sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 20 21logger = logging.getLogger("sqlglot") 22 23OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 24 25 26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 37 38 39def build_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range 56 57 58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 69 70 71def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 72 arg = seq_get(args, 0) 73 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 74 75 76def build_lower(args: t.List) -> exp.Lower | exp.Hex: 77 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 78 arg = seq_get(args, 0) 79 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 80 81 82def build_upper(args: t.List) -> exp.Upper | exp.Hex: 83 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 84 arg = seq_get(args, 0) 85 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 86 87 88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder 99 100 101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression) 110 111 112def build_pad(args: t.List, is_left: bool = True): 113 return exp.Pad( 114 this=seq_get(args, 0), 115 expression=seq_get(args, 1), 116 fill_pattern=seq_get(args, 2), 117 is_left=is_left, 118 ) 119 120 121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp 130 131 132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args) 142 143 144def build_trim(args: t.List, is_left: bool = True): 145 return exp.Trim( 146 this=seq_get(args, 0), 147 expression=seq_get(args, 1), 148 position="LEADING" if is_left else "TRAILING", 149 ) 150 151 152def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 153 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 154 155 156def build_locate_strposition(args: t.List): 157 return exp.StrPosition( 158 this=seq_get(args, 1), 159 substr=seq_get(args, 0), 160 position=seq_get(args, 2), 161 ) 162 163 164class _Parser(type): 165 def __new__(cls, clsname, bases, attrs): 166 klass = super().__new__(cls, clsname, bases, attrs) 167 168 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 169 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 170 171 return klass 172 173 174class Parser(metaclass=_Parser): 175 """ 176 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 177 178 Args: 179 error_level: The desired error level. 180 Default: ErrorLevel.IMMEDIATE 181 error_message_context: The amount of context to capture from a query string when displaying 182 the error message (in number of characters). 183 Default: 100 184 max_errors: Maximum number of error messages to include in a raised ParseError. 185 This is only relevant if error_level is ErrorLevel.RAISE. 186 Default: 3 187 """ 188 189 FUNCTIONS: t.Dict[str, t.Callable] = { 190 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 191 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 192 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 193 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 194 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 195 ), 196 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "CHAR": lambda args: exp.Chr(expressions=args), 200 "CHR": lambda args: exp.Chr(expressions=args), 201 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 202 "CONCAT": lambda args, dialect: exp.Concat( 203 expressions=args, 204 safe=not dialect.STRICT_STRING_CONCAT, 205 coalesce=dialect.CONCAT_COALESCE, 206 ), 207 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 208 expressions=args, 209 safe=not dialect.STRICT_STRING_CONCAT, 210 coalesce=dialect.CONCAT_COALESCE, 211 ), 212 "CONVERT_TIMEZONE": build_convert_timezone, 213 "DATE_TO_DATE_STR": lambda args: exp.Cast( 214 this=seq_get(args, 0), 215 to=exp.DataType(this=exp.DataType.Type.TEXT), 216 ), 217 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 218 start=seq_get(args, 0), 219 end=seq_get(args, 1), 220 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 221 ), 222 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 223 "HEX": build_hex, 224 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 225 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 226 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 227 "LIKE": build_like, 228 "LOG": build_logarithm, 229 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 230 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 231 "LOWER": build_lower, 232 "LPAD": lambda args: build_pad(args), 233 "LEFTPAD": lambda args: build_pad(args), 234 "LTRIM": lambda args: build_trim(args), 235 "MOD": build_mod, 236 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 237 "RPAD": lambda args: build_pad(args, is_left=False), 238 "RTRIM": lambda args: build_trim(args, is_left=False), 239 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 240 if len(args) != 2 241 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 242 "STRPOS": exp.StrPosition.from_arg_list, 243 "CHARINDEX": lambda args: build_locate_strposition(args), 244 "INSTR": exp.StrPosition.from_arg_list, 245 "LOCATE": lambda args: build_locate_strposition(args), 246 "TIME_TO_TIME_STR": lambda args: exp.Cast( 247 this=seq_get(args, 0), 248 to=exp.DataType(this=exp.DataType.Type.TEXT), 249 ), 250 "TO_HEX": build_hex, 251 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 252 this=exp.Cast( 253 this=seq_get(args, 0), 254 to=exp.DataType(this=exp.DataType.Type.TEXT), 255 ), 256 start=exp.Literal.number(1), 257 length=exp.Literal.number(10), 258 ), 259 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 260 "UPPER": build_upper, 261 "VAR_MAP": build_var_map, 262 } 263 264 NO_PAREN_FUNCTIONS = { 265 TokenType.CURRENT_DATE: exp.CurrentDate, 266 TokenType.CURRENT_DATETIME: exp.CurrentDate, 267 TokenType.CURRENT_TIME: exp.CurrentTime, 268 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 269 TokenType.CURRENT_USER: exp.CurrentUser, 270 } 271 272 STRUCT_TYPE_TOKENS = { 273 TokenType.NESTED, 274 TokenType.OBJECT, 275 TokenType.STRUCT, 276 TokenType.UNION, 277 } 278 279 NESTED_TYPE_TOKENS = { 280 TokenType.ARRAY, 281 TokenType.LIST, 282 TokenType.LOWCARDINALITY, 283 TokenType.MAP, 284 TokenType.NULLABLE, 285 TokenType.RANGE, 286 *STRUCT_TYPE_TOKENS, 287 } 288 289 ENUM_TYPE_TOKENS = { 290 TokenType.DYNAMIC, 291 TokenType.ENUM, 292 TokenType.ENUM8, 293 TokenType.ENUM16, 294 } 295 296 AGGREGATE_TYPE_TOKENS = { 297 TokenType.AGGREGATEFUNCTION, 298 TokenType.SIMPLEAGGREGATEFUNCTION, 299 } 300 301 TYPE_TOKENS = { 302 TokenType.BIT, 303 TokenType.BOOLEAN, 304 TokenType.TINYINT, 305 TokenType.UTINYINT, 306 TokenType.SMALLINT, 307 TokenType.USMALLINT, 308 TokenType.INT, 309 TokenType.UINT, 310 TokenType.BIGINT, 311 TokenType.UBIGINT, 312 TokenType.INT128, 313 TokenType.UINT128, 314 TokenType.INT256, 315 TokenType.UINT256, 316 TokenType.MEDIUMINT, 317 TokenType.UMEDIUMINT, 318 TokenType.FIXEDSTRING, 319 TokenType.FLOAT, 320 TokenType.DOUBLE, 321 TokenType.UDOUBLE, 322 TokenType.CHAR, 323 TokenType.NCHAR, 324 TokenType.VARCHAR, 325 TokenType.NVARCHAR, 326 TokenType.BPCHAR, 327 TokenType.TEXT, 328 TokenType.MEDIUMTEXT, 329 TokenType.LONGTEXT, 330 TokenType.MEDIUMBLOB, 331 TokenType.LONGBLOB, 332 TokenType.BINARY, 333 TokenType.VARBINARY, 334 TokenType.JSON, 335 TokenType.JSONB, 336 TokenType.INTERVAL, 337 TokenType.TINYBLOB, 338 TokenType.TINYTEXT, 339 TokenType.TIME, 340 TokenType.TIMETZ, 341 TokenType.TIMESTAMP, 342 TokenType.TIMESTAMP_S, 343 TokenType.TIMESTAMP_MS, 344 TokenType.TIMESTAMP_NS, 345 TokenType.TIMESTAMPTZ, 346 TokenType.TIMESTAMPLTZ, 347 TokenType.TIMESTAMPNTZ, 348 TokenType.DATETIME, 349 TokenType.DATETIME2, 350 TokenType.DATETIME64, 351 TokenType.SMALLDATETIME, 352 TokenType.DATE, 353 TokenType.DATE32, 354 TokenType.INT4RANGE, 355 TokenType.INT4MULTIRANGE, 356 TokenType.INT8RANGE, 357 TokenType.INT8MULTIRANGE, 358 TokenType.NUMRANGE, 359 TokenType.NUMMULTIRANGE, 360 TokenType.TSRANGE, 361 TokenType.TSMULTIRANGE, 362 TokenType.TSTZRANGE, 363 TokenType.TSTZMULTIRANGE, 364 TokenType.DATERANGE, 365 TokenType.DATEMULTIRANGE, 366 TokenType.DECIMAL, 367 TokenType.DECIMAL32, 368 TokenType.DECIMAL64, 369 TokenType.DECIMAL128, 370 TokenType.DECIMAL256, 371 TokenType.UDECIMAL, 372 TokenType.BIGDECIMAL, 373 TokenType.UUID, 374 TokenType.GEOGRAPHY, 375 TokenType.GEOMETRY, 376 TokenType.POINT, 377 TokenType.RING, 378 TokenType.LINESTRING, 379 TokenType.MULTILINESTRING, 380 TokenType.POLYGON, 381 TokenType.MULTIPOLYGON, 382 TokenType.HLLSKETCH, 383 TokenType.HSTORE, 384 TokenType.PSEUDO_TYPE, 385 TokenType.SUPER, 386 TokenType.SERIAL, 387 TokenType.SMALLSERIAL, 388 TokenType.BIGSERIAL, 389 TokenType.XML, 390 TokenType.YEAR, 391 TokenType.USERDEFINED, 392 TokenType.MONEY, 393 TokenType.SMALLMONEY, 394 TokenType.ROWVERSION, 395 TokenType.IMAGE, 396 TokenType.VARIANT, 397 TokenType.VECTOR, 398 TokenType.OBJECT, 399 TokenType.OBJECT_IDENTIFIER, 400 TokenType.INET, 401 TokenType.IPADDRESS, 402 TokenType.IPPREFIX, 403 TokenType.IPV4, 404 TokenType.IPV6, 405 TokenType.UNKNOWN, 406 TokenType.NULL, 407 TokenType.NAME, 408 TokenType.TDIGEST, 409 TokenType.DYNAMIC, 410 *ENUM_TYPE_TOKENS, 411 *NESTED_TYPE_TOKENS, 412 *AGGREGATE_TYPE_TOKENS, 413 } 414 415 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 416 TokenType.BIGINT: TokenType.UBIGINT, 417 TokenType.INT: TokenType.UINT, 418 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 419 TokenType.SMALLINT: TokenType.USMALLINT, 420 TokenType.TINYINT: TokenType.UTINYINT, 421 TokenType.DECIMAL: TokenType.UDECIMAL, 422 TokenType.DOUBLE: TokenType.UDOUBLE, 423 } 424 425 SUBQUERY_PREDICATES = { 426 TokenType.ANY: exp.Any, 427 TokenType.ALL: exp.All, 428 TokenType.EXISTS: exp.Exists, 429 TokenType.SOME: exp.Any, 430 } 431 432 RESERVED_TOKENS = { 433 *Tokenizer.SINGLE_TOKENS.values(), 434 TokenType.SELECT, 435 } - {TokenType.IDENTIFIER} 436 437 DB_CREATABLES = { 438 TokenType.DATABASE, 439 TokenType.DICTIONARY, 440 TokenType.MODEL, 441 TokenType.NAMESPACE, 442 TokenType.SCHEMA, 443 TokenType.SEQUENCE, 444 TokenType.SINK, 445 TokenType.SOURCE, 446 TokenType.STORAGE_INTEGRATION, 447 TokenType.STREAMLIT, 448 TokenType.TABLE, 449 TokenType.TAG, 450 TokenType.VIEW, 451 TokenType.WAREHOUSE, 452 } 453 454 CREATABLES = { 455 TokenType.COLUMN, 456 TokenType.CONSTRAINT, 457 TokenType.FOREIGN_KEY, 458 TokenType.FUNCTION, 459 TokenType.INDEX, 460 TokenType.PROCEDURE, 461 *DB_CREATABLES, 462 } 463 464 ALTERABLES = { 465 TokenType.INDEX, 466 TokenType.TABLE, 467 TokenType.VIEW, 468 } 469 470 # Tokens that can represent identifiers 471 ID_VAR_TOKENS = { 472 TokenType.ALL, 473 TokenType.ATTACH, 474 TokenType.VAR, 475 TokenType.ANTI, 476 TokenType.APPLY, 477 TokenType.ASC, 478 TokenType.ASOF, 479 TokenType.AUTO_INCREMENT, 480 TokenType.BEGIN, 481 TokenType.BPCHAR, 482 TokenType.CACHE, 483 TokenType.CASE, 484 TokenType.COLLATE, 485 TokenType.COMMAND, 486 TokenType.COMMENT, 487 TokenType.COMMIT, 488 TokenType.CONSTRAINT, 489 TokenType.COPY, 490 TokenType.CUBE, 491 TokenType.CURRENT_SCHEMA, 492 TokenType.DEFAULT, 493 TokenType.DELETE, 494 TokenType.DESC, 495 TokenType.DESCRIBE, 496 TokenType.DETACH, 497 TokenType.DICTIONARY, 498 TokenType.DIV, 499 TokenType.END, 500 TokenType.EXECUTE, 501 TokenType.EXPORT, 502 TokenType.ESCAPE, 503 TokenType.FALSE, 504 TokenType.FIRST, 505 TokenType.FILTER, 506 TokenType.FINAL, 507 TokenType.FORMAT, 508 TokenType.FULL, 509 TokenType.IDENTIFIER, 510 TokenType.IS, 511 TokenType.ISNULL, 512 TokenType.INTERVAL, 513 TokenType.KEEP, 514 TokenType.KILL, 515 TokenType.LEFT, 516 TokenType.LIMIT, 517 TokenType.LOAD, 518 TokenType.MERGE, 519 TokenType.NATURAL, 520 TokenType.NEXT, 521 TokenType.OFFSET, 522 TokenType.OPERATOR, 523 TokenType.ORDINALITY, 524 TokenType.OVERLAPS, 525 TokenType.OVERWRITE, 526 TokenType.PARTITION, 527 TokenType.PERCENT, 528 TokenType.PIVOT, 529 TokenType.PRAGMA, 530 TokenType.RANGE, 531 TokenType.RECURSIVE, 532 TokenType.REFERENCES, 533 TokenType.REFRESH, 534 TokenType.RENAME, 535 TokenType.REPLACE, 536 TokenType.RIGHT, 537 TokenType.ROLLUP, 538 TokenType.ROW, 539 TokenType.ROWS, 540 TokenType.SEMI, 541 TokenType.SET, 542 TokenType.SETTINGS, 543 TokenType.SHOW, 544 TokenType.TEMPORARY, 545 TokenType.TOP, 546 TokenType.TRUE, 547 TokenType.TRUNCATE, 548 TokenType.UNIQUE, 549 TokenType.UNNEST, 550 TokenType.UNPIVOT, 551 TokenType.UPDATE, 552 TokenType.USE, 553 TokenType.VOLATILE, 554 TokenType.WINDOW, 555 *CREATABLES, 556 *SUBQUERY_PREDICATES, 557 *TYPE_TOKENS, 558 *NO_PAREN_FUNCTIONS, 559 } 560 ID_VAR_TOKENS.remove(TokenType.UNION) 561 562 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 563 TokenType.ANTI, 564 TokenType.APPLY, 565 TokenType.ASOF, 566 TokenType.FULL, 567 TokenType.LEFT, 568 TokenType.LOCK, 569 TokenType.NATURAL, 570 TokenType.RIGHT, 571 TokenType.SEMI, 572 TokenType.WINDOW, 573 } 574 575 ALIAS_TOKENS = ID_VAR_TOKENS 576 577 ARRAY_CONSTRUCTORS = { 578 "ARRAY": exp.Array, 579 "LIST": exp.List, 580 } 581 582 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 583 584 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 585 586 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 587 588 FUNC_TOKENS = { 589 TokenType.COLLATE, 590 TokenType.COMMAND, 591 TokenType.CURRENT_DATE, 592 TokenType.CURRENT_DATETIME, 593 TokenType.CURRENT_SCHEMA, 594 TokenType.CURRENT_TIMESTAMP, 595 TokenType.CURRENT_TIME, 596 TokenType.CURRENT_USER, 597 TokenType.FILTER, 598 TokenType.FIRST, 599 TokenType.FORMAT, 600 TokenType.GLOB, 601 TokenType.IDENTIFIER, 602 TokenType.INDEX, 603 TokenType.ISNULL, 604 TokenType.ILIKE, 605 TokenType.INSERT, 606 TokenType.LIKE, 607 TokenType.MERGE, 608 TokenType.NEXT, 609 TokenType.OFFSET, 610 TokenType.PRIMARY_KEY, 611 TokenType.RANGE, 612 TokenType.REPLACE, 613 TokenType.RLIKE, 614 TokenType.ROW, 615 TokenType.UNNEST, 616 TokenType.VAR, 617 TokenType.LEFT, 618 TokenType.RIGHT, 619 TokenType.SEQUENCE, 620 TokenType.DATE, 621 TokenType.DATETIME, 622 TokenType.TABLE, 623 TokenType.TIMESTAMP, 624 TokenType.TIMESTAMPTZ, 625 TokenType.TRUNCATE, 626 TokenType.WINDOW, 627 TokenType.XOR, 628 *TYPE_TOKENS, 629 *SUBQUERY_PREDICATES, 630 } 631 632 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 633 TokenType.AND: exp.And, 634 } 635 636 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 637 TokenType.COLON_EQ: exp.PropertyEQ, 638 } 639 640 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 641 TokenType.OR: exp.Or, 642 } 643 644 EQUALITY = { 645 TokenType.EQ: exp.EQ, 646 TokenType.NEQ: exp.NEQ, 647 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 648 } 649 650 COMPARISON = { 651 TokenType.GT: exp.GT, 652 TokenType.GTE: exp.GTE, 653 TokenType.LT: exp.LT, 654 TokenType.LTE: exp.LTE, 655 } 656 657 BITWISE = { 658 TokenType.AMP: exp.BitwiseAnd, 659 TokenType.CARET: exp.BitwiseXor, 660 TokenType.PIPE: exp.BitwiseOr, 661 } 662 663 TERM = { 664 TokenType.DASH: exp.Sub, 665 TokenType.PLUS: exp.Add, 666 TokenType.MOD: exp.Mod, 667 TokenType.COLLATE: exp.Collate, 668 } 669 670 FACTOR = { 671 TokenType.DIV: exp.IntDiv, 672 TokenType.LR_ARROW: exp.Distance, 673 TokenType.SLASH: exp.Div, 674 TokenType.STAR: exp.Mul, 675 } 676 677 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 678 679 TIMES = { 680 TokenType.TIME, 681 TokenType.TIMETZ, 682 } 683 684 TIMESTAMPS = { 685 TokenType.TIMESTAMP, 686 TokenType.TIMESTAMPTZ, 687 TokenType.TIMESTAMPLTZ, 688 *TIMES, 689 } 690 691 SET_OPERATIONS = { 692 TokenType.UNION, 693 TokenType.INTERSECT, 694 TokenType.EXCEPT, 695 } 696 697 JOIN_METHODS = { 698 TokenType.ASOF, 699 TokenType.NATURAL, 700 TokenType.POSITIONAL, 701 } 702 703 JOIN_SIDES = { 704 TokenType.LEFT, 705 TokenType.RIGHT, 706 TokenType.FULL, 707 } 708 709 JOIN_KINDS = { 710 TokenType.ANTI, 711 TokenType.CROSS, 712 TokenType.INNER, 713 TokenType.OUTER, 714 TokenType.SEMI, 715 TokenType.STRAIGHT_JOIN, 716 } 717 718 JOIN_HINTS: t.Set[str] = set() 719 720 LAMBDAS = { 721 TokenType.ARROW: lambda self, expressions: self.expression( 722 exp.Lambda, 723 this=self._replace_lambda( 724 self._parse_assignment(), 725 expressions, 726 ), 727 expressions=expressions, 728 ), 729 TokenType.FARROW: lambda self, expressions: self.expression( 730 exp.Kwarg, 731 this=exp.var(expressions[0].name), 732 expression=self._parse_assignment(), 733 ), 734 } 735 736 COLUMN_OPERATORS = { 737 TokenType.DOT: None, 738 TokenType.DOTCOLON: lambda self, this, to: self.expression( 739 exp.JSONCast, 740 this=this, 741 to=to, 742 ), 743 TokenType.DCOLON: lambda self, this, to: self.expression( 744 exp.Cast if self.STRICT_CAST else exp.TryCast, 745 this=this, 746 to=to, 747 ), 748 TokenType.ARROW: lambda self, this, path: self.expression( 749 exp.JSONExtract, 750 this=this, 751 expression=self.dialect.to_json_path(path), 752 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 753 ), 754 TokenType.DARROW: lambda self, this, path: self.expression( 755 exp.JSONExtractScalar, 756 this=this, 757 expression=self.dialect.to_json_path(path), 758 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 759 ), 760 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 761 exp.JSONBExtract, 762 this=this, 763 expression=path, 764 ), 765 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 766 exp.JSONBExtractScalar, 767 this=this, 768 expression=path, 769 ), 770 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 771 exp.JSONBContains, 772 this=this, 773 expression=key, 774 ), 775 } 776 777 EXPRESSION_PARSERS = { 778 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 779 exp.Column: lambda self: self._parse_column(), 780 exp.Condition: lambda self: self._parse_assignment(), 781 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 782 exp.Expression: lambda self: self._parse_expression(), 783 exp.From: lambda self: self._parse_from(joins=True), 784 exp.Group: lambda self: self._parse_group(), 785 exp.Having: lambda self: self._parse_having(), 786 exp.Hint: lambda self: self._parse_hint_body(), 787 exp.Identifier: lambda self: self._parse_id_var(), 788 exp.Join: lambda self: self._parse_join(), 789 exp.Lambda: lambda self: self._parse_lambda(), 790 exp.Lateral: lambda self: self._parse_lateral(), 791 exp.Limit: lambda self: self._parse_limit(), 792 exp.Offset: lambda self: self._parse_offset(), 793 exp.Order: lambda self: self._parse_order(), 794 exp.Ordered: lambda self: self._parse_ordered(), 795 exp.Properties: lambda self: self._parse_properties(), 796 exp.Qualify: lambda self: self._parse_qualify(), 797 exp.Returning: lambda self: self._parse_returning(), 798 exp.Select: lambda self: self._parse_select(), 799 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 800 exp.Table: lambda self: self._parse_table_parts(), 801 exp.TableAlias: lambda self: self._parse_table_alias(), 802 exp.Tuple: lambda self: self._parse_value(), 803 exp.Whens: lambda self: self._parse_when_matched(), 804 exp.Where: lambda self: self._parse_where(), 805 exp.Window: lambda self: self._parse_named_window(), 806 exp.With: lambda self: self._parse_with(), 807 "JOIN_TYPE": lambda self: self._parse_join_parts(), 808 } 809 810 STATEMENT_PARSERS = { 811 TokenType.ALTER: lambda self: self._parse_alter(), 812 TokenType.ANALYZE: lambda self: self._parse_analyze(), 813 TokenType.BEGIN: lambda self: self._parse_transaction(), 814 TokenType.CACHE: lambda self: self._parse_cache(), 815 TokenType.COMMENT: lambda self: self._parse_comment(), 816 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 817 TokenType.COPY: lambda self: self._parse_copy(), 818 TokenType.CREATE: lambda self: self._parse_create(), 819 TokenType.DELETE: lambda self: self._parse_delete(), 820 TokenType.DESC: lambda self: self._parse_describe(), 821 TokenType.DESCRIBE: lambda self: self._parse_describe(), 822 TokenType.DROP: lambda self: self._parse_drop(), 823 TokenType.GRANT: lambda self: self._parse_grant(), 824 TokenType.INSERT: lambda self: self._parse_insert(), 825 TokenType.KILL: lambda self: self._parse_kill(), 826 TokenType.LOAD: lambda self: self._parse_load(), 827 TokenType.MERGE: lambda self: self._parse_merge(), 828 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 829 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 830 TokenType.REFRESH: lambda self: self._parse_refresh(), 831 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 832 TokenType.SET: lambda self: self._parse_set(), 833 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 834 TokenType.UNCACHE: lambda self: self._parse_uncache(), 835 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 836 TokenType.UPDATE: lambda self: self._parse_update(), 837 TokenType.USE: lambda self: self._parse_use(), 838 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 839 } 840 841 UNARY_PARSERS = { 842 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 843 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 844 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 845 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 846 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 847 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 848 } 849 850 STRING_PARSERS = { 851 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 852 exp.RawString, this=token.text 853 ), 854 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 855 exp.National, this=token.text 856 ), 857 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 858 TokenType.STRING: lambda self, token: self.expression( 859 exp.Literal, this=token.text, is_string=True 860 ), 861 TokenType.UNICODE_STRING: lambda self, token: self.expression( 862 exp.UnicodeString, 863 this=token.text, 864 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 865 ), 866 } 867 868 NUMERIC_PARSERS = { 869 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 870 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 871 TokenType.HEX_STRING: lambda self, token: self.expression( 872 exp.HexString, 873 this=token.text, 874 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 875 ), 876 TokenType.NUMBER: lambda self, token: self.expression( 877 exp.Literal, this=token.text, is_string=False 878 ), 879 } 880 881 PRIMARY_PARSERS = { 882 **STRING_PARSERS, 883 **NUMERIC_PARSERS, 884 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 885 TokenType.NULL: lambda self, _: self.expression(exp.Null), 886 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 887 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 888 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 889 TokenType.STAR: lambda self, _: self._parse_star_ops(), 890 } 891 892 PLACEHOLDER_PARSERS = { 893 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 894 TokenType.PARAMETER: lambda self: self._parse_parameter(), 895 TokenType.COLON: lambda self: ( 896 self.expression(exp.Placeholder, this=self._prev.text) 897 if self._match_set(self.ID_VAR_TOKENS) 898 else None 899 ), 900 } 901 902 RANGE_PARSERS = { 903 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 904 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 905 TokenType.GLOB: binary_range_parser(exp.Glob), 906 TokenType.ILIKE: binary_range_parser(exp.ILike), 907 TokenType.IN: lambda self, this: self._parse_in(this), 908 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 909 TokenType.IS: lambda self, this: self._parse_is(this), 910 TokenType.LIKE: binary_range_parser(exp.Like), 911 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 912 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 913 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 914 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 915 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 916 } 917 918 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 919 "ALLOWED_VALUES": lambda self: self.expression( 920 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 921 ), 922 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 923 "AUTO": lambda self: self._parse_auto_property(), 924 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 925 "BACKUP": lambda self: self.expression( 926 exp.BackupProperty, this=self._parse_var(any_token=True) 927 ), 928 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 929 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 930 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 931 "CHECKSUM": lambda self: self._parse_checksum(), 932 "CLUSTER BY": lambda self: self._parse_cluster(), 933 "CLUSTERED": lambda self: self._parse_clustered_by(), 934 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 935 exp.CollateProperty, **kwargs 936 ), 937 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 938 "CONTAINS": lambda self: self._parse_contains_property(), 939 "COPY": lambda self: self._parse_copy_property(), 940 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 941 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 942 "DEFINER": lambda self: self._parse_definer(), 943 "DETERMINISTIC": lambda self: self.expression( 944 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 945 ), 946 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 947 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 948 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 949 "DISTKEY": lambda self: self._parse_distkey(), 950 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 951 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 952 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 953 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 954 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 955 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 956 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 957 "FREESPACE": lambda self: self._parse_freespace(), 958 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 959 "HEAP": lambda self: self.expression(exp.HeapProperty), 960 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 961 "IMMUTABLE": lambda self: self.expression( 962 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 963 ), 964 "INHERITS": lambda self: self.expression( 965 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 966 ), 967 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 968 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 969 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 970 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 971 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 972 "LIKE": lambda self: self._parse_create_like(), 973 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 974 "LOCK": lambda self: self._parse_locking(), 975 "LOCKING": lambda self: self._parse_locking(), 976 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 977 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 978 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 979 "MODIFIES": lambda self: self._parse_modifies_property(), 980 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 981 "NO": lambda self: self._parse_no_property(), 982 "ON": lambda self: self._parse_on_property(), 983 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 984 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 985 "PARTITION": lambda self: self._parse_partitioned_of(), 986 "PARTITION BY": lambda self: self._parse_partitioned_by(), 987 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 988 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 989 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 990 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 991 "READS": lambda self: self._parse_reads_property(), 992 "REMOTE": lambda self: self._parse_remote_with_connection(), 993 "RETURNS": lambda self: self._parse_returns(), 994 "STRICT": lambda self: self.expression(exp.StrictProperty), 995 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 996 "ROW": lambda self: self._parse_row(), 997 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 998 "SAMPLE": lambda self: self.expression( 999 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1000 ), 1001 "SECURE": lambda self: self.expression(exp.SecureProperty), 1002 "SECURITY": lambda self: self._parse_security(), 1003 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1004 "SETTINGS": lambda self: self._parse_settings_property(), 1005 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1006 "SORTKEY": lambda self: self._parse_sortkey(), 1007 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1008 "STABLE": lambda self: self.expression( 1009 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1010 ), 1011 "STORED": lambda self: self._parse_stored(), 1012 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1013 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1014 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1015 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1016 "TO": lambda self: self._parse_to_table(), 1017 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1018 "TRANSFORM": lambda self: self.expression( 1019 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1020 ), 1021 "TTL": lambda self: self._parse_ttl(), 1022 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1023 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1024 "VOLATILE": lambda self: self._parse_volatile_property(), 1025 "WITH": lambda self: self._parse_with_property(), 1026 } 1027 1028 CONSTRAINT_PARSERS = { 1029 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1030 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1031 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1032 "CHARACTER SET": lambda self: self.expression( 1033 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1034 ), 1035 "CHECK": lambda self: self.expression( 1036 exp.CheckColumnConstraint, 1037 this=self._parse_wrapped(self._parse_assignment), 1038 enforced=self._match_text_seq("ENFORCED"), 1039 ), 1040 "COLLATE": lambda self: self.expression( 1041 exp.CollateColumnConstraint, 1042 this=self._parse_identifier() or self._parse_column(), 1043 ), 1044 "COMMENT": lambda self: self.expression( 1045 exp.CommentColumnConstraint, this=self._parse_string() 1046 ), 1047 "COMPRESS": lambda self: self._parse_compress(), 1048 "CLUSTERED": lambda self: self.expression( 1049 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1050 ), 1051 "NONCLUSTERED": lambda self: self.expression( 1052 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1053 ), 1054 "DEFAULT": lambda self: self.expression( 1055 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1056 ), 1057 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1058 "EPHEMERAL": lambda self: self.expression( 1059 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1060 ), 1061 "EXCLUDE": lambda self: self.expression( 1062 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1063 ), 1064 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1065 "FORMAT": lambda self: self.expression( 1066 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1067 ), 1068 "GENERATED": lambda self: self._parse_generated_as_identity(), 1069 "IDENTITY": lambda self: self._parse_auto_increment(), 1070 "INLINE": lambda self: self._parse_inline(), 1071 "LIKE": lambda self: self._parse_create_like(), 1072 "NOT": lambda self: self._parse_not_constraint(), 1073 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1074 "ON": lambda self: ( 1075 self._match(TokenType.UPDATE) 1076 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1077 ) 1078 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1079 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1080 "PERIOD": lambda self: self._parse_period_for_system_time(), 1081 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1082 "REFERENCES": lambda self: self._parse_references(match=False), 1083 "TITLE": lambda self: self.expression( 1084 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1085 ), 1086 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1087 "UNIQUE": lambda self: self._parse_unique(), 1088 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1089 "WATERMARK": lambda self: self.expression( 1090 exp.WatermarkColumnConstraint, 1091 this=self._match(TokenType.FOR) and self._parse_column(), 1092 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1093 ), 1094 "WITH": lambda self: self.expression( 1095 exp.Properties, expressions=self._parse_wrapped_properties() 1096 ), 1097 } 1098 1099 ALTER_PARSERS = { 1100 "ADD": lambda self: self._parse_alter_table_add(), 1101 "AS": lambda self: self._parse_select(), 1102 "ALTER": lambda self: self._parse_alter_table_alter(), 1103 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1104 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1105 "DROP": lambda self: self._parse_alter_table_drop(), 1106 "RENAME": lambda self: self._parse_alter_table_rename(), 1107 "SET": lambda self: self._parse_alter_table_set(), 1108 "SWAP": lambda self: self.expression( 1109 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1110 ), 1111 } 1112 1113 ALTER_ALTER_PARSERS = { 1114 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1115 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1116 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1117 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1118 } 1119 1120 SCHEMA_UNNAMED_CONSTRAINTS = { 1121 "CHECK", 1122 "EXCLUDE", 1123 "FOREIGN KEY", 1124 "LIKE", 1125 "PERIOD", 1126 "PRIMARY KEY", 1127 "UNIQUE", 1128 "WATERMARK", 1129 } 1130 1131 NO_PAREN_FUNCTION_PARSERS = { 1132 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1133 "CASE": lambda self: self._parse_case(), 1134 "CONNECT_BY_ROOT": lambda self: self.expression( 1135 exp.ConnectByRoot, this=self._parse_column() 1136 ), 1137 "IF": lambda self: self._parse_if(), 1138 } 1139 1140 INVALID_FUNC_NAME_TOKENS = { 1141 TokenType.IDENTIFIER, 1142 TokenType.STRING, 1143 } 1144 1145 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1146 1147 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1148 1149 FUNCTION_PARSERS = { 1150 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1151 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1152 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1153 "DECODE": lambda self: self._parse_decode(), 1154 "EXTRACT": lambda self: self._parse_extract(), 1155 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1156 "GAP_FILL": lambda self: self._parse_gap_fill(), 1157 "JSON_OBJECT": lambda self: self._parse_json_object(), 1158 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1159 "JSON_TABLE": lambda self: self._parse_json_table(), 1160 "MATCH": lambda self: self._parse_match_against(), 1161 "NORMALIZE": lambda self: self._parse_normalize(), 1162 "OPENJSON": lambda self: self._parse_open_json(), 1163 "OVERLAY": lambda self: self._parse_overlay(), 1164 "POSITION": lambda self: self._parse_position(), 1165 "PREDICT": lambda self: self._parse_predict(), 1166 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1167 "STRING_AGG": lambda self: self._parse_string_agg(), 1168 "SUBSTRING": lambda self: self._parse_substring(), 1169 "TRIM": lambda self: self._parse_trim(), 1170 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1171 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1172 "XMLELEMENT": lambda self: self.expression( 1173 exp.XMLElement, 1174 this=self._match_text_seq("NAME") and self._parse_id_var(), 1175 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1176 ), 1177 "XMLTABLE": lambda self: self._parse_xml_table(), 1178 } 1179 1180 QUERY_MODIFIER_PARSERS = { 1181 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1182 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1183 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1184 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1185 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1186 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1187 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1188 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1189 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1190 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1191 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1192 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1193 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1194 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1195 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1196 TokenType.CLUSTER_BY: lambda self: ( 1197 "cluster", 1198 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1199 ), 1200 TokenType.DISTRIBUTE_BY: lambda self: ( 1201 "distribute", 1202 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1203 ), 1204 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1205 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1206 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1207 } 1208 1209 SET_PARSERS = { 1210 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1211 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1212 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1213 "TRANSACTION": lambda self: self._parse_set_transaction(), 1214 } 1215 1216 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1217 1218 TYPE_LITERAL_PARSERS = { 1219 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1220 } 1221 1222 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1223 1224 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1225 1226 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1227 1228 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1229 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1230 "ISOLATION": ( 1231 ("LEVEL", "REPEATABLE", "READ"), 1232 ("LEVEL", "READ", "COMMITTED"), 1233 ("LEVEL", "READ", "UNCOMITTED"), 1234 ("LEVEL", "SERIALIZABLE"), 1235 ), 1236 "READ": ("WRITE", "ONLY"), 1237 } 1238 1239 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1240 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1241 ) 1242 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1243 1244 CREATE_SEQUENCE: OPTIONS_TYPE = { 1245 "SCALE": ("EXTEND", "NOEXTEND"), 1246 "SHARD": ("EXTEND", "NOEXTEND"), 1247 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1248 **dict.fromkeys( 1249 ( 1250 "SESSION", 1251 "GLOBAL", 1252 "KEEP", 1253 "NOKEEP", 1254 "ORDER", 1255 "NOORDER", 1256 "NOCACHE", 1257 "CYCLE", 1258 "NOCYCLE", 1259 "NOMINVALUE", 1260 "NOMAXVALUE", 1261 "NOSCALE", 1262 "NOSHARD", 1263 ), 1264 tuple(), 1265 ), 1266 } 1267 1268 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1269 1270 USABLES: OPTIONS_TYPE = dict.fromkeys( 1271 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1272 ) 1273 1274 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1275 1276 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1277 "TYPE": ("EVOLUTION",), 1278 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1279 } 1280 1281 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1282 1283 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1284 1285 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1286 "NOT": ("ENFORCED",), 1287 "MATCH": ( 1288 "FULL", 1289 "PARTIAL", 1290 "SIMPLE", 1291 ), 1292 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1293 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1294 } 1295 1296 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1297 1298 CLONE_KEYWORDS = {"CLONE", "COPY"} 1299 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1300 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1301 1302 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1303 1304 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1305 1306 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1307 1308 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1309 1310 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1311 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1312 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1313 1314 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1315 1316 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1317 1318 ADD_CONSTRAINT_TOKENS = { 1319 TokenType.CONSTRAINT, 1320 TokenType.FOREIGN_KEY, 1321 TokenType.INDEX, 1322 TokenType.KEY, 1323 TokenType.PRIMARY_KEY, 1324 TokenType.UNIQUE, 1325 } 1326 1327 DISTINCT_TOKENS = {TokenType.DISTINCT} 1328 1329 NULL_TOKENS = {TokenType.NULL} 1330 1331 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1332 1333 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1334 1335 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1336 1337 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1338 1339 ODBC_DATETIME_LITERALS = { 1340 "d": exp.Date, 1341 "t": exp.Time, 1342 "ts": exp.Timestamp, 1343 } 1344 1345 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1346 1347 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1348 1349 # The style options for the DESCRIBE statement 1350 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1351 1352 # The style options for the ANALYZE statement 1353 ANALYZE_STYLES = { 1354 "BUFFER_USAGE_LIMIT", 1355 "FULL", 1356 "LOCAL", 1357 "NO_WRITE_TO_BINLOG", 1358 "SAMPLE", 1359 "SKIP_LOCKED", 1360 "VERBOSE", 1361 } 1362 1363 ANALYZE_EXPRESSION_PARSERS = { 1364 "ALL": lambda self: self._parse_analyze_columns(), 1365 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1366 "DELETE": lambda self: self._parse_analyze_delete(), 1367 "DROP": lambda self: self._parse_analyze_histogram(), 1368 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1369 "LIST": lambda self: self._parse_analyze_list(), 1370 "PREDICATE": lambda self: self._parse_analyze_columns(), 1371 "UPDATE": lambda self: self._parse_analyze_histogram(), 1372 "VALIDATE": lambda self: self._parse_analyze_validate(), 1373 } 1374 1375 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1376 1377 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1378 1379 OPERATION_MODIFIERS: t.Set[str] = set() 1380 1381 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1382 1383 STRICT_CAST = True 1384 1385 PREFIXED_PIVOT_COLUMNS = False 1386 IDENTIFY_PIVOT_STRINGS = False 1387 1388 LOG_DEFAULTS_TO_LN = False 1389 1390 # Whether ADD is present for each column added by ALTER TABLE 1391 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1392 1393 # Whether the table sample clause expects CSV syntax 1394 TABLESAMPLE_CSV = False 1395 1396 # The default method used for table sampling 1397 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1398 1399 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1400 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1401 1402 # Whether the TRIM function expects the characters to trim as its first argument 1403 TRIM_PATTERN_FIRST = False 1404 1405 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1406 STRING_ALIASES = False 1407 1408 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1409 MODIFIERS_ATTACHED_TO_SET_OP = True 1410 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1411 1412 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1413 NO_PAREN_IF_COMMANDS = True 1414 1415 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1416 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1417 1418 # Whether the `:` operator is used to extract a value from a VARIANT column 1419 COLON_IS_VARIANT_EXTRACT = False 1420 1421 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1422 # If this is True and '(' is not found, the keyword will be treated as an identifier 1423 VALUES_FOLLOWED_BY_PAREN = True 1424 1425 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1426 SUPPORTS_IMPLICIT_UNNEST = False 1427 1428 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1429 INTERVAL_SPANS = True 1430 1431 # Whether a PARTITION clause can follow a table reference 1432 SUPPORTS_PARTITION_SELECTION = False 1433 1434 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1435 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1436 1437 # Whether the 'AS' keyword is optional in the CTE definition syntax 1438 OPTIONAL_ALIAS_TOKEN_CTE = True 1439 1440 __slots__ = ( 1441 "error_level", 1442 "error_message_context", 1443 "max_errors", 1444 "dialect", 1445 "sql", 1446 "errors", 1447 "_tokens", 1448 "_index", 1449 "_curr", 1450 "_next", 1451 "_prev", 1452 "_prev_comments", 1453 ) 1454 1455 # Autofilled 1456 SHOW_TRIE: t.Dict = {} 1457 SET_TRIE: t.Dict = {} 1458 1459 def __init__( 1460 self, 1461 error_level: t.Optional[ErrorLevel] = None, 1462 error_message_context: int = 100, 1463 max_errors: int = 3, 1464 dialect: DialectType = None, 1465 ): 1466 from sqlglot.dialects import Dialect 1467 1468 self.error_level = error_level or ErrorLevel.IMMEDIATE 1469 self.error_message_context = error_message_context 1470 self.max_errors = max_errors 1471 self.dialect = Dialect.get_or_raise(dialect) 1472 self.reset() 1473 1474 def reset(self): 1475 self.sql = "" 1476 self.errors = [] 1477 self._tokens = [] 1478 self._index = 0 1479 self._curr = None 1480 self._next = None 1481 self._prev = None 1482 self._prev_comments = None 1483 1484 def parse( 1485 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1486 ) -> t.List[t.Optional[exp.Expression]]: 1487 """ 1488 Parses a list of tokens and returns a list of syntax trees, one tree 1489 per parsed SQL statement. 1490 1491 Args: 1492 raw_tokens: The list of tokens. 1493 sql: The original SQL string, used to produce helpful debug messages. 1494 1495 Returns: 1496 The list of the produced syntax trees. 1497 """ 1498 return self._parse( 1499 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1500 ) 1501 1502 def parse_into( 1503 self, 1504 expression_types: exp.IntoType, 1505 raw_tokens: t.List[Token], 1506 sql: t.Optional[str] = None, 1507 ) -> t.List[t.Optional[exp.Expression]]: 1508 """ 1509 Parses a list of tokens into a given Expression type. If a collection of Expression 1510 types is given instead, this method will try to parse the token list into each one 1511 of them, stopping at the first for which the parsing succeeds. 1512 1513 Args: 1514 expression_types: The expression type(s) to try and parse the token list into. 1515 raw_tokens: The list of tokens. 1516 sql: The original SQL string, used to produce helpful debug messages. 1517 1518 Returns: 1519 The target Expression. 1520 """ 1521 errors = [] 1522 for expression_type in ensure_list(expression_types): 1523 parser = self.EXPRESSION_PARSERS.get(expression_type) 1524 if not parser: 1525 raise TypeError(f"No parser registered for {expression_type}") 1526 1527 try: 1528 return self._parse(parser, raw_tokens, sql) 1529 except ParseError as e: 1530 e.errors[0]["into_expression"] = expression_type 1531 errors.append(e) 1532 1533 raise ParseError( 1534 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1535 errors=merge_errors(errors), 1536 ) from errors[-1] 1537 1538 def _parse( 1539 self, 1540 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1541 raw_tokens: t.List[Token], 1542 sql: t.Optional[str] = None, 1543 ) -> t.List[t.Optional[exp.Expression]]: 1544 self.reset() 1545 self.sql = sql or "" 1546 1547 total = len(raw_tokens) 1548 chunks: t.List[t.List[Token]] = [[]] 1549 1550 for i, token in enumerate(raw_tokens): 1551 if token.token_type == TokenType.SEMICOLON: 1552 if token.comments: 1553 chunks.append([token]) 1554 1555 if i < total - 1: 1556 chunks.append([]) 1557 else: 1558 chunks[-1].append(token) 1559 1560 expressions = [] 1561 1562 for tokens in chunks: 1563 self._index = -1 1564 self._tokens = tokens 1565 self._advance() 1566 1567 expressions.append(parse_method(self)) 1568 1569 if self._index < len(self._tokens): 1570 self.raise_error("Invalid expression / Unexpected token") 1571 1572 self.check_errors() 1573 1574 return expressions 1575 1576 def check_errors(self) -> None: 1577 """Logs or raises any found errors, depending on the chosen error level setting.""" 1578 if self.error_level == ErrorLevel.WARN: 1579 for error in self.errors: 1580 logger.error(str(error)) 1581 elif self.error_level == ErrorLevel.RAISE and self.errors: 1582 raise ParseError( 1583 concat_messages(self.errors, self.max_errors), 1584 errors=merge_errors(self.errors), 1585 ) 1586 1587 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1588 """ 1589 Appends an error in the list of recorded errors or raises it, depending on the chosen 1590 error level setting. 1591 """ 1592 token = token or self._curr or self._prev or Token.string("") 1593 start = token.start 1594 end = token.end + 1 1595 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1596 highlight = self.sql[start:end] 1597 end_context = self.sql[end : end + self.error_message_context] 1598 1599 error = ParseError.new( 1600 f"{message}. Line {token.line}, Col: {token.col}.\n" 1601 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1602 description=message, 1603 line=token.line, 1604 col=token.col, 1605 start_context=start_context, 1606 highlight=highlight, 1607 end_context=end_context, 1608 ) 1609 1610 if self.error_level == ErrorLevel.IMMEDIATE: 1611 raise error 1612 1613 self.errors.append(error) 1614 1615 def expression( 1616 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1617 ) -> E: 1618 """ 1619 Creates a new, validated Expression. 1620 1621 Args: 1622 exp_class: The expression class to instantiate. 1623 comments: An optional list of comments to attach to the expression. 1624 kwargs: The arguments to set for the expression along with their respective values. 1625 1626 Returns: 1627 The target expression. 1628 """ 1629 instance = exp_class(**kwargs) 1630 instance.add_comments(comments) if comments else self._add_comments(instance) 1631 return self.validate_expression(instance) 1632 1633 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1634 if expression and self._prev_comments: 1635 expression.add_comments(self._prev_comments) 1636 self._prev_comments = None 1637 1638 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1639 """ 1640 Validates an Expression, making sure that all its mandatory arguments are set. 1641 1642 Args: 1643 expression: The expression to validate. 1644 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1645 1646 Returns: 1647 The validated expression. 1648 """ 1649 if self.error_level != ErrorLevel.IGNORE: 1650 for error_message in expression.error_messages(args): 1651 self.raise_error(error_message) 1652 1653 return expression 1654 1655 def _find_sql(self, start: Token, end: Token) -> str: 1656 return self.sql[start.start : end.end + 1] 1657 1658 def _is_connected(self) -> bool: 1659 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1660 1661 def _advance(self, times: int = 1) -> None: 1662 self._index += times 1663 self._curr = seq_get(self._tokens, self._index) 1664 self._next = seq_get(self._tokens, self._index + 1) 1665 1666 if self._index > 0: 1667 self._prev = self._tokens[self._index - 1] 1668 self._prev_comments = self._prev.comments 1669 else: 1670 self._prev = None 1671 self._prev_comments = None 1672 1673 def _retreat(self, index: int) -> None: 1674 if index != self._index: 1675 self._advance(index - self._index) 1676 1677 def _warn_unsupported(self) -> None: 1678 if len(self._tokens) <= 1: 1679 return 1680 1681 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1682 # interested in emitting a warning for the one being currently processed. 1683 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1684 1685 logger.warning( 1686 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1687 ) 1688 1689 def _parse_command(self) -> exp.Command: 1690 self._warn_unsupported() 1691 return self.expression( 1692 exp.Command, 1693 comments=self._prev_comments, 1694 this=self._prev.text.upper(), 1695 expression=self._parse_string(), 1696 ) 1697 1698 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1699 """ 1700 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1701 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1702 solve this by setting & resetting the parser state accordingly 1703 """ 1704 index = self._index 1705 error_level = self.error_level 1706 1707 self.error_level = ErrorLevel.IMMEDIATE 1708 try: 1709 this = parse_method() 1710 except ParseError: 1711 this = None 1712 finally: 1713 if not this or retreat: 1714 self._retreat(index) 1715 self.error_level = error_level 1716 1717 return this 1718 1719 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1720 start = self._prev 1721 exists = self._parse_exists() if allow_exists else None 1722 1723 self._match(TokenType.ON) 1724 1725 materialized = self._match_text_seq("MATERIALIZED") 1726 kind = self._match_set(self.CREATABLES) and self._prev 1727 if not kind: 1728 return self._parse_as_command(start) 1729 1730 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1731 this = self._parse_user_defined_function(kind=kind.token_type) 1732 elif kind.token_type == TokenType.TABLE: 1733 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1734 elif kind.token_type == TokenType.COLUMN: 1735 this = self._parse_column() 1736 else: 1737 this = self._parse_id_var() 1738 1739 self._match(TokenType.IS) 1740 1741 return self.expression( 1742 exp.Comment, 1743 this=this, 1744 kind=kind.text, 1745 expression=self._parse_string(), 1746 exists=exists, 1747 materialized=materialized, 1748 ) 1749 1750 def _parse_to_table( 1751 self, 1752 ) -> exp.ToTableProperty: 1753 table = self._parse_table_parts(schema=True) 1754 return self.expression(exp.ToTableProperty, this=table) 1755 1756 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1757 def _parse_ttl(self) -> exp.Expression: 1758 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1759 this = self._parse_bitwise() 1760 1761 if self._match_text_seq("DELETE"): 1762 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1763 if self._match_text_seq("RECOMPRESS"): 1764 return self.expression( 1765 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1766 ) 1767 if self._match_text_seq("TO", "DISK"): 1768 return self.expression( 1769 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1770 ) 1771 if self._match_text_seq("TO", "VOLUME"): 1772 return self.expression( 1773 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1774 ) 1775 1776 return this 1777 1778 expressions = self._parse_csv(_parse_ttl_action) 1779 where = self._parse_where() 1780 group = self._parse_group() 1781 1782 aggregates = None 1783 if group and self._match(TokenType.SET): 1784 aggregates = self._parse_csv(self._parse_set_item) 1785 1786 return self.expression( 1787 exp.MergeTreeTTL, 1788 expressions=expressions, 1789 where=where, 1790 group=group, 1791 aggregates=aggregates, 1792 ) 1793 1794 def _parse_statement(self) -> t.Optional[exp.Expression]: 1795 if self._curr is None: 1796 return None 1797 1798 if self._match_set(self.STATEMENT_PARSERS): 1799 comments = self._prev_comments 1800 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1801 stmt.add_comments(comments, prepend=True) 1802 return stmt 1803 1804 if self._match_set(self.dialect.tokenizer.COMMANDS): 1805 return self._parse_command() 1806 1807 expression = self._parse_expression() 1808 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1809 return self._parse_query_modifiers(expression) 1810 1811 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1812 start = self._prev 1813 temporary = self._match(TokenType.TEMPORARY) 1814 materialized = self._match_text_seq("MATERIALIZED") 1815 1816 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1817 if not kind: 1818 return self._parse_as_command(start) 1819 1820 concurrently = self._match_text_seq("CONCURRENTLY") 1821 if_exists = exists or self._parse_exists() 1822 1823 if kind == "COLUMN": 1824 this = self._parse_column() 1825 else: 1826 this = self._parse_table_parts( 1827 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1828 ) 1829 1830 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1831 1832 if self._match(TokenType.L_PAREN, advance=False): 1833 expressions = self._parse_wrapped_csv(self._parse_types) 1834 else: 1835 expressions = None 1836 1837 return self.expression( 1838 exp.Drop, 1839 exists=if_exists, 1840 this=this, 1841 expressions=expressions, 1842 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1843 temporary=temporary, 1844 materialized=materialized, 1845 cascade=self._match_text_seq("CASCADE"), 1846 constraints=self._match_text_seq("CONSTRAINTS"), 1847 purge=self._match_text_seq("PURGE"), 1848 cluster=cluster, 1849 concurrently=concurrently, 1850 ) 1851 1852 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1853 return ( 1854 self._match_text_seq("IF") 1855 and (not not_ or self._match(TokenType.NOT)) 1856 and self._match(TokenType.EXISTS) 1857 ) 1858 1859 def _parse_create(self) -> exp.Create | exp.Command: 1860 # Note: this can't be None because we've matched a statement parser 1861 start = self._prev 1862 1863 replace = ( 1864 start.token_type == TokenType.REPLACE 1865 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1866 or self._match_pair(TokenType.OR, TokenType.ALTER) 1867 ) 1868 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1869 1870 unique = self._match(TokenType.UNIQUE) 1871 1872 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1873 clustered = True 1874 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1875 "COLUMNSTORE" 1876 ): 1877 clustered = False 1878 else: 1879 clustered = None 1880 1881 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1882 self._advance() 1883 1884 properties = None 1885 create_token = self._match_set(self.CREATABLES) and self._prev 1886 1887 if not create_token: 1888 # exp.Properties.Location.POST_CREATE 1889 properties = self._parse_properties() 1890 create_token = self._match_set(self.CREATABLES) and self._prev 1891 1892 if not properties or not create_token: 1893 return self._parse_as_command(start) 1894 1895 concurrently = self._match_text_seq("CONCURRENTLY") 1896 exists = self._parse_exists(not_=True) 1897 this = None 1898 expression: t.Optional[exp.Expression] = None 1899 indexes = None 1900 no_schema_binding = None 1901 begin = None 1902 end = None 1903 clone = None 1904 1905 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1906 nonlocal properties 1907 if properties and temp_props: 1908 properties.expressions.extend(temp_props.expressions) 1909 elif temp_props: 1910 properties = temp_props 1911 1912 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1913 this = self._parse_user_defined_function(kind=create_token.token_type) 1914 1915 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1916 extend_props(self._parse_properties()) 1917 1918 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1919 extend_props(self._parse_properties()) 1920 1921 if not expression: 1922 if self._match(TokenType.COMMAND): 1923 expression = self._parse_as_command(self._prev) 1924 else: 1925 begin = self._match(TokenType.BEGIN) 1926 return_ = self._match_text_seq("RETURN") 1927 1928 if self._match(TokenType.STRING, advance=False): 1929 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1930 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1931 expression = self._parse_string() 1932 extend_props(self._parse_properties()) 1933 else: 1934 expression = self._parse_user_defined_function_expression() 1935 1936 end = self._match_text_seq("END") 1937 1938 if return_: 1939 expression = self.expression(exp.Return, this=expression) 1940 elif create_token.token_type == TokenType.INDEX: 1941 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1942 if not self._match(TokenType.ON): 1943 index = self._parse_id_var() 1944 anonymous = False 1945 else: 1946 index = None 1947 anonymous = True 1948 1949 this = self._parse_index(index=index, anonymous=anonymous) 1950 elif create_token.token_type in self.DB_CREATABLES: 1951 table_parts = self._parse_table_parts( 1952 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1953 ) 1954 1955 # exp.Properties.Location.POST_NAME 1956 self._match(TokenType.COMMA) 1957 extend_props(self._parse_properties(before=True)) 1958 1959 this = self._parse_schema(this=table_parts) 1960 1961 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1962 extend_props(self._parse_properties()) 1963 1964 self._match(TokenType.ALIAS) 1965 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1966 # exp.Properties.Location.POST_ALIAS 1967 extend_props(self._parse_properties()) 1968 1969 if create_token.token_type == TokenType.SEQUENCE: 1970 expression = self._parse_types() 1971 extend_props(self._parse_properties()) 1972 else: 1973 expression = self._parse_ddl_select() 1974 1975 if create_token.token_type == TokenType.TABLE: 1976 # exp.Properties.Location.POST_EXPRESSION 1977 extend_props(self._parse_properties()) 1978 1979 indexes = [] 1980 while True: 1981 index = self._parse_index() 1982 1983 # exp.Properties.Location.POST_INDEX 1984 extend_props(self._parse_properties()) 1985 if not index: 1986 break 1987 else: 1988 self._match(TokenType.COMMA) 1989 indexes.append(index) 1990 elif create_token.token_type == TokenType.VIEW: 1991 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1992 no_schema_binding = True 1993 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1994 extend_props(self._parse_properties()) 1995 1996 shallow = self._match_text_seq("SHALLOW") 1997 1998 if self._match_texts(self.CLONE_KEYWORDS): 1999 copy = self._prev.text.lower() == "copy" 2000 clone = self.expression( 2001 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2002 ) 2003 2004 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2005 return self._parse_as_command(start) 2006 2007 create_kind_text = create_token.text.upper() 2008 return self.expression( 2009 exp.Create, 2010 this=this, 2011 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2012 replace=replace, 2013 refresh=refresh, 2014 unique=unique, 2015 expression=expression, 2016 exists=exists, 2017 properties=properties, 2018 indexes=indexes, 2019 no_schema_binding=no_schema_binding, 2020 begin=begin, 2021 end=end, 2022 clone=clone, 2023 concurrently=concurrently, 2024 clustered=clustered, 2025 ) 2026 2027 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2028 seq = exp.SequenceProperties() 2029 2030 options = [] 2031 index = self._index 2032 2033 while self._curr: 2034 self._match(TokenType.COMMA) 2035 if self._match_text_seq("INCREMENT"): 2036 self._match_text_seq("BY") 2037 self._match_text_seq("=") 2038 seq.set("increment", self._parse_term()) 2039 elif self._match_text_seq("MINVALUE"): 2040 seq.set("minvalue", self._parse_term()) 2041 elif self._match_text_seq("MAXVALUE"): 2042 seq.set("maxvalue", self._parse_term()) 2043 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2044 self._match_text_seq("=") 2045 seq.set("start", self._parse_term()) 2046 elif self._match_text_seq("CACHE"): 2047 # T-SQL allows empty CACHE which is initialized dynamically 2048 seq.set("cache", self._parse_number() or True) 2049 elif self._match_text_seq("OWNED", "BY"): 2050 # "OWNED BY NONE" is the default 2051 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2052 else: 2053 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2054 if opt: 2055 options.append(opt) 2056 else: 2057 break 2058 2059 seq.set("options", options if options else None) 2060 return None if self._index == index else seq 2061 2062 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2063 # only used for teradata currently 2064 self._match(TokenType.COMMA) 2065 2066 kwargs = { 2067 "no": self._match_text_seq("NO"), 2068 "dual": self._match_text_seq("DUAL"), 2069 "before": self._match_text_seq("BEFORE"), 2070 "default": self._match_text_seq("DEFAULT"), 2071 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2072 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2073 "after": self._match_text_seq("AFTER"), 2074 "minimum": self._match_texts(("MIN", "MINIMUM")), 2075 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2076 } 2077 2078 if self._match_texts(self.PROPERTY_PARSERS): 2079 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2080 try: 2081 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2082 except TypeError: 2083 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2084 2085 return None 2086 2087 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2088 return self._parse_wrapped_csv(self._parse_property) 2089 2090 def _parse_property(self) -> t.Optional[exp.Expression]: 2091 if self._match_texts(self.PROPERTY_PARSERS): 2092 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2093 2094 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2095 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2096 2097 if self._match_text_seq("COMPOUND", "SORTKEY"): 2098 return self._parse_sortkey(compound=True) 2099 2100 if self._match_text_seq("SQL", "SECURITY"): 2101 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2102 2103 index = self._index 2104 key = self._parse_column() 2105 2106 if not self._match(TokenType.EQ): 2107 self._retreat(index) 2108 return self._parse_sequence_properties() 2109 2110 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2111 if isinstance(key, exp.Column): 2112 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2113 2114 value = self._parse_bitwise() or self._parse_var(any_token=True) 2115 2116 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2117 if isinstance(value, exp.Column): 2118 value = exp.var(value.name) 2119 2120 return self.expression(exp.Property, this=key, value=value) 2121 2122 def _parse_stored(self) -> exp.FileFormatProperty: 2123 self._match(TokenType.ALIAS) 2124 2125 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2126 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2127 2128 return self.expression( 2129 exp.FileFormatProperty, 2130 this=( 2131 self.expression( 2132 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2133 ) 2134 if input_format or output_format 2135 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2136 ), 2137 ) 2138 2139 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2140 field = self._parse_field() 2141 if isinstance(field, exp.Identifier) and not field.quoted: 2142 field = exp.var(field) 2143 2144 return field 2145 2146 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2147 self._match(TokenType.EQ) 2148 self._match(TokenType.ALIAS) 2149 2150 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2151 2152 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2153 properties = [] 2154 while True: 2155 if before: 2156 prop = self._parse_property_before() 2157 else: 2158 prop = self._parse_property() 2159 if not prop: 2160 break 2161 for p in ensure_list(prop): 2162 properties.append(p) 2163 2164 if properties: 2165 return self.expression(exp.Properties, expressions=properties) 2166 2167 return None 2168 2169 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2170 return self.expression( 2171 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2172 ) 2173 2174 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2175 if self._match_texts(("DEFINER", "INVOKER")): 2176 security_specifier = self._prev.text.upper() 2177 return self.expression(exp.SecurityProperty, this=security_specifier) 2178 return None 2179 2180 def _parse_settings_property(self) -> exp.SettingsProperty: 2181 return self.expression( 2182 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2183 ) 2184 2185 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2186 if self._index >= 2: 2187 pre_volatile_token = self._tokens[self._index - 2] 2188 else: 2189 pre_volatile_token = None 2190 2191 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2192 return exp.VolatileProperty() 2193 2194 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2195 2196 def _parse_retention_period(self) -> exp.Var: 2197 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2198 number = self._parse_number() 2199 number_str = f"{number} " if number else "" 2200 unit = self._parse_var(any_token=True) 2201 return exp.var(f"{number_str}{unit}") 2202 2203 def _parse_system_versioning_property( 2204 self, with_: bool = False 2205 ) -> exp.WithSystemVersioningProperty: 2206 self._match(TokenType.EQ) 2207 prop = self.expression( 2208 exp.WithSystemVersioningProperty, 2209 **{ # type: ignore 2210 "on": True, 2211 "with": with_, 2212 }, 2213 ) 2214 2215 if self._match_text_seq("OFF"): 2216 prop.set("on", False) 2217 return prop 2218 2219 self._match(TokenType.ON) 2220 if self._match(TokenType.L_PAREN): 2221 while self._curr and not self._match(TokenType.R_PAREN): 2222 if self._match_text_seq("HISTORY_TABLE", "="): 2223 prop.set("this", self._parse_table_parts()) 2224 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2225 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2226 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2227 prop.set("retention_period", self._parse_retention_period()) 2228 2229 self._match(TokenType.COMMA) 2230 2231 return prop 2232 2233 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2234 self._match(TokenType.EQ) 2235 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2236 prop = self.expression(exp.DataDeletionProperty, on=on) 2237 2238 if self._match(TokenType.L_PAREN): 2239 while self._curr and not self._match(TokenType.R_PAREN): 2240 if self._match_text_seq("FILTER_COLUMN", "="): 2241 prop.set("filter_column", self._parse_column()) 2242 elif self._match_text_seq("RETENTION_PERIOD", "="): 2243 prop.set("retention_period", self._parse_retention_period()) 2244 2245 self._match(TokenType.COMMA) 2246 2247 return prop 2248 2249 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2250 kind = "HASH" 2251 expressions: t.Optional[t.List[exp.Expression]] = None 2252 if self._match_text_seq("BY", "HASH"): 2253 expressions = self._parse_wrapped_csv(self._parse_id_var) 2254 elif self._match_text_seq("BY", "RANDOM"): 2255 kind = "RANDOM" 2256 2257 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2258 buckets: t.Optional[exp.Expression] = None 2259 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2260 buckets = self._parse_number() 2261 2262 return self.expression( 2263 exp.DistributedByProperty, 2264 expressions=expressions, 2265 kind=kind, 2266 buckets=buckets, 2267 order=self._parse_order(), 2268 ) 2269 2270 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2271 self._match_text_seq("KEY") 2272 expressions = self._parse_wrapped_id_vars() 2273 return self.expression(expr_type, expressions=expressions) 2274 2275 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2276 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2277 prop = self._parse_system_versioning_property(with_=True) 2278 self._match_r_paren() 2279 return prop 2280 2281 if self._match(TokenType.L_PAREN, advance=False): 2282 return self._parse_wrapped_properties() 2283 2284 if self._match_text_seq("JOURNAL"): 2285 return self._parse_withjournaltable() 2286 2287 if self._match_texts(self.VIEW_ATTRIBUTES): 2288 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2289 2290 if self._match_text_seq("DATA"): 2291 return self._parse_withdata(no=False) 2292 elif self._match_text_seq("NO", "DATA"): 2293 return self._parse_withdata(no=True) 2294 2295 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2296 return self._parse_serde_properties(with_=True) 2297 2298 if self._match(TokenType.SCHEMA): 2299 return self.expression( 2300 exp.WithSchemaBindingProperty, 2301 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2302 ) 2303 2304 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2305 return self.expression( 2306 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2307 ) 2308 2309 if not self._next: 2310 return None 2311 2312 return self._parse_withisolatedloading() 2313 2314 def _parse_procedure_option(self) -> exp.Expression | None: 2315 if self._match_text_seq("EXECUTE", "AS"): 2316 return self.expression( 2317 exp.ExecuteAsProperty, 2318 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2319 or self._parse_string(), 2320 ) 2321 2322 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2323 2324 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2325 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2326 self._match(TokenType.EQ) 2327 2328 user = self._parse_id_var() 2329 self._match(TokenType.PARAMETER) 2330 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2331 2332 if not user or not host: 2333 return None 2334 2335 return exp.DefinerProperty(this=f"{user}@{host}") 2336 2337 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2338 self._match(TokenType.TABLE) 2339 self._match(TokenType.EQ) 2340 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2341 2342 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2343 return self.expression(exp.LogProperty, no=no) 2344 2345 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2346 return self.expression(exp.JournalProperty, **kwargs) 2347 2348 def _parse_checksum(self) -> exp.ChecksumProperty: 2349 self._match(TokenType.EQ) 2350 2351 on = None 2352 if self._match(TokenType.ON): 2353 on = True 2354 elif self._match_text_seq("OFF"): 2355 on = False 2356 2357 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2358 2359 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2360 return self.expression( 2361 exp.Cluster, 2362 expressions=( 2363 self._parse_wrapped_csv(self._parse_ordered) 2364 if wrapped 2365 else self._parse_csv(self._parse_ordered) 2366 ), 2367 ) 2368 2369 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2370 self._match_text_seq("BY") 2371 2372 self._match_l_paren() 2373 expressions = self._parse_csv(self._parse_column) 2374 self._match_r_paren() 2375 2376 if self._match_text_seq("SORTED", "BY"): 2377 self._match_l_paren() 2378 sorted_by = self._parse_csv(self._parse_ordered) 2379 self._match_r_paren() 2380 else: 2381 sorted_by = None 2382 2383 self._match(TokenType.INTO) 2384 buckets = self._parse_number() 2385 self._match_text_seq("BUCKETS") 2386 2387 return self.expression( 2388 exp.ClusteredByProperty, 2389 expressions=expressions, 2390 sorted_by=sorted_by, 2391 buckets=buckets, 2392 ) 2393 2394 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2395 if not self._match_text_seq("GRANTS"): 2396 self._retreat(self._index - 1) 2397 return None 2398 2399 return self.expression(exp.CopyGrantsProperty) 2400 2401 def _parse_freespace(self) -> exp.FreespaceProperty: 2402 self._match(TokenType.EQ) 2403 return self.expression( 2404 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2405 ) 2406 2407 def _parse_mergeblockratio( 2408 self, no: bool = False, default: bool = False 2409 ) -> exp.MergeBlockRatioProperty: 2410 if self._match(TokenType.EQ): 2411 return self.expression( 2412 exp.MergeBlockRatioProperty, 2413 this=self._parse_number(), 2414 percent=self._match(TokenType.PERCENT), 2415 ) 2416 2417 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2418 2419 def _parse_datablocksize( 2420 self, 2421 default: t.Optional[bool] = None, 2422 minimum: t.Optional[bool] = None, 2423 maximum: t.Optional[bool] = None, 2424 ) -> exp.DataBlocksizeProperty: 2425 self._match(TokenType.EQ) 2426 size = self._parse_number() 2427 2428 units = None 2429 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2430 units = self._prev.text 2431 2432 return self.expression( 2433 exp.DataBlocksizeProperty, 2434 size=size, 2435 units=units, 2436 default=default, 2437 minimum=minimum, 2438 maximum=maximum, 2439 ) 2440 2441 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2442 self._match(TokenType.EQ) 2443 always = self._match_text_seq("ALWAYS") 2444 manual = self._match_text_seq("MANUAL") 2445 never = self._match_text_seq("NEVER") 2446 default = self._match_text_seq("DEFAULT") 2447 2448 autotemp = None 2449 if self._match_text_seq("AUTOTEMP"): 2450 autotemp = self._parse_schema() 2451 2452 return self.expression( 2453 exp.BlockCompressionProperty, 2454 always=always, 2455 manual=manual, 2456 never=never, 2457 default=default, 2458 autotemp=autotemp, 2459 ) 2460 2461 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2462 index = self._index 2463 no = self._match_text_seq("NO") 2464 concurrent = self._match_text_seq("CONCURRENT") 2465 2466 if not self._match_text_seq("ISOLATED", "LOADING"): 2467 self._retreat(index) 2468 return None 2469 2470 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2471 return self.expression( 2472 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2473 ) 2474 2475 def _parse_locking(self) -> exp.LockingProperty: 2476 if self._match(TokenType.TABLE): 2477 kind = "TABLE" 2478 elif self._match(TokenType.VIEW): 2479 kind = "VIEW" 2480 elif self._match(TokenType.ROW): 2481 kind = "ROW" 2482 elif self._match_text_seq("DATABASE"): 2483 kind = "DATABASE" 2484 else: 2485 kind = None 2486 2487 if kind in ("DATABASE", "TABLE", "VIEW"): 2488 this = self._parse_table_parts() 2489 else: 2490 this = None 2491 2492 if self._match(TokenType.FOR): 2493 for_or_in = "FOR" 2494 elif self._match(TokenType.IN): 2495 for_or_in = "IN" 2496 else: 2497 for_or_in = None 2498 2499 if self._match_text_seq("ACCESS"): 2500 lock_type = "ACCESS" 2501 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2502 lock_type = "EXCLUSIVE" 2503 elif self._match_text_seq("SHARE"): 2504 lock_type = "SHARE" 2505 elif self._match_text_seq("READ"): 2506 lock_type = "READ" 2507 elif self._match_text_seq("WRITE"): 2508 lock_type = "WRITE" 2509 elif self._match_text_seq("CHECKSUM"): 2510 lock_type = "CHECKSUM" 2511 else: 2512 lock_type = None 2513 2514 override = self._match_text_seq("OVERRIDE") 2515 2516 return self.expression( 2517 exp.LockingProperty, 2518 this=this, 2519 kind=kind, 2520 for_or_in=for_or_in, 2521 lock_type=lock_type, 2522 override=override, 2523 ) 2524 2525 def _parse_partition_by(self) -> t.List[exp.Expression]: 2526 if self._match(TokenType.PARTITION_BY): 2527 return self._parse_csv(self._parse_assignment) 2528 return [] 2529 2530 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2531 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2532 if self._match_text_seq("MINVALUE"): 2533 return exp.var("MINVALUE") 2534 if self._match_text_seq("MAXVALUE"): 2535 return exp.var("MAXVALUE") 2536 return self._parse_bitwise() 2537 2538 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2539 expression = None 2540 from_expressions = None 2541 to_expressions = None 2542 2543 if self._match(TokenType.IN): 2544 this = self._parse_wrapped_csv(self._parse_bitwise) 2545 elif self._match(TokenType.FROM): 2546 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2547 self._match_text_seq("TO") 2548 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2549 elif self._match_text_seq("WITH", "(", "MODULUS"): 2550 this = self._parse_number() 2551 self._match_text_seq(",", "REMAINDER") 2552 expression = self._parse_number() 2553 self._match_r_paren() 2554 else: 2555 self.raise_error("Failed to parse partition bound spec.") 2556 2557 return self.expression( 2558 exp.PartitionBoundSpec, 2559 this=this, 2560 expression=expression, 2561 from_expressions=from_expressions, 2562 to_expressions=to_expressions, 2563 ) 2564 2565 # https://www.postgresql.org/docs/current/sql-createtable.html 2566 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2567 if not self._match_text_seq("OF"): 2568 self._retreat(self._index - 1) 2569 return None 2570 2571 this = self._parse_table(schema=True) 2572 2573 if self._match(TokenType.DEFAULT): 2574 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2575 elif self._match_text_seq("FOR", "VALUES"): 2576 expression = self._parse_partition_bound_spec() 2577 else: 2578 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2579 2580 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2581 2582 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2583 self._match(TokenType.EQ) 2584 return self.expression( 2585 exp.PartitionedByProperty, 2586 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2587 ) 2588 2589 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2590 if self._match_text_seq("AND", "STATISTICS"): 2591 statistics = True 2592 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2593 statistics = False 2594 else: 2595 statistics = None 2596 2597 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2598 2599 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2600 if self._match_text_seq("SQL"): 2601 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2602 return None 2603 2604 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2605 if self._match_text_seq("SQL", "DATA"): 2606 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2607 return None 2608 2609 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2610 if self._match_text_seq("PRIMARY", "INDEX"): 2611 return exp.NoPrimaryIndexProperty() 2612 if self._match_text_seq("SQL"): 2613 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2614 return None 2615 2616 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2617 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2618 return exp.OnCommitProperty() 2619 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2620 return exp.OnCommitProperty(delete=True) 2621 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2622 2623 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2624 if self._match_text_seq("SQL", "DATA"): 2625 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2626 return None 2627 2628 def _parse_distkey(self) -> exp.DistKeyProperty: 2629 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2630 2631 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2632 table = self._parse_table(schema=True) 2633 2634 options = [] 2635 while self._match_texts(("INCLUDING", "EXCLUDING")): 2636 this = self._prev.text.upper() 2637 2638 id_var = self._parse_id_var() 2639 if not id_var: 2640 return None 2641 2642 options.append( 2643 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2644 ) 2645 2646 return self.expression(exp.LikeProperty, this=table, expressions=options) 2647 2648 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2649 return self.expression( 2650 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2651 ) 2652 2653 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2654 self._match(TokenType.EQ) 2655 return self.expression( 2656 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2657 ) 2658 2659 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2660 self._match_text_seq("WITH", "CONNECTION") 2661 return self.expression( 2662 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2663 ) 2664 2665 def _parse_returns(self) -> exp.ReturnsProperty: 2666 value: t.Optional[exp.Expression] 2667 null = None 2668 is_table = self._match(TokenType.TABLE) 2669 2670 if is_table: 2671 if self._match(TokenType.LT): 2672 value = self.expression( 2673 exp.Schema, 2674 this="TABLE", 2675 expressions=self._parse_csv(self._parse_struct_types), 2676 ) 2677 if not self._match(TokenType.GT): 2678 self.raise_error("Expecting >") 2679 else: 2680 value = self._parse_schema(exp.var("TABLE")) 2681 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2682 null = True 2683 value = None 2684 else: 2685 value = self._parse_types() 2686 2687 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2688 2689 def _parse_describe(self) -> exp.Describe: 2690 kind = self._match_set(self.CREATABLES) and self._prev.text 2691 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2692 if self._match(TokenType.DOT): 2693 style = None 2694 self._retreat(self._index - 2) 2695 2696 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2697 2698 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2699 this = self._parse_statement() 2700 else: 2701 this = self._parse_table(schema=True) 2702 2703 properties = self._parse_properties() 2704 expressions = properties.expressions if properties else None 2705 partition = self._parse_partition() 2706 return self.expression( 2707 exp.Describe, 2708 this=this, 2709 style=style, 2710 kind=kind, 2711 expressions=expressions, 2712 partition=partition, 2713 format=format, 2714 ) 2715 2716 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2717 kind = self._prev.text.upper() 2718 expressions = [] 2719 2720 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2721 if self._match(TokenType.WHEN): 2722 expression = self._parse_disjunction() 2723 self._match(TokenType.THEN) 2724 else: 2725 expression = None 2726 2727 else_ = self._match(TokenType.ELSE) 2728 2729 if not self._match(TokenType.INTO): 2730 return None 2731 2732 return self.expression( 2733 exp.ConditionalInsert, 2734 this=self.expression( 2735 exp.Insert, 2736 this=self._parse_table(schema=True), 2737 expression=self._parse_derived_table_values(), 2738 ), 2739 expression=expression, 2740 else_=else_, 2741 ) 2742 2743 expression = parse_conditional_insert() 2744 while expression is not None: 2745 expressions.append(expression) 2746 expression = parse_conditional_insert() 2747 2748 return self.expression( 2749 exp.MultitableInserts, 2750 kind=kind, 2751 comments=comments, 2752 expressions=expressions, 2753 source=self._parse_table(), 2754 ) 2755 2756 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2757 comments = [] 2758 hint = self._parse_hint() 2759 overwrite = self._match(TokenType.OVERWRITE) 2760 ignore = self._match(TokenType.IGNORE) 2761 local = self._match_text_seq("LOCAL") 2762 alternative = None 2763 is_function = None 2764 2765 if self._match_text_seq("DIRECTORY"): 2766 this: t.Optional[exp.Expression] = self.expression( 2767 exp.Directory, 2768 this=self._parse_var_or_string(), 2769 local=local, 2770 row_format=self._parse_row_format(match_row=True), 2771 ) 2772 else: 2773 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2774 comments += ensure_list(self._prev_comments) 2775 return self._parse_multitable_inserts(comments) 2776 2777 if self._match(TokenType.OR): 2778 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2779 2780 self._match(TokenType.INTO) 2781 comments += ensure_list(self._prev_comments) 2782 self._match(TokenType.TABLE) 2783 is_function = self._match(TokenType.FUNCTION) 2784 2785 this = ( 2786 self._parse_table(schema=True, parse_partition=True) 2787 if not is_function 2788 else self._parse_function() 2789 ) 2790 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2791 this.set("alias", self._parse_table_alias()) 2792 2793 returning = self._parse_returning() 2794 2795 return self.expression( 2796 exp.Insert, 2797 comments=comments, 2798 hint=hint, 2799 is_function=is_function, 2800 this=this, 2801 stored=self._match_text_seq("STORED") and self._parse_stored(), 2802 by_name=self._match_text_seq("BY", "NAME"), 2803 exists=self._parse_exists(), 2804 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2805 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2806 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2807 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2808 conflict=self._parse_on_conflict(), 2809 returning=returning or self._parse_returning(), 2810 overwrite=overwrite, 2811 alternative=alternative, 2812 ignore=ignore, 2813 source=self._match(TokenType.TABLE) and self._parse_table(), 2814 ) 2815 2816 def _parse_kill(self) -> exp.Kill: 2817 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2818 2819 return self.expression( 2820 exp.Kill, 2821 this=self._parse_primary(), 2822 kind=kind, 2823 ) 2824 2825 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2826 conflict = self._match_text_seq("ON", "CONFLICT") 2827 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2828 2829 if not conflict and not duplicate: 2830 return None 2831 2832 conflict_keys = None 2833 constraint = None 2834 2835 if conflict: 2836 if self._match_text_seq("ON", "CONSTRAINT"): 2837 constraint = self._parse_id_var() 2838 elif self._match(TokenType.L_PAREN): 2839 conflict_keys = self._parse_csv(self._parse_id_var) 2840 self._match_r_paren() 2841 2842 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2843 if self._prev.token_type == TokenType.UPDATE: 2844 self._match(TokenType.SET) 2845 expressions = self._parse_csv(self._parse_equality) 2846 else: 2847 expressions = None 2848 2849 return self.expression( 2850 exp.OnConflict, 2851 duplicate=duplicate, 2852 expressions=expressions, 2853 action=action, 2854 conflict_keys=conflict_keys, 2855 constraint=constraint, 2856 where=self._parse_where(), 2857 ) 2858 2859 def _parse_returning(self) -> t.Optional[exp.Returning]: 2860 if not self._match(TokenType.RETURNING): 2861 return None 2862 return self.expression( 2863 exp.Returning, 2864 expressions=self._parse_csv(self._parse_expression), 2865 into=self._match(TokenType.INTO) and self._parse_table_part(), 2866 ) 2867 2868 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2869 if not self._match(TokenType.FORMAT): 2870 return None 2871 return self._parse_row_format() 2872 2873 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2874 index = self._index 2875 with_ = with_ or self._match_text_seq("WITH") 2876 2877 if not self._match(TokenType.SERDE_PROPERTIES): 2878 self._retreat(index) 2879 return None 2880 return self.expression( 2881 exp.SerdeProperties, 2882 **{ # type: ignore 2883 "expressions": self._parse_wrapped_properties(), 2884 "with": with_, 2885 }, 2886 ) 2887 2888 def _parse_row_format( 2889 self, match_row: bool = False 2890 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2891 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2892 return None 2893 2894 if self._match_text_seq("SERDE"): 2895 this = self._parse_string() 2896 2897 serde_properties = self._parse_serde_properties() 2898 2899 return self.expression( 2900 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2901 ) 2902 2903 self._match_text_seq("DELIMITED") 2904 2905 kwargs = {} 2906 2907 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2908 kwargs["fields"] = self._parse_string() 2909 if self._match_text_seq("ESCAPED", "BY"): 2910 kwargs["escaped"] = self._parse_string() 2911 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2912 kwargs["collection_items"] = self._parse_string() 2913 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2914 kwargs["map_keys"] = self._parse_string() 2915 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2916 kwargs["lines"] = self._parse_string() 2917 if self._match_text_seq("NULL", "DEFINED", "AS"): 2918 kwargs["null"] = self._parse_string() 2919 2920 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2921 2922 def _parse_load(self) -> exp.LoadData | exp.Command: 2923 if self._match_text_seq("DATA"): 2924 local = self._match_text_seq("LOCAL") 2925 self._match_text_seq("INPATH") 2926 inpath = self._parse_string() 2927 overwrite = self._match(TokenType.OVERWRITE) 2928 self._match_pair(TokenType.INTO, TokenType.TABLE) 2929 2930 return self.expression( 2931 exp.LoadData, 2932 this=self._parse_table(schema=True), 2933 local=local, 2934 overwrite=overwrite, 2935 inpath=inpath, 2936 partition=self._parse_partition(), 2937 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2938 serde=self._match_text_seq("SERDE") and self._parse_string(), 2939 ) 2940 return self._parse_as_command(self._prev) 2941 2942 def _parse_delete(self) -> exp.Delete: 2943 # This handles MySQL's "Multiple-Table Syntax" 2944 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2945 tables = None 2946 if not self._match(TokenType.FROM, advance=False): 2947 tables = self._parse_csv(self._parse_table) or None 2948 2949 returning = self._parse_returning() 2950 2951 return self.expression( 2952 exp.Delete, 2953 tables=tables, 2954 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2955 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2956 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2957 where=self._parse_where(), 2958 returning=returning or self._parse_returning(), 2959 limit=self._parse_limit(), 2960 ) 2961 2962 def _parse_update(self) -> exp.Update: 2963 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2964 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2965 returning = self._parse_returning() 2966 return self.expression( 2967 exp.Update, 2968 **{ # type: ignore 2969 "this": this, 2970 "expressions": expressions, 2971 "from": self._parse_from(joins=True), 2972 "where": self._parse_where(), 2973 "returning": returning or self._parse_returning(), 2974 "order": self._parse_order(), 2975 "limit": self._parse_limit(), 2976 }, 2977 ) 2978 2979 def _parse_use(self) -> exp.Use: 2980 return self.expression( 2981 exp.Use, 2982 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2983 this=self._parse_table(schema=False), 2984 ) 2985 2986 def _parse_uncache(self) -> exp.Uncache: 2987 if not self._match(TokenType.TABLE): 2988 self.raise_error("Expecting TABLE after UNCACHE") 2989 2990 return self.expression( 2991 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2992 ) 2993 2994 def _parse_cache(self) -> exp.Cache: 2995 lazy = self._match_text_seq("LAZY") 2996 self._match(TokenType.TABLE) 2997 table = self._parse_table(schema=True) 2998 2999 options = [] 3000 if self._match_text_seq("OPTIONS"): 3001 self._match_l_paren() 3002 k = self._parse_string() 3003 self._match(TokenType.EQ) 3004 v = self._parse_string() 3005 options = [k, v] 3006 self._match_r_paren() 3007 3008 self._match(TokenType.ALIAS) 3009 return self.expression( 3010 exp.Cache, 3011 this=table, 3012 lazy=lazy, 3013 options=options, 3014 expression=self._parse_select(nested=True), 3015 ) 3016 3017 def _parse_partition(self) -> t.Optional[exp.Partition]: 3018 if not self._match_texts(self.PARTITION_KEYWORDS): 3019 return None 3020 3021 return self.expression( 3022 exp.Partition, 3023 subpartition=self._prev.text.upper() == "SUBPARTITION", 3024 expressions=self._parse_wrapped_csv(self._parse_assignment), 3025 ) 3026 3027 def _parse_value(self) -> t.Optional[exp.Tuple]: 3028 def _parse_value_expression() -> t.Optional[exp.Expression]: 3029 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3030 return exp.var(self._prev.text.upper()) 3031 return self._parse_expression() 3032 3033 if self._match(TokenType.L_PAREN): 3034 expressions = self._parse_csv(_parse_value_expression) 3035 self._match_r_paren() 3036 return self.expression(exp.Tuple, expressions=expressions) 3037 3038 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3039 expression = self._parse_expression() 3040 if expression: 3041 return self.expression(exp.Tuple, expressions=[expression]) 3042 return None 3043 3044 def _parse_projections(self) -> t.List[exp.Expression]: 3045 return self._parse_expressions() 3046 3047 def _parse_select( 3048 self, 3049 nested: bool = False, 3050 table: bool = False, 3051 parse_subquery_alias: bool = True, 3052 parse_set_operation: bool = True, 3053 ) -> t.Optional[exp.Expression]: 3054 cte = self._parse_with() 3055 3056 if cte: 3057 this = self._parse_statement() 3058 3059 if not this: 3060 self.raise_error("Failed to parse any statement following CTE") 3061 return cte 3062 3063 if "with" in this.arg_types: 3064 this.set("with", cte) 3065 else: 3066 self.raise_error(f"{this.key} does not support CTE") 3067 this = cte 3068 3069 return this 3070 3071 # duckdb supports leading with FROM x 3072 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3073 3074 if self._match(TokenType.SELECT): 3075 comments = self._prev_comments 3076 3077 hint = self._parse_hint() 3078 3079 if self._next and not self._next.token_type == TokenType.DOT: 3080 all_ = self._match(TokenType.ALL) 3081 distinct = self._match_set(self.DISTINCT_TOKENS) 3082 else: 3083 all_, distinct = None, None 3084 3085 kind = ( 3086 self._match(TokenType.ALIAS) 3087 and self._match_texts(("STRUCT", "VALUE")) 3088 and self._prev.text.upper() 3089 ) 3090 3091 if distinct: 3092 distinct = self.expression( 3093 exp.Distinct, 3094 on=self._parse_value() if self._match(TokenType.ON) else None, 3095 ) 3096 3097 if all_ and distinct: 3098 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3099 3100 operation_modifiers = [] 3101 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3102 operation_modifiers.append(exp.var(self._prev.text.upper())) 3103 3104 limit = self._parse_limit(top=True) 3105 projections = self._parse_projections() 3106 3107 this = self.expression( 3108 exp.Select, 3109 kind=kind, 3110 hint=hint, 3111 distinct=distinct, 3112 expressions=projections, 3113 limit=limit, 3114 operation_modifiers=operation_modifiers or None, 3115 ) 3116 this.comments = comments 3117 3118 into = self._parse_into() 3119 if into: 3120 this.set("into", into) 3121 3122 if not from_: 3123 from_ = self._parse_from() 3124 3125 if from_: 3126 this.set("from", from_) 3127 3128 this = self._parse_query_modifiers(this) 3129 elif (table or nested) and self._match(TokenType.L_PAREN): 3130 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3131 this = self._parse_simplified_pivot( 3132 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3133 ) 3134 elif self._match(TokenType.FROM): 3135 from_ = self._parse_from(skip_from_token=True) 3136 # Support parentheses for duckdb FROM-first syntax 3137 select = self._parse_select() 3138 if select: 3139 select.set("from", from_) 3140 this = select 3141 else: 3142 this = exp.select("*").from_(t.cast(exp.From, from_)) 3143 else: 3144 this = ( 3145 self._parse_table() 3146 if table 3147 else self._parse_select(nested=True, parse_set_operation=False) 3148 ) 3149 3150 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3151 # in case a modifier (e.g. join) is following 3152 if table and isinstance(this, exp.Values) and this.alias: 3153 alias = this.args["alias"].pop() 3154 this = exp.Table(this=this, alias=alias) 3155 3156 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3157 3158 self._match_r_paren() 3159 3160 # We return early here so that the UNION isn't attached to the subquery by the 3161 # following call to _parse_set_operations, but instead becomes the parent node 3162 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3163 elif self._match(TokenType.VALUES, advance=False): 3164 this = self._parse_derived_table_values() 3165 elif from_: 3166 this = exp.select("*").from_(from_.this, copy=False) 3167 elif self._match(TokenType.SUMMARIZE): 3168 table = self._match(TokenType.TABLE) 3169 this = self._parse_select() or self._parse_string() or self._parse_table() 3170 return self.expression(exp.Summarize, this=this, table=table) 3171 elif self._match(TokenType.DESCRIBE): 3172 this = self._parse_describe() 3173 elif self._match_text_seq("STREAM"): 3174 this = self._parse_function() 3175 if this: 3176 this = self.expression(exp.Stream, this=this) 3177 else: 3178 self._retreat(self._index - 1) 3179 else: 3180 this = None 3181 3182 return self._parse_set_operations(this) if parse_set_operation else this 3183 3184 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3185 self._match_text_seq("SEARCH") 3186 3187 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3188 3189 if not kind: 3190 return None 3191 3192 self._match_text_seq("FIRST", "BY") 3193 3194 return self.expression( 3195 exp.RecursiveWithSearch, 3196 kind=kind, 3197 this=self._parse_id_var(), 3198 expression=self._match_text_seq("SET") and self._parse_id_var(), 3199 using=self._match_text_seq("USING") and self._parse_id_var(), 3200 ) 3201 3202 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3203 if not skip_with_token and not self._match(TokenType.WITH): 3204 return None 3205 3206 comments = self._prev_comments 3207 recursive = self._match(TokenType.RECURSIVE) 3208 3209 last_comments = None 3210 expressions = [] 3211 while True: 3212 cte = self._parse_cte() 3213 if isinstance(cte, exp.CTE): 3214 expressions.append(cte) 3215 if last_comments: 3216 cte.add_comments(last_comments) 3217 3218 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3219 break 3220 else: 3221 self._match(TokenType.WITH) 3222 3223 last_comments = self._prev_comments 3224 3225 return self.expression( 3226 exp.With, 3227 comments=comments, 3228 expressions=expressions, 3229 recursive=recursive, 3230 search=self._parse_recursive_with_search(), 3231 ) 3232 3233 def _parse_cte(self) -> t.Optional[exp.CTE]: 3234 index = self._index 3235 3236 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3237 if not alias or not alias.this: 3238 self.raise_error("Expected CTE to have alias") 3239 3240 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3241 self._retreat(index) 3242 return None 3243 3244 comments = self._prev_comments 3245 3246 if self._match_text_seq("NOT", "MATERIALIZED"): 3247 materialized = False 3248 elif self._match_text_seq("MATERIALIZED"): 3249 materialized = True 3250 else: 3251 materialized = None 3252 3253 cte = self.expression( 3254 exp.CTE, 3255 this=self._parse_wrapped(self._parse_statement), 3256 alias=alias, 3257 materialized=materialized, 3258 comments=comments, 3259 ) 3260 3261 if isinstance(cte.this, exp.Values): 3262 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3263 3264 return cte 3265 3266 def _parse_table_alias( 3267 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3268 ) -> t.Optional[exp.TableAlias]: 3269 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3270 # so this section tries to parse the clause version and if it fails, it treats the token 3271 # as an identifier (alias) 3272 if self._can_parse_limit_or_offset(): 3273 return None 3274 3275 any_token = self._match(TokenType.ALIAS) 3276 alias = ( 3277 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3278 or self._parse_string_as_identifier() 3279 ) 3280 3281 index = self._index 3282 if self._match(TokenType.L_PAREN): 3283 columns = self._parse_csv(self._parse_function_parameter) 3284 self._match_r_paren() if columns else self._retreat(index) 3285 else: 3286 columns = None 3287 3288 if not alias and not columns: 3289 return None 3290 3291 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3292 3293 # We bubble up comments from the Identifier to the TableAlias 3294 if isinstance(alias, exp.Identifier): 3295 table_alias.add_comments(alias.pop_comments()) 3296 3297 return table_alias 3298 3299 def _parse_subquery( 3300 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3301 ) -> t.Optional[exp.Subquery]: 3302 if not this: 3303 return None 3304 3305 return self.expression( 3306 exp.Subquery, 3307 this=this, 3308 pivots=self._parse_pivots(), 3309 alias=self._parse_table_alias() if parse_alias else None, 3310 sample=self._parse_table_sample(), 3311 ) 3312 3313 def _implicit_unnests_to_explicit(self, this: E) -> E: 3314 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3315 3316 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3317 for i, join in enumerate(this.args.get("joins") or []): 3318 table = join.this 3319 normalized_table = table.copy() 3320 normalized_table.meta["maybe_column"] = True 3321 normalized_table = _norm(normalized_table, dialect=self.dialect) 3322 3323 if isinstance(table, exp.Table) and not join.args.get("on"): 3324 if normalized_table.parts[0].name in refs: 3325 table_as_column = table.to_column() 3326 unnest = exp.Unnest(expressions=[table_as_column]) 3327 3328 # Table.to_column creates a parent Alias node that we want to convert to 3329 # a TableAlias and attach to the Unnest, so it matches the parser's output 3330 if isinstance(table.args.get("alias"), exp.TableAlias): 3331 table_as_column.replace(table_as_column.this) 3332 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3333 3334 table.replace(unnest) 3335 3336 refs.add(normalized_table.alias_or_name) 3337 3338 return this 3339 3340 def _parse_query_modifiers( 3341 self, this: t.Optional[exp.Expression] 3342 ) -> t.Optional[exp.Expression]: 3343 if isinstance(this, (exp.Query, exp.Table)): 3344 for join in self._parse_joins(): 3345 this.append("joins", join) 3346 for lateral in iter(self._parse_lateral, None): 3347 this.append("laterals", lateral) 3348 3349 while True: 3350 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3351 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3352 key, expression = parser(self) 3353 3354 if expression: 3355 this.set(key, expression) 3356 if key == "limit": 3357 offset = expression.args.pop("offset", None) 3358 3359 if offset: 3360 offset = exp.Offset(expression=offset) 3361 this.set("offset", offset) 3362 3363 limit_by_expressions = expression.expressions 3364 expression.set("expressions", None) 3365 offset.set("expressions", limit_by_expressions) 3366 continue 3367 break 3368 3369 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3370 this = self._implicit_unnests_to_explicit(this) 3371 3372 return this 3373 3374 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3375 start = self._curr 3376 while self._curr: 3377 self._advance() 3378 3379 end = self._tokens[self._index - 1] 3380 return exp.Hint(expressions=[self._find_sql(start, end)]) 3381 3382 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3383 return self._parse_function_call() 3384 3385 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3386 start_index = self._index 3387 should_fallback_to_string = False 3388 3389 hints = [] 3390 try: 3391 for hint in iter( 3392 lambda: self._parse_csv( 3393 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3394 ), 3395 [], 3396 ): 3397 hints.extend(hint) 3398 except ParseError: 3399 should_fallback_to_string = True 3400 3401 if should_fallback_to_string or self._curr: 3402 self._retreat(start_index) 3403 return self._parse_hint_fallback_to_string() 3404 3405 return self.expression(exp.Hint, expressions=hints) 3406 3407 def _parse_hint(self) -> t.Optional[exp.Hint]: 3408 if self._match(TokenType.HINT) and self._prev_comments: 3409 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3410 3411 return None 3412 3413 def _parse_into(self) -> t.Optional[exp.Into]: 3414 if not self._match(TokenType.INTO): 3415 return None 3416 3417 temp = self._match(TokenType.TEMPORARY) 3418 unlogged = self._match_text_seq("UNLOGGED") 3419 self._match(TokenType.TABLE) 3420 3421 return self.expression( 3422 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3423 ) 3424 3425 def _parse_from( 3426 self, joins: bool = False, skip_from_token: bool = False 3427 ) -> t.Optional[exp.From]: 3428 if not skip_from_token and not self._match(TokenType.FROM): 3429 return None 3430 3431 return self.expression( 3432 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3433 ) 3434 3435 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3436 return self.expression( 3437 exp.MatchRecognizeMeasure, 3438 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3439 this=self._parse_expression(), 3440 ) 3441 3442 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3443 if not self._match(TokenType.MATCH_RECOGNIZE): 3444 return None 3445 3446 self._match_l_paren() 3447 3448 partition = self._parse_partition_by() 3449 order = self._parse_order() 3450 3451 measures = ( 3452 self._parse_csv(self._parse_match_recognize_measure) 3453 if self._match_text_seq("MEASURES") 3454 else None 3455 ) 3456 3457 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3458 rows = exp.var("ONE ROW PER MATCH") 3459 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3460 text = "ALL ROWS PER MATCH" 3461 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3462 text += " SHOW EMPTY MATCHES" 3463 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3464 text += " OMIT EMPTY MATCHES" 3465 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3466 text += " WITH UNMATCHED ROWS" 3467 rows = exp.var(text) 3468 else: 3469 rows = None 3470 3471 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3472 text = "AFTER MATCH SKIP" 3473 if self._match_text_seq("PAST", "LAST", "ROW"): 3474 text += " PAST LAST ROW" 3475 elif self._match_text_seq("TO", "NEXT", "ROW"): 3476 text += " TO NEXT ROW" 3477 elif self._match_text_seq("TO", "FIRST"): 3478 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3479 elif self._match_text_seq("TO", "LAST"): 3480 text += f" TO LAST {self._advance_any().text}" # type: ignore 3481 after = exp.var(text) 3482 else: 3483 after = None 3484 3485 if self._match_text_seq("PATTERN"): 3486 self._match_l_paren() 3487 3488 if not self._curr: 3489 self.raise_error("Expecting )", self._curr) 3490 3491 paren = 1 3492 start = self._curr 3493 3494 while self._curr and paren > 0: 3495 if self._curr.token_type == TokenType.L_PAREN: 3496 paren += 1 3497 if self._curr.token_type == TokenType.R_PAREN: 3498 paren -= 1 3499 3500 end = self._prev 3501 self._advance() 3502 3503 if paren > 0: 3504 self.raise_error("Expecting )", self._curr) 3505 3506 pattern = exp.var(self._find_sql(start, end)) 3507 else: 3508 pattern = None 3509 3510 define = ( 3511 self._parse_csv(self._parse_name_as_expression) 3512 if self._match_text_seq("DEFINE") 3513 else None 3514 ) 3515 3516 self._match_r_paren() 3517 3518 return self.expression( 3519 exp.MatchRecognize, 3520 partition_by=partition, 3521 order=order, 3522 measures=measures, 3523 rows=rows, 3524 after=after, 3525 pattern=pattern, 3526 define=define, 3527 alias=self._parse_table_alias(), 3528 ) 3529 3530 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3531 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3532 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3533 cross_apply = False 3534 3535 if cross_apply is not None: 3536 this = self._parse_select(table=True) 3537 view = None 3538 outer = None 3539 elif self._match(TokenType.LATERAL): 3540 this = self._parse_select(table=True) 3541 view = self._match(TokenType.VIEW) 3542 outer = self._match(TokenType.OUTER) 3543 else: 3544 return None 3545 3546 if not this: 3547 this = ( 3548 self._parse_unnest() 3549 or self._parse_function() 3550 or self._parse_id_var(any_token=False) 3551 ) 3552 3553 while self._match(TokenType.DOT): 3554 this = exp.Dot( 3555 this=this, 3556 expression=self._parse_function() or self._parse_id_var(any_token=False), 3557 ) 3558 3559 if view: 3560 table = self._parse_id_var(any_token=False) 3561 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3562 table_alias: t.Optional[exp.TableAlias] = self.expression( 3563 exp.TableAlias, this=table, columns=columns 3564 ) 3565 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3566 # We move the alias from the lateral's child node to the lateral itself 3567 table_alias = this.args["alias"].pop() 3568 else: 3569 table_alias = self._parse_table_alias() 3570 3571 return self.expression( 3572 exp.Lateral, 3573 this=this, 3574 view=view, 3575 outer=outer, 3576 alias=table_alias, 3577 cross_apply=cross_apply, 3578 ) 3579 3580 def _parse_join_parts( 3581 self, 3582 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3583 return ( 3584 self._match_set(self.JOIN_METHODS) and self._prev, 3585 self._match_set(self.JOIN_SIDES) and self._prev, 3586 self._match_set(self.JOIN_KINDS) and self._prev, 3587 ) 3588 3589 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3590 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3591 this = self._parse_column() 3592 if isinstance(this, exp.Column): 3593 return this.this 3594 return this 3595 3596 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3597 3598 def _parse_join( 3599 self, skip_join_token: bool = False, parse_bracket: bool = False 3600 ) -> t.Optional[exp.Join]: 3601 if self._match(TokenType.COMMA): 3602 return self.expression(exp.Join, this=self._parse_table()) 3603 3604 index = self._index 3605 method, side, kind = self._parse_join_parts() 3606 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3607 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3608 3609 if not skip_join_token and not join: 3610 self._retreat(index) 3611 kind = None 3612 method = None 3613 side = None 3614 3615 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3616 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3617 3618 if not skip_join_token and not join and not outer_apply and not cross_apply: 3619 return None 3620 3621 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3622 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3623 kwargs["expressions"] = self._parse_csv( 3624 lambda: self._parse_table(parse_bracket=parse_bracket) 3625 ) 3626 3627 if method: 3628 kwargs["method"] = method.text 3629 if side: 3630 kwargs["side"] = side.text 3631 if kind: 3632 kwargs["kind"] = kind.text 3633 if hint: 3634 kwargs["hint"] = hint 3635 3636 if self._match(TokenType.MATCH_CONDITION): 3637 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3638 3639 if self._match(TokenType.ON): 3640 kwargs["on"] = self._parse_assignment() 3641 elif self._match(TokenType.USING): 3642 kwargs["using"] = self._parse_using_identifiers() 3643 elif ( 3644 not (outer_apply or cross_apply) 3645 and not isinstance(kwargs["this"], exp.Unnest) 3646 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3647 ): 3648 index = self._index 3649 joins: t.Optional[list] = list(self._parse_joins()) 3650 3651 if joins and self._match(TokenType.ON): 3652 kwargs["on"] = self._parse_assignment() 3653 elif joins and self._match(TokenType.USING): 3654 kwargs["using"] = self._parse_using_identifiers() 3655 else: 3656 joins = None 3657 self._retreat(index) 3658 3659 kwargs["this"].set("joins", joins if joins else None) 3660 3661 comments = [c for token in (method, side, kind) if token for c in token.comments] 3662 return self.expression(exp.Join, comments=comments, **kwargs) 3663 3664 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3665 this = self._parse_assignment() 3666 3667 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3668 return this 3669 3670 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3671 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3672 3673 return this 3674 3675 def _parse_index_params(self) -> exp.IndexParameters: 3676 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3677 3678 if self._match(TokenType.L_PAREN, advance=False): 3679 columns = self._parse_wrapped_csv(self._parse_with_operator) 3680 else: 3681 columns = None 3682 3683 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3684 partition_by = self._parse_partition_by() 3685 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3686 tablespace = ( 3687 self._parse_var(any_token=True) 3688 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3689 else None 3690 ) 3691 where = self._parse_where() 3692 3693 on = self._parse_field() if self._match(TokenType.ON) else None 3694 3695 return self.expression( 3696 exp.IndexParameters, 3697 using=using, 3698 columns=columns, 3699 include=include, 3700 partition_by=partition_by, 3701 where=where, 3702 with_storage=with_storage, 3703 tablespace=tablespace, 3704 on=on, 3705 ) 3706 3707 def _parse_index( 3708 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3709 ) -> t.Optional[exp.Index]: 3710 if index or anonymous: 3711 unique = None 3712 primary = None 3713 amp = None 3714 3715 self._match(TokenType.ON) 3716 self._match(TokenType.TABLE) # hive 3717 table = self._parse_table_parts(schema=True) 3718 else: 3719 unique = self._match(TokenType.UNIQUE) 3720 primary = self._match_text_seq("PRIMARY") 3721 amp = self._match_text_seq("AMP") 3722 3723 if not self._match(TokenType.INDEX): 3724 return None 3725 3726 index = self._parse_id_var() 3727 table = None 3728 3729 params = self._parse_index_params() 3730 3731 return self.expression( 3732 exp.Index, 3733 this=index, 3734 table=table, 3735 unique=unique, 3736 primary=primary, 3737 amp=amp, 3738 params=params, 3739 ) 3740 3741 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3742 hints: t.List[exp.Expression] = [] 3743 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3744 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3745 hints.append( 3746 self.expression( 3747 exp.WithTableHint, 3748 expressions=self._parse_csv( 3749 lambda: self._parse_function() or self._parse_var(any_token=True) 3750 ), 3751 ) 3752 ) 3753 self._match_r_paren() 3754 else: 3755 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3756 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3757 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3758 3759 self._match_set((TokenType.INDEX, TokenType.KEY)) 3760 if self._match(TokenType.FOR): 3761 hint.set("target", self._advance_any() and self._prev.text.upper()) 3762 3763 hint.set("expressions", self._parse_wrapped_id_vars()) 3764 hints.append(hint) 3765 3766 return hints or None 3767 3768 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3769 return ( 3770 (not schema and self._parse_function(optional_parens=False)) 3771 or self._parse_id_var(any_token=False) 3772 or self._parse_string_as_identifier() 3773 or self._parse_placeholder() 3774 ) 3775 3776 def _parse_table_parts( 3777 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3778 ) -> exp.Table: 3779 catalog = None 3780 db = None 3781 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3782 3783 while self._match(TokenType.DOT): 3784 if catalog: 3785 # This allows nesting the table in arbitrarily many dot expressions if needed 3786 table = self.expression( 3787 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3788 ) 3789 else: 3790 catalog = db 3791 db = table 3792 # "" used for tsql FROM a..b case 3793 table = self._parse_table_part(schema=schema) or "" 3794 3795 if ( 3796 wildcard 3797 and self._is_connected() 3798 and (isinstance(table, exp.Identifier) or not table) 3799 and self._match(TokenType.STAR) 3800 ): 3801 if isinstance(table, exp.Identifier): 3802 table.args["this"] += "*" 3803 else: 3804 table = exp.Identifier(this="*") 3805 3806 # We bubble up comments from the Identifier to the Table 3807 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3808 3809 if is_db_reference: 3810 catalog = db 3811 db = table 3812 table = None 3813 3814 if not table and not is_db_reference: 3815 self.raise_error(f"Expected table name but got {self._curr}") 3816 if not db and is_db_reference: 3817 self.raise_error(f"Expected database name but got {self._curr}") 3818 3819 table = self.expression( 3820 exp.Table, 3821 comments=comments, 3822 this=table, 3823 db=db, 3824 catalog=catalog, 3825 ) 3826 3827 changes = self._parse_changes() 3828 if changes: 3829 table.set("changes", changes) 3830 3831 at_before = self._parse_historical_data() 3832 if at_before: 3833 table.set("when", at_before) 3834 3835 pivots = self._parse_pivots() 3836 if pivots: 3837 table.set("pivots", pivots) 3838 3839 return table 3840 3841 def _parse_table( 3842 self, 3843 schema: bool = False, 3844 joins: bool = False, 3845 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3846 parse_bracket: bool = False, 3847 is_db_reference: bool = False, 3848 parse_partition: bool = False, 3849 ) -> t.Optional[exp.Expression]: 3850 lateral = self._parse_lateral() 3851 if lateral: 3852 return lateral 3853 3854 unnest = self._parse_unnest() 3855 if unnest: 3856 return unnest 3857 3858 values = self._parse_derived_table_values() 3859 if values: 3860 return values 3861 3862 subquery = self._parse_select(table=True) 3863 if subquery: 3864 if not subquery.args.get("pivots"): 3865 subquery.set("pivots", self._parse_pivots()) 3866 return subquery 3867 3868 bracket = parse_bracket and self._parse_bracket(None) 3869 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3870 3871 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3872 self._parse_table 3873 ) 3874 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3875 3876 only = self._match(TokenType.ONLY) 3877 3878 this = t.cast( 3879 exp.Expression, 3880 bracket 3881 or rows_from 3882 or self._parse_bracket( 3883 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3884 ), 3885 ) 3886 3887 if only: 3888 this.set("only", only) 3889 3890 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3891 self._match_text_seq("*") 3892 3893 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3894 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3895 this.set("partition", self._parse_partition()) 3896 3897 if schema: 3898 return self._parse_schema(this=this) 3899 3900 version = self._parse_version() 3901 3902 if version: 3903 this.set("version", version) 3904 3905 if self.dialect.ALIAS_POST_TABLESAMPLE: 3906 this.set("sample", self._parse_table_sample()) 3907 3908 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3909 if alias: 3910 this.set("alias", alias) 3911 3912 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3913 return self.expression( 3914 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3915 ) 3916 3917 this.set("hints", self._parse_table_hints()) 3918 3919 if not this.args.get("pivots"): 3920 this.set("pivots", self._parse_pivots()) 3921 3922 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3923 this.set("sample", self._parse_table_sample()) 3924 3925 if joins: 3926 for join in self._parse_joins(): 3927 this.append("joins", join) 3928 3929 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3930 this.set("ordinality", True) 3931 this.set("alias", self._parse_table_alias()) 3932 3933 return this 3934 3935 def _parse_version(self) -> t.Optional[exp.Version]: 3936 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3937 this = "TIMESTAMP" 3938 elif self._match(TokenType.VERSION_SNAPSHOT): 3939 this = "VERSION" 3940 else: 3941 return None 3942 3943 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3944 kind = self._prev.text.upper() 3945 start = self._parse_bitwise() 3946 self._match_texts(("TO", "AND")) 3947 end = self._parse_bitwise() 3948 expression: t.Optional[exp.Expression] = self.expression( 3949 exp.Tuple, expressions=[start, end] 3950 ) 3951 elif self._match_text_seq("CONTAINED", "IN"): 3952 kind = "CONTAINED IN" 3953 expression = self.expression( 3954 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3955 ) 3956 elif self._match(TokenType.ALL): 3957 kind = "ALL" 3958 expression = None 3959 else: 3960 self._match_text_seq("AS", "OF") 3961 kind = "AS OF" 3962 expression = self._parse_type() 3963 3964 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3965 3966 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3967 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3968 index = self._index 3969 historical_data = None 3970 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3971 this = self._prev.text.upper() 3972 kind = ( 3973 self._match(TokenType.L_PAREN) 3974 and self._match_texts(self.HISTORICAL_DATA_KIND) 3975 and self._prev.text.upper() 3976 ) 3977 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3978 3979 if expression: 3980 self._match_r_paren() 3981 historical_data = self.expression( 3982 exp.HistoricalData, this=this, kind=kind, expression=expression 3983 ) 3984 else: 3985 self._retreat(index) 3986 3987 return historical_data 3988 3989 def _parse_changes(self) -> t.Optional[exp.Changes]: 3990 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3991 return None 3992 3993 information = self._parse_var(any_token=True) 3994 self._match_r_paren() 3995 3996 return self.expression( 3997 exp.Changes, 3998 information=information, 3999 at_before=self._parse_historical_data(), 4000 end=self._parse_historical_data(), 4001 ) 4002 4003 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4004 if not self._match(TokenType.UNNEST): 4005 return None 4006 4007 expressions = self._parse_wrapped_csv(self._parse_equality) 4008 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4009 4010 alias = self._parse_table_alias() if with_alias else None 4011 4012 if alias: 4013 if self.dialect.UNNEST_COLUMN_ONLY: 4014 if alias.args.get("columns"): 4015 self.raise_error("Unexpected extra column alias in unnest.") 4016 4017 alias.set("columns", [alias.this]) 4018 alias.set("this", None) 4019 4020 columns = alias.args.get("columns") or [] 4021 if offset and len(expressions) < len(columns): 4022 offset = columns.pop() 4023 4024 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4025 self._match(TokenType.ALIAS) 4026 offset = self._parse_id_var( 4027 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4028 ) or exp.to_identifier("offset") 4029 4030 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4031 4032 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4033 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4034 if not is_derived and not ( 4035 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4036 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4037 ): 4038 return None 4039 4040 expressions = self._parse_csv(self._parse_value) 4041 alias = self._parse_table_alias() 4042 4043 if is_derived: 4044 self._match_r_paren() 4045 4046 return self.expression( 4047 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4048 ) 4049 4050 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4051 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4052 as_modifier and self._match_text_seq("USING", "SAMPLE") 4053 ): 4054 return None 4055 4056 bucket_numerator = None 4057 bucket_denominator = None 4058 bucket_field = None 4059 percent = None 4060 size = None 4061 seed = None 4062 4063 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4064 matched_l_paren = self._match(TokenType.L_PAREN) 4065 4066 if self.TABLESAMPLE_CSV: 4067 num = None 4068 expressions = self._parse_csv(self._parse_primary) 4069 else: 4070 expressions = None 4071 num = ( 4072 self._parse_factor() 4073 if self._match(TokenType.NUMBER, advance=False) 4074 else self._parse_primary() or self._parse_placeholder() 4075 ) 4076 4077 if self._match_text_seq("BUCKET"): 4078 bucket_numerator = self._parse_number() 4079 self._match_text_seq("OUT", "OF") 4080 bucket_denominator = bucket_denominator = self._parse_number() 4081 self._match(TokenType.ON) 4082 bucket_field = self._parse_field() 4083 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4084 percent = num 4085 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4086 size = num 4087 else: 4088 percent = num 4089 4090 if matched_l_paren: 4091 self._match_r_paren() 4092 4093 if self._match(TokenType.L_PAREN): 4094 method = self._parse_var(upper=True) 4095 seed = self._match(TokenType.COMMA) and self._parse_number() 4096 self._match_r_paren() 4097 elif self._match_texts(("SEED", "REPEATABLE")): 4098 seed = self._parse_wrapped(self._parse_number) 4099 4100 if not method and self.DEFAULT_SAMPLING_METHOD: 4101 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4102 4103 return self.expression( 4104 exp.TableSample, 4105 expressions=expressions, 4106 method=method, 4107 bucket_numerator=bucket_numerator, 4108 bucket_denominator=bucket_denominator, 4109 bucket_field=bucket_field, 4110 percent=percent, 4111 size=size, 4112 seed=seed, 4113 ) 4114 4115 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4116 return list(iter(self._parse_pivot, None)) or None 4117 4118 def _parse_joins(self) -> t.Iterator[exp.Join]: 4119 return iter(self._parse_join, None) 4120 4121 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4122 if not self._match(TokenType.INTO): 4123 return None 4124 4125 return self.expression( 4126 exp.UnpivotColumns, 4127 this=self._match_text_seq("NAME") and self._parse_column(), 4128 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4129 ) 4130 4131 # https://duckdb.org/docs/sql/statements/pivot 4132 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4133 def _parse_on() -> t.Optional[exp.Expression]: 4134 this = self._parse_bitwise() 4135 4136 if self._match(TokenType.IN): 4137 # PIVOT ... ON col IN (row_val1, row_val2) 4138 return self._parse_in(this) 4139 if self._match(TokenType.ALIAS, advance=False): 4140 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4141 return self._parse_alias(this) 4142 4143 return this 4144 4145 this = self._parse_table() 4146 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4147 into = self._parse_unpivot_columns() 4148 using = self._match(TokenType.USING) and self._parse_csv( 4149 lambda: self._parse_alias(self._parse_function()) 4150 ) 4151 group = self._parse_group() 4152 4153 return self.expression( 4154 exp.Pivot, 4155 this=this, 4156 expressions=expressions, 4157 using=using, 4158 group=group, 4159 unpivot=is_unpivot, 4160 into=into, 4161 ) 4162 4163 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4164 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4165 this = self._parse_select_or_expression() 4166 4167 self._match(TokenType.ALIAS) 4168 alias = self._parse_bitwise() 4169 if alias: 4170 if isinstance(alias, exp.Column) and not alias.db: 4171 alias = alias.this 4172 return self.expression(exp.PivotAlias, this=this, alias=alias) 4173 4174 return this 4175 4176 value = self._parse_column() 4177 4178 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4179 self.raise_error("Expecting IN (") 4180 4181 if self._match(TokenType.ANY): 4182 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4183 else: 4184 exprs = self._parse_csv(_parse_aliased_expression) 4185 4186 self._match_r_paren() 4187 return self.expression(exp.In, this=value, expressions=exprs) 4188 4189 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4190 index = self._index 4191 include_nulls = None 4192 4193 if self._match(TokenType.PIVOT): 4194 unpivot = False 4195 elif self._match(TokenType.UNPIVOT): 4196 unpivot = True 4197 4198 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4199 if self._match_text_seq("INCLUDE", "NULLS"): 4200 include_nulls = True 4201 elif self._match_text_seq("EXCLUDE", "NULLS"): 4202 include_nulls = False 4203 else: 4204 return None 4205 4206 expressions = [] 4207 4208 if not self._match(TokenType.L_PAREN): 4209 self._retreat(index) 4210 return None 4211 4212 if unpivot: 4213 expressions = self._parse_csv(self._parse_column) 4214 else: 4215 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4216 4217 if not expressions: 4218 self.raise_error("Failed to parse PIVOT's aggregation list") 4219 4220 if not self._match(TokenType.FOR): 4221 self.raise_error("Expecting FOR") 4222 4223 field = self._parse_pivot_in() 4224 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4225 self._parse_bitwise 4226 ) 4227 4228 self._match_r_paren() 4229 4230 pivot = self.expression( 4231 exp.Pivot, 4232 expressions=expressions, 4233 field=field, 4234 unpivot=unpivot, 4235 include_nulls=include_nulls, 4236 default_on_null=default_on_null, 4237 ) 4238 4239 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4240 pivot.set("alias", self._parse_table_alias()) 4241 4242 if not unpivot: 4243 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4244 4245 columns: t.List[exp.Expression] = [] 4246 pivot_field_expressions = pivot.args["field"].expressions 4247 4248 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4249 if not isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4250 for fld in pivot_field_expressions: 4251 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4252 for name in names: 4253 if self.PREFIXED_PIVOT_COLUMNS: 4254 name = f"{name}_{field_name}" if name else field_name 4255 else: 4256 name = f"{field_name}_{name}" if name else field_name 4257 4258 columns.append(exp.to_identifier(name)) 4259 4260 pivot.set("columns", columns) 4261 4262 return pivot 4263 4264 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4265 return [agg.alias for agg in aggregations] 4266 4267 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4268 if not skip_where_token and not self._match(TokenType.PREWHERE): 4269 return None 4270 4271 return self.expression( 4272 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4273 ) 4274 4275 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4276 if not skip_where_token and not self._match(TokenType.WHERE): 4277 return None 4278 4279 return self.expression( 4280 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4281 ) 4282 4283 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4284 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4285 return None 4286 4287 elements: t.Dict[str, t.Any] = defaultdict(list) 4288 4289 if self._match(TokenType.ALL): 4290 elements["all"] = True 4291 elif self._match(TokenType.DISTINCT): 4292 elements["all"] = False 4293 4294 while True: 4295 index = self._index 4296 4297 elements["expressions"].extend( 4298 self._parse_csv( 4299 lambda: None 4300 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4301 else self._parse_assignment() 4302 ) 4303 ) 4304 4305 before_with_index = self._index 4306 with_prefix = self._match(TokenType.WITH) 4307 4308 if self._match(TokenType.ROLLUP): 4309 elements["rollup"].append( 4310 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4311 ) 4312 elif self._match(TokenType.CUBE): 4313 elements["cube"].append( 4314 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4315 ) 4316 elif self._match(TokenType.GROUPING_SETS): 4317 elements["grouping_sets"].append( 4318 self.expression( 4319 exp.GroupingSets, 4320 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4321 ) 4322 ) 4323 elif self._match_text_seq("TOTALS"): 4324 elements["totals"] = True # type: ignore 4325 4326 if before_with_index <= self._index <= before_with_index + 1: 4327 self._retreat(before_with_index) 4328 break 4329 4330 if index == self._index: 4331 break 4332 4333 return self.expression(exp.Group, **elements) # type: ignore 4334 4335 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4336 return self.expression( 4337 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4338 ) 4339 4340 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4341 if self._match(TokenType.L_PAREN): 4342 grouping_set = self._parse_csv(self._parse_column) 4343 self._match_r_paren() 4344 return self.expression(exp.Tuple, expressions=grouping_set) 4345 4346 return self._parse_column() 4347 4348 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4349 if not skip_having_token and not self._match(TokenType.HAVING): 4350 return None 4351 return self.expression(exp.Having, this=self._parse_assignment()) 4352 4353 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4354 if not self._match(TokenType.QUALIFY): 4355 return None 4356 return self.expression(exp.Qualify, this=self._parse_assignment()) 4357 4358 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4359 if skip_start_token: 4360 start = None 4361 elif self._match(TokenType.START_WITH): 4362 start = self._parse_assignment() 4363 else: 4364 return None 4365 4366 self._match(TokenType.CONNECT_BY) 4367 nocycle = self._match_text_seq("NOCYCLE") 4368 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4369 exp.Prior, this=self._parse_bitwise() 4370 ) 4371 connect = self._parse_assignment() 4372 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4373 4374 if not start and self._match(TokenType.START_WITH): 4375 start = self._parse_assignment() 4376 4377 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4378 4379 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4380 this = self._parse_id_var(any_token=True) 4381 if self._match(TokenType.ALIAS): 4382 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4383 return this 4384 4385 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4386 if self._match_text_seq("INTERPOLATE"): 4387 return self._parse_wrapped_csv(self._parse_name_as_expression) 4388 return None 4389 4390 def _parse_order( 4391 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4392 ) -> t.Optional[exp.Expression]: 4393 siblings = None 4394 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4395 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4396 return this 4397 4398 siblings = True 4399 4400 return self.expression( 4401 exp.Order, 4402 this=this, 4403 expressions=self._parse_csv(self._parse_ordered), 4404 siblings=siblings, 4405 ) 4406 4407 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4408 if not self._match(token): 4409 return None 4410 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4411 4412 def _parse_ordered( 4413 self, parse_method: t.Optional[t.Callable] = None 4414 ) -> t.Optional[exp.Ordered]: 4415 this = parse_method() if parse_method else self._parse_assignment() 4416 if not this: 4417 return None 4418 4419 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4420 this = exp.var("ALL") 4421 4422 asc = self._match(TokenType.ASC) 4423 desc = self._match(TokenType.DESC) or (asc and False) 4424 4425 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4426 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4427 4428 nulls_first = is_nulls_first or False 4429 explicitly_null_ordered = is_nulls_first or is_nulls_last 4430 4431 if ( 4432 not explicitly_null_ordered 4433 and ( 4434 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4435 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4436 ) 4437 and self.dialect.NULL_ORDERING != "nulls_are_last" 4438 ): 4439 nulls_first = True 4440 4441 if self._match_text_seq("WITH", "FILL"): 4442 with_fill = self.expression( 4443 exp.WithFill, 4444 **{ # type: ignore 4445 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4446 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4447 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4448 "interpolate": self._parse_interpolate(), 4449 }, 4450 ) 4451 else: 4452 with_fill = None 4453 4454 return self.expression( 4455 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4456 ) 4457 4458 def _parse_limit_options(self) -> exp.LimitOptions: 4459 percent = self._match(TokenType.PERCENT) 4460 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4461 self._match_text_seq("ONLY") 4462 with_ties = self._match_text_seq("WITH", "TIES") 4463 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4464 4465 def _parse_limit( 4466 self, 4467 this: t.Optional[exp.Expression] = None, 4468 top: bool = False, 4469 skip_limit_token: bool = False, 4470 ) -> t.Optional[exp.Expression]: 4471 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4472 comments = self._prev_comments 4473 if top: 4474 limit_paren = self._match(TokenType.L_PAREN) 4475 expression = self._parse_term() if limit_paren else self._parse_number() 4476 4477 if limit_paren: 4478 self._match_r_paren() 4479 4480 limit_options = self._parse_limit_options() 4481 else: 4482 limit_options = None 4483 expression = self._parse_term() 4484 4485 if self._match(TokenType.COMMA): 4486 offset = expression 4487 expression = self._parse_term() 4488 else: 4489 offset = None 4490 4491 limit_exp = self.expression( 4492 exp.Limit, 4493 this=this, 4494 expression=expression, 4495 offset=offset, 4496 comments=comments, 4497 limit_options=limit_options, 4498 expressions=self._parse_limit_by(), 4499 ) 4500 4501 return limit_exp 4502 4503 if self._match(TokenType.FETCH): 4504 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4505 direction = self._prev.text.upper() if direction else "FIRST" 4506 4507 count = self._parse_field(tokens=self.FETCH_TOKENS) 4508 4509 return self.expression( 4510 exp.Fetch, 4511 direction=direction, 4512 count=count, 4513 limit_options=self._parse_limit_options(), 4514 ) 4515 4516 return this 4517 4518 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4519 if not self._match(TokenType.OFFSET): 4520 return this 4521 4522 count = self._parse_term() 4523 self._match_set((TokenType.ROW, TokenType.ROWS)) 4524 4525 return self.expression( 4526 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4527 ) 4528 4529 def _can_parse_limit_or_offset(self) -> bool: 4530 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4531 return False 4532 4533 index = self._index 4534 result = bool( 4535 self._try_parse(self._parse_limit, retreat=True) 4536 or self._try_parse(self._parse_offset, retreat=True) 4537 ) 4538 self._retreat(index) 4539 return result 4540 4541 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4542 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4543 4544 def _parse_locks(self) -> t.List[exp.Lock]: 4545 locks = [] 4546 while True: 4547 if self._match_text_seq("FOR", "UPDATE"): 4548 update = True 4549 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4550 "LOCK", "IN", "SHARE", "MODE" 4551 ): 4552 update = False 4553 else: 4554 break 4555 4556 expressions = None 4557 if self._match_text_seq("OF"): 4558 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4559 4560 wait: t.Optional[bool | exp.Expression] = None 4561 if self._match_text_seq("NOWAIT"): 4562 wait = True 4563 elif self._match_text_seq("WAIT"): 4564 wait = self._parse_primary() 4565 elif self._match_text_seq("SKIP", "LOCKED"): 4566 wait = False 4567 4568 locks.append( 4569 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4570 ) 4571 4572 return locks 4573 4574 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4575 while this and self._match_set(self.SET_OPERATIONS): 4576 token_type = self._prev.token_type 4577 4578 if token_type == TokenType.UNION: 4579 operation: t.Type[exp.SetOperation] = exp.Union 4580 elif token_type == TokenType.EXCEPT: 4581 operation = exp.Except 4582 else: 4583 operation = exp.Intersect 4584 4585 comments = self._prev.comments 4586 4587 if self._match(TokenType.DISTINCT): 4588 distinct: t.Optional[bool] = True 4589 elif self._match(TokenType.ALL): 4590 distinct = False 4591 else: 4592 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4593 if distinct is None: 4594 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4595 4596 by_name = self._match_text_seq("BY", "NAME") 4597 expression = self._parse_select(nested=True, parse_set_operation=False) 4598 4599 this = self.expression( 4600 operation, 4601 comments=comments, 4602 this=this, 4603 distinct=distinct, 4604 by_name=by_name, 4605 expression=expression, 4606 ) 4607 4608 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4609 expression = this.expression 4610 4611 if expression: 4612 for arg in self.SET_OP_MODIFIERS: 4613 expr = expression.args.get(arg) 4614 if expr: 4615 this.set(arg, expr.pop()) 4616 4617 return this 4618 4619 def _parse_expression(self) -> t.Optional[exp.Expression]: 4620 return self._parse_alias(self._parse_assignment()) 4621 4622 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4623 this = self._parse_disjunction() 4624 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4625 # This allows us to parse <non-identifier token> := <expr> 4626 this = exp.column( 4627 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4628 ) 4629 4630 while self._match_set(self.ASSIGNMENT): 4631 if isinstance(this, exp.Column) and len(this.parts) == 1: 4632 this = this.this 4633 4634 this = self.expression( 4635 self.ASSIGNMENT[self._prev.token_type], 4636 this=this, 4637 comments=self._prev_comments, 4638 expression=self._parse_assignment(), 4639 ) 4640 4641 return this 4642 4643 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4644 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4645 4646 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4647 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4648 4649 def _parse_equality(self) -> t.Optional[exp.Expression]: 4650 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4651 4652 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4653 return self._parse_tokens(self._parse_range, self.COMPARISON) 4654 4655 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4656 this = this or self._parse_bitwise() 4657 negate = self._match(TokenType.NOT) 4658 4659 if self._match_set(self.RANGE_PARSERS): 4660 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4661 if not expression: 4662 return this 4663 4664 this = expression 4665 elif self._match(TokenType.ISNULL): 4666 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4667 4668 # Postgres supports ISNULL and NOTNULL for conditions. 4669 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4670 if self._match(TokenType.NOTNULL): 4671 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4672 this = self.expression(exp.Not, this=this) 4673 4674 if negate: 4675 this = self._negate_range(this) 4676 4677 if self._match(TokenType.IS): 4678 this = self._parse_is(this) 4679 4680 return this 4681 4682 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4683 if not this: 4684 return this 4685 4686 return self.expression(exp.Not, this=this) 4687 4688 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4689 index = self._index - 1 4690 negate = self._match(TokenType.NOT) 4691 4692 if self._match_text_seq("DISTINCT", "FROM"): 4693 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4694 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4695 4696 if self._match(TokenType.JSON): 4697 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4698 4699 if self._match_text_seq("WITH"): 4700 _with = True 4701 elif self._match_text_seq("WITHOUT"): 4702 _with = False 4703 else: 4704 _with = None 4705 4706 unique = self._match(TokenType.UNIQUE) 4707 self._match_text_seq("KEYS") 4708 expression: t.Optional[exp.Expression] = self.expression( 4709 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4710 ) 4711 else: 4712 expression = self._parse_primary() or self._parse_null() 4713 if not expression: 4714 self._retreat(index) 4715 return None 4716 4717 this = self.expression(exp.Is, this=this, expression=expression) 4718 return self.expression(exp.Not, this=this) if negate else this 4719 4720 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4721 unnest = self._parse_unnest(with_alias=False) 4722 if unnest: 4723 this = self.expression(exp.In, this=this, unnest=unnest) 4724 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4725 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4726 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4727 4728 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4729 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4730 else: 4731 this = self.expression(exp.In, this=this, expressions=expressions) 4732 4733 if matched_l_paren: 4734 self._match_r_paren(this) 4735 elif not self._match(TokenType.R_BRACKET, expression=this): 4736 self.raise_error("Expecting ]") 4737 else: 4738 this = self.expression(exp.In, this=this, field=self._parse_column()) 4739 4740 return this 4741 4742 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4743 low = self._parse_bitwise() 4744 self._match(TokenType.AND) 4745 high = self._parse_bitwise() 4746 return self.expression(exp.Between, this=this, low=low, high=high) 4747 4748 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4749 if not self._match(TokenType.ESCAPE): 4750 return this 4751 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4752 4753 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4754 index = self._index 4755 4756 if not self._match(TokenType.INTERVAL) and match_interval: 4757 return None 4758 4759 if self._match(TokenType.STRING, advance=False): 4760 this = self._parse_primary() 4761 else: 4762 this = self._parse_term() 4763 4764 if not this or ( 4765 isinstance(this, exp.Column) 4766 and not this.table 4767 and not this.this.quoted 4768 and this.name.upper() == "IS" 4769 ): 4770 self._retreat(index) 4771 return None 4772 4773 unit = self._parse_function() or ( 4774 not self._match(TokenType.ALIAS, advance=False) 4775 and self._parse_var(any_token=True, upper=True) 4776 ) 4777 4778 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4779 # each INTERVAL expression into this canonical form so it's easy to transpile 4780 if this and this.is_number: 4781 this = exp.Literal.string(this.to_py()) 4782 elif this and this.is_string: 4783 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4784 if parts and unit: 4785 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4786 unit = None 4787 self._retreat(self._index - 1) 4788 4789 if len(parts) == 1: 4790 this = exp.Literal.string(parts[0][0]) 4791 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4792 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4793 unit = self.expression( 4794 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4795 ) 4796 4797 interval = self.expression(exp.Interval, this=this, unit=unit) 4798 4799 index = self._index 4800 self._match(TokenType.PLUS) 4801 4802 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4803 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4804 return self.expression( 4805 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4806 ) 4807 4808 self._retreat(index) 4809 return interval 4810 4811 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4812 this = self._parse_term() 4813 4814 while True: 4815 if self._match_set(self.BITWISE): 4816 this = self.expression( 4817 self.BITWISE[self._prev.token_type], 4818 this=this, 4819 expression=self._parse_term(), 4820 ) 4821 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4822 this = self.expression( 4823 exp.DPipe, 4824 this=this, 4825 expression=self._parse_term(), 4826 safe=not self.dialect.STRICT_STRING_CONCAT, 4827 ) 4828 elif self._match(TokenType.DQMARK): 4829 this = self.expression( 4830 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4831 ) 4832 elif self._match_pair(TokenType.LT, TokenType.LT): 4833 this = self.expression( 4834 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4835 ) 4836 elif self._match_pair(TokenType.GT, TokenType.GT): 4837 this = self.expression( 4838 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4839 ) 4840 else: 4841 break 4842 4843 return this 4844 4845 def _parse_term(self) -> t.Optional[exp.Expression]: 4846 this = self._parse_factor() 4847 4848 while self._match_set(self.TERM): 4849 klass = self.TERM[self._prev.token_type] 4850 comments = self._prev_comments 4851 expression = self._parse_factor() 4852 4853 this = self.expression(klass, this=this, comments=comments, expression=expression) 4854 4855 if isinstance(this, exp.Collate): 4856 expr = this.expression 4857 4858 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4859 # fallback to Identifier / Var 4860 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4861 ident = expr.this 4862 if isinstance(ident, exp.Identifier): 4863 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4864 4865 return this 4866 4867 def _parse_factor(self) -> t.Optional[exp.Expression]: 4868 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4869 this = parse_method() 4870 4871 while self._match_set(self.FACTOR): 4872 klass = self.FACTOR[self._prev.token_type] 4873 comments = self._prev_comments 4874 expression = parse_method() 4875 4876 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4877 self._retreat(self._index - 1) 4878 return this 4879 4880 this = self.expression(klass, this=this, comments=comments, expression=expression) 4881 4882 if isinstance(this, exp.Div): 4883 this.args["typed"] = self.dialect.TYPED_DIVISION 4884 this.args["safe"] = self.dialect.SAFE_DIVISION 4885 4886 return this 4887 4888 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4889 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4890 4891 def _parse_unary(self) -> t.Optional[exp.Expression]: 4892 if self._match_set(self.UNARY_PARSERS): 4893 return self.UNARY_PARSERS[self._prev.token_type](self) 4894 return self._parse_at_time_zone(self._parse_type()) 4895 4896 def _parse_type( 4897 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4898 ) -> t.Optional[exp.Expression]: 4899 interval = parse_interval and self._parse_interval() 4900 if interval: 4901 return interval 4902 4903 index = self._index 4904 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4905 4906 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4907 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4908 if isinstance(data_type, exp.Cast): 4909 # This constructor can contain ops directly after it, for instance struct unnesting: 4910 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4911 return self._parse_column_ops(data_type) 4912 4913 if data_type: 4914 index2 = self._index 4915 this = self._parse_primary() 4916 4917 if isinstance(this, exp.Literal): 4918 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4919 if parser: 4920 return parser(self, this, data_type) 4921 4922 return self.expression(exp.Cast, this=this, to=data_type) 4923 4924 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4925 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4926 # 4927 # If the index difference here is greater than 1, that means the parser itself must have 4928 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4929 # 4930 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4931 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4932 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4933 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4934 # 4935 # In these cases, we don't really want to return the converted type, but instead retreat 4936 # and try to parse a Column or Identifier in the section below. 4937 if data_type.expressions and index2 - index > 1: 4938 self._retreat(index2) 4939 return self._parse_column_ops(data_type) 4940 4941 self._retreat(index) 4942 4943 if fallback_to_identifier: 4944 return self._parse_id_var() 4945 4946 this = self._parse_column() 4947 return this and self._parse_column_ops(this) 4948 4949 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4950 this = self._parse_type() 4951 if not this: 4952 return None 4953 4954 if isinstance(this, exp.Column) and not this.table: 4955 this = exp.var(this.name.upper()) 4956 4957 return self.expression( 4958 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4959 ) 4960 4961 def _parse_types( 4962 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4963 ) -> t.Optional[exp.Expression]: 4964 index = self._index 4965 4966 this: t.Optional[exp.Expression] = None 4967 prefix = self._match_text_seq("SYSUDTLIB", ".") 4968 4969 if not self._match_set(self.TYPE_TOKENS): 4970 identifier = allow_identifiers and self._parse_id_var( 4971 any_token=False, tokens=(TokenType.VAR,) 4972 ) 4973 if isinstance(identifier, exp.Identifier): 4974 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4975 4976 if len(tokens) != 1: 4977 self.raise_error("Unexpected identifier", self._prev) 4978 4979 if tokens[0].token_type in self.TYPE_TOKENS: 4980 self._prev = tokens[0] 4981 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4982 type_name = identifier.name 4983 4984 while self._match(TokenType.DOT): 4985 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4986 4987 this = exp.DataType.build(type_name, udt=True) 4988 else: 4989 self._retreat(self._index - 1) 4990 return None 4991 else: 4992 return None 4993 4994 type_token = self._prev.token_type 4995 4996 if type_token == TokenType.PSEUDO_TYPE: 4997 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4998 4999 if type_token == TokenType.OBJECT_IDENTIFIER: 5000 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5001 5002 # https://materialize.com/docs/sql/types/map/ 5003 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5004 key_type = self._parse_types( 5005 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5006 ) 5007 if not self._match(TokenType.FARROW): 5008 self._retreat(index) 5009 return None 5010 5011 value_type = self._parse_types( 5012 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5013 ) 5014 if not self._match(TokenType.R_BRACKET): 5015 self._retreat(index) 5016 return None 5017 5018 return exp.DataType( 5019 this=exp.DataType.Type.MAP, 5020 expressions=[key_type, value_type], 5021 nested=True, 5022 prefix=prefix, 5023 ) 5024 5025 nested = type_token in self.NESTED_TYPE_TOKENS 5026 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5027 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5028 expressions = None 5029 maybe_func = False 5030 5031 if self._match(TokenType.L_PAREN): 5032 if is_struct: 5033 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5034 elif nested: 5035 expressions = self._parse_csv( 5036 lambda: self._parse_types( 5037 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5038 ) 5039 ) 5040 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5041 this = expressions[0] 5042 this.set("nullable", True) 5043 self._match_r_paren() 5044 return this 5045 elif type_token in self.ENUM_TYPE_TOKENS: 5046 expressions = self._parse_csv(self._parse_equality) 5047 elif is_aggregate: 5048 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5049 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5050 ) 5051 if not func_or_ident or not self._match(TokenType.COMMA): 5052 return None 5053 expressions = self._parse_csv( 5054 lambda: self._parse_types( 5055 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5056 ) 5057 ) 5058 expressions.insert(0, func_or_ident) 5059 else: 5060 expressions = self._parse_csv(self._parse_type_size) 5061 5062 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5063 if type_token == TokenType.VECTOR and len(expressions) == 2: 5064 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5065 5066 if not expressions or not self._match(TokenType.R_PAREN): 5067 self._retreat(index) 5068 return None 5069 5070 maybe_func = True 5071 5072 values: t.Optional[t.List[exp.Expression]] = None 5073 5074 if nested and self._match(TokenType.LT): 5075 if is_struct: 5076 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5077 else: 5078 expressions = self._parse_csv( 5079 lambda: self._parse_types( 5080 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5081 ) 5082 ) 5083 5084 if not self._match(TokenType.GT): 5085 self.raise_error("Expecting >") 5086 5087 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5088 values = self._parse_csv(self._parse_assignment) 5089 if not values and is_struct: 5090 values = None 5091 self._retreat(self._index - 1) 5092 else: 5093 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5094 5095 if type_token in self.TIMESTAMPS: 5096 if self._match_text_seq("WITH", "TIME", "ZONE"): 5097 maybe_func = False 5098 tz_type = ( 5099 exp.DataType.Type.TIMETZ 5100 if type_token in self.TIMES 5101 else exp.DataType.Type.TIMESTAMPTZ 5102 ) 5103 this = exp.DataType(this=tz_type, expressions=expressions) 5104 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5105 maybe_func = False 5106 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5107 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5108 maybe_func = False 5109 elif type_token == TokenType.INTERVAL: 5110 unit = self._parse_var(upper=True) 5111 if unit: 5112 if self._match_text_seq("TO"): 5113 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5114 5115 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5116 else: 5117 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5118 5119 if maybe_func and check_func: 5120 index2 = self._index 5121 peek = self._parse_string() 5122 5123 if not peek: 5124 self._retreat(index) 5125 return None 5126 5127 self._retreat(index2) 5128 5129 if not this: 5130 if self._match_text_seq("UNSIGNED"): 5131 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5132 if not unsigned_type_token: 5133 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5134 5135 type_token = unsigned_type_token or type_token 5136 5137 this = exp.DataType( 5138 this=exp.DataType.Type[type_token.value], 5139 expressions=expressions, 5140 nested=nested, 5141 prefix=prefix, 5142 ) 5143 5144 # Empty arrays/structs are allowed 5145 if values is not None: 5146 cls = exp.Struct if is_struct else exp.Array 5147 this = exp.cast(cls(expressions=values), this, copy=False) 5148 5149 elif expressions: 5150 this.set("expressions", expressions) 5151 5152 # https://materialize.com/docs/sql/types/list/#type-name 5153 while self._match(TokenType.LIST): 5154 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5155 5156 index = self._index 5157 5158 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5159 matched_array = self._match(TokenType.ARRAY) 5160 5161 while self._curr: 5162 datatype_token = self._prev.token_type 5163 matched_l_bracket = self._match(TokenType.L_BRACKET) 5164 5165 if (not matched_l_bracket and not matched_array) or ( 5166 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5167 ): 5168 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5169 # not to be confused with the fixed size array parsing 5170 break 5171 5172 matched_array = False 5173 values = self._parse_csv(self._parse_assignment) or None 5174 if ( 5175 values 5176 and not schema 5177 and ( 5178 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5179 ) 5180 ): 5181 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5182 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5183 self._retreat(index) 5184 break 5185 5186 this = exp.DataType( 5187 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5188 ) 5189 self._match(TokenType.R_BRACKET) 5190 5191 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5192 converter = self.TYPE_CONVERTERS.get(this.this) 5193 if converter: 5194 this = converter(t.cast(exp.DataType, this)) 5195 5196 return this 5197 5198 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5199 index = self._index 5200 5201 if ( 5202 self._curr 5203 and self._next 5204 and self._curr.token_type in self.TYPE_TOKENS 5205 and self._next.token_type in self.TYPE_TOKENS 5206 ): 5207 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5208 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5209 this = self._parse_id_var() 5210 else: 5211 this = ( 5212 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5213 or self._parse_id_var() 5214 ) 5215 5216 self._match(TokenType.COLON) 5217 5218 if ( 5219 type_required 5220 and not isinstance(this, exp.DataType) 5221 and not self._match_set(self.TYPE_TOKENS, advance=False) 5222 ): 5223 self._retreat(index) 5224 return self._parse_types() 5225 5226 return self._parse_column_def(this) 5227 5228 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5229 if not self._match_text_seq("AT", "TIME", "ZONE"): 5230 return this 5231 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5232 5233 def _parse_column(self) -> t.Optional[exp.Expression]: 5234 this = self._parse_column_reference() 5235 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5236 5237 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5238 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5239 5240 return column 5241 5242 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5243 this = self._parse_field() 5244 if ( 5245 not this 5246 and self._match(TokenType.VALUES, advance=False) 5247 and self.VALUES_FOLLOWED_BY_PAREN 5248 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5249 ): 5250 this = self._parse_id_var() 5251 5252 if isinstance(this, exp.Identifier): 5253 # We bubble up comments from the Identifier to the Column 5254 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5255 5256 return this 5257 5258 def _parse_colon_as_variant_extract( 5259 self, this: t.Optional[exp.Expression] 5260 ) -> t.Optional[exp.Expression]: 5261 casts = [] 5262 json_path = [] 5263 escape = None 5264 5265 while self._match(TokenType.COLON): 5266 start_index = self._index 5267 5268 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5269 path = self._parse_column_ops( 5270 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5271 ) 5272 5273 # The cast :: operator has a lower precedence than the extraction operator :, so 5274 # we rearrange the AST appropriately to avoid casting the JSON path 5275 while isinstance(path, exp.Cast): 5276 casts.append(path.to) 5277 path = path.this 5278 5279 if casts: 5280 dcolon_offset = next( 5281 i 5282 for i, t in enumerate(self._tokens[start_index:]) 5283 if t.token_type == TokenType.DCOLON 5284 ) 5285 end_token = self._tokens[start_index + dcolon_offset - 1] 5286 else: 5287 end_token = self._prev 5288 5289 if path: 5290 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5291 # it'll roundtrip to a string literal in GET_PATH 5292 if isinstance(path, exp.Identifier) and path.quoted: 5293 escape = True 5294 5295 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5296 5297 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5298 # Databricks transforms it back to the colon/dot notation 5299 if json_path: 5300 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5301 5302 if json_path_expr: 5303 json_path_expr.set("escape", escape) 5304 5305 this = self.expression( 5306 exp.JSONExtract, 5307 this=this, 5308 expression=json_path_expr, 5309 variant_extract=True, 5310 ) 5311 5312 while casts: 5313 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5314 5315 return this 5316 5317 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5318 return self._parse_types() 5319 5320 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5321 this = self._parse_bracket(this) 5322 5323 while self._match_set(self.COLUMN_OPERATORS): 5324 op_token = self._prev.token_type 5325 op = self.COLUMN_OPERATORS.get(op_token) 5326 5327 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5328 field = self._parse_dcolon() 5329 if not field: 5330 self.raise_error("Expected type") 5331 elif op and self._curr: 5332 field = self._parse_column_reference() or self._parse_bracket() 5333 else: 5334 field = self._parse_field(any_token=True, anonymous_func=True) 5335 5336 if isinstance(field, (exp.Func, exp.Window)) and this: 5337 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5338 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5339 this = exp.replace_tree( 5340 this, 5341 lambda n: ( 5342 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5343 if n.table 5344 else n.this 5345 ) 5346 if isinstance(n, exp.Column) 5347 else n, 5348 ) 5349 5350 if op: 5351 this = op(self, this, field) 5352 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5353 this = self.expression( 5354 exp.Column, 5355 comments=this.comments, 5356 this=field, 5357 table=this.this, 5358 db=this.args.get("table"), 5359 catalog=this.args.get("db"), 5360 ) 5361 elif isinstance(field, exp.Window): 5362 # Move the exp.Dot's to the window's function 5363 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5364 field.set("this", window_func) 5365 this = field 5366 else: 5367 this = self.expression(exp.Dot, this=this, expression=field) 5368 5369 if field and field.comments: 5370 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5371 5372 this = self._parse_bracket(this) 5373 5374 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5375 5376 def _parse_primary(self) -> t.Optional[exp.Expression]: 5377 if self._match_set(self.PRIMARY_PARSERS): 5378 token_type = self._prev.token_type 5379 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5380 5381 if token_type == TokenType.STRING: 5382 expressions = [primary] 5383 while self._match(TokenType.STRING): 5384 expressions.append(exp.Literal.string(self._prev.text)) 5385 5386 if len(expressions) > 1: 5387 return self.expression(exp.Concat, expressions=expressions) 5388 5389 return primary 5390 5391 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5392 return exp.Literal.number(f"0.{self._prev.text}") 5393 5394 if self._match(TokenType.L_PAREN): 5395 comments = self._prev_comments 5396 query = self._parse_select() 5397 5398 if query: 5399 expressions = [query] 5400 else: 5401 expressions = self._parse_expressions() 5402 5403 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5404 5405 if not this and self._match(TokenType.R_PAREN, advance=False): 5406 this = self.expression(exp.Tuple) 5407 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5408 this = self._parse_subquery(this=this, parse_alias=False) 5409 elif isinstance(this, exp.Subquery): 5410 this = self._parse_subquery( 5411 this=self._parse_set_operations(this), parse_alias=False 5412 ) 5413 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5414 this = self.expression(exp.Tuple, expressions=expressions) 5415 else: 5416 this = self.expression(exp.Paren, this=this) 5417 5418 if this: 5419 this.add_comments(comments) 5420 5421 self._match_r_paren(expression=this) 5422 return this 5423 5424 return None 5425 5426 def _parse_field( 5427 self, 5428 any_token: bool = False, 5429 tokens: t.Optional[t.Collection[TokenType]] = None, 5430 anonymous_func: bool = False, 5431 ) -> t.Optional[exp.Expression]: 5432 if anonymous_func: 5433 field = ( 5434 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5435 or self._parse_primary() 5436 ) 5437 else: 5438 field = self._parse_primary() or self._parse_function( 5439 anonymous=anonymous_func, any_token=any_token 5440 ) 5441 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5442 5443 def _parse_function( 5444 self, 5445 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5446 anonymous: bool = False, 5447 optional_parens: bool = True, 5448 any_token: bool = False, 5449 ) -> t.Optional[exp.Expression]: 5450 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5451 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5452 fn_syntax = False 5453 if ( 5454 self._match(TokenType.L_BRACE, advance=False) 5455 and self._next 5456 and self._next.text.upper() == "FN" 5457 ): 5458 self._advance(2) 5459 fn_syntax = True 5460 5461 func = self._parse_function_call( 5462 functions=functions, 5463 anonymous=anonymous, 5464 optional_parens=optional_parens, 5465 any_token=any_token, 5466 ) 5467 5468 if fn_syntax: 5469 self._match(TokenType.R_BRACE) 5470 5471 return func 5472 5473 def _parse_function_call( 5474 self, 5475 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5476 anonymous: bool = False, 5477 optional_parens: bool = True, 5478 any_token: bool = False, 5479 ) -> t.Optional[exp.Expression]: 5480 if not self._curr: 5481 return None 5482 5483 comments = self._curr.comments 5484 token_type = self._curr.token_type 5485 this = self._curr.text 5486 upper = this.upper() 5487 5488 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5489 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5490 self._advance() 5491 return self._parse_window(parser(self)) 5492 5493 if not self._next or self._next.token_type != TokenType.L_PAREN: 5494 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5495 self._advance() 5496 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5497 5498 return None 5499 5500 if any_token: 5501 if token_type in self.RESERVED_TOKENS: 5502 return None 5503 elif token_type not in self.FUNC_TOKENS: 5504 return None 5505 5506 self._advance(2) 5507 5508 parser = self.FUNCTION_PARSERS.get(upper) 5509 if parser and not anonymous: 5510 this = parser(self) 5511 else: 5512 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5513 5514 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5515 this = self.expression( 5516 subquery_predicate, comments=comments, this=self._parse_select() 5517 ) 5518 self._match_r_paren() 5519 return this 5520 5521 if functions is None: 5522 functions = self.FUNCTIONS 5523 5524 function = functions.get(upper) 5525 known_function = function and not anonymous 5526 5527 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5528 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5529 5530 post_func_comments = self._curr and self._curr.comments 5531 if known_function and post_func_comments: 5532 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5533 # call we'll construct it as exp.Anonymous, even if it's "known" 5534 if any( 5535 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5536 for comment in post_func_comments 5537 ): 5538 known_function = False 5539 5540 if alias and known_function: 5541 args = self._kv_to_prop_eq(args) 5542 5543 if known_function: 5544 func_builder = t.cast(t.Callable, function) 5545 5546 if "dialect" in func_builder.__code__.co_varnames: 5547 func = func_builder(args, dialect=self.dialect) 5548 else: 5549 func = func_builder(args) 5550 5551 func = self.validate_expression(func, args) 5552 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5553 func.meta["name"] = this 5554 5555 this = func 5556 else: 5557 if token_type == TokenType.IDENTIFIER: 5558 this = exp.Identifier(this=this, quoted=True) 5559 this = self.expression(exp.Anonymous, this=this, expressions=args) 5560 5561 if isinstance(this, exp.Expression): 5562 this.add_comments(comments) 5563 5564 self._match_r_paren(this) 5565 return self._parse_window(this) 5566 5567 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5568 return expression 5569 5570 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5571 transformed = [] 5572 5573 for index, e in enumerate(expressions): 5574 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5575 if isinstance(e, exp.Alias): 5576 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5577 5578 if not isinstance(e, exp.PropertyEQ): 5579 e = self.expression( 5580 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5581 ) 5582 5583 if isinstance(e.this, exp.Column): 5584 e.this.replace(e.this.this) 5585 else: 5586 e = self._to_prop_eq(e, index) 5587 5588 transformed.append(e) 5589 5590 return transformed 5591 5592 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5593 return self._parse_statement() 5594 5595 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5596 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5597 5598 def _parse_user_defined_function( 5599 self, kind: t.Optional[TokenType] = None 5600 ) -> t.Optional[exp.Expression]: 5601 this = self._parse_id_var() 5602 5603 while self._match(TokenType.DOT): 5604 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5605 5606 if not self._match(TokenType.L_PAREN): 5607 return this 5608 5609 expressions = self._parse_csv(self._parse_function_parameter) 5610 self._match_r_paren() 5611 return self.expression( 5612 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5613 ) 5614 5615 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5616 literal = self._parse_primary() 5617 if literal: 5618 return self.expression(exp.Introducer, this=token.text, expression=literal) 5619 5620 return self.expression(exp.Identifier, this=token.text) 5621 5622 def _parse_session_parameter(self) -> exp.SessionParameter: 5623 kind = None 5624 this = self._parse_id_var() or self._parse_primary() 5625 5626 if this and self._match(TokenType.DOT): 5627 kind = this.name 5628 this = self._parse_var() or self._parse_primary() 5629 5630 return self.expression(exp.SessionParameter, this=this, kind=kind) 5631 5632 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5633 return self._parse_id_var() 5634 5635 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5636 index = self._index 5637 5638 if self._match(TokenType.L_PAREN): 5639 expressions = t.cast( 5640 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5641 ) 5642 5643 if not self._match(TokenType.R_PAREN): 5644 self._retreat(index) 5645 else: 5646 expressions = [self._parse_lambda_arg()] 5647 5648 if self._match_set(self.LAMBDAS): 5649 return self.LAMBDAS[self._prev.token_type](self, expressions) 5650 5651 self._retreat(index) 5652 5653 this: t.Optional[exp.Expression] 5654 5655 if self._match(TokenType.DISTINCT): 5656 this = self.expression( 5657 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5658 ) 5659 else: 5660 this = self._parse_select_or_expression(alias=alias) 5661 5662 return self._parse_limit( 5663 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5664 ) 5665 5666 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5667 index = self._index 5668 if not self._match(TokenType.L_PAREN): 5669 return this 5670 5671 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5672 # expr can be of both types 5673 if self._match_set(self.SELECT_START_TOKENS): 5674 self._retreat(index) 5675 return this 5676 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5677 self._match_r_paren() 5678 return self.expression(exp.Schema, this=this, expressions=args) 5679 5680 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5681 return self._parse_column_def(self._parse_field(any_token=True)) 5682 5683 def _parse_column_def( 5684 self, this: t.Optional[exp.Expression], computed_column: bool = True 5685 ) -> t.Optional[exp.Expression]: 5686 # column defs are not really columns, they're identifiers 5687 if isinstance(this, exp.Column): 5688 this = this.this 5689 5690 if not computed_column: 5691 self._match(TokenType.ALIAS) 5692 5693 kind = self._parse_types(schema=True) 5694 5695 if self._match_text_seq("FOR", "ORDINALITY"): 5696 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5697 5698 constraints: t.List[exp.Expression] = [] 5699 5700 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5701 ("ALIAS", "MATERIALIZED") 5702 ): 5703 persisted = self._prev.text.upper() == "MATERIALIZED" 5704 constraint_kind = exp.ComputedColumnConstraint( 5705 this=self._parse_assignment(), 5706 persisted=persisted or self._match_text_seq("PERSISTED"), 5707 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5708 ) 5709 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5710 elif ( 5711 kind 5712 and self._match(TokenType.ALIAS, advance=False) 5713 and ( 5714 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5715 or (self._next and self._next.token_type == TokenType.L_PAREN) 5716 ) 5717 ): 5718 self._advance() 5719 constraints.append( 5720 self.expression( 5721 exp.ColumnConstraint, 5722 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5723 ) 5724 ) 5725 5726 while True: 5727 constraint = self._parse_column_constraint() 5728 if not constraint: 5729 break 5730 constraints.append(constraint) 5731 5732 if not kind and not constraints: 5733 return this 5734 5735 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5736 5737 def _parse_auto_increment( 5738 self, 5739 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5740 start = None 5741 increment = None 5742 5743 if self._match(TokenType.L_PAREN, advance=False): 5744 args = self._parse_wrapped_csv(self._parse_bitwise) 5745 start = seq_get(args, 0) 5746 increment = seq_get(args, 1) 5747 elif self._match_text_seq("START"): 5748 start = self._parse_bitwise() 5749 self._match_text_seq("INCREMENT") 5750 increment = self._parse_bitwise() 5751 5752 if start and increment: 5753 return exp.GeneratedAsIdentityColumnConstraint( 5754 start=start, increment=increment, this=False 5755 ) 5756 5757 return exp.AutoIncrementColumnConstraint() 5758 5759 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5760 if not self._match_text_seq("REFRESH"): 5761 self._retreat(self._index - 1) 5762 return None 5763 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5764 5765 def _parse_compress(self) -> exp.CompressColumnConstraint: 5766 if self._match(TokenType.L_PAREN, advance=False): 5767 return self.expression( 5768 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5769 ) 5770 5771 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5772 5773 def _parse_generated_as_identity( 5774 self, 5775 ) -> ( 5776 exp.GeneratedAsIdentityColumnConstraint 5777 | exp.ComputedColumnConstraint 5778 | exp.GeneratedAsRowColumnConstraint 5779 ): 5780 if self._match_text_seq("BY", "DEFAULT"): 5781 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5782 this = self.expression( 5783 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5784 ) 5785 else: 5786 self._match_text_seq("ALWAYS") 5787 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5788 5789 self._match(TokenType.ALIAS) 5790 5791 if self._match_text_seq("ROW"): 5792 start = self._match_text_seq("START") 5793 if not start: 5794 self._match(TokenType.END) 5795 hidden = self._match_text_seq("HIDDEN") 5796 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5797 5798 identity = self._match_text_seq("IDENTITY") 5799 5800 if self._match(TokenType.L_PAREN): 5801 if self._match(TokenType.START_WITH): 5802 this.set("start", self._parse_bitwise()) 5803 if self._match_text_seq("INCREMENT", "BY"): 5804 this.set("increment", self._parse_bitwise()) 5805 if self._match_text_seq("MINVALUE"): 5806 this.set("minvalue", self._parse_bitwise()) 5807 if self._match_text_seq("MAXVALUE"): 5808 this.set("maxvalue", self._parse_bitwise()) 5809 5810 if self._match_text_seq("CYCLE"): 5811 this.set("cycle", True) 5812 elif self._match_text_seq("NO", "CYCLE"): 5813 this.set("cycle", False) 5814 5815 if not identity: 5816 this.set("expression", self._parse_range()) 5817 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5818 args = self._parse_csv(self._parse_bitwise) 5819 this.set("start", seq_get(args, 0)) 5820 this.set("increment", seq_get(args, 1)) 5821 5822 self._match_r_paren() 5823 5824 return this 5825 5826 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5827 self._match_text_seq("LENGTH") 5828 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5829 5830 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5831 if self._match_text_seq("NULL"): 5832 return self.expression(exp.NotNullColumnConstraint) 5833 if self._match_text_seq("CASESPECIFIC"): 5834 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5835 if self._match_text_seq("FOR", "REPLICATION"): 5836 return self.expression(exp.NotForReplicationColumnConstraint) 5837 5838 # Unconsume the `NOT` token 5839 self._retreat(self._index - 1) 5840 return None 5841 5842 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5843 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5844 5845 procedure_option_follows = ( 5846 self._match(TokenType.WITH, advance=False) 5847 and self._next 5848 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5849 ) 5850 5851 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5852 return self.expression( 5853 exp.ColumnConstraint, 5854 this=this, 5855 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5856 ) 5857 5858 return this 5859 5860 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5861 if not self._match(TokenType.CONSTRAINT): 5862 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5863 5864 return self.expression( 5865 exp.Constraint, 5866 this=self._parse_id_var(), 5867 expressions=self._parse_unnamed_constraints(), 5868 ) 5869 5870 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5871 constraints = [] 5872 while True: 5873 constraint = self._parse_unnamed_constraint() or self._parse_function() 5874 if not constraint: 5875 break 5876 constraints.append(constraint) 5877 5878 return constraints 5879 5880 def _parse_unnamed_constraint( 5881 self, constraints: t.Optional[t.Collection[str]] = None 5882 ) -> t.Optional[exp.Expression]: 5883 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5884 constraints or self.CONSTRAINT_PARSERS 5885 ): 5886 return None 5887 5888 constraint = self._prev.text.upper() 5889 if constraint not in self.CONSTRAINT_PARSERS: 5890 self.raise_error(f"No parser found for schema constraint {constraint}.") 5891 5892 return self.CONSTRAINT_PARSERS[constraint](self) 5893 5894 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5895 return self._parse_id_var(any_token=False) 5896 5897 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5898 self._match_text_seq("KEY") 5899 return self.expression( 5900 exp.UniqueColumnConstraint, 5901 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5902 this=self._parse_schema(self._parse_unique_key()), 5903 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5904 on_conflict=self._parse_on_conflict(), 5905 ) 5906 5907 def _parse_key_constraint_options(self) -> t.List[str]: 5908 options = [] 5909 while True: 5910 if not self._curr: 5911 break 5912 5913 if self._match(TokenType.ON): 5914 action = None 5915 on = self._advance_any() and self._prev.text 5916 5917 if self._match_text_seq("NO", "ACTION"): 5918 action = "NO ACTION" 5919 elif self._match_text_seq("CASCADE"): 5920 action = "CASCADE" 5921 elif self._match_text_seq("RESTRICT"): 5922 action = "RESTRICT" 5923 elif self._match_pair(TokenType.SET, TokenType.NULL): 5924 action = "SET NULL" 5925 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5926 action = "SET DEFAULT" 5927 else: 5928 self.raise_error("Invalid key constraint") 5929 5930 options.append(f"ON {on} {action}") 5931 else: 5932 var = self._parse_var_from_options( 5933 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5934 ) 5935 if not var: 5936 break 5937 options.append(var.name) 5938 5939 return options 5940 5941 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5942 if match and not self._match(TokenType.REFERENCES): 5943 return None 5944 5945 expressions = None 5946 this = self._parse_table(schema=True) 5947 options = self._parse_key_constraint_options() 5948 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5949 5950 def _parse_foreign_key(self) -> exp.ForeignKey: 5951 expressions = self._parse_wrapped_id_vars() 5952 reference = self._parse_references() 5953 options = {} 5954 5955 while self._match(TokenType.ON): 5956 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5957 self.raise_error("Expected DELETE or UPDATE") 5958 5959 kind = self._prev.text.lower() 5960 5961 if self._match_text_seq("NO", "ACTION"): 5962 action = "NO ACTION" 5963 elif self._match(TokenType.SET): 5964 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5965 action = "SET " + self._prev.text.upper() 5966 else: 5967 self._advance() 5968 action = self._prev.text.upper() 5969 5970 options[kind] = action 5971 5972 return self.expression( 5973 exp.ForeignKey, 5974 expressions=expressions, 5975 reference=reference, 5976 **options, # type: ignore 5977 ) 5978 5979 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5980 return self._parse_ordered() or self._parse_field() 5981 5982 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5983 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5984 self._retreat(self._index - 1) 5985 return None 5986 5987 id_vars = self._parse_wrapped_id_vars() 5988 return self.expression( 5989 exp.PeriodForSystemTimeConstraint, 5990 this=seq_get(id_vars, 0), 5991 expression=seq_get(id_vars, 1), 5992 ) 5993 5994 def _parse_primary_key( 5995 self, wrapped_optional: bool = False, in_props: bool = False 5996 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5997 desc = ( 5998 self._match_set((TokenType.ASC, TokenType.DESC)) 5999 and self._prev.token_type == TokenType.DESC 6000 ) 6001 6002 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6003 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6004 6005 expressions = self._parse_wrapped_csv( 6006 self._parse_primary_key_part, optional=wrapped_optional 6007 ) 6008 options = self._parse_key_constraint_options() 6009 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6010 6011 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6012 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6013 6014 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6015 """ 6016 Parses a datetime column in ODBC format. We parse the column into the corresponding 6017 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6018 same as we did for `DATE('yyyy-mm-dd')`. 6019 6020 Reference: 6021 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6022 """ 6023 self._match(TokenType.VAR) 6024 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6025 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6026 if not self._match(TokenType.R_BRACE): 6027 self.raise_error("Expected }") 6028 return expression 6029 6030 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6031 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6032 return this 6033 6034 bracket_kind = self._prev.token_type 6035 if ( 6036 bracket_kind == TokenType.L_BRACE 6037 and self._curr 6038 and self._curr.token_type == TokenType.VAR 6039 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6040 ): 6041 return self._parse_odbc_datetime_literal() 6042 6043 expressions = self._parse_csv( 6044 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6045 ) 6046 6047 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6048 self.raise_error("Expected ]") 6049 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6050 self.raise_error("Expected }") 6051 6052 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6053 if bracket_kind == TokenType.L_BRACE: 6054 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6055 elif not this: 6056 this = build_array_constructor( 6057 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6058 ) 6059 else: 6060 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6061 if constructor_type: 6062 return build_array_constructor( 6063 constructor_type, 6064 args=expressions, 6065 bracket_kind=bracket_kind, 6066 dialect=self.dialect, 6067 ) 6068 6069 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6070 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6071 6072 self._add_comments(this) 6073 return self._parse_bracket(this) 6074 6075 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6076 if self._match(TokenType.COLON): 6077 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6078 return this 6079 6080 def _parse_case(self) -> t.Optional[exp.Expression]: 6081 ifs = [] 6082 default = None 6083 6084 comments = self._prev_comments 6085 expression = self._parse_assignment() 6086 6087 while self._match(TokenType.WHEN): 6088 this = self._parse_assignment() 6089 self._match(TokenType.THEN) 6090 then = self._parse_assignment() 6091 ifs.append(self.expression(exp.If, this=this, true=then)) 6092 6093 if self._match(TokenType.ELSE): 6094 default = self._parse_assignment() 6095 6096 if not self._match(TokenType.END): 6097 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6098 default = exp.column("interval") 6099 else: 6100 self.raise_error("Expected END after CASE", self._prev) 6101 6102 return self.expression( 6103 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6104 ) 6105 6106 def _parse_if(self) -> t.Optional[exp.Expression]: 6107 if self._match(TokenType.L_PAREN): 6108 args = self._parse_csv(self._parse_assignment) 6109 this = self.validate_expression(exp.If.from_arg_list(args), args) 6110 self._match_r_paren() 6111 else: 6112 index = self._index - 1 6113 6114 if self.NO_PAREN_IF_COMMANDS and index == 0: 6115 return self._parse_as_command(self._prev) 6116 6117 condition = self._parse_assignment() 6118 6119 if not condition: 6120 self._retreat(index) 6121 return None 6122 6123 self._match(TokenType.THEN) 6124 true = self._parse_assignment() 6125 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6126 self._match(TokenType.END) 6127 this = self.expression(exp.If, this=condition, true=true, false=false) 6128 6129 return this 6130 6131 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6132 if not self._match_text_seq("VALUE", "FOR"): 6133 self._retreat(self._index - 1) 6134 return None 6135 6136 return self.expression( 6137 exp.NextValueFor, 6138 this=self._parse_column(), 6139 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6140 ) 6141 6142 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6143 this = self._parse_function() or self._parse_var_or_string(upper=True) 6144 6145 if self._match(TokenType.FROM): 6146 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6147 6148 if not self._match(TokenType.COMMA): 6149 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6150 6151 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6152 6153 def _parse_gap_fill(self) -> exp.GapFill: 6154 self._match(TokenType.TABLE) 6155 this = self._parse_table() 6156 6157 self._match(TokenType.COMMA) 6158 args = [this, *self._parse_csv(self._parse_lambda)] 6159 6160 gap_fill = exp.GapFill.from_arg_list(args) 6161 return self.validate_expression(gap_fill, args) 6162 6163 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6164 this = self._parse_assignment() 6165 6166 if not self._match(TokenType.ALIAS): 6167 if self._match(TokenType.COMMA): 6168 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6169 6170 self.raise_error("Expected AS after CAST") 6171 6172 fmt = None 6173 to = self._parse_types() 6174 6175 default = self._match(TokenType.DEFAULT) 6176 if default: 6177 default = self._parse_bitwise() 6178 self._match_text_seq("ON", "CONVERSION", "ERROR") 6179 6180 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6181 fmt_string = self._parse_string() 6182 fmt = self._parse_at_time_zone(fmt_string) 6183 6184 if not to: 6185 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6186 if to.this in exp.DataType.TEMPORAL_TYPES: 6187 this = self.expression( 6188 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6189 this=this, 6190 format=exp.Literal.string( 6191 format_time( 6192 fmt_string.this if fmt_string else "", 6193 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6194 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6195 ) 6196 ), 6197 safe=safe, 6198 ) 6199 6200 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6201 this.set("zone", fmt.args["zone"]) 6202 return this 6203 elif not to: 6204 self.raise_error("Expected TYPE after CAST") 6205 elif isinstance(to, exp.Identifier): 6206 to = exp.DataType.build(to.name, udt=True) 6207 elif to.this == exp.DataType.Type.CHAR: 6208 if self._match(TokenType.CHARACTER_SET): 6209 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6210 6211 return self.expression( 6212 exp.Cast if strict else exp.TryCast, 6213 this=this, 6214 to=to, 6215 format=fmt, 6216 safe=safe, 6217 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6218 default=default, 6219 ) 6220 6221 def _parse_string_agg(self) -> exp.GroupConcat: 6222 if self._match(TokenType.DISTINCT): 6223 args: t.List[t.Optional[exp.Expression]] = [ 6224 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6225 ] 6226 if self._match(TokenType.COMMA): 6227 args.extend(self._parse_csv(self._parse_assignment)) 6228 else: 6229 args = self._parse_csv(self._parse_assignment) # type: ignore 6230 6231 if self._match_text_seq("ON", "OVERFLOW"): 6232 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6233 if self._match_text_seq("ERROR"): 6234 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6235 else: 6236 self._match_text_seq("TRUNCATE") 6237 on_overflow = self.expression( 6238 exp.OverflowTruncateBehavior, 6239 this=self._parse_string(), 6240 with_count=( 6241 self._match_text_seq("WITH", "COUNT") 6242 or not self._match_text_seq("WITHOUT", "COUNT") 6243 ), 6244 ) 6245 else: 6246 on_overflow = None 6247 6248 index = self._index 6249 if not self._match(TokenType.R_PAREN) and args: 6250 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6251 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6252 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6253 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6254 6255 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6256 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6257 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6258 if not self._match_text_seq("WITHIN", "GROUP"): 6259 self._retreat(index) 6260 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6261 6262 # The corresponding match_r_paren will be called in parse_function (caller) 6263 self._match_l_paren() 6264 6265 return self.expression( 6266 exp.GroupConcat, 6267 this=self._parse_order(this=seq_get(args, 0)), 6268 separator=seq_get(args, 1), 6269 on_overflow=on_overflow, 6270 ) 6271 6272 def _parse_convert( 6273 self, strict: bool, safe: t.Optional[bool] = None 6274 ) -> t.Optional[exp.Expression]: 6275 this = self._parse_bitwise() 6276 6277 if self._match(TokenType.USING): 6278 to: t.Optional[exp.Expression] = self.expression( 6279 exp.CharacterSet, this=self._parse_var() 6280 ) 6281 elif self._match(TokenType.COMMA): 6282 to = self._parse_types() 6283 else: 6284 to = None 6285 6286 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6287 6288 def _parse_xml_table(self) -> exp.XMLTable: 6289 namespaces = None 6290 passing = None 6291 columns = None 6292 6293 if self._match_text_seq("XMLNAMESPACES", "("): 6294 namespaces = self._parse_xml_namespace() 6295 self._match_text_seq(")", ",") 6296 6297 this = self._parse_string() 6298 6299 if self._match_text_seq("PASSING"): 6300 # The BY VALUE keywords are optional and are provided for semantic clarity 6301 self._match_text_seq("BY", "VALUE") 6302 passing = self._parse_csv(self._parse_column) 6303 6304 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6305 6306 if self._match_text_seq("COLUMNS"): 6307 columns = self._parse_csv(self._parse_field_def) 6308 6309 return self.expression( 6310 exp.XMLTable, 6311 this=this, 6312 namespaces=namespaces, 6313 passing=passing, 6314 columns=columns, 6315 by_ref=by_ref, 6316 ) 6317 6318 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6319 namespaces = [] 6320 6321 while True: 6322 if self._match(TokenType.DEFAULT): 6323 uri = self._parse_string() 6324 else: 6325 uri = self._parse_alias(self._parse_string()) 6326 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6327 if not self._match(TokenType.COMMA): 6328 break 6329 6330 return namespaces 6331 6332 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6333 """ 6334 There are generally two variants of the DECODE function: 6335 6336 - DECODE(bin, charset) 6337 - DECODE(expression, search, result [, search, result] ... [, default]) 6338 6339 The second variant will always be parsed into a CASE expression. Note that NULL 6340 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6341 instead of relying on pattern matching. 6342 """ 6343 args = self._parse_csv(self._parse_assignment) 6344 6345 if len(args) < 3: 6346 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6347 6348 expression, *expressions = args 6349 if not expression: 6350 return None 6351 6352 ifs = [] 6353 for search, result in zip(expressions[::2], expressions[1::2]): 6354 if not search or not result: 6355 return None 6356 6357 if isinstance(search, exp.Literal): 6358 ifs.append( 6359 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6360 ) 6361 elif isinstance(search, exp.Null): 6362 ifs.append( 6363 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6364 ) 6365 else: 6366 cond = exp.or_( 6367 exp.EQ(this=expression.copy(), expression=search), 6368 exp.and_( 6369 exp.Is(this=expression.copy(), expression=exp.Null()), 6370 exp.Is(this=search.copy(), expression=exp.Null()), 6371 copy=False, 6372 ), 6373 copy=False, 6374 ) 6375 ifs.append(exp.If(this=cond, true=result)) 6376 6377 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6378 6379 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6380 self._match_text_seq("KEY") 6381 key = self._parse_column() 6382 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6383 self._match_text_seq("VALUE") 6384 value = self._parse_bitwise() 6385 6386 if not key and not value: 6387 return None 6388 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6389 6390 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6391 if not this or not self._match_text_seq("FORMAT", "JSON"): 6392 return this 6393 6394 return self.expression(exp.FormatJson, this=this) 6395 6396 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6397 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6398 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6399 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6400 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6401 else: 6402 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6403 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6404 6405 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6406 6407 if not empty and not error and not null: 6408 return None 6409 6410 return self.expression( 6411 exp.OnCondition, 6412 empty=empty, 6413 error=error, 6414 null=null, 6415 ) 6416 6417 def _parse_on_handling( 6418 self, on: str, *values: str 6419 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6420 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6421 for value in values: 6422 if self._match_text_seq(value, "ON", on): 6423 return f"{value} ON {on}" 6424 6425 index = self._index 6426 if self._match(TokenType.DEFAULT): 6427 default_value = self._parse_bitwise() 6428 if self._match_text_seq("ON", on): 6429 return default_value 6430 6431 self._retreat(index) 6432 6433 return None 6434 6435 @t.overload 6436 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6437 6438 @t.overload 6439 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6440 6441 def _parse_json_object(self, agg=False): 6442 star = self._parse_star() 6443 expressions = ( 6444 [star] 6445 if star 6446 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6447 ) 6448 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6449 6450 unique_keys = None 6451 if self._match_text_seq("WITH", "UNIQUE"): 6452 unique_keys = True 6453 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6454 unique_keys = False 6455 6456 self._match_text_seq("KEYS") 6457 6458 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6459 self._parse_type() 6460 ) 6461 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6462 6463 return self.expression( 6464 exp.JSONObjectAgg if agg else exp.JSONObject, 6465 expressions=expressions, 6466 null_handling=null_handling, 6467 unique_keys=unique_keys, 6468 return_type=return_type, 6469 encoding=encoding, 6470 ) 6471 6472 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6473 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6474 if not self._match_text_seq("NESTED"): 6475 this = self._parse_id_var() 6476 kind = self._parse_types(allow_identifiers=False) 6477 nested = None 6478 else: 6479 this = None 6480 kind = None 6481 nested = True 6482 6483 path = self._match_text_seq("PATH") and self._parse_string() 6484 nested_schema = nested and self._parse_json_schema() 6485 6486 return self.expression( 6487 exp.JSONColumnDef, 6488 this=this, 6489 kind=kind, 6490 path=path, 6491 nested_schema=nested_schema, 6492 ) 6493 6494 def _parse_json_schema(self) -> exp.JSONSchema: 6495 self._match_text_seq("COLUMNS") 6496 return self.expression( 6497 exp.JSONSchema, 6498 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6499 ) 6500 6501 def _parse_json_table(self) -> exp.JSONTable: 6502 this = self._parse_format_json(self._parse_bitwise()) 6503 path = self._match(TokenType.COMMA) and self._parse_string() 6504 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6505 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6506 schema = self._parse_json_schema() 6507 6508 return exp.JSONTable( 6509 this=this, 6510 schema=schema, 6511 path=path, 6512 error_handling=error_handling, 6513 empty_handling=empty_handling, 6514 ) 6515 6516 def _parse_match_against(self) -> exp.MatchAgainst: 6517 expressions = self._parse_csv(self._parse_column) 6518 6519 self._match_text_seq(")", "AGAINST", "(") 6520 6521 this = self._parse_string() 6522 6523 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6524 modifier = "IN NATURAL LANGUAGE MODE" 6525 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6526 modifier = f"{modifier} WITH QUERY EXPANSION" 6527 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6528 modifier = "IN BOOLEAN MODE" 6529 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6530 modifier = "WITH QUERY EXPANSION" 6531 else: 6532 modifier = None 6533 6534 return self.expression( 6535 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6536 ) 6537 6538 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6539 def _parse_open_json(self) -> exp.OpenJSON: 6540 this = self._parse_bitwise() 6541 path = self._match(TokenType.COMMA) and self._parse_string() 6542 6543 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6544 this = self._parse_field(any_token=True) 6545 kind = self._parse_types() 6546 path = self._parse_string() 6547 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6548 6549 return self.expression( 6550 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6551 ) 6552 6553 expressions = None 6554 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6555 self._match_l_paren() 6556 expressions = self._parse_csv(_parse_open_json_column_def) 6557 6558 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6559 6560 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6561 args = self._parse_csv(self._parse_bitwise) 6562 6563 if self._match(TokenType.IN): 6564 return self.expression( 6565 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6566 ) 6567 6568 if haystack_first: 6569 haystack = seq_get(args, 0) 6570 needle = seq_get(args, 1) 6571 else: 6572 haystack = seq_get(args, 1) 6573 needle = seq_get(args, 0) 6574 6575 return self.expression( 6576 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6577 ) 6578 6579 def _parse_predict(self) -> exp.Predict: 6580 self._match_text_seq("MODEL") 6581 this = self._parse_table() 6582 6583 self._match(TokenType.COMMA) 6584 self._match_text_seq("TABLE") 6585 6586 return self.expression( 6587 exp.Predict, 6588 this=this, 6589 expression=self._parse_table(), 6590 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6591 ) 6592 6593 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6594 args = self._parse_csv(self._parse_table) 6595 return exp.JoinHint(this=func_name.upper(), expressions=args) 6596 6597 def _parse_substring(self) -> exp.Substring: 6598 # Postgres supports the form: substring(string [from int] [for int]) 6599 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6600 6601 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6602 6603 if self._match(TokenType.FROM): 6604 args.append(self._parse_bitwise()) 6605 if self._match(TokenType.FOR): 6606 if len(args) == 1: 6607 args.append(exp.Literal.number(1)) 6608 args.append(self._parse_bitwise()) 6609 6610 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6611 6612 def _parse_trim(self) -> exp.Trim: 6613 # https://www.w3resource.com/sql/character-functions/trim.php 6614 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6615 6616 position = None 6617 collation = None 6618 expression = None 6619 6620 if self._match_texts(self.TRIM_TYPES): 6621 position = self._prev.text.upper() 6622 6623 this = self._parse_bitwise() 6624 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6625 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6626 expression = self._parse_bitwise() 6627 6628 if invert_order: 6629 this, expression = expression, this 6630 6631 if self._match(TokenType.COLLATE): 6632 collation = self._parse_bitwise() 6633 6634 return self.expression( 6635 exp.Trim, this=this, position=position, expression=expression, collation=collation 6636 ) 6637 6638 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6639 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6640 6641 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6642 return self._parse_window(self._parse_id_var(), alias=True) 6643 6644 def _parse_respect_or_ignore_nulls( 6645 self, this: t.Optional[exp.Expression] 6646 ) -> t.Optional[exp.Expression]: 6647 if self._match_text_seq("IGNORE", "NULLS"): 6648 return self.expression(exp.IgnoreNulls, this=this) 6649 if self._match_text_seq("RESPECT", "NULLS"): 6650 return self.expression(exp.RespectNulls, this=this) 6651 return this 6652 6653 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6654 if self._match(TokenType.HAVING): 6655 self._match_texts(("MAX", "MIN")) 6656 max = self._prev.text.upper() != "MIN" 6657 return self.expression( 6658 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6659 ) 6660 6661 return this 6662 6663 def _parse_window( 6664 self, this: t.Optional[exp.Expression], alias: bool = False 6665 ) -> t.Optional[exp.Expression]: 6666 func = this 6667 comments = func.comments if isinstance(func, exp.Expression) else None 6668 6669 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6670 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6671 if self._match_text_seq("WITHIN", "GROUP"): 6672 order = self._parse_wrapped(self._parse_order) 6673 this = self.expression(exp.WithinGroup, this=this, expression=order) 6674 6675 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6676 self._match(TokenType.WHERE) 6677 this = self.expression( 6678 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6679 ) 6680 self._match_r_paren() 6681 6682 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6683 # Some dialects choose to implement and some do not. 6684 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6685 6686 # There is some code above in _parse_lambda that handles 6687 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6688 6689 # The below changes handle 6690 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6691 6692 # Oracle allows both formats 6693 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6694 # and Snowflake chose to do the same for familiarity 6695 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6696 if isinstance(this, exp.AggFunc): 6697 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6698 6699 if ignore_respect and ignore_respect is not this: 6700 ignore_respect.replace(ignore_respect.this) 6701 this = self.expression(ignore_respect.__class__, this=this) 6702 6703 this = self._parse_respect_or_ignore_nulls(this) 6704 6705 # bigquery select from window x AS (partition by ...) 6706 if alias: 6707 over = None 6708 self._match(TokenType.ALIAS) 6709 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6710 return this 6711 else: 6712 over = self._prev.text.upper() 6713 6714 if comments and isinstance(func, exp.Expression): 6715 func.pop_comments() 6716 6717 if not self._match(TokenType.L_PAREN): 6718 return self.expression( 6719 exp.Window, 6720 comments=comments, 6721 this=this, 6722 alias=self._parse_id_var(False), 6723 over=over, 6724 ) 6725 6726 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6727 6728 first = self._match(TokenType.FIRST) 6729 if self._match_text_seq("LAST"): 6730 first = False 6731 6732 partition, order = self._parse_partition_and_order() 6733 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6734 6735 if kind: 6736 self._match(TokenType.BETWEEN) 6737 start = self._parse_window_spec() 6738 self._match(TokenType.AND) 6739 end = self._parse_window_spec() 6740 6741 spec = self.expression( 6742 exp.WindowSpec, 6743 kind=kind, 6744 start=start["value"], 6745 start_side=start["side"], 6746 end=end["value"], 6747 end_side=end["side"], 6748 ) 6749 else: 6750 spec = None 6751 6752 self._match_r_paren() 6753 6754 window = self.expression( 6755 exp.Window, 6756 comments=comments, 6757 this=this, 6758 partition_by=partition, 6759 order=order, 6760 spec=spec, 6761 alias=window_alias, 6762 over=over, 6763 first=first, 6764 ) 6765 6766 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6767 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6768 return self._parse_window(window, alias=alias) 6769 6770 return window 6771 6772 def _parse_partition_and_order( 6773 self, 6774 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6775 return self._parse_partition_by(), self._parse_order() 6776 6777 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6778 self._match(TokenType.BETWEEN) 6779 6780 return { 6781 "value": ( 6782 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6783 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6784 or self._parse_bitwise() 6785 ), 6786 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6787 } 6788 6789 def _parse_alias( 6790 self, this: t.Optional[exp.Expression], explicit: bool = False 6791 ) -> t.Optional[exp.Expression]: 6792 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6793 # so this section tries to parse the clause version and if it fails, it treats the token 6794 # as an identifier (alias) 6795 if self._can_parse_limit_or_offset(): 6796 return this 6797 6798 any_token = self._match(TokenType.ALIAS) 6799 comments = self._prev_comments or [] 6800 6801 if explicit and not any_token: 6802 return this 6803 6804 if self._match(TokenType.L_PAREN): 6805 aliases = self.expression( 6806 exp.Aliases, 6807 comments=comments, 6808 this=this, 6809 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6810 ) 6811 self._match_r_paren(aliases) 6812 return aliases 6813 6814 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6815 self.STRING_ALIASES and self._parse_string_as_identifier() 6816 ) 6817 6818 if alias: 6819 comments.extend(alias.pop_comments()) 6820 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6821 column = this.this 6822 6823 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6824 if not this.comments and column and column.comments: 6825 this.comments = column.pop_comments() 6826 6827 return this 6828 6829 def _parse_id_var( 6830 self, 6831 any_token: bool = True, 6832 tokens: t.Optional[t.Collection[TokenType]] = None, 6833 ) -> t.Optional[exp.Expression]: 6834 expression = self._parse_identifier() 6835 if not expression and ( 6836 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6837 ): 6838 quoted = self._prev.token_type == TokenType.STRING 6839 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6840 6841 return expression 6842 6843 def _parse_string(self) -> t.Optional[exp.Expression]: 6844 if self._match_set(self.STRING_PARSERS): 6845 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6846 return self._parse_placeholder() 6847 6848 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6849 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6850 6851 def _parse_number(self) -> t.Optional[exp.Expression]: 6852 if self._match_set(self.NUMERIC_PARSERS): 6853 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6854 return self._parse_placeholder() 6855 6856 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6857 if self._match(TokenType.IDENTIFIER): 6858 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6859 return self._parse_placeholder() 6860 6861 def _parse_var( 6862 self, 6863 any_token: bool = False, 6864 tokens: t.Optional[t.Collection[TokenType]] = None, 6865 upper: bool = False, 6866 ) -> t.Optional[exp.Expression]: 6867 if ( 6868 (any_token and self._advance_any()) 6869 or self._match(TokenType.VAR) 6870 or (self._match_set(tokens) if tokens else False) 6871 ): 6872 return self.expression( 6873 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6874 ) 6875 return self._parse_placeholder() 6876 6877 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6878 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6879 self._advance() 6880 return self._prev 6881 return None 6882 6883 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6884 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6885 6886 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6887 return self._parse_primary() or self._parse_var(any_token=True) 6888 6889 def _parse_null(self) -> t.Optional[exp.Expression]: 6890 if self._match_set(self.NULL_TOKENS): 6891 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6892 return self._parse_placeholder() 6893 6894 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6895 if self._match(TokenType.TRUE): 6896 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6897 if self._match(TokenType.FALSE): 6898 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6899 return self._parse_placeholder() 6900 6901 def _parse_star(self) -> t.Optional[exp.Expression]: 6902 if self._match(TokenType.STAR): 6903 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6904 return self._parse_placeholder() 6905 6906 def _parse_parameter(self) -> exp.Parameter: 6907 this = self._parse_identifier() or self._parse_primary_or_var() 6908 return self.expression(exp.Parameter, this=this) 6909 6910 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6911 if self._match_set(self.PLACEHOLDER_PARSERS): 6912 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6913 if placeholder: 6914 return placeholder 6915 self._advance(-1) 6916 return None 6917 6918 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6919 if not self._match_texts(keywords): 6920 return None 6921 if self._match(TokenType.L_PAREN, advance=False): 6922 return self._parse_wrapped_csv(self._parse_expression) 6923 6924 expression = self._parse_expression() 6925 return [expression] if expression else None 6926 6927 def _parse_csv( 6928 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6929 ) -> t.List[exp.Expression]: 6930 parse_result = parse_method() 6931 items = [parse_result] if parse_result is not None else [] 6932 6933 while self._match(sep): 6934 self._add_comments(parse_result) 6935 parse_result = parse_method() 6936 if parse_result is not None: 6937 items.append(parse_result) 6938 6939 return items 6940 6941 def _parse_tokens( 6942 self, parse_method: t.Callable, expressions: t.Dict 6943 ) -> t.Optional[exp.Expression]: 6944 this = parse_method() 6945 6946 while self._match_set(expressions): 6947 this = self.expression( 6948 expressions[self._prev.token_type], 6949 this=this, 6950 comments=self._prev_comments, 6951 expression=parse_method(), 6952 ) 6953 6954 return this 6955 6956 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6957 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6958 6959 def _parse_wrapped_csv( 6960 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6961 ) -> t.List[exp.Expression]: 6962 return self._parse_wrapped( 6963 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6964 ) 6965 6966 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6967 wrapped = self._match(TokenType.L_PAREN) 6968 if not wrapped and not optional: 6969 self.raise_error("Expecting (") 6970 parse_result = parse_method() 6971 if wrapped: 6972 self._match_r_paren() 6973 return parse_result 6974 6975 def _parse_expressions(self) -> t.List[exp.Expression]: 6976 return self._parse_csv(self._parse_expression) 6977 6978 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6979 return self._parse_select() or self._parse_set_operations( 6980 self._parse_alias(self._parse_assignment(), explicit=True) 6981 if alias 6982 else self._parse_assignment() 6983 ) 6984 6985 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6986 return self._parse_query_modifiers( 6987 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6988 ) 6989 6990 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6991 this = None 6992 if self._match_texts(self.TRANSACTION_KIND): 6993 this = self._prev.text 6994 6995 self._match_texts(("TRANSACTION", "WORK")) 6996 6997 modes = [] 6998 while True: 6999 mode = [] 7000 while self._match(TokenType.VAR): 7001 mode.append(self._prev.text) 7002 7003 if mode: 7004 modes.append(" ".join(mode)) 7005 if not self._match(TokenType.COMMA): 7006 break 7007 7008 return self.expression(exp.Transaction, this=this, modes=modes) 7009 7010 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7011 chain = None 7012 savepoint = None 7013 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7014 7015 self._match_texts(("TRANSACTION", "WORK")) 7016 7017 if self._match_text_seq("TO"): 7018 self._match_text_seq("SAVEPOINT") 7019 savepoint = self._parse_id_var() 7020 7021 if self._match(TokenType.AND): 7022 chain = not self._match_text_seq("NO") 7023 self._match_text_seq("CHAIN") 7024 7025 if is_rollback: 7026 return self.expression(exp.Rollback, savepoint=savepoint) 7027 7028 return self.expression(exp.Commit, chain=chain) 7029 7030 def _parse_refresh(self) -> exp.Refresh: 7031 self._match(TokenType.TABLE) 7032 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7033 7034 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7035 if not self._match_text_seq("ADD"): 7036 return None 7037 7038 self._match(TokenType.COLUMN) 7039 exists_column = self._parse_exists(not_=True) 7040 expression = self._parse_field_def() 7041 7042 if expression: 7043 expression.set("exists", exists_column) 7044 7045 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7046 if self._match_texts(("FIRST", "AFTER")): 7047 position = self._prev.text 7048 column_position = self.expression( 7049 exp.ColumnPosition, this=self._parse_column(), position=position 7050 ) 7051 expression.set("position", column_position) 7052 7053 return expression 7054 7055 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7056 drop = self._match(TokenType.DROP) and self._parse_drop() 7057 if drop and not isinstance(drop, exp.Command): 7058 drop.set("kind", drop.args.get("kind", "COLUMN")) 7059 return drop 7060 7061 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7062 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7063 return self.expression( 7064 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7065 ) 7066 7067 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7068 index = self._index - 1 7069 7070 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7071 return self._parse_csv( 7072 lambda: self.expression( 7073 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7074 ) 7075 ) 7076 7077 self._retreat(index) 7078 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7079 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7080 7081 if self._match_text_seq("ADD", "COLUMNS"): 7082 schema = self._parse_schema() 7083 if schema: 7084 return [schema] 7085 return [] 7086 7087 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7088 7089 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7090 if self._match_texts(self.ALTER_ALTER_PARSERS): 7091 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7092 7093 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7094 # keyword after ALTER we default to parsing this statement 7095 self._match(TokenType.COLUMN) 7096 column = self._parse_field(any_token=True) 7097 7098 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7099 return self.expression(exp.AlterColumn, this=column, drop=True) 7100 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7101 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7102 if self._match(TokenType.COMMENT): 7103 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7104 if self._match_text_seq("DROP", "NOT", "NULL"): 7105 return self.expression( 7106 exp.AlterColumn, 7107 this=column, 7108 drop=True, 7109 allow_null=True, 7110 ) 7111 if self._match_text_seq("SET", "NOT", "NULL"): 7112 return self.expression( 7113 exp.AlterColumn, 7114 this=column, 7115 allow_null=False, 7116 ) 7117 self._match_text_seq("SET", "DATA") 7118 self._match_text_seq("TYPE") 7119 return self.expression( 7120 exp.AlterColumn, 7121 this=column, 7122 dtype=self._parse_types(), 7123 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7124 using=self._match(TokenType.USING) and self._parse_assignment(), 7125 ) 7126 7127 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7128 if self._match_texts(("ALL", "EVEN", "AUTO")): 7129 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7130 7131 self._match_text_seq("KEY", "DISTKEY") 7132 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7133 7134 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7135 if compound: 7136 self._match_text_seq("SORTKEY") 7137 7138 if self._match(TokenType.L_PAREN, advance=False): 7139 return self.expression( 7140 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7141 ) 7142 7143 self._match_texts(("AUTO", "NONE")) 7144 return self.expression( 7145 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7146 ) 7147 7148 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7149 index = self._index - 1 7150 7151 partition_exists = self._parse_exists() 7152 if self._match(TokenType.PARTITION, advance=False): 7153 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7154 7155 self._retreat(index) 7156 return self._parse_csv(self._parse_drop_column) 7157 7158 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7159 if self._match(TokenType.COLUMN): 7160 exists = self._parse_exists() 7161 old_column = self._parse_column() 7162 to = self._match_text_seq("TO") 7163 new_column = self._parse_column() 7164 7165 if old_column is None or to is None or new_column is None: 7166 return None 7167 7168 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7169 7170 self._match_text_seq("TO") 7171 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7172 7173 def _parse_alter_table_set(self) -> exp.AlterSet: 7174 alter_set = self.expression(exp.AlterSet) 7175 7176 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7177 "TABLE", "PROPERTIES" 7178 ): 7179 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7180 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7181 alter_set.set("expressions", [self._parse_assignment()]) 7182 elif self._match_texts(("LOGGED", "UNLOGGED")): 7183 alter_set.set("option", exp.var(self._prev.text.upper())) 7184 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7185 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7186 elif self._match_text_seq("LOCATION"): 7187 alter_set.set("location", self._parse_field()) 7188 elif self._match_text_seq("ACCESS", "METHOD"): 7189 alter_set.set("access_method", self._parse_field()) 7190 elif self._match_text_seq("TABLESPACE"): 7191 alter_set.set("tablespace", self._parse_field()) 7192 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7193 alter_set.set("file_format", [self._parse_field()]) 7194 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7195 alter_set.set("file_format", self._parse_wrapped_options()) 7196 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7197 alter_set.set("copy_options", self._parse_wrapped_options()) 7198 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7199 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7200 else: 7201 if self._match_text_seq("SERDE"): 7202 alter_set.set("serde", self._parse_field()) 7203 7204 alter_set.set("expressions", [self._parse_properties()]) 7205 7206 return alter_set 7207 7208 def _parse_alter(self) -> exp.Alter | exp.Command: 7209 start = self._prev 7210 7211 alter_token = self._match_set(self.ALTERABLES) and self._prev 7212 if not alter_token: 7213 return self._parse_as_command(start) 7214 7215 exists = self._parse_exists() 7216 only = self._match_text_seq("ONLY") 7217 this = self._parse_table(schema=True) 7218 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7219 7220 if self._next: 7221 self._advance() 7222 7223 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7224 if parser: 7225 actions = ensure_list(parser(self)) 7226 not_valid = self._match_text_seq("NOT", "VALID") 7227 options = self._parse_csv(self._parse_property) 7228 7229 if not self._curr and actions: 7230 return self.expression( 7231 exp.Alter, 7232 this=this, 7233 kind=alter_token.text.upper(), 7234 exists=exists, 7235 actions=actions, 7236 only=only, 7237 options=options, 7238 cluster=cluster, 7239 not_valid=not_valid, 7240 ) 7241 7242 return self._parse_as_command(start) 7243 7244 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7245 start = self._prev 7246 # https://duckdb.org/docs/sql/statements/analyze 7247 if not self._curr: 7248 return self.expression(exp.Analyze) 7249 7250 options = [] 7251 while self._match_texts(self.ANALYZE_STYLES): 7252 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7253 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7254 else: 7255 options.append(self._prev.text.upper()) 7256 7257 this: t.Optional[exp.Expression] = None 7258 inner_expression: t.Optional[exp.Expression] = None 7259 7260 kind = self._curr and self._curr.text.upper() 7261 7262 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7263 this = self._parse_table_parts() 7264 elif self._match_text_seq("TABLES"): 7265 if self._match_set((TokenType.FROM, TokenType.IN)): 7266 kind = f"{kind} {self._prev.text.upper()}" 7267 this = self._parse_table(schema=True, is_db_reference=True) 7268 elif self._match_text_seq("DATABASE"): 7269 this = self._parse_table(schema=True, is_db_reference=True) 7270 elif self._match_text_seq("CLUSTER"): 7271 this = self._parse_table() 7272 # Try matching inner expr keywords before fallback to parse table. 7273 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7274 kind = None 7275 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7276 else: 7277 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7278 kind = None 7279 this = self._parse_table_parts() 7280 7281 partition = self._try_parse(self._parse_partition) 7282 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7283 return self._parse_as_command(start) 7284 7285 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7286 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7287 "WITH", "ASYNC", "MODE" 7288 ): 7289 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7290 else: 7291 mode = None 7292 7293 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7294 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7295 7296 properties = self._parse_properties() 7297 return self.expression( 7298 exp.Analyze, 7299 kind=kind, 7300 this=this, 7301 mode=mode, 7302 partition=partition, 7303 properties=properties, 7304 expression=inner_expression, 7305 options=options, 7306 ) 7307 7308 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7309 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7310 this = None 7311 kind = self._prev.text.upper() 7312 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7313 expressions = [] 7314 7315 if not self._match_text_seq("STATISTICS"): 7316 self.raise_error("Expecting token STATISTICS") 7317 7318 if self._match_text_seq("NOSCAN"): 7319 this = "NOSCAN" 7320 elif self._match(TokenType.FOR): 7321 if self._match_text_seq("ALL", "COLUMNS"): 7322 this = "FOR ALL COLUMNS" 7323 if self._match_texts("COLUMNS"): 7324 this = "FOR COLUMNS" 7325 expressions = self._parse_csv(self._parse_column_reference) 7326 elif self._match_text_seq("SAMPLE"): 7327 sample = self._parse_number() 7328 expressions = [ 7329 self.expression( 7330 exp.AnalyzeSample, 7331 sample=sample, 7332 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7333 ) 7334 ] 7335 7336 return self.expression( 7337 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7338 ) 7339 7340 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7341 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7342 kind = None 7343 this = None 7344 expression: t.Optional[exp.Expression] = None 7345 if self._match_text_seq("REF", "UPDATE"): 7346 kind = "REF" 7347 this = "UPDATE" 7348 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7349 this = "UPDATE SET DANGLING TO NULL" 7350 elif self._match_text_seq("STRUCTURE"): 7351 kind = "STRUCTURE" 7352 if self._match_text_seq("CASCADE", "FAST"): 7353 this = "CASCADE FAST" 7354 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7355 ("ONLINE", "OFFLINE") 7356 ): 7357 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7358 expression = self._parse_into() 7359 7360 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7361 7362 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7363 this = self._prev.text.upper() 7364 if self._match_text_seq("COLUMNS"): 7365 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7366 return None 7367 7368 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7369 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7370 if self._match_text_seq("STATISTICS"): 7371 return self.expression(exp.AnalyzeDelete, kind=kind) 7372 return None 7373 7374 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7375 if self._match_text_seq("CHAINED", "ROWS"): 7376 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7377 return None 7378 7379 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7380 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7381 this = self._prev.text.upper() 7382 expression: t.Optional[exp.Expression] = None 7383 expressions = [] 7384 update_options = None 7385 7386 if self._match_text_seq("HISTOGRAM", "ON"): 7387 expressions = self._parse_csv(self._parse_column_reference) 7388 with_expressions = [] 7389 while self._match(TokenType.WITH): 7390 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7391 if self._match_texts(("SYNC", "ASYNC")): 7392 if self._match_text_seq("MODE", advance=False): 7393 with_expressions.append(f"{self._prev.text.upper()} MODE") 7394 self._advance() 7395 else: 7396 buckets = self._parse_number() 7397 if self._match_text_seq("BUCKETS"): 7398 with_expressions.append(f"{buckets} BUCKETS") 7399 if with_expressions: 7400 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7401 7402 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7403 TokenType.UPDATE, advance=False 7404 ): 7405 update_options = self._prev.text.upper() 7406 self._advance() 7407 elif self._match_text_seq("USING", "DATA"): 7408 expression = self.expression(exp.UsingData, this=self._parse_string()) 7409 7410 return self.expression( 7411 exp.AnalyzeHistogram, 7412 this=this, 7413 expressions=expressions, 7414 expression=expression, 7415 update_options=update_options, 7416 ) 7417 7418 def _parse_merge(self) -> exp.Merge: 7419 self._match(TokenType.INTO) 7420 target = self._parse_table() 7421 7422 if target and self._match(TokenType.ALIAS, advance=False): 7423 target.set("alias", self._parse_table_alias()) 7424 7425 self._match(TokenType.USING) 7426 using = self._parse_table() 7427 7428 self._match(TokenType.ON) 7429 on = self._parse_assignment() 7430 7431 return self.expression( 7432 exp.Merge, 7433 this=target, 7434 using=using, 7435 on=on, 7436 whens=self._parse_when_matched(), 7437 returning=self._parse_returning(), 7438 ) 7439 7440 def _parse_when_matched(self) -> exp.Whens: 7441 whens = [] 7442 7443 while self._match(TokenType.WHEN): 7444 matched = not self._match(TokenType.NOT) 7445 self._match_text_seq("MATCHED") 7446 source = ( 7447 False 7448 if self._match_text_seq("BY", "TARGET") 7449 else self._match_text_seq("BY", "SOURCE") 7450 ) 7451 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7452 7453 self._match(TokenType.THEN) 7454 7455 if self._match(TokenType.INSERT): 7456 this = self._parse_star() 7457 if this: 7458 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7459 else: 7460 then = self.expression( 7461 exp.Insert, 7462 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7463 expression=self._match_text_seq("VALUES") and self._parse_value(), 7464 ) 7465 elif self._match(TokenType.UPDATE): 7466 expressions = self._parse_star() 7467 if expressions: 7468 then = self.expression(exp.Update, expressions=expressions) 7469 else: 7470 then = self.expression( 7471 exp.Update, 7472 expressions=self._match(TokenType.SET) 7473 and self._parse_csv(self._parse_equality), 7474 ) 7475 elif self._match(TokenType.DELETE): 7476 then = self.expression(exp.Var, this=self._prev.text) 7477 else: 7478 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7479 7480 whens.append( 7481 self.expression( 7482 exp.When, 7483 matched=matched, 7484 source=source, 7485 condition=condition, 7486 then=then, 7487 ) 7488 ) 7489 return self.expression(exp.Whens, expressions=whens) 7490 7491 def _parse_show(self) -> t.Optional[exp.Expression]: 7492 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7493 if parser: 7494 return parser(self) 7495 return self._parse_as_command(self._prev) 7496 7497 def _parse_set_item_assignment( 7498 self, kind: t.Optional[str] = None 7499 ) -> t.Optional[exp.Expression]: 7500 index = self._index 7501 7502 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7503 return self._parse_set_transaction(global_=kind == "GLOBAL") 7504 7505 left = self._parse_primary() or self._parse_column() 7506 assignment_delimiter = self._match_texts(("=", "TO")) 7507 7508 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7509 self._retreat(index) 7510 return None 7511 7512 right = self._parse_statement() or self._parse_id_var() 7513 if isinstance(right, (exp.Column, exp.Identifier)): 7514 right = exp.var(right.name) 7515 7516 this = self.expression(exp.EQ, this=left, expression=right) 7517 return self.expression(exp.SetItem, this=this, kind=kind) 7518 7519 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7520 self._match_text_seq("TRANSACTION") 7521 characteristics = self._parse_csv( 7522 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7523 ) 7524 return self.expression( 7525 exp.SetItem, 7526 expressions=characteristics, 7527 kind="TRANSACTION", 7528 **{"global": global_}, # type: ignore 7529 ) 7530 7531 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7532 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7533 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7534 7535 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7536 index = self._index 7537 set_ = self.expression( 7538 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7539 ) 7540 7541 if self._curr: 7542 self._retreat(index) 7543 return self._parse_as_command(self._prev) 7544 7545 return set_ 7546 7547 def _parse_var_from_options( 7548 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7549 ) -> t.Optional[exp.Var]: 7550 start = self._curr 7551 if not start: 7552 return None 7553 7554 option = start.text.upper() 7555 continuations = options.get(option) 7556 7557 index = self._index 7558 self._advance() 7559 for keywords in continuations or []: 7560 if isinstance(keywords, str): 7561 keywords = (keywords,) 7562 7563 if self._match_text_seq(*keywords): 7564 option = f"{option} {' '.join(keywords)}" 7565 break 7566 else: 7567 if continuations or continuations is None: 7568 if raise_unmatched: 7569 self.raise_error(f"Unknown option {option}") 7570 7571 self._retreat(index) 7572 return None 7573 7574 return exp.var(option) 7575 7576 def _parse_as_command(self, start: Token) -> exp.Command: 7577 while self._curr: 7578 self._advance() 7579 text = self._find_sql(start, self._prev) 7580 size = len(start.text) 7581 self._warn_unsupported() 7582 return exp.Command(this=text[:size], expression=text[size:]) 7583 7584 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7585 settings = [] 7586 7587 self._match_l_paren() 7588 kind = self._parse_id_var() 7589 7590 if self._match(TokenType.L_PAREN): 7591 while True: 7592 key = self._parse_id_var() 7593 value = self._parse_primary() 7594 if not key and value is None: 7595 break 7596 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7597 self._match(TokenType.R_PAREN) 7598 7599 self._match_r_paren() 7600 7601 return self.expression( 7602 exp.DictProperty, 7603 this=this, 7604 kind=kind.this if kind else None, 7605 settings=settings, 7606 ) 7607 7608 def _parse_dict_range(self, this: str) -> exp.DictRange: 7609 self._match_l_paren() 7610 has_min = self._match_text_seq("MIN") 7611 if has_min: 7612 min = self._parse_var() or self._parse_primary() 7613 self._match_text_seq("MAX") 7614 max = self._parse_var() or self._parse_primary() 7615 else: 7616 max = self._parse_var() or self._parse_primary() 7617 min = exp.Literal.number(0) 7618 self._match_r_paren() 7619 return self.expression(exp.DictRange, this=this, min=min, max=max) 7620 7621 def _parse_comprehension( 7622 self, this: t.Optional[exp.Expression] 7623 ) -> t.Optional[exp.Comprehension]: 7624 index = self._index 7625 expression = self._parse_column() 7626 if not self._match(TokenType.IN): 7627 self._retreat(index - 1) 7628 return None 7629 iterator = self._parse_column() 7630 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7631 return self.expression( 7632 exp.Comprehension, 7633 this=this, 7634 expression=expression, 7635 iterator=iterator, 7636 condition=condition, 7637 ) 7638 7639 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7640 if self._match(TokenType.HEREDOC_STRING): 7641 return self.expression(exp.Heredoc, this=self._prev.text) 7642 7643 if not self._match_text_seq("$"): 7644 return None 7645 7646 tags = ["$"] 7647 tag_text = None 7648 7649 if self._is_connected(): 7650 self._advance() 7651 tags.append(self._prev.text.upper()) 7652 else: 7653 self.raise_error("No closing $ found") 7654 7655 if tags[-1] != "$": 7656 if self._is_connected() and self._match_text_seq("$"): 7657 tag_text = tags[-1] 7658 tags.append("$") 7659 else: 7660 self.raise_error("No closing $ found") 7661 7662 heredoc_start = self._curr 7663 7664 while self._curr: 7665 if self._match_text_seq(*tags, advance=False): 7666 this = self._find_sql(heredoc_start, self._prev) 7667 self._advance(len(tags)) 7668 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7669 7670 self._advance() 7671 7672 self.raise_error(f"No closing {''.join(tags)} found") 7673 return None 7674 7675 def _find_parser( 7676 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7677 ) -> t.Optional[t.Callable]: 7678 if not self._curr: 7679 return None 7680 7681 index = self._index 7682 this = [] 7683 while True: 7684 # The current token might be multiple words 7685 curr = self._curr.text.upper() 7686 key = curr.split(" ") 7687 this.append(curr) 7688 7689 self._advance() 7690 result, trie = in_trie(trie, key) 7691 if result == TrieResult.FAILED: 7692 break 7693 7694 if result == TrieResult.EXISTS: 7695 subparser = parsers[" ".join(this)] 7696 return subparser 7697 7698 self._retreat(index) 7699 return None 7700 7701 def _match(self, token_type, advance=True, expression=None): 7702 if not self._curr: 7703 return None 7704 7705 if self._curr.token_type == token_type: 7706 if advance: 7707 self._advance() 7708 self._add_comments(expression) 7709 return True 7710 7711 return None 7712 7713 def _match_set(self, types, advance=True): 7714 if not self._curr: 7715 return None 7716 7717 if self._curr.token_type in types: 7718 if advance: 7719 self._advance() 7720 return True 7721 7722 return None 7723 7724 def _match_pair(self, token_type_a, token_type_b, advance=True): 7725 if not self._curr or not self._next: 7726 return None 7727 7728 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7729 if advance: 7730 self._advance(2) 7731 return True 7732 7733 return None 7734 7735 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7736 if not self._match(TokenType.L_PAREN, expression=expression): 7737 self.raise_error("Expecting (") 7738 7739 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7740 if not self._match(TokenType.R_PAREN, expression=expression): 7741 self.raise_error("Expecting )") 7742 7743 def _match_texts(self, texts, advance=True): 7744 if ( 7745 self._curr 7746 and self._curr.token_type != TokenType.STRING 7747 and self._curr.text.upper() in texts 7748 ): 7749 if advance: 7750 self._advance() 7751 return True 7752 return None 7753 7754 def _match_text_seq(self, *texts, advance=True): 7755 index = self._index 7756 for text in texts: 7757 if ( 7758 self._curr 7759 and self._curr.token_type != TokenType.STRING 7760 and self._curr.text.upper() == text 7761 ): 7762 self._advance() 7763 else: 7764 self._retreat(index) 7765 return None 7766 7767 if not advance: 7768 self._retreat(index) 7769 7770 return True 7771 7772 def _replace_lambda( 7773 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7774 ) -> t.Optional[exp.Expression]: 7775 if not node: 7776 return node 7777 7778 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7779 7780 for column in node.find_all(exp.Column): 7781 typ = lambda_types.get(column.parts[0].name) 7782 if typ is not None: 7783 dot_or_id = column.to_dot() if column.table else column.this 7784 7785 if typ: 7786 dot_or_id = self.expression( 7787 exp.Cast, 7788 this=dot_or_id, 7789 to=typ, 7790 ) 7791 7792 parent = column.parent 7793 7794 while isinstance(parent, exp.Dot): 7795 if not isinstance(parent.parent, exp.Dot): 7796 parent.replace(dot_or_id) 7797 break 7798 parent = parent.parent 7799 else: 7800 if column is node: 7801 node = dot_or_id 7802 else: 7803 column.replace(dot_or_id) 7804 return node 7805 7806 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7807 start = self._prev 7808 7809 # Not to be confused with TRUNCATE(number, decimals) function call 7810 if self._match(TokenType.L_PAREN): 7811 self._retreat(self._index - 2) 7812 return self._parse_function() 7813 7814 # Clickhouse supports TRUNCATE DATABASE as well 7815 is_database = self._match(TokenType.DATABASE) 7816 7817 self._match(TokenType.TABLE) 7818 7819 exists = self._parse_exists(not_=False) 7820 7821 expressions = self._parse_csv( 7822 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7823 ) 7824 7825 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7826 7827 if self._match_text_seq("RESTART", "IDENTITY"): 7828 identity = "RESTART" 7829 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7830 identity = "CONTINUE" 7831 else: 7832 identity = None 7833 7834 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7835 option = self._prev.text 7836 else: 7837 option = None 7838 7839 partition = self._parse_partition() 7840 7841 # Fallback case 7842 if self._curr: 7843 return self._parse_as_command(start) 7844 7845 return self.expression( 7846 exp.TruncateTable, 7847 expressions=expressions, 7848 is_database=is_database, 7849 exists=exists, 7850 cluster=cluster, 7851 identity=identity, 7852 option=option, 7853 partition=partition, 7854 ) 7855 7856 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7857 this = self._parse_ordered(self._parse_opclass) 7858 7859 if not self._match(TokenType.WITH): 7860 return this 7861 7862 op = self._parse_var(any_token=True) 7863 7864 return self.expression(exp.WithOperator, this=this, op=op) 7865 7866 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7867 self._match(TokenType.EQ) 7868 self._match(TokenType.L_PAREN) 7869 7870 opts: t.List[t.Optional[exp.Expression]] = [] 7871 while self._curr and not self._match(TokenType.R_PAREN): 7872 if self._match_text_seq("FORMAT_NAME", "="): 7873 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7874 # so we parse it separately to use _parse_field() 7875 prop = self.expression( 7876 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7877 ) 7878 opts.append(prop) 7879 else: 7880 opts.append(self._parse_property()) 7881 7882 self._match(TokenType.COMMA) 7883 7884 return opts 7885 7886 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7887 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7888 7889 options = [] 7890 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7891 option = self._parse_var(any_token=True) 7892 prev = self._prev.text.upper() 7893 7894 # Different dialects might separate options and values by white space, "=" and "AS" 7895 self._match(TokenType.EQ) 7896 self._match(TokenType.ALIAS) 7897 7898 param = self.expression(exp.CopyParameter, this=option) 7899 7900 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7901 TokenType.L_PAREN, advance=False 7902 ): 7903 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7904 param.set("expressions", self._parse_wrapped_options()) 7905 elif prev == "FILE_FORMAT": 7906 # T-SQL's external file format case 7907 param.set("expression", self._parse_field()) 7908 else: 7909 param.set("expression", self._parse_unquoted_field()) 7910 7911 options.append(param) 7912 self._match(sep) 7913 7914 return options 7915 7916 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7917 expr = self.expression(exp.Credentials) 7918 7919 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7920 expr.set("storage", self._parse_field()) 7921 if self._match_text_seq("CREDENTIALS"): 7922 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7923 creds = ( 7924 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7925 ) 7926 expr.set("credentials", creds) 7927 if self._match_text_seq("ENCRYPTION"): 7928 expr.set("encryption", self._parse_wrapped_options()) 7929 if self._match_text_seq("IAM_ROLE"): 7930 expr.set("iam_role", self._parse_field()) 7931 if self._match_text_seq("REGION"): 7932 expr.set("region", self._parse_field()) 7933 7934 return expr 7935 7936 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7937 return self._parse_field() 7938 7939 def _parse_copy(self) -> exp.Copy | exp.Command: 7940 start = self._prev 7941 7942 self._match(TokenType.INTO) 7943 7944 this = ( 7945 self._parse_select(nested=True, parse_subquery_alias=False) 7946 if self._match(TokenType.L_PAREN, advance=False) 7947 else self._parse_table(schema=True) 7948 ) 7949 7950 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7951 7952 files = self._parse_csv(self._parse_file_location) 7953 credentials = self._parse_credentials() 7954 7955 self._match_text_seq("WITH") 7956 7957 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7958 7959 # Fallback case 7960 if self._curr: 7961 return self._parse_as_command(start) 7962 7963 return self.expression( 7964 exp.Copy, 7965 this=this, 7966 kind=kind, 7967 credentials=credentials, 7968 files=files, 7969 params=params, 7970 ) 7971 7972 def _parse_normalize(self) -> exp.Normalize: 7973 return self.expression( 7974 exp.Normalize, 7975 this=self._parse_bitwise(), 7976 form=self._match(TokenType.COMMA) and self._parse_var(), 7977 ) 7978 7979 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7980 args = self._parse_csv(lambda: self._parse_lambda()) 7981 7982 this = seq_get(args, 0) 7983 decimals = seq_get(args, 1) 7984 7985 return expr_type( 7986 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7987 ) 7988 7989 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7990 if self._match_text_seq("COLUMNS", "(", advance=False): 7991 this = self._parse_function() 7992 if isinstance(this, exp.Columns): 7993 this.set("unpack", True) 7994 return this 7995 7996 return self.expression( 7997 exp.Star, 7998 **{ # type: ignore 7999 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8000 "replace": self._parse_star_op("REPLACE"), 8001 "rename": self._parse_star_op("RENAME"), 8002 }, 8003 ) 8004 8005 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8006 privilege_parts = [] 8007 8008 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8009 # (end of privilege list) or L_PAREN (start of column list) are met 8010 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8011 privilege_parts.append(self._curr.text.upper()) 8012 self._advance() 8013 8014 this = exp.var(" ".join(privilege_parts)) 8015 expressions = ( 8016 self._parse_wrapped_csv(self._parse_column) 8017 if self._match(TokenType.L_PAREN, advance=False) 8018 else None 8019 ) 8020 8021 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8022 8023 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8024 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8025 principal = self._parse_id_var() 8026 8027 if not principal: 8028 return None 8029 8030 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8031 8032 def _parse_grant(self) -> exp.Grant | exp.Command: 8033 start = self._prev 8034 8035 privileges = self._parse_csv(self._parse_grant_privilege) 8036 8037 self._match(TokenType.ON) 8038 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8039 8040 # Attempt to parse the securable e.g. MySQL allows names 8041 # such as "foo.*", "*.*" which are not easily parseable yet 8042 securable = self._try_parse(self._parse_table_parts) 8043 8044 if not securable or not self._match_text_seq("TO"): 8045 return self._parse_as_command(start) 8046 8047 principals = self._parse_csv(self._parse_grant_principal) 8048 8049 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8050 8051 if self._curr: 8052 return self._parse_as_command(start) 8053 8054 return self.expression( 8055 exp.Grant, 8056 privileges=privileges, 8057 kind=kind, 8058 securable=securable, 8059 principals=principals, 8060 grant_option=grant_option, 8061 ) 8062 8063 def _parse_overlay(self) -> exp.Overlay: 8064 return self.expression( 8065 exp.Overlay, 8066 **{ # type: ignore 8067 "this": self._parse_bitwise(), 8068 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8069 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8070 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8071 }, 8072 )
27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range
59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder
102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression)
122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp
133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args)
175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.MEDIUMBLOB, 332 TokenType.LONGBLOB, 333 TokenType.BINARY, 334 TokenType.VARBINARY, 335 TokenType.JSON, 336 TokenType.JSONB, 337 TokenType.INTERVAL, 338 TokenType.TINYBLOB, 339 TokenType.TINYTEXT, 340 TokenType.TIME, 341 TokenType.TIMETZ, 342 TokenType.TIMESTAMP, 343 TokenType.TIMESTAMP_S, 344 TokenType.TIMESTAMP_MS, 345 TokenType.TIMESTAMP_NS, 346 TokenType.TIMESTAMPTZ, 347 TokenType.TIMESTAMPLTZ, 348 TokenType.TIMESTAMPNTZ, 349 TokenType.DATETIME, 350 TokenType.DATETIME2, 351 TokenType.DATETIME64, 352 TokenType.SMALLDATETIME, 353 TokenType.DATE, 354 TokenType.DATE32, 355 TokenType.INT4RANGE, 356 TokenType.INT4MULTIRANGE, 357 TokenType.INT8RANGE, 358 TokenType.INT8MULTIRANGE, 359 TokenType.NUMRANGE, 360 TokenType.NUMMULTIRANGE, 361 TokenType.TSRANGE, 362 TokenType.TSMULTIRANGE, 363 TokenType.TSTZRANGE, 364 TokenType.TSTZMULTIRANGE, 365 TokenType.DATERANGE, 366 TokenType.DATEMULTIRANGE, 367 TokenType.DECIMAL, 368 TokenType.DECIMAL32, 369 TokenType.DECIMAL64, 370 TokenType.DECIMAL128, 371 TokenType.DECIMAL256, 372 TokenType.UDECIMAL, 373 TokenType.BIGDECIMAL, 374 TokenType.UUID, 375 TokenType.GEOGRAPHY, 376 TokenType.GEOMETRY, 377 TokenType.POINT, 378 TokenType.RING, 379 TokenType.LINESTRING, 380 TokenType.MULTILINESTRING, 381 TokenType.POLYGON, 382 TokenType.MULTIPOLYGON, 383 TokenType.HLLSKETCH, 384 TokenType.HSTORE, 385 TokenType.PSEUDO_TYPE, 386 TokenType.SUPER, 387 TokenType.SERIAL, 388 TokenType.SMALLSERIAL, 389 TokenType.BIGSERIAL, 390 TokenType.XML, 391 TokenType.YEAR, 392 TokenType.USERDEFINED, 393 TokenType.MONEY, 394 TokenType.SMALLMONEY, 395 TokenType.ROWVERSION, 396 TokenType.IMAGE, 397 TokenType.VARIANT, 398 TokenType.VECTOR, 399 TokenType.OBJECT, 400 TokenType.OBJECT_IDENTIFIER, 401 TokenType.INET, 402 TokenType.IPADDRESS, 403 TokenType.IPPREFIX, 404 TokenType.IPV4, 405 TokenType.IPV6, 406 TokenType.UNKNOWN, 407 TokenType.NULL, 408 TokenType.NAME, 409 TokenType.TDIGEST, 410 TokenType.DYNAMIC, 411 *ENUM_TYPE_TOKENS, 412 *NESTED_TYPE_TOKENS, 413 *AGGREGATE_TYPE_TOKENS, 414 } 415 416 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 417 TokenType.BIGINT: TokenType.UBIGINT, 418 TokenType.INT: TokenType.UINT, 419 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 420 TokenType.SMALLINT: TokenType.USMALLINT, 421 TokenType.TINYINT: TokenType.UTINYINT, 422 TokenType.DECIMAL: TokenType.UDECIMAL, 423 TokenType.DOUBLE: TokenType.UDOUBLE, 424 } 425 426 SUBQUERY_PREDICATES = { 427 TokenType.ANY: exp.Any, 428 TokenType.ALL: exp.All, 429 TokenType.EXISTS: exp.Exists, 430 TokenType.SOME: exp.Any, 431 } 432 433 RESERVED_TOKENS = { 434 *Tokenizer.SINGLE_TOKENS.values(), 435 TokenType.SELECT, 436 } - {TokenType.IDENTIFIER} 437 438 DB_CREATABLES = { 439 TokenType.DATABASE, 440 TokenType.DICTIONARY, 441 TokenType.MODEL, 442 TokenType.NAMESPACE, 443 TokenType.SCHEMA, 444 TokenType.SEQUENCE, 445 TokenType.SINK, 446 TokenType.SOURCE, 447 TokenType.STORAGE_INTEGRATION, 448 TokenType.STREAMLIT, 449 TokenType.TABLE, 450 TokenType.TAG, 451 TokenType.VIEW, 452 TokenType.WAREHOUSE, 453 } 454 455 CREATABLES = { 456 TokenType.COLUMN, 457 TokenType.CONSTRAINT, 458 TokenType.FOREIGN_KEY, 459 TokenType.FUNCTION, 460 TokenType.INDEX, 461 TokenType.PROCEDURE, 462 *DB_CREATABLES, 463 } 464 465 ALTERABLES = { 466 TokenType.INDEX, 467 TokenType.TABLE, 468 TokenType.VIEW, 469 } 470 471 # Tokens that can represent identifiers 472 ID_VAR_TOKENS = { 473 TokenType.ALL, 474 TokenType.ATTACH, 475 TokenType.VAR, 476 TokenType.ANTI, 477 TokenType.APPLY, 478 TokenType.ASC, 479 TokenType.ASOF, 480 TokenType.AUTO_INCREMENT, 481 TokenType.BEGIN, 482 TokenType.BPCHAR, 483 TokenType.CACHE, 484 TokenType.CASE, 485 TokenType.COLLATE, 486 TokenType.COMMAND, 487 TokenType.COMMENT, 488 TokenType.COMMIT, 489 TokenType.CONSTRAINT, 490 TokenType.COPY, 491 TokenType.CUBE, 492 TokenType.CURRENT_SCHEMA, 493 TokenType.DEFAULT, 494 TokenType.DELETE, 495 TokenType.DESC, 496 TokenType.DESCRIBE, 497 TokenType.DETACH, 498 TokenType.DICTIONARY, 499 TokenType.DIV, 500 TokenType.END, 501 TokenType.EXECUTE, 502 TokenType.EXPORT, 503 TokenType.ESCAPE, 504 TokenType.FALSE, 505 TokenType.FIRST, 506 TokenType.FILTER, 507 TokenType.FINAL, 508 TokenType.FORMAT, 509 TokenType.FULL, 510 TokenType.IDENTIFIER, 511 TokenType.IS, 512 TokenType.ISNULL, 513 TokenType.INTERVAL, 514 TokenType.KEEP, 515 TokenType.KILL, 516 TokenType.LEFT, 517 TokenType.LIMIT, 518 TokenType.LOAD, 519 TokenType.MERGE, 520 TokenType.NATURAL, 521 TokenType.NEXT, 522 TokenType.OFFSET, 523 TokenType.OPERATOR, 524 TokenType.ORDINALITY, 525 TokenType.OVERLAPS, 526 TokenType.OVERWRITE, 527 TokenType.PARTITION, 528 TokenType.PERCENT, 529 TokenType.PIVOT, 530 TokenType.PRAGMA, 531 TokenType.RANGE, 532 TokenType.RECURSIVE, 533 TokenType.REFERENCES, 534 TokenType.REFRESH, 535 TokenType.RENAME, 536 TokenType.REPLACE, 537 TokenType.RIGHT, 538 TokenType.ROLLUP, 539 TokenType.ROW, 540 TokenType.ROWS, 541 TokenType.SEMI, 542 TokenType.SET, 543 TokenType.SETTINGS, 544 TokenType.SHOW, 545 TokenType.TEMPORARY, 546 TokenType.TOP, 547 TokenType.TRUE, 548 TokenType.TRUNCATE, 549 TokenType.UNIQUE, 550 TokenType.UNNEST, 551 TokenType.UNPIVOT, 552 TokenType.UPDATE, 553 TokenType.USE, 554 TokenType.VOLATILE, 555 TokenType.WINDOW, 556 *CREATABLES, 557 *SUBQUERY_PREDICATES, 558 *TYPE_TOKENS, 559 *NO_PAREN_FUNCTIONS, 560 } 561 ID_VAR_TOKENS.remove(TokenType.UNION) 562 563 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 564 TokenType.ANTI, 565 TokenType.APPLY, 566 TokenType.ASOF, 567 TokenType.FULL, 568 TokenType.LEFT, 569 TokenType.LOCK, 570 TokenType.NATURAL, 571 TokenType.RIGHT, 572 TokenType.SEMI, 573 TokenType.WINDOW, 574 } 575 576 ALIAS_TOKENS = ID_VAR_TOKENS 577 578 ARRAY_CONSTRUCTORS = { 579 "ARRAY": exp.Array, 580 "LIST": exp.List, 581 } 582 583 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 584 585 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 586 587 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 588 589 FUNC_TOKENS = { 590 TokenType.COLLATE, 591 TokenType.COMMAND, 592 TokenType.CURRENT_DATE, 593 TokenType.CURRENT_DATETIME, 594 TokenType.CURRENT_SCHEMA, 595 TokenType.CURRENT_TIMESTAMP, 596 TokenType.CURRENT_TIME, 597 TokenType.CURRENT_USER, 598 TokenType.FILTER, 599 TokenType.FIRST, 600 TokenType.FORMAT, 601 TokenType.GLOB, 602 TokenType.IDENTIFIER, 603 TokenType.INDEX, 604 TokenType.ISNULL, 605 TokenType.ILIKE, 606 TokenType.INSERT, 607 TokenType.LIKE, 608 TokenType.MERGE, 609 TokenType.NEXT, 610 TokenType.OFFSET, 611 TokenType.PRIMARY_KEY, 612 TokenType.RANGE, 613 TokenType.REPLACE, 614 TokenType.RLIKE, 615 TokenType.ROW, 616 TokenType.UNNEST, 617 TokenType.VAR, 618 TokenType.LEFT, 619 TokenType.RIGHT, 620 TokenType.SEQUENCE, 621 TokenType.DATE, 622 TokenType.DATETIME, 623 TokenType.TABLE, 624 TokenType.TIMESTAMP, 625 TokenType.TIMESTAMPTZ, 626 TokenType.TRUNCATE, 627 TokenType.WINDOW, 628 TokenType.XOR, 629 *TYPE_TOKENS, 630 *SUBQUERY_PREDICATES, 631 } 632 633 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 634 TokenType.AND: exp.And, 635 } 636 637 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 638 TokenType.COLON_EQ: exp.PropertyEQ, 639 } 640 641 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 642 TokenType.OR: exp.Or, 643 } 644 645 EQUALITY = { 646 TokenType.EQ: exp.EQ, 647 TokenType.NEQ: exp.NEQ, 648 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 649 } 650 651 COMPARISON = { 652 TokenType.GT: exp.GT, 653 TokenType.GTE: exp.GTE, 654 TokenType.LT: exp.LT, 655 TokenType.LTE: exp.LTE, 656 } 657 658 BITWISE = { 659 TokenType.AMP: exp.BitwiseAnd, 660 TokenType.CARET: exp.BitwiseXor, 661 TokenType.PIPE: exp.BitwiseOr, 662 } 663 664 TERM = { 665 TokenType.DASH: exp.Sub, 666 TokenType.PLUS: exp.Add, 667 TokenType.MOD: exp.Mod, 668 TokenType.COLLATE: exp.Collate, 669 } 670 671 FACTOR = { 672 TokenType.DIV: exp.IntDiv, 673 TokenType.LR_ARROW: exp.Distance, 674 TokenType.SLASH: exp.Div, 675 TokenType.STAR: exp.Mul, 676 } 677 678 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 679 680 TIMES = { 681 TokenType.TIME, 682 TokenType.TIMETZ, 683 } 684 685 TIMESTAMPS = { 686 TokenType.TIMESTAMP, 687 TokenType.TIMESTAMPTZ, 688 TokenType.TIMESTAMPLTZ, 689 *TIMES, 690 } 691 692 SET_OPERATIONS = { 693 TokenType.UNION, 694 TokenType.INTERSECT, 695 TokenType.EXCEPT, 696 } 697 698 JOIN_METHODS = { 699 TokenType.ASOF, 700 TokenType.NATURAL, 701 TokenType.POSITIONAL, 702 } 703 704 JOIN_SIDES = { 705 TokenType.LEFT, 706 TokenType.RIGHT, 707 TokenType.FULL, 708 } 709 710 JOIN_KINDS = { 711 TokenType.ANTI, 712 TokenType.CROSS, 713 TokenType.INNER, 714 TokenType.OUTER, 715 TokenType.SEMI, 716 TokenType.STRAIGHT_JOIN, 717 } 718 719 JOIN_HINTS: t.Set[str] = set() 720 721 LAMBDAS = { 722 TokenType.ARROW: lambda self, expressions: self.expression( 723 exp.Lambda, 724 this=self._replace_lambda( 725 self._parse_assignment(), 726 expressions, 727 ), 728 expressions=expressions, 729 ), 730 TokenType.FARROW: lambda self, expressions: self.expression( 731 exp.Kwarg, 732 this=exp.var(expressions[0].name), 733 expression=self._parse_assignment(), 734 ), 735 } 736 737 COLUMN_OPERATORS = { 738 TokenType.DOT: None, 739 TokenType.DOTCOLON: lambda self, this, to: self.expression( 740 exp.JSONCast, 741 this=this, 742 to=to, 743 ), 744 TokenType.DCOLON: lambda self, this, to: self.expression( 745 exp.Cast if self.STRICT_CAST else exp.TryCast, 746 this=this, 747 to=to, 748 ), 749 TokenType.ARROW: lambda self, this, path: self.expression( 750 exp.JSONExtract, 751 this=this, 752 expression=self.dialect.to_json_path(path), 753 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 754 ), 755 TokenType.DARROW: lambda self, this, path: self.expression( 756 exp.JSONExtractScalar, 757 this=this, 758 expression=self.dialect.to_json_path(path), 759 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 760 ), 761 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 762 exp.JSONBExtract, 763 this=this, 764 expression=path, 765 ), 766 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 767 exp.JSONBExtractScalar, 768 this=this, 769 expression=path, 770 ), 771 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 772 exp.JSONBContains, 773 this=this, 774 expression=key, 775 ), 776 } 777 778 EXPRESSION_PARSERS = { 779 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 780 exp.Column: lambda self: self._parse_column(), 781 exp.Condition: lambda self: self._parse_assignment(), 782 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 783 exp.Expression: lambda self: self._parse_expression(), 784 exp.From: lambda self: self._parse_from(joins=True), 785 exp.Group: lambda self: self._parse_group(), 786 exp.Having: lambda self: self._parse_having(), 787 exp.Hint: lambda self: self._parse_hint_body(), 788 exp.Identifier: lambda self: self._parse_id_var(), 789 exp.Join: lambda self: self._parse_join(), 790 exp.Lambda: lambda self: self._parse_lambda(), 791 exp.Lateral: lambda self: self._parse_lateral(), 792 exp.Limit: lambda self: self._parse_limit(), 793 exp.Offset: lambda self: self._parse_offset(), 794 exp.Order: lambda self: self._parse_order(), 795 exp.Ordered: lambda self: self._parse_ordered(), 796 exp.Properties: lambda self: self._parse_properties(), 797 exp.Qualify: lambda self: self._parse_qualify(), 798 exp.Returning: lambda self: self._parse_returning(), 799 exp.Select: lambda self: self._parse_select(), 800 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 801 exp.Table: lambda self: self._parse_table_parts(), 802 exp.TableAlias: lambda self: self._parse_table_alias(), 803 exp.Tuple: lambda self: self._parse_value(), 804 exp.Whens: lambda self: self._parse_when_matched(), 805 exp.Where: lambda self: self._parse_where(), 806 exp.Window: lambda self: self._parse_named_window(), 807 exp.With: lambda self: self._parse_with(), 808 "JOIN_TYPE": lambda self: self._parse_join_parts(), 809 } 810 811 STATEMENT_PARSERS = { 812 TokenType.ALTER: lambda self: self._parse_alter(), 813 TokenType.ANALYZE: lambda self: self._parse_analyze(), 814 TokenType.BEGIN: lambda self: self._parse_transaction(), 815 TokenType.CACHE: lambda self: self._parse_cache(), 816 TokenType.COMMENT: lambda self: self._parse_comment(), 817 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 818 TokenType.COPY: lambda self: self._parse_copy(), 819 TokenType.CREATE: lambda self: self._parse_create(), 820 TokenType.DELETE: lambda self: self._parse_delete(), 821 TokenType.DESC: lambda self: self._parse_describe(), 822 TokenType.DESCRIBE: lambda self: self._parse_describe(), 823 TokenType.DROP: lambda self: self._parse_drop(), 824 TokenType.GRANT: lambda self: self._parse_grant(), 825 TokenType.INSERT: lambda self: self._parse_insert(), 826 TokenType.KILL: lambda self: self._parse_kill(), 827 TokenType.LOAD: lambda self: self._parse_load(), 828 TokenType.MERGE: lambda self: self._parse_merge(), 829 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 830 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 831 TokenType.REFRESH: lambda self: self._parse_refresh(), 832 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 833 TokenType.SET: lambda self: self._parse_set(), 834 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 835 TokenType.UNCACHE: lambda self: self._parse_uncache(), 836 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 837 TokenType.UPDATE: lambda self: self._parse_update(), 838 TokenType.USE: lambda self: self._parse_use(), 839 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 840 } 841 842 UNARY_PARSERS = { 843 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 844 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 845 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 846 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 847 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 848 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 849 } 850 851 STRING_PARSERS = { 852 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 853 exp.RawString, this=token.text 854 ), 855 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 856 exp.National, this=token.text 857 ), 858 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 859 TokenType.STRING: lambda self, token: self.expression( 860 exp.Literal, this=token.text, is_string=True 861 ), 862 TokenType.UNICODE_STRING: lambda self, token: self.expression( 863 exp.UnicodeString, 864 this=token.text, 865 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 866 ), 867 } 868 869 NUMERIC_PARSERS = { 870 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 871 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 872 TokenType.HEX_STRING: lambda self, token: self.expression( 873 exp.HexString, 874 this=token.text, 875 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 876 ), 877 TokenType.NUMBER: lambda self, token: self.expression( 878 exp.Literal, this=token.text, is_string=False 879 ), 880 } 881 882 PRIMARY_PARSERS = { 883 **STRING_PARSERS, 884 **NUMERIC_PARSERS, 885 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 886 TokenType.NULL: lambda self, _: self.expression(exp.Null), 887 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 888 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 889 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 890 TokenType.STAR: lambda self, _: self._parse_star_ops(), 891 } 892 893 PLACEHOLDER_PARSERS = { 894 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 895 TokenType.PARAMETER: lambda self: self._parse_parameter(), 896 TokenType.COLON: lambda self: ( 897 self.expression(exp.Placeholder, this=self._prev.text) 898 if self._match_set(self.ID_VAR_TOKENS) 899 else None 900 ), 901 } 902 903 RANGE_PARSERS = { 904 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 905 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 906 TokenType.GLOB: binary_range_parser(exp.Glob), 907 TokenType.ILIKE: binary_range_parser(exp.ILike), 908 TokenType.IN: lambda self, this: self._parse_in(this), 909 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 910 TokenType.IS: lambda self, this: self._parse_is(this), 911 TokenType.LIKE: binary_range_parser(exp.Like), 912 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 913 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 914 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 915 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 916 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 917 } 918 919 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 920 "ALLOWED_VALUES": lambda self: self.expression( 921 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 922 ), 923 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 924 "AUTO": lambda self: self._parse_auto_property(), 925 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 926 "BACKUP": lambda self: self.expression( 927 exp.BackupProperty, this=self._parse_var(any_token=True) 928 ), 929 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 930 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 931 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 932 "CHECKSUM": lambda self: self._parse_checksum(), 933 "CLUSTER BY": lambda self: self._parse_cluster(), 934 "CLUSTERED": lambda self: self._parse_clustered_by(), 935 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 936 exp.CollateProperty, **kwargs 937 ), 938 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 939 "CONTAINS": lambda self: self._parse_contains_property(), 940 "COPY": lambda self: self._parse_copy_property(), 941 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 942 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 943 "DEFINER": lambda self: self._parse_definer(), 944 "DETERMINISTIC": lambda self: self.expression( 945 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 946 ), 947 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 948 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 949 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 950 "DISTKEY": lambda self: self._parse_distkey(), 951 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 952 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 953 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 954 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 955 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 956 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 957 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 958 "FREESPACE": lambda self: self._parse_freespace(), 959 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 960 "HEAP": lambda self: self.expression(exp.HeapProperty), 961 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 962 "IMMUTABLE": lambda self: self.expression( 963 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 964 ), 965 "INHERITS": lambda self: self.expression( 966 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 967 ), 968 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 969 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 970 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 971 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 972 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 973 "LIKE": lambda self: self._parse_create_like(), 974 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 975 "LOCK": lambda self: self._parse_locking(), 976 "LOCKING": lambda self: self._parse_locking(), 977 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 978 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 979 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 980 "MODIFIES": lambda self: self._parse_modifies_property(), 981 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 982 "NO": lambda self: self._parse_no_property(), 983 "ON": lambda self: self._parse_on_property(), 984 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 985 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 986 "PARTITION": lambda self: self._parse_partitioned_of(), 987 "PARTITION BY": lambda self: self._parse_partitioned_by(), 988 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 989 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 990 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 991 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 992 "READS": lambda self: self._parse_reads_property(), 993 "REMOTE": lambda self: self._parse_remote_with_connection(), 994 "RETURNS": lambda self: self._parse_returns(), 995 "STRICT": lambda self: self.expression(exp.StrictProperty), 996 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 997 "ROW": lambda self: self._parse_row(), 998 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 999 "SAMPLE": lambda self: self.expression( 1000 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1001 ), 1002 "SECURE": lambda self: self.expression(exp.SecureProperty), 1003 "SECURITY": lambda self: self._parse_security(), 1004 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1005 "SETTINGS": lambda self: self._parse_settings_property(), 1006 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1007 "SORTKEY": lambda self: self._parse_sortkey(), 1008 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1009 "STABLE": lambda self: self.expression( 1010 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1011 ), 1012 "STORED": lambda self: self._parse_stored(), 1013 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1014 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1015 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1016 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1017 "TO": lambda self: self._parse_to_table(), 1018 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1019 "TRANSFORM": lambda self: self.expression( 1020 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1021 ), 1022 "TTL": lambda self: self._parse_ttl(), 1023 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1024 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1025 "VOLATILE": lambda self: self._parse_volatile_property(), 1026 "WITH": lambda self: self._parse_with_property(), 1027 } 1028 1029 CONSTRAINT_PARSERS = { 1030 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1031 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1032 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1033 "CHARACTER SET": lambda self: self.expression( 1034 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1035 ), 1036 "CHECK": lambda self: self.expression( 1037 exp.CheckColumnConstraint, 1038 this=self._parse_wrapped(self._parse_assignment), 1039 enforced=self._match_text_seq("ENFORCED"), 1040 ), 1041 "COLLATE": lambda self: self.expression( 1042 exp.CollateColumnConstraint, 1043 this=self._parse_identifier() or self._parse_column(), 1044 ), 1045 "COMMENT": lambda self: self.expression( 1046 exp.CommentColumnConstraint, this=self._parse_string() 1047 ), 1048 "COMPRESS": lambda self: self._parse_compress(), 1049 "CLUSTERED": lambda self: self.expression( 1050 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1051 ), 1052 "NONCLUSTERED": lambda self: self.expression( 1053 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1054 ), 1055 "DEFAULT": lambda self: self.expression( 1056 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1057 ), 1058 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1059 "EPHEMERAL": lambda self: self.expression( 1060 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1061 ), 1062 "EXCLUDE": lambda self: self.expression( 1063 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1064 ), 1065 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1066 "FORMAT": lambda self: self.expression( 1067 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1068 ), 1069 "GENERATED": lambda self: self._parse_generated_as_identity(), 1070 "IDENTITY": lambda self: self._parse_auto_increment(), 1071 "INLINE": lambda self: self._parse_inline(), 1072 "LIKE": lambda self: self._parse_create_like(), 1073 "NOT": lambda self: self._parse_not_constraint(), 1074 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1075 "ON": lambda self: ( 1076 self._match(TokenType.UPDATE) 1077 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1078 ) 1079 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1080 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1081 "PERIOD": lambda self: self._parse_period_for_system_time(), 1082 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1083 "REFERENCES": lambda self: self._parse_references(match=False), 1084 "TITLE": lambda self: self.expression( 1085 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1086 ), 1087 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1088 "UNIQUE": lambda self: self._parse_unique(), 1089 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1090 "WATERMARK": lambda self: self.expression( 1091 exp.WatermarkColumnConstraint, 1092 this=self._match(TokenType.FOR) and self._parse_column(), 1093 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1094 ), 1095 "WITH": lambda self: self.expression( 1096 exp.Properties, expressions=self._parse_wrapped_properties() 1097 ), 1098 } 1099 1100 ALTER_PARSERS = { 1101 "ADD": lambda self: self._parse_alter_table_add(), 1102 "AS": lambda self: self._parse_select(), 1103 "ALTER": lambda self: self._parse_alter_table_alter(), 1104 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1105 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1106 "DROP": lambda self: self._parse_alter_table_drop(), 1107 "RENAME": lambda self: self._parse_alter_table_rename(), 1108 "SET": lambda self: self._parse_alter_table_set(), 1109 "SWAP": lambda self: self.expression( 1110 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1111 ), 1112 } 1113 1114 ALTER_ALTER_PARSERS = { 1115 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1116 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1117 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1118 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1119 } 1120 1121 SCHEMA_UNNAMED_CONSTRAINTS = { 1122 "CHECK", 1123 "EXCLUDE", 1124 "FOREIGN KEY", 1125 "LIKE", 1126 "PERIOD", 1127 "PRIMARY KEY", 1128 "UNIQUE", 1129 "WATERMARK", 1130 } 1131 1132 NO_PAREN_FUNCTION_PARSERS = { 1133 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1134 "CASE": lambda self: self._parse_case(), 1135 "CONNECT_BY_ROOT": lambda self: self.expression( 1136 exp.ConnectByRoot, this=self._parse_column() 1137 ), 1138 "IF": lambda self: self._parse_if(), 1139 } 1140 1141 INVALID_FUNC_NAME_TOKENS = { 1142 TokenType.IDENTIFIER, 1143 TokenType.STRING, 1144 } 1145 1146 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1147 1148 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1149 1150 FUNCTION_PARSERS = { 1151 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1152 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1153 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1154 "DECODE": lambda self: self._parse_decode(), 1155 "EXTRACT": lambda self: self._parse_extract(), 1156 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1157 "GAP_FILL": lambda self: self._parse_gap_fill(), 1158 "JSON_OBJECT": lambda self: self._parse_json_object(), 1159 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1160 "JSON_TABLE": lambda self: self._parse_json_table(), 1161 "MATCH": lambda self: self._parse_match_against(), 1162 "NORMALIZE": lambda self: self._parse_normalize(), 1163 "OPENJSON": lambda self: self._parse_open_json(), 1164 "OVERLAY": lambda self: self._parse_overlay(), 1165 "POSITION": lambda self: self._parse_position(), 1166 "PREDICT": lambda self: self._parse_predict(), 1167 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1168 "STRING_AGG": lambda self: self._parse_string_agg(), 1169 "SUBSTRING": lambda self: self._parse_substring(), 1170 "TRIM": lambda self: self._parse_trim(), 1171 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1172 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1173 "XMLELEMENT": lambda self: self.expression( 1174 exp.XMLElement, 1175 this=self._match_text_seq("NAME") and self._parse_id_var(), 1176 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1177 ), 1178 "XMLTABLE": lambda self: self._parse_xml_table(), 1179 } 1180 1181 QUERY_MODIFIER_PARSERS = { 1182 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1183 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1184 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1185 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1186 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1187 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1188 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1189 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1190 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1191 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1192 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1193 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1194 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1195 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1196 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1197 TokenType.CLUSTER_BY: lambda self: ( 1198 "cluster", 1199 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1200 ), 1201 TokenType.DISTRIBUTE_BY: lambda self: ( 1202 "distribute", 1203 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1204 ), 1205 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1206 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1207 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1208 } 1209 1210 SET_PARSERS = { 1211 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1212 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1213 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1214 "TRANSACTION": lambda self: self._parse_set_transaction(), 1215 } 1216 1217 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1218 1219 TYPE_LITERAL_PARSERS = { 1220 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1221 } 1222 1223 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1224 1225 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1226 1227 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1228 1229 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1230 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1231 "ISOLATION": ( 1232 ("LEVEL", "REPEATABLE", "READ"), 1233 ("LEVEL", "READ", "COMMITTED"), 1234 ("LEVEL", "READ", "UNCOMITTED"), 1235 ("LEVEL", "SERIALIZABLE"), 1236 ), 1237 "READ": ("WRITE", "ONLY"), 1238 } 1239 1240 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1241 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1242 ) 1243 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1244 1245 CREATE_SEQUENCE: OPTIONS_TYPE = { 1246 "SCALE": ("EXTEND", "NOEXTEND"), 1247 "SHARD": ("EXTEND", "NOEXTEND"), 1248 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1249 **dict.fromkeys( 1250 ( 1251 "SESSION", 1252 "GLOBAL", 1253 "KEEP", 1254 "NOKEEP", 1255 "ORDER", 1256 "NOORDER", 1257 "NOCACHE", 1258 "CYCLE", 1259 "NOCYCLE", 1260 "NOMINVALUE", 1261 "NOMAXVALUE", 1262 "NOSCALE", 1263 "NOSHARD", 1264 ), 1265 tuple(), 1266 ), 1267 } 1268 1269 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1270 1271 USABLES: OPTIONS_TYPE = dict.fromkeys( 1272 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1273 ) 1274 1275 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1276 1277 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1278 "TYPE": ("EVOLUTION",), 1279 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1280 } 1281 1282 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1283 1284 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1285 1286 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1287 "NOT": ("ENFORCED",), 1288 "MATCH": ( 1289 "FULL", 1290 "PARTIAL", 1291 "SIMPLE", 1292 ), 1293 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1294 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1295 } 1296 1297 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1298 1299 CLONE_KEYWORDS = {"CLONE", "COPY"} 1300 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1301 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1302 1303 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1304 1305 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1306 1307 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1308 1309 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1310 1311 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1312 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1313 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1314 1315 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1316 1317 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1318 1319 ADD_CONSTRAINT_TOKENS = { 1320 TokenType.CONSTRAINT, 1321 TokenType.FOREIGN_KEY, 1322 TokenType.INDEX, 1323 TokenType.KEY, 1324 TokenType.PRIMARY_KEY, 1325 TokenType.UNIQUE, 1326 } 1327 1328 DISTINCT_TOKENS = {TokenType.DISTINCT} 1329 1330 NULL_TOKENS = {TokenType.NULL} 1331 1332 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1333 1334 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1335 1336 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1337 1338 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1339 1340 ODBC_DATETIME_LITERALS = { 1341 "d": exp.Date, 1342 "t": exp.Time, 1343 "ts": exp.Timestamp, 1344 } 1345 1346 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1347 1348 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1349 1350 # The style options for the DESCRIBE statement 1351 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1352 1353 # The style options for the ANALYZE statement 1354 ANALYZE_STYLES = { 1355 "BUFFER_USAGE_LIMIT", 1356 "FULL", 1357 "LOCAL", 1358 "NO_WRITE_TO_BINLOG", 1359 "SAMPLE", 1360 "SKIP_LOCKED", 1361 "VERBOSE", 1362 } 1363 1364 ANALYZE_EXPRESSION_PARSERS = { 1365 "ALL": lambda self: self._parse_analyze_columns(), 1366 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1367 "DELETE": lambda self: self._parse_analyze_delete(), 1368 "DROP": lambda self: self._parse_analyze_histogram(), 1369 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1370 "LIST": lambda self: self._parse_analyze_list(), 1371 "PREDICATE": lambda self: self._parse_analyze_columns(), 1372 "UPDATE": lambda self: self._parse_analyze_histogram(), 1373 "VALIDATE": lambda self: self._parse_analyze_validate(), 1374 } 1375 1376 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1377 1378 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1379 1380 OPERATION_MODIFIERS: t.Set[str] = set() 1381 1382 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1383 1384 STRICT_CAST = True 1385 1386 PREFIXED_PIVOT_COLUMNS = False 1387 IDENTIFY_PIVOT_STRINGS = False 1388 1389 LOG_DEFAULTS_TO_LN = False 1390 1391 # Whether ADD is present for each column added by ALTER TABLE 1392 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1393 1394 # Whether the table sample clause expects CSV syntax 1395 TABLESAMPLE_CSV = False 1396 1397 # The default method used for table sampling 1398 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1399 1400 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1401 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1402 1403 # Whether the TRIM function expects the characters to trim as its first argument 1404 TRIM_PATTERN_FIRST = False 1405 1406 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1407 STRING_ALIASES = False 1408 1409 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1410 MODIFIERS_ATTACHED_TO_SET_OP = True 1411 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1412 1413 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1414 NO_PAREN_IF_COMMANDS = True 1415 1416 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1417 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1418 1419 # Whether the `:` operator is used to extract a value from a VARIANT column 1420 COLON_IS_VARIANT_EXTRACT = False 1421 1422 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1423 # If this is True and '(' is not found, the keyword will be treated as an identifier 1424 VALUES_FOLLOWED_BY_PAREN = True 1425 1426 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1427 SUPPORTS_IMPLICIT_UNNEST = False 1428 1429 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1430 INTERVAL_SPANS = True 1431 1432 # Whether a PARTITION clause can follow a table reference 1433 SUPPORTS_PARTITION_SELECTION = False 1434 1435 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1436 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1437 1438 # Whether the 'AS' keyword is optional in the CTE definition syntax 1439 OPTIONAL_ALIAS_TOKEN_CTE = True 1440 1441 __slots__ = ( 1442 "error_level", 1443 "error_message_context", 1444 "max_errors", 1445 "dialect", 1446 "sql", 1447 "errors", 1448 "_tokens", 1449 "_index", 1450 "_curr", 1451 "_next", 1452 "_prev", 1453 "_prev_comments", 1454 ) 1455 1456 # Autofilled 1457 SHOW_TRIE: t.Dict = {} 1458 SET_TRIE: t.Dict = {} 1459 1460 def __init__( 1461 self, 1462 error_level: t.Optional[ErrorLevel] = None, 1463 error_message_context: int = 100, 1464 max_errors: int = 3, 1465 dialect: DialectType = None, 1466 ): 1467 from sqlglot.dialects import Dialect 1468 1469 self.error_level = error_level or ErrorLevel.IMMEDIATE 1470 self.error_message_context = error_message_context 1471 self.max_errors = max_errors 1472 self.dialect = Dialect.get_or_raise(dialect) 1473 self.reset() 1474 1475 def reset(self): 1476 self.sql = "" 1477 self.errors = [] 1478 self._tokens = [] 1479 self._index = 0 1480 self._curr = None 1481 self._next = None 1482 self._prev = None 1483 self._prev_comments = None 1484 1485 def parse( 1486 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1487 ) -> t.List[t.Optional[exp.Expression]]: 1488 """ 1489 Parses a list of tokens and returns a list of syntax trees, one tree 1490 per parsed SQL statement. 1491 1492 Args: 1493 raw_tokens: The list of tokens. 1494 sql: The original SQL string, used to produce helpful debug messages. 1495 1496 Returns: 1497 The list of the produced syntax trees. 1498 """ 1499 return self._parse( 1500 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1501 ) 1502 1503 def parse_into( 1504 self, 1505 expression_types: exp.IntoType, 1506 raw_tokens: t.List[Token], 1507 sql: t.Optional[str] = None, 1508 ) -> t.List[t.Optional[exp.Expression]]: 1509 """ 1510 Parses a list of tokens into a given Expression type. If a collection of Expression 1511 types is given instead, this method will try to parse the token list into each one 1512 of them, stopping at the first for which the parsing succeeds. 1513 1514 Args: 1515 expression_types: The expression type(s) to try and parse the token list into. 1516 raw_tokens: The list of tokens. 1517 sql: The original SQL string, used to produce helpful debug messages. 1518 1519 Returns: 1520 The target Expression. 1521 """ 1522 errors = [] 1523 for expression_type in ensure_list(expression_types): 1524 parser = self.EXPRESSION_PARSERS.get(expression_type) 1525 if not parser: 1526 raise TypeError(f"No parser registered for {expression_type}") 1527 1528 try: 1529 return self._parse(parser, raw_tokens, sql) 1530 except ParseError as e: 1531 e.errors[0]["into_expression"] = expression_type 1532 errors.append(e) 1533 1534 raise ParseError( 1535 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1536 errors=merge_errors(errors), 1537 ) from errors[-1] 1538 1539 def _parse( 1540 self, 1541 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1542 raw_tokens: t.List[Token], 1543 sql: t.Optional[str] = None, 1544 ) -> t.List[t.Optional[exp.Expression]]: 1545 self.reset() 1546 self.sql = sql or "" 1547 1548 total = len(raw_tokens) 1549 chunks: t.List[t.List[Token]] = [[]] 1550 1551 for i, token in enumerate(raw_tokens): 1552 if token.token_type == TokenType.SEMICOLON: 1553 if token.comments: 1554 chunks.append([token]) 1555 1556 if i < total - 1: 1557 chunks.append([]) 1558 else: 1559 chunks[-1].append(token) 1560 1561 expressions = [] 1562 1563 for tokens in chunks: 1564 self._index = -1 1565 self._tokens = tokens 1566 self._advance() 1567 1568 expressions.append(parse_method(self)) 1569 1570 if self._index < len(self._tokens): 1571 self.raise_error("Invalid expression / Unexpected token") 1572 1573 self.check_errors() 1574 1575 return expressions 1576 1577 def check_errors(self) -> None: 1578 """Logs or raises any found errors, depending on the chosen error level setting.""" 1579 if self.error_level == ErrorLevel.WARN: 1580 for error in self.errors: 1581 logger.error(str(error)) 1582 elif self.error_level == ErrorLevel.RAISE and self.errors: 1583 raise ParseError( 1584 concat_messages(self.errors, self.max_errors), 1585 errors=merge_errors(self.errors), 1586 ) 1587 1588 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1589 """ 1590 Appends an error in the list of recorded errors or raises it, depending on the chosen 1591 error level setting. 1592 """ 1593 token = token or self._curr or self._prev or Token.string("") 1594 start = token.start 1595 end = token.end + 1 1596 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1597 highlight = self.sql[start:end] 1598 end_context = self.sql[end : end + self.error_message_context] 1599 1600 error = ParseError.new( 1601 f"{message}. Line {token.line}, Col: {token.col}.\n" 1602 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1603 description=message, 1604 line=token.line, 1605 col=token.col, 1606 start_context=start_context, 1607 highlight=highlight, 1608 end_context=end_context, 1609 ) 1610 1611 if self.error_level == ErrorLevel.IMMEDIATE: 1612 raise error 1613 1614 self.errors.append(error) 1615 1616 def expression( 1617 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1618 ) -> E: 1619 """ 1620 Creates a new, validated Expression. 1621 1622 Args: 1623 exp_class: The expression class to instantiate. 1624 comments: An optional list of comments to attach to the expression. 1625 kwargs: The arguments to set for the expression along with their respective values. 1626 1627 Returns: 1628 The target expression. 1629 """ 1630 instance = exp_class(**kwargs) 1631 instance.add_comments(comments) if comments else self._add_comments(instance) 1632 return self.validate_expression(instance) 1633 1634 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1635 if expression and self._prev_comments: 1636 expression.add_comments(self._prev_comments) 1637 self._prev_comments = None 1638 1639 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1640 """ 1641 Validates an Expression, making sure that all its mandatory arguments are set. 1642 1643 Args: 1644 expression: The expression to validate. 1645 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1646 1647 Returns: 1648 The validated expression. 1649 """ 1650 if self.error_level != ErrorLevel.IGNORE: 1651 for error_message in expression.error_messages(args): 1652 self.raise_error(error_message) 1653 1654 return expression 1655 1656 def _find_sql(self, start: Token, end: Token) -> str: 1657 return self.sql[start.start : end.end + 1] 1658 1659 def _is_connected(self) -> bool: 1660 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1661 1662 def _advance(self, times: int = 1) -> None: 1663 self._index += times 1664 self._curr = seq_get(self._tokens, self._index) 1665 self._next = seq_get(self._tokens, self._index + 1) 1666 1667 if self._index > 0: 1668 self._prev = self._tokens[self._index - 1] 1669 self._prev_comments = self._prev.comments 1670 else: 1671 self._prev = None 1672 self._prev_comments = None 1673 1674 def _retreat(self, index: int) -> None: 1675 if index != self._index: 1676 self._advance(index - self._index) 1677 1678 def _warn_unsupported(self) -> None: 1679 if len(self._tokens) <= 1: 1680 return 1681 1682 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1683 # interested in emitting a warning for the one being currently processed. 1684 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1685 1686 logger.warning( 1687 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1688 ) 1689 1690 def _parse_command(self) -> exp.Command: 1691 self._warn_unsupported() 1692 return self.expression( 1693 exp.Command, 1694 comments=self._prev_comments, 1695 this=self._prev.text.upper(), 1696 expression=self._parse_string(), 1697 ) 1698 1699 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1700 """ 1701 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1702 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1703 solve this by setting & resetting the parser state accordingly 1704 """ 1705 index = self._index 1706 error_level = self.error_level 1707 1708 self.error_level = ErrorLevel.IMMEDIATE 1709 try: 1710 this = parse_method() 1711 except ParseError: 1712 this = None 1713 finally: 1714 if not this or retreat: 1715 self._retreat(index) 1716 self.error_level = error_level 1717 1718 return this 1719 1720 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1721 start = self._prev 1722 exists = self._parse_exists() if allow_exists else None 1723 1724 self._match(TokenType.ON) 1725 1726 materialized = self._match_text_seq("MATERIALIZED") 1727 kind = self._match_set(self.CREATABLES) and self._prev 1728 if not kind: 1729 return self._parse_as_command(start) 1730 1731 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1732 this = self._parse_user_defined_function(kind=kind.token_type) 1733 elif kind.token_type == TokenType.TABLE: 1734 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1735 elif kind.token_type == TokenType.COLUMN: 1736 this = self._parse_column() 1737 else: 1738 this = self._parse_id_var() 1739 1740 self._match(TokenType.IS) 1741 1742 return self.expression( 1743 exp.Comment, 1744 this=this, 1745 kind=kind.text, 1746 expression=self._parse_string(), 1747 exists=exists, 1748 materialized=materialized, 1749 ) 1750 1751 def _parse_to_table( 1752 self, 1753 ) -> exp.ToTableProperty: 1754 table = self._parse_table_parts(schema=True) 1755 return self.expression(exp.ToTableProperty, this=table) 1756 1757 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1758 def _parse_ttl(self) -> exp.Expression: 1759 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1760 this = self._parse_bitwise() 1761 1762 if self._match_text_seq("DELETE"): 1763 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1764 if self._match_text_seq("RECOMPRESS"): 1765 return self.expression( 1766 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1767 ) 1768 if self._match_text_seq("TO", "DISK"): 1769 return self.expression( 1770 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1771 ) 1772 if self._match_text_seq("TO", "VOLUME"): 1773 return self.expression( 1774 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1775 ) 1776 1777 return this 1778 1779 expressions = self._parse_csv(_parse_ttl_action) 1780 where = self._parse_where() 1781 group = self._parse_group() 1782 1783 aggregates = None 1784 if group and self._match(TokenType.SET): 1785 aggregates = self._parse_csv(self._parse_set_item) 1786 1787 return self.expression( 1788 exp.MergeTreeTTL, 1789 expressions=expressions, 1790 where=where, 1791 group=group, 1792 aggregates=aggregates, 1793 ) 1794 1795 def _parse_statement(self) -> t.Optional[exp.Expression]: 1796 if self._curr is None: 1797 return None 1798 1799 if self._match_set(self.STATEMENT_PARSERS): 1800 comments = self._prev_comments 1801 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1802 stmt.add_comments(comments, prepend=True) 1803 return stmt 1804 1805 if self._match_set(self.dialect.tokenizer.COMMANDS): 1806 return self._parse_command() 1807 1808 expression = self._parse_expression() 1809 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1810 return self._parse_query_modifiers(expression) 1811 1812 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1813 start = self._prev 1814 temporary = self._match(TokenType.TEMPORARY) 1815 materialized = self._match_text_seq("MATERIALIZED") 1816 1817 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1818 if not kind: 1819 return self._parse_as_command(start) 1820 1821 concurrently = self._match_text_seq("CONCURRENTLY") 1822 if_exists = exists or self._parse_exists() 1823 1824 if kind == "COLUMN": 1825 this = self._parse_column() 1826 else: 1827 this = self._parse_table_parts( 1828 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1829 ) 1830 1831 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1832 1833 if self._match(TokenType.L_PAREN, advance=False): 1834 expressions = self._parse_wrapped_csv(self._parse_types) 1835 else: 1836 expressions = None 1837 1838 return self.expression( 1839 exp.Drop, 1840 exists=if_exists, 1841 this=this, 1842 expressions=expressions, 1843 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1844 temporary=temporary, 1845 materialized=materialized, 1846 cascade=self._match_text_seq("CASCADE"), 1847 constraints=self._match_text_seq("CONSTRAINTS"), 1848 purge=self._match_text_seq("PURGE"), 1849 cluster=cluster, 1850 concurrently=concurrently, 1851 ) 1852 1853 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1854 return ( 1855 self._match_text_seq("IF") 1856 and (not not_ or self._match(TokenType.NOT)) 1857 and self._match(TokenType.EXISTS) 1858 ) 1859 1860 def _parse_create(self) -> exp.Create | exp.Command: 1861 # Note: this can't be None because we've matched a statement parser 1862 start = self._prev 1863 1864 replace = ( 1865 start.token_type == TokenType.REPLACE 1866 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1867 or self._match_pair(TokenType.OR, TokenType.ALTER) 1868 ) 1869 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1870 1871 unique = self._match(TokenType.UNIQUE) 1872 1873 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1874 clustered = True 1875 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1876 "COLUMNSTORE" 1877 ): 1878 clustered = False 1879 else: 1880 clustered = None 1881 1882 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1883 self._advance() 1884 1885 properties = None 1886 create_token = self._match_set(self.CREATABLES) and self._prev 1887 1888 if not create_token: 1889 # exp.Properties.Location.POST_CREATE 1890 properties = self._parse_properties() 1891 create_token = self._match_set(self.CREATABLES) and self._prev 1892 1893 if not properties or not create_token: 1894 return self._parse_as_command(start) 1895 1896 concurrently = self._match_text_seq("CONCURRENTLY") 1897 exists = self._parse_exists(not_=True) 1898 this = None 1899 expression: t.Optional[exp.Expression] = None 1900 indexes = None 1901 no_schema_binding = None 1902 begin = None 1903 end = None 1904 clone = None 1905 1906 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1907 nonlocal properties 1908 if properties and temp_props: 1909 properties.expressions.extend(temp_props.expressions) 1910 elif temp_props: 1911 properties = temp_props 1912 1913 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1914 this = self._parse_user_defined_function(kind=create_token.token_type) 1915 1916 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1917 extend_props(self._parse_properties()) 1918 1919 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1920 extend_props(self._parse_properties()) 1921 1922 if not expression: 1923 if self._match(TokenType.COMMAND): 1924 expression = self._parse_as_command(self._prev) 1925 else: 1926 begin = self._match(TokenType.BEGIN) 1927 return_ = self._match_text_seq("RETURN") 1928 1929 if self._match(TokenType.STRING, advance=False): 1930 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1931 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1932 expression = self._parse_string() 1933 extend_props(self._parse_properties()) 1934 else: 1935 expression = self._parse_user_defined_function_expression() 1936 1937 end = self._match_text_seq("END") 1938 1939 if return_: 1940 expression = self.expression(exp.Return, this=expression) 1941 elif create_token.token_type == TokenType.INDEX: 1942 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1943 if not self._match(TokenType.ON): 1944 index = self._parse_id_var() 1945 anonymous = False 1946 else: 1947 index = None 1948 anonymous = True 1949 1950 this = self._parse_index(index=index, anonymous=anonymous) 1951 elif create_token.token_type in self.DB_CREATABLES: 1952 table_parts = self._parse_table_parts( 1953 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1954 ) 1955 1956 # exp.Properties.Location.POST_NAME 1957 self._match(TokenType.COMMA) 1958 extend_props(self._parse_properties(before=True)) 1959 1960 this = self._parse_schema(this=table_parts) 1961 1962 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1963 extend_props(self._parse_properties()) 1964 1965 self._match(TokenType.ALIAS) 1966 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1967 # exp.Properties.Location.POST_ALIAS 1968 extend_props(self._parse_properties()) 1969 1970 if create_token.token_type == TokenType.SEQUENCE: 1971 expression = self._parse_types() 1972 extend_props(self._parse_properties()) 1973 else: 1974 expression = self._parse_ddl_select() 1975 1976 if create_token.token_type == TokenType.TABLE: 1977 # exp.Properties.Location.POST_EXPRESSION 1978 extend_props(self._parse_properties()) 1979 1980 indexes = [] 1981 while True: 1982 index = self._parse_index() 1983 1984 # exp.Properties.Location.POST_INDEX 1985 extend_props(self._parse_properties()) 1986 if not index: 1987 break 1988 else: 1989 self._match(TokenType.COMMA) 1990 indexes.append(index) 1991 elif create_token.token_type == TokenType.VIEW: 1992 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1993 no_schema_binding = True 1994 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1995 extend_props(self._parse_properties()) 1996 1997 shallow = self._match_text_seq("SHALLOW") 1998 1999 if self._match_texts(self.CLONE_KEYWORDS): 2000 copy = self._prev.text.lower() == "copy" 2001 clone = self.expression( 2002 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2003 ) 2004 2005 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2006 return self._parse_as_command(start) 2007 2008 create_kind_text = create_token.text.upper() 2009 return self.expression( 2010 exp.Create, 2011 this=this, 2012 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2013 replace=replace, 2014 refresh=refresh, 2015 unique=unique, 2016 expression=expression, 2017 exists=exists, 2018 properties=properties, 2019 indexes=indexes, 2020 no_schema_binding=no_schema_binding, 2021 begin=begin, 2022 end=end, 2023 clone=clone, 2024 concurrently=concurrently, 2025 clustered=clustered, 2026 ) 2027 2028 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2029 seq = exp.SequenceProperties() 2030 2031 options = [] 2032 index = self._index 2033 2034 while self._curr: 2035 self._match(TokenType.COMMA) 2036 if self._match_text_seq("INCREMENT"): 2037 self._match_text_seq("BY") 2038 self._match_text_seq("=") 2039 seq.set("increment", self._parse_term()) 2040 elif self._match_text_seq("MINVALUE"): 2041 seq.set("minvalue", self._parse_term()) 2042 elif self._match_text_seq("MAXVALUE"): 2043 seq.set("maxvalue", self._parse_term()) 2044 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2045 self._match_text_seq("=") 2046 seq.set("start", self._parse_term()) 2047 elif self._match_text_seq("CACHE"): 2048 # T-SQL allows empty CACHE which is initialized dynamically 2049 seq.set("cache", self._parse_number() or True) 2050 elif self._match_text_seq("OWNED", "BY"): 2051 # "OWNED BY NONE" is the default 2052 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2053 else: 2054 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2055 if opt: 2056 options.append(opt) 2057 else: 2058 break 2059 2060 seq.set("options", options if options else None) 2061 return None if self._index == index else seq 2062 2063 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2064 # only used for teradata currently 2065 self._match(TokenType.COMMA) 2066 2067 kwargs = { 2068 "no": self._match_text_seq("NO"), 2069 "dual": self._match_text_seq("DUAL"), 2070 "before": self._match_text_seq("BEFORE"), 2071 "default": self._match_text_seq("DEFAULT"), 2072 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2073 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2074 "after": self._match_text_seq("AFTER"), 2075 "minimum": self._match_texts(("MIN", "MINIMUM")), 2076 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2077 } 2078 2079 if self._match_texts(self.PROPERTY_PARSERS): 2080 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2081 try: 2082 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2083 except TypeError: 2084 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2085 2086 return None 2087 2088 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2089 return self._parse_wrapped_csv(self._parse_property) 2090 2091 def _parse_property(self) -> t.Optional[exp.Expression]: 2092 if self._match_texts(self.PROPERTY_PARSERS): 2093 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2094 2095 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2096 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2097 2098 if self._match_text_seq("COMPOUND", "SORTKEY"): 2099 return self._parse_sortkey(compound=True) 2100 2101 if self._match_text_seq("SQL", "SECURITY"): 2102 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2103 2104 index = self._index 2105 key = self._parse_column() 2106 2107 if not self._match(TokenType.EQ): 2108 self._retreat(index) 2109 return self._parse_sequence_properties() 2110 2111 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2112 if isinstance(key, exp.Column): 2113 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2114 2115 value = self._parse_bitwise() or self._parse_var(any_token=True) 2116 2117 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2118 if isinstance(value, exp.Column): 2119 value = exp.var(value.name) 2120 2121 return self.expression(exp.Property, this=key, value=value) 2122 2123 def _parse_stored(self) -> exp.FileFormatProperty: 2124 self._match(TokenType.ALIAS) 2125 2126 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2127 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2128 2129 return self.expression( 2130 exp.FileFormatProperty, 2131 this=( 2132 self.expression( 2133 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2134 ) 2135 if input_format or output_format 2136 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2137 ), 2138 ) 2139 2140 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2141 field = self._parse_field() 2142 if isinstance(field, exp.Identifier) and not field.quoted: 2143 field = exp.var(field) 2144 2145 return field 2146 2147 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2148 self._match(TokenType.EQ) 2149 self._match(TokenType.ALIAS) 2150 2151 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2152 2153 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2154 properties = [] 2155 while True: 2156 if before: 2157 prop = self._parse_property_before() 2158 else: 2159 prop = self._parse_property() 2160 if not prop: 2161 break 2162 for p in ensure_list(prop): 2163 properties.append(p) 2164 2165 if properties: 2166 return self.expression(exp.Properties, expressions=properties) 2167 2168 return None 2169 2170 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2171 return self.expression( 2172 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2173 ) 2174 2175 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2176 if self._match_texts(("DEFINER", "INVOKER")): 2177 security_specifier = self._prev.text.upper() 2178 return self.expression(exp.SecurityProperty, this=security_specifier) 2179 return None 2180 2181 def _parse_settings_property(self) -> exp.SettingsProperty: 2182 return self.expression( 2183 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2184 ) 2185 2186 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2187 if self._index >= 2: 2188 pre_volatile_token = self._tokens[self._index - 2] 2189 else: 2190 pre_volatile_token = None 2191 2192 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2193 return exp.VolatileProperty() 2194 2195 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2196 2197 def _parse_retention_period(self) -> exp.Var: 2198 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2199 number = self._parse_number() 2200 number_str = f"{number} " if number else "" 2201 unit = self._parse_var(any_token=True) 2202 return exp.var(f"{number_str}{unit}") 2203 2204 def _parse_system_versioning_property( 2205 self, with_: bool = False 2206 ) -> exp.WithSystemVersioningProperty: 2207 self._match(TokenType.EQ) 2208 prop = self.expression( 2209 exp.WithSystemVersioningProperty, 2210 **{ # type: ignore 2211 "on": True, 2212 "with": with_, 2213 }, 2214 ) 2215 2216 if self._match_text_seq("OFF"): 2217 prop.set("on", False) 2218 return prop 2219 2220 self._match(TokenType.ON) 2221 if self._match(TokenType.L_PAREN): 2222 while self._curr and not self._match(TokenType.R_PAREN): 2223 if self._match_text_seq("HISTORY_TABLE", "="): 2224 prop.set("this", self._parse_table_parts()) 2225 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2226 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2227 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2228 prop.set("retention_period", self._parse_retention_period()) 2229 2230 self._match(TokenType.COMMA) 2231 2232 return prop 2233 2234 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2235 self._match(TokenType.EQ) 2236 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2237 prop = self.expression(exp.DataDeletionProperty, on=on) 2238 2239 if self._match(TokenType.L_PAREN): 2240 while self._curr and not self._match(TokenType.R_PAREN): 2241 if self._match_text_seq("FILTER_COLUMN", "="): 2242 prop.set("filter_column", self._parse_column()) 2243 elif self._match_text_seq("RETENTION_PERIOD", "="): 2244 prop.set("retention_period", self._parse_retention_period()) 2245 2246 self._match(TokenType.COMMA) 2247 2248 return prop 2249 2250 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2251 kind = "HASH" 2252 expressions: t.Optional[t.List[exp.Expression]] = None 2253 if self._match_text_seq("BY", "HASH"): 2254 expressions = self._parse_wrapped_csv(self._parse_id_var) 2255 elif self._match_text_seq("BY", "RANDOM"): 2256 kind = "RANDOM" 2257 2258 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2259 buckets: t.Optional[exp.Expression] = None 2260 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2261 buckets = self._parse_number() 2262 2263 return self.expression( 2264 exp.DistributedByProperty, 2265 expressions=expressions, 2266 kind=kind, 2267 buckets=buckets, 2268 order=self._parse_order(), 2269 ) 2270 2271 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2272 self._match_text_seq("KEY") 2273 expressions = self._parse_wrapped_id_vars() 2274 return self.expression(expr_type, expressions=expressions) 2275 2276 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2277 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2278 prop = self._parse_system_versioning_property(with_=True) 2279 self._match_r_paren() 2280 return prop 2281 2282 if self._match(TokenType.L_PAREN, advance=False): 2283 return self._parse_wrapped_properties() 2284 2285 if self._match_text_seq("JOURNAL"): 2286 return self._parse_withjournaltable() 2287 2288 if self._match_texts(self.VIEW_ATTRIBUTES): 2289 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2290 2291 if self._match_text_seq("DATA"): 2292 return self._parse_withdata(no=False) 2293 elif self._match_text_seq("NO", "DATA"): 2294 return self._parse_withdata(no=True) 2295 2296 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2297 return self._parse_serde_properties(with_=True) 2298 2299 if self._match(TokenType.SCHEMA): 2300 return self.expression( 2301 exp.WithSchemaBindingProperty, 2302 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2303 ) 2304 2305 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2306 return self.expression( 2307 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2308 ) 2309 2310 if not self._next: 2311 return None 2312 2313 return self._parse_withisolatedloading() 2314 2315 def _parse_procedure_option(self) -> exp.Expression | None: 2316 if self._match_text_seq("EXECUTE", "AS"): 2317 return self.expression( 2318 exp.ExecuteAsProperty, 2319 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2320 or self._parse_string(), 2321 ) 2322 2323 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2324 2325 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2326 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2327 self._match(TokenType.EQ) 2328 2329 user = self._parse_id_var() 2330 self._match(TokenType.PARAMETER) 2331 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2332 2333 if not user or not host: 2334 return None 2335 2336 return exp.DefinerProperty(this=f"{user}@{host}") 2337 2338 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2339 self._match(TokenType.TABLE) 2340 self._match(TokenType.EQ) 2341 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2342 2343 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2344 return self.expression(exp.LogProperty, no=no) 2345 2346 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2347 return self.expression(exp.JournalProperty, **kwargs) 2348 2349 def _parse_checksum(self) -> exp.ChecksumProperty: 2350 self._match(TokenType.EQ) 2351 2352 on = None 2353 if self._match(TokenType.ON): 2354 on = True 2355 elif self._match_text_seq("OFF"): 2356 on = False 2357 2358 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2359 2360 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2361 return self.expression( 2362 exp.Cluster, 2363 expressions=( 2364 self._parse_wrapped_csv(self._parse_ordered) 2365 if wrapped 2366 else self._parse_csv(self._parse_ordered) 2367 ), 2368 ) 2369 2370 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2371 self._match_text_seq("BY") 2372 2373 self._match_l_paren() 2374 expressions = self._parse_csv(self._parse_column) 2375 self._match_r_paren() 2376 2377 if self._match_text_seq("SORTED", "BY"): 2378 self._match_l_paren() 2379 sorted_by = self._parse_csv(self._parse_ordered) 2380 self._match_r_paren() 2381 else: 2382 sorted_by = None 2383 2384 self._match(TokenType.INTO) 2385 buckets = self._parse_number() 2386 self._match_text_seq("BUCKETS") 2387 2388 return self.expression( 2389 exp.ClusteredByProperty, 2390 expressions=expressions, 2391 sorted_by=sorted_by, 2392 buckets=buckets, 2393 ) 2394 2395 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2396 if not self._match_text_seq("GRANTS"): 2397 self._retreat(self._index - 1) 2398 return None 2399 2400 return self.expression(exp.CopyGrantsProperty) 2401 2402 def _parse_freespace(self) -> exp.FreespaceProperty: 2403 self._match(TokenType.EQ) 2404 return self.expression( 2405 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2406 ) 2407 2408 def _parse_mergeblockratio( 2409 self, no: bool = False, default: bool = False 2410 ) -> exp.MergeBlockRatioProperty: 2411 if self._match(TokenType.EQ): 2412 return self.expression( 2413 exp.MergeBlockRatioProperty, 2414 this=self._parse_number(), 2415 percent=self._match(TokenType.PERCENT), 2416 ) 2417 2418 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2419 2420 def _parse_datablocksize( 2421 self, 2422 default: t.Optional[bool] = None, 2423 minimum: t.Optional[bool] = None, 2424 maximum: t.Optional[bool] = None, 2425 ) -> exp.DataBlocksizeProperty: 2426 self._match(TokenType.EQ) 2427 size = self._parse_number() 2428 2429 units = None 2430 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2431 units = self._prev.text 2432 2433 return self.expression( 2434 exp.DataBlocksizeProperty, 2435 size=size, 2436 units=units, 2437 default=default, 2438 minimum=minimum, 2439 maximum=maximum, 2440 ) 2441 2442 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2443 self._match(TokenType.EQ) 2444 always = self._match_text_seq("ALWAYS") 2445 manual = self._match_text_seq("MANUAL") 2446 never = self._match_text_seq("NEVER") 2447 default = self._match_text_seq("DEFAULT") 2448 2449 autotemp = None 2450 if self._match_text_seq("AUTOTEMP"): 2451 autotemp = self._parse_schema() 2452 2453 return self.expression( 2454 exp.BlockCompressionProperty, 2455 always=always, 2456 manual=manual, 2457 never=never, 2458 default=default, 2459 autotemp=autotemp, 2460 ) 2461 2462 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2463 index = self._index 2464 no = self._match_text_seq("NO") 2465 concurrent = self._match_text_seq("CONCURRENT") 2466 2467 if not self._match_text_seq("ISOLATED", "LOADING"): 2468 self._retreat(index) 2469 return None 2470 2471 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2472 return self.expression( 2473 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2474 ) 2475 2476 def _parse_locking(self) -> exp.LockingProperty: 2477 if self._match(TokenType.TABLE): 2478 kind = "TABLE" 2479 elif self._match(TokenType.VIEW): 2480 kind = "VIEW" 2481 elif self._match(TokenType.ROW): 2482 kind = "ROW" 2483 elif self._match_text_seq("DATABASE"): 2484 kind = "DATABASE" 2485 else: 2486 kind = None 2487 2488 if kind in ("DATABASE", "TABLE", "VIEW"): 2489 this = self._parse_table_parts() 2490 else: 2491 this = None 2492 2493 if self._match(TokenType.FOR): 2494 for_or_in = "FOR" 2495 elif self._match(TokenType.IN): 2496 for_or_in = "IN" 2497 else: 2498 for_or_in = None 2499 2500 if self._match_text_seq("ACCESS"): 2501 lock_type = "ACCESS" 2502 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2503 lock_type = "EXCLUSIVE" 2504 elif self._match_text_seq("SHARE"): 2505 lock_type = "SHARE" 2506 elif self._match_text_seq("READ"): 2507 lock_type = "READ" 2508 elif self._match_text_seq("WRITE"): 2509 lock_type = "WRITE" 2510 elif self._match_text_seq("CHECKSUM"): 2511 lock_type = "CHECKSUM" 2512 else: 2513 lock_type = None 2514 2515 override = self._match_text_seq("OVERRIDE") 2516 2517 return self.expression( 2518 exp.LockingProperty, 2519 this=this, 2520 kind=kind, 2521 for_or_in=for_or_in, 2522 lock_type=lock_type, 2523 override=override, 2524 ) 2525 2526 def _parse_partition_by(self) -> t.List[exp.Expression]: 2527 if self._match(TokenType.PARTITION_BY): 2528 return self._parse_csv(self._parse_assignment) 2529 return [] 2530 2531 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2532 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2533 if self._match_text_seq("MINVALUE"): 2534 return exp.var("MINVALUE") 2535 if self._match_text_seq("MAXVALUE"): 2536 return exp.var("MAXVALUE") 2537 return self._parse_bitwise() 2538 2539 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2540 expression = None 2541 from_expressions = None 2542 to_expressions = None 2543 2544 if self._match(TokenType.IN): 2545 this = self._parse_wrapped_csv(self._parse_bitwise) 2546 elif self._match(TokenType.FROM): 2547 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2548 self._match_text_seq("TO") 2549 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2550 elif self._match_text_seq("WITH", "(", "MODULUS"): 2551 this = self._parse_number() 2552 self._match_text_seq(",", "REMAINDER") 2553 expression = self._parse_number() 2554 self._match_r_paren() 2555 else: 2556 self.raise_error("Failed to parse partition bound spec.") 2557 2558 return self.expression( 2559 exp.PartitionBoundSpec, 2560 this=this, 2561 expression=expression, 2562 from_expressions=from_expressions, 2563 to_expressions=to_expressions, 2564 ) 2565 2566 # https://www.postgresql.org/docs/current/sql-createtable.html 2567 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2568 if not self._match_text_seq("OF"): 2569 self._retreat(self._index - 1) 2570 return None 2571 2572 this = self._parse_table(schema=True) 2573 2574 if self._match(TokenType.DEFAULT): 2575 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2576 elif self._match_text_seq("FOR", "VALUES"): 2577 expression = self._parse_partition_bound_spec() 2578 else: 2579 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2580 2581 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2582 2583 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2584 self._match(TokenType.EQ) 2585 return self.expression( 2586 exp.PartitionedByProperty, 2587 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2588 ) 2589 2590 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2591 if self._match_text_seq("AND", "STATISTICS"): 2592 statistics = True 2593 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2594 statistics = False 2595 else: 2596 statistics = None 2597 2598 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2599 2600 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2601 if self._match_text_seq("SQL"): 2602 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2603 return None 2604 2605 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2606 if self._match_text_seq("SQL", "DATA"): 2607 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2608 return None 2609 2610 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2611 if self._match_text_seq("PRIMARY", "INDEX"): 2612 return exp.NoPrimaryIndexProperty() 2613 if self._match_text_seq("SQL"): 2614 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2615 return None 2616 2617 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2618 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2619 return exp.OnCommitProperty() 2620 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2621 return exp.OnCommitProperty(delete=True) 2622 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2623 2624 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2625 if self._match_text_seq("SQL", "DATA"): 2626 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2627 return None 2628 2629 def _parse_distkey(self) -> exp.DistKeyProperty: 2630 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2631 2632 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2633 table = self._parse_table(schema=True) 2634 2635 options = [] 2636 while self._match_texts(("INCLUDING", "EXCLUDING")): 2637 this = self._prev.text.upper() 2638 2639 id_var = self._parse_id_var() 2640 if not id_var: 2641 return None 2642 2643 options.append( 2644 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2645 ) 2646 2647 return self.expression(exp.LikeProperty, this=table, expressions=options) 2648 2649 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2650 return self.expression( 2651 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2652 ) 2653 2654 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2655 self._match(TokenType.EQ) 2656 return self.expression( 2657 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2658 ) 2659 2660 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2661 self._match_text_seq("WITH", "CONNECTION") 2662 return self.expression( 2663 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2664 ) 2665 2666 def _parse_returns(self) -> exp.ReturnsProperty: 2667 value: t.Optional[exp.Expression] 2668 null = None 2669 is_table = self._match(TokenType.TABLE) 2670 2671 if is_table: 2672 if self._match(TokenType.LT): 2673 value = self.expression( 2674 exp.Schema, 2675 this="TABLE", 2676 expressions=self._parse_csv(self._parse_struct_types), 2677 ) 2678 if not self._match(TokenType.GT): 2679 self.raise_error("Expecting >") 2680 else: 2681 value = self._parse_schema(exp.var("TABLE")) 2682 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2683 null = True 2684 value = None 2685 else: 2686 value = self._parse_types() 2687 2688 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2689 2690 def _parse_describe(self) -> exp.Describe: 2691 kind = self._match_set(self.CREATABLES) and self._prev.text 2692 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2693 if self._match(TokenType.DOT): 2694 style = None 2695 self._retreat(self._index - 2) 2696 2697 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2698 2699 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2700 this = self._parse_statement() 2701 else: 2702 this = self._parse_table(schema=True) 2703 2704 properties = self._parse_properties() 2705 expressions = properties.expressions if properties else None 2706 partition = self._parse_partition() 2707 return self.expression( 2708 exp.Describe, 2709 this=this, 2710 style=style, 2711 kind=kind, 2712 expressions=expressions, 2713 partition=partition, 2714 format=format, 2715 ) 2716 2717 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2718 kind = self._prev.text.upper() 2719 expressions = [] 2720 2721 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2722 if self._match(TokenType.WHEN): 2723 expression = self._parse_disjunction() 2724 self._match(TokenType.THEN) 2725 else: 2726 expression = None 2727 2728 else_ = self._match(TokenType.ELSE) 2729 2730 if not self._match(TokenType.INTO): 2731 return None 2732 2733 return self.expression( 2734 exp.ConditionalInsert, 2735 this=self.expression( 2736 exp.Insert, 2737 this=self._parse_table(schema=True), 2738 expression=self._parse_derived_table_values(), 2739 ), 2740 expression=expression, 2741 else_=else_, 2742 ) 2743 2744 expression = parse_conditional_insert() 2745 while expression is not None: 2746 expressions.append(expression) 2747 expression = parse_conditional_insert() 2748 2749 return self.expression( 2750 exp.MultitableInserts, 2751 kind=kind, 2752 comments=comments, 2753 expressions=expressions, 2754 source=self._parse_table(), 2755 ) 2756 2757 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2758 comments = [] 2759 hint = self._parse_hint() 2760 overwrite = self._match(TokenType.OVERWRITE) 2761 ignore = self._match(TokenType.IGNORE) 2762 local = self._match_text_seq("LOCAL") 2763 alternative = None 2764 is_function = None 2765 2766 if self._match_text_seq("DIRECTORY"): 2767 this: t.Optional[exp.Expression] = self.expression( 2768 exp.Directory, 2769 this=self._parse_var_or_string(), 2770 local=local, 2771 row_format=self._parse_row_format(match_row=True), 2772 ) 2773 else: 2774 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2775 comments += ensure_list(self._prev_comments) 2776 return self._parse_multitable_inserts(comments) 2777 2778 if self._match(TokenType.OR): 2779 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2780 2781 self._match(TokenType.INTO) 2782 comments += ensure_list(self._prev_comments) 2783 self._match(TokenType.TABLE) 2784 is_function = self._match(TokenType.FUNCTION) 2785 2786 this = ( 2787 self._parse_table(schema=True, parse_partition=True) 2788 if not is_function 2789 else self._parse_function() 2790 ) 2791 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2792 this.set("alias", self._parse_table_alias()) 2793 2794 returning = self._parse_returning() 2795 2796 return self.expression( 2797 exp.Insert, 2798 comments=comments, 2799 hint=hint, 2800 is_function=is_function, 2801 this=this, 2802 stored=self._match_text_seq("STORED") and self._parse_stored(), 2803 by_name=self._match_text_seq("BY", "NAME"), 2804 exists=self._parse_exists(), 2805 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2806 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2807 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2808 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2809 conflict=self._parse_on_conflict(), 2810 returning=returning or self._parse_returning(), 2811 overwrite=overwrite, 2812 alternative=alternative, 2813 ignore=ignore, 2814 source=self._match(TokenType.TABLE) and self._parse_table(), 2815 ) 2816 2817 def _parse_kill(self) -> exp.Kill: 2818 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2819 2820 return self.expression( 2821 exp.Kill, 2822 this=self._parse_primary(), 2823 kind=kind, 2824 ) 2825 2826 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2827 conflict = self._match_text_seq("ON", "CONFLICT") 2828 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2829 2830 if not conflict and not duplicate: 2831 return None 2832 2833 conflict_keys = None 2834 constraint = None 2835 2836 if conflict: 2837 if self._match_text_seq("ON", "CONSTRAINT"): 2838 constraint = self._parse_id_var() 2839 elif self._match(TokenType.L_PAREN): 2840 conflict_keys = self._parse_csv(self._parse_id_var) 2841 self._match_r_paren() 2842 2843 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2844 if self._prev.token_type == TokenType.UPDATE: 2845 self._match(TokenType.SET) 2846 expressions = self._parse_csv(self._parse_equality) 2847 else: 2848 expressions = None 2849 2850 return self.expression( 2851 exp.OnConflict, 2852 duplicate=duplicate, 2853 expressions=expressions, 2854 action=action, 2855 conflict_keys=conflict_keys, 2856 constraint=constraint, 2857 where=self._parse_where(), 2858 ) 2859 2860 def _parse_returning(self) -> t.Optional[exp.Returning]: 2861 if not self._match(TokenType.RETURNING): 2862 return None 2863 return self.expression( 2864 exp.Returning, 2865 expressions=self._parse_csv(self._parse_expression), 2866 into=self._match(TokenType.INTO) and self._parse_table_part(), 2867 ) 2868 2869 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2870 if not self._match(TokenType.FORMAT): 2871 return None 2872 return self._parse_row_format() 2873 2874 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2875 index = self._index 2876 with_ = with_ or self._match_text_seq("WITH") 2877 2878 if not self._match(TokenType.SERDE_PROPERTIES): 2879 self._retreat(index) 2880 return None 2881 return self.expression( 2882 exp.SerdeProperties, 2883 **{ # type: ignore 2884 "expressions": self._parse_wrapped_properties(), 2885 "with": with_, 2886 }, 2887 ) 2888 2889 def _parse_row_format( 2890 self, match_row: bool = False 2891 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2892 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2893 return None 2894 2895 if self._match_text_seq("SERDE"): 2896 this = self._parse_string() 2897 2898 serde_properties = self._parse_serde_properties() 2899 2900 return self.expression( 2901 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2902 ) 2903 2904 self._match_text_seq("DELIMITED") 2905 2906 kwargs = {} 2907 2908 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2909 kwargs["fields"] = self._parse_string() 2910 if self._match_text_seq("ESCAPED", "BY"): 2911 kwargs["escaped"] = self._parse_string() 2912 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2913 kwargs["collection_items"] = self._parse_string() 2914 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2915 kwargs["map_keys"] = self._parse_string() 2916 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2917 kwargs["lines"] = self._parse_string() 2918 if self._match_text_seq("NULL", "DEFINED", "AS"): 2919 kwargs["null"] = self._parse_string() 2920 2921 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2922 2923 def _parse_load(self) -> exp.LoadData | exp.Command: 2924 if self._match_text_seq("DATA"): 2925 local = self._match_text_seq("LOCAL") 2926 self._match_text_seq("INPATH") 2927 inpath = self._parse_string() 2928 overwrite = self._match(TokenType.OVERWRITE) 2929 self._match_pair(TokenType.INTO, TokenType.TABLE) 2930 2931 return self.expression( 2932 exp.LoadData, 2933 this=self._parse_table(schema=True), 2934 local=local, 2935 overwrite=overwrite, 2936 inpath=inpath, 2937 partition=self._parse_partition(), 2938 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2939 serde=self._match_text_seq("SERDE") and self._parse_string(), 2940 ) 2941 return self._parse_as_command(self._prev) 2942 2943 def _parse_delete(self) -> exp.Delete: 2944 # This handles MySQL's "Multiple-Table Syntax" 2945 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2946 tables = None 2947 if not self._match(TokenType.FROM, advance=False): 2948 tables = self._parse_csv(self._parse_table) or None 2949 2950 returning = self._parse_returning() 2951 2952 return self.expression( 2953 exp.Delete, 2954 tables=tables, 2955 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2956 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2957 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2958 where=self._parse_where(), 2959 returning=returning or self._parse_returning(), 2960 limit=self._parse_limit(), 2961 ) 2962 2963 def _parse_update(self) -> exp.Update: 2964 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2965 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2966 returning = self._parse_returning() 2967 return self.expression( 2968 exp.Update, 2969 **{ # type: ignore 2970 "this": this, 2971 "expressions": expressions, 2972 "from": self._parse_from(joins=True), 2973 "where": self._parse_where(), 2974 "returning": returning or self._parse_returning(), 2975 "order": self._parse_order(), 2976 "limit": self._parse_limit(), 2977 }, 2978 ) 2979 2980 def _parse_use(self) -> exp.Use: 2981 return self.expression( 2982 exp.Use, 2983 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2984 this=self._parse_table(schema=False), 2985 ) 2986 2987 def _parse_uncache(self) -> exp.Uncache: 2988 if not self._match(TokenType.TABLE): 2989 self.raise_error("Expecting TABLE after UNCACHE") 2990 2991 return self.expression( 2992 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2993 ) 2994 2995 def _parse_cache(self) -> exp.Cache: 2996 lazy = self._match_text_seq("LAZY") 2997 self._match(TokenType.TABLE) 2998 table = self._parse_table(schema=True) 2999 3000 options = [] 3001 if self._match_text_seq("OPTIONS"): 3002 self._match_l_paren() 3003 k = self._parse_string() 3004 self._match(TokenType.EQ) 3005 v = self._parse_string() 3006 options = [k, v] 3007 self._match_r_paren() 3008 3009 self._match(TokenType.ALIAS) 3010 return self.expression( 3011 exp.Cache, 3012 this=table, 3013 lazy=lazy, 3014 options=options, 3015 expression=self._parse_select(nested=True), 3016 ) 3017 3018 def _parse_partition(self) -> t.Optional[exp.Partition]: 3019 if not self._match_texts(self.PARTITION_KEYWORDS): 3020 return None 3021 3022 return self.expression( 3023 exp.Partition, 3024 subpartition=self._prev.text.upper() == "SUBPARTITION", 3025 expressions=self._parse_wrapped_csv(self._parse_assignment), 3026 ) 3027 3028 def _parse_value(self) -> t.Optional[exp.Tuple]: 3029 def _parse_value_expression() -> t.Optional[exp.Expression]: 3030 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3031 return exp.var(self._prev.text.upper()) 3032 return self._parse_expression() 3033 3034 if self._match(TokenType.L_PAREN): 3035 expressions = self._parse_csv(_parse_value_expression) 3036 self._match_r_paren() 3037 return self.expression(exp.Tuple, expressions=expressions) 3038 3039 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3040 expression = self._parse_expression() 3041 if expression: 3042 return self.expression(exp.Tuple, expressions=[expression]) 3043 return None 3044 3045 def _parse_projections(self) -> t.List[exp.Expression]: 3046 return self._parse_expressions() 3047 3048 def _parse_select( 3049 self, 3050 nested: bool = False, 3051 table: bool = False, 3052 parse_subquery_alias: bool = True, 3053 parse_set_operation: bool = True, 3054 ) -> t.Optional[exp.Expression]: 3055 cte = self._parse_with() 3056 3057 if cte: 3058 this = self._parse_statement() 3059 3060 if not this: 3061 self.raise_error("Failed to parse any statement following CTE") 3062 return cte 3063 3064 if "with" in this.arg_types: 3065 this.set("with", cte) 3066 else: 3067 self.raise_error(f"{this.key} does not support CTE") 3068 this = cte 3069 3070 return this 3071 3072 # duckdb supports leading with FROM x 3073 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3074 3075 if self._match(TokenType.SELECT): 3076 comments = self._prev_comments 3077 3078 hint = self._parse_hint() 3079 3080 if self._next and not self._next.token_type == TokenType.DOT: 3081 all_ = self._match(TokenType.ALL) 3082 distinct = self._match_set(self.DISTINCT_TOKENS) 3083 else: 3084 all_, distinct = None, None 3085 3086 kind = ( 3087 self._match(TokenType.ALIAS) 3088 and self._match_texts(("STRUCT", "VALUE")) 3089 and self._prev.text.upper() 3090 ) 3091 3092 if distinct: 3093 distinct = self.expression( 3094 exp.Distinct, 3095 on=self._parse_value() if self._match(TokenType.ON) else None, 3096 ) 3097 3098 if all_ and distinct: 3099 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3100 3101 operation_modifiers = [] 3102 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3103 operation_modifiers.append(exp.var(self._prev.text.upper())) 3104 3105 limit = self._parse_limit(top=True) 3106 projections = self._parse_projections() 3107 3108 this = self.expression( 3109 exp.Select, 3110 kind=kind, 3111 hint=hint, 3112 distinct=distinct, 3113 expressions=projections, 3114 limit=limit, 3115 operation_modifiers=operation_modifiers or None, 3116 ) 3117 this.comments = comments 3118 3119 into = self._parse_into() 3120 if into: 3121 this.set("into", into) 3122 3123 if not from_: 3124 from_ = self._parse_from() 3125 3126 if from_: 3127 this.set("from", from_) 3128 3129 this = self._parse_query_modifiers(this) 3130 elif (table or nested) and self._match(TokenType.L_PAREN): 3131 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3132 this = self._parse_simplified_pivot( 3133 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3134 ) 3135 elif self._match(TokenType.FROM): 3136 from_ = self._parse_from(skip_from_token=True) 3137 # Support parentheses for duckdb FROM-first syntax 3138 select = self._parse_select() 3139 if select: 3140 select.set("from", from_) 3141 this = select 3142 else: 3143 this = exp.select("*").from_(t.cast(exp.From, from_)) 3144 else: 3145 this = ( 3146 self._parse_table() 3147 if table 3148 else self._parse_select(nested=True, parse_set_operation=False) 3149 ) 3150 3151 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3152 # in case a modifier (e.g. join) is following 3153 if table and isinstance(this, exp.Values) and this.alias: 3154 alias = this.args["alias"].pop() 3155 this = exp.Table(this=this, alias=alias) 3156 3157 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3158 3159 self._match_r_paren() 3160 3161 # We return early here so that the UNION isn't attached to the subquery by the 3162 # following call to _parse_set_operations, but instead becomes the parent node 3163 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3164 elif self._match(TokenType.VALUES, advance=False): 3165 this = self._parse_derived_table_values() 3166 elif from_: 3167 this = exp.select("*").from_(from_.this, copy=False) 3168 elif self._match(TokenType.SUMMARIZE): 3169 table = self._match(TokenType.TABLE) 3170 this = self._parse_select() or self._parse_string() or self._parse_table() 3171 return self.expression(exp.Summarize, this=this, table=table) 3172 elif self._match(TokenType.DESCRIBE): 3173 this = self._parse_describe() 3174 elif self._match_text_seq("STREAM"): 3175 this = self._parse_function() 3176 if this: 3177 this = self.expression(exp.Stream, this=this) 3178 else: 3179 self._retreat(self._index - 1) 3180 else: 3181 this = None 3182 3183 return self._parse_set_operations(this) if parse_set_operation else this 3184 3185 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3186 self._match_text_seq("SEARCH") 3187 3188 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3189 3190 if not kind: 3191 return None 3192 3193 self._match_text_seq("FIRST", "BY") 3194 3195 return self.expression( 3196 exp.RecursiveWithSearch, 3197 kind=kind, 3198 this=self._parse_id_var(), 3199 expression=self._match_text_seq("SET") and self._parse_id_var(), 3200 using=self._match_text_seq("USING") and self._parse_id_var(), 3201 ) 3202 3203 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3204 if not skip_with_token and not self._match(TokenType.WITH): 3205 return None 3206 3207 comments = self._prev_comments 3208 recursive = self._match(TokenType.RECURSIVE) 3209 3210 last_comments = None 3211 expressions = [] 3212 while True: 3213 cte = self._parse_cte() 3214 if isinstance(cte, exp.CTE): 3215 expressions.append(cte) 3216 if last_comments: 3217 cte.add_comments(last_comments) 3218 3219 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3220 break 3221 else: 3222 self._match(TokenType.WITH) 3223 3224 last_comments = self._prev_comments 3225 3226 return self.expression( 3227 exp.With, 3228 comments=comments, 3229 expressions=expressions, 3230 recursive=recursive, 3231 search=self._parse_recursive_with_search(), 3232 ) 3233 3234 def _parse_cte(self) -> t.Optional[exp.CTE]: 3235 index = self._index 3236 3237 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3238 if not alias or not alias.this: 3239 self.raise_error("Expected CTE to have alias") 3240 3241 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3242 self._retreat(index) 3243 return None 3244 3245 comments = self._prev_comments 3246 3247 if self._match_text_seq("NOT", "MATERIALIZED"): 3248 materialized = False 3249 elif self._match_text_seq("MATERIALIZED"): 3250 materialized = True 3251 else: 3252 materialized = None 3253 3254 cte = self.expression( 3255 exp.CTE, 3256 this=self._parse_wrapped(self._parse_statement), 3257 alias=alias, 3258 materialized=materialized, 3259 comments=comments, 3260 ) 3261 3262 if isinstance(cte.this, exp.Values): 3263 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3264 3265 return cte 3266 3267 def _parse_table_alias( 3268 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3269 ) -> t.Optional[exp.TableAlias]: 3270 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3271 # so this section tries to parse the clause version and if it fails, it treats the token 3272 # as an identifier (alias) 3273 if self._can_parse_limit_or_offset(): 3274 return None 3275 3276 any_token = self._match(TokenType.ALIAS) 3277 alias = ( 3278 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3279 or self._parse_string_as_identifier() 3280 ) 3281 3282 index = self._index 3283 if self._match(TokenType.L_PAREN): 3284 columns = self._parse_csv(self._parse_function_parameter) 3285 self._match_r_paren() if columns else self._retreat(index) 3286 else: 3287 columns = None 3288 3289 if not alias and not columns: 3290 return None 3291 3292 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3293 3294 # We bubble up comments from the Identifier to the TableAlias 3295 if isinstance(alias, exp.Identifier): 3296 table_alias.add_comments(alias.pop_comments()) 3297 3298 return table_alias 3299 3300 def _parse_subquery( 3301 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3302 ) -> t.Optional[exp.Subquery]: 3303 if not this: 3304 return None 3305 3306 return self.expression( 3307 exp.Subquery, 3308 this=this, 3309 pivots=self._parse_pivots(), 3310 alias=self._parse_table_alias() if parse_alias else None, 3311 sample=self._parse_table_sample(), 3312 ) 3313 3314 def _implicit_unnests_to_explicit(self, this: E) -> E: 3315 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3316 3317 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3318 for i, join in enumerate(this.args.get("joins") or []): 3319 table = join.this 3320 normalized_table = table.copy() 3321 normalized_table.meta["maybe_column"] = True 3322 normalized_table = _norm(normalized_table, dialect=self.dialect) 3323 3324 if isinstance(table, exp.Table) and not join.args.get("on"): 3325 if normalized_table.parts[0].name in refs: 3326 table_as_column = table.to_column() 3327 unnest = exp.Unnest(expressions=[table_as_column]) 3328 3329 # Table.to_column creates a parent Alias node that we want to convert to 3330 # a TableAlias and attach to the Unnest, so it matches the parser's output 3331 if isinstance(table.args.get("alias"), exp.TableAlias): 3332 table_as_column.replace(table_as_column.this) 3333 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3334 3335 table.replace(unnest) 3336 3337 refs.add(normalized_table.alias_or_name) 3338 3339 return this 3340 3341 def _parse_query_modifiers( 3342 self, this: t.Optional[exp.Expression] 3343 ) -> t.Optional[exp.Expression]: 3344 if isinstance(this, (exp.Query, exp.Table)): 3345 for join in self._parse_joins(): 3346 this.append("joins", join) 3347 for lateral in iter(self._parse_lateral, None): 3348 this.append("laterals", lateral) 3349 3350 while True: 3351 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3352 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3353 key, expression = parser(self) 3354 3355 if expression: 3356 this.set(key, expression) 3357 if key == "limit": 3358 offset = expression.args.pop("offset", None) 3359 3360 if offset: 3361 offset = exp.Offset(expression=offset) 3362 this.set("offset", offset) 3363 3364 limit_by_expressions = expression.expressions 3365 expression.set("expressions", None) 3366 offset.set("expressions", limit_by_expressions) 3367 continue 3368 break 3369 3370 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3371 this = self._implicit_unnests_to_explicit(this) 3372 3373 return this 3374 3375 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3376 start = self._curr 3377 while self._curr: 3378 self._advance() 3379 3380 end = self._tokens[self._index - 1] 3381 return exp.Hint(expressions=[self._find_sql(start, end)]) 3382 3383 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3384 return self._parse_function_call() 3385 3386 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3387 start_index = self._index 3388 should_fallback_to_string = False 3389 3390 hints = [] 3391 try: 3392 for hint in iter( 3393 lambda: self._parse_csv( 3394 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3395 ), 3396 [], 3397 ): 3398 hints.extend(hint) 3399 except ParseError: 3400 should_fallback_to_string = True 3401 3402 if should_fallback_to_string or self._curr: 3403 self._retreat(start_index) 3404 return self._parse_hint_fallback_to_string() 3405 3406 return self.expression(exp.Hint, expressions=hints) 3407 3408 def _parse_hint(self) -> t.Optional[exp.Hint]: 3409 if self._match(TokenType.HINT) and self._prev_comments: 3410 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3411 3412 return None 3413 3414 def _parse_into(self) -> t.Optional[exp.Into]: 3415 if not self._match(TokenType.INTO): 3416 return None 3417 3418 temp = self._match(TokenType.TEMPORARY) 3419 unlogged = self._match_text_seq("UNLOGGED") 3420 self._match(TokenType.TABLE) 3421 3422 return self.expression( 3423 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3424 ) 3425 3426 def _parse_from( 3427 self, joins: bool = False, skip_from_token: bool = False 3428 ) -> t.Optional[exp.From]: 3429 if not skip_from_token and not self._match(TokenType.FROM): 3430 return None 3431 3432 return self.expression( 3433 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3434 ) 3435 3436 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3437 return self.expression( 3438 exp.MatchRecognizeMeasure, 3439 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3440 this=self._parse_expression(), 3441 ) 3442 3443 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3444 if not self._match(TokenType.MATCH_RECOGNIZE): 3445 return None 3446 3447 self._match_l_paren() 3448 3449 partition = self._parse_partition_by() 3450 order = self._parse_order() 3451 3452 measures = ( 3453 self._parse_csv(self._parse_match_recognize_measure) 3454 if self._match_text_seq("MEASURES") 3455 else None 3456 ) 3457 3458 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3459 rows = exp.var("ONE ROW PER MATCH") 3460 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3461 text = "ALL ROWS PER MATCH" 3462 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3463 text += " SHOW EMPTY MATCHES" 3464 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3465 text += " OMIT EMPTY MATCHES" 3466 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3467 text += " WITH UNMATCHED ROWS" 3468 rows = exp.var(text) 3469 else: 3470 rows = None 3471 3472 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3473 text = "AFTER MATCH SKIP" 3474 if self._match_text_seq("PAST", "LAST", "ROW"): 3475 text += " PAST LAST ROW" 3476 elif self._match_text_seq("TO", "NEXT", "ROW"): 3477 text += " TO NEXT ROW" 3478 elif self._match_text_seq("TO", "FIRST"): 3479 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3480 elif self._match_text_seq("TO", "LAST"): 3481 text += f" TO LAST {self._advance_any().text}" # type: ignore 3482 after = exp.var(text) 3483 else: 3484 after = None 3485 3486 if self._match_text_seq("PATTERN"): 3487 self._match_l_paren() 3488 3489 if not self._curr: 3490 self.raise_error("Expecting )", self._curr) 3491 3492 paren = 1 3493 start = self._curr 3494 3495 while self._curr and paren > 0: 3496 if self._curr.token_type == TokenType.L_PAREN: 3497 paren += 1 3498 if self._curr.token_type == TokenType.R_PAREN: 3499 paren -= 1 3500 3501 end = self._prev 3502 self._advance() 3503 3504 if paren > 0: 3505 self.raise_error("Expecting )", self._curr) 3506 3507 pattern = exp.var(self._find_sql(start, end)) 3508 else: 3509 pattern = None 3510 3511 define = ( 3512 self._parse_csv(self._parse_name_as_expression) 3513 if self._match_text_seq("DEFINE") 3514 else None 3515 ) 3516 3517 self._match_r_paren() 3518 3519 return self.expression( 3520 exp.MatchRecognize, 3521 partition_by=partition, 3522 order=order, 3523 measures=measures, 3524 rows=rows, 3525 after=after, 3526 pattern=pattern, 3527 define=define, 3528 alias=self._parse_table_alias(), 3529 ) 3530 3531 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3532 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3533 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3534 cross_apply = False 3535 3536 if cross_apply is not None: 3537 this = self._parse_select(table=True) 3538 view = None 3539 outer = None 3540 elif self._match(TokenType.LATERAL): 3541 this = self._parse_select(table=True) 3542 view = self._match(TokenType.VIEW) 3543 outer = self._match(TokenType.OUTER) 3544 else: 3545 return None 3546 3547 if not this: 3548 this = ( 3549 self._parse_unnest() 3550 or self._parse_function() 3551 or self._parse_id_var(any_token=False) 3552 ) 3553 3554 while self._match(TokenType.DOT): 3555 this = exp.Dot( 3556 this=this, 3557 expression=self._parse_function() or self._parse_id_var(any_token=False), 3558 ) 3559 3560 if view: 3561 table = self._parse_id_var(any_token=False) 3562 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3563 table_alias: t.Optional[exp.TableAlias] = self.expression( 3564 exp.TableAlias, this=table, columns=columns 3565 ) 3566 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3567 # We move the alias from the lateral's child node to the lateral itself 3568 table_alias = this.args["alias"].pop() 3569 else: 3570 table_alias = self._parse_table_alias() 3571 3572 return self.expression( 3573 exp.Lateral, 3574 this=this, 3575 view=view, 3576 outer=outer, 3577 alias=table_alias, 3578 cross_apply=cross_apply, 3579 ) 3580 3581 def _parse_join_parts( 3582 self, 3583 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3584 return ( 3585 self._match_set(self.JOIN_METHODS) and self._prev, 3586 self._match_set(self.JOIN_SIDES) and self._prev, 3587 self._match_set(self.JOIN_KINDS) and self._prev, 3588 ) 3589 3590 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3591 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3592 this = self._parse_column() 3593 if isinstance(this, exp.Column): 3594 return this.this 3595 return this 3596 3597 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3598 3599 def _parse_join( 3600 self, skip_join_token: bool = False, parse_bracket: bool = False 3601 ) -> t.Optional[exp.Join]: 3602 if self._match(TokenType.COMMA): 3603 return self.expression(exp.Join, this=self._parse_table()) 3604 3605 index = self._index 3606 method, side, kind = self._parse_join_parts() 3607 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3608 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3609 3610 if not skip_join_token and not join: 3611 self._retreat(index) 3612 kind = None 3613 method = None 3614 side = None 3615 3616 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3617 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3618 3619 if not skip_join_token and not join and not outer_apply and not cross_apply: 3620 return None 3621 3622 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3623 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3624 kwargs["expressions"] = self._parse_csv( 3625 lambda: self._parse_table(parse_bracket=parse_bracket) 3626 ) 3627 3628 if method: 3629 kwargs["method"] = method.text 3630 if side: 3631 kwargs["side"] = side.text 3632 if kind: 3633 kwargs["kind"] = kind.text 3634 if hint: 3635 kwargs["hint"] = hint 3636 3637 if self._match(TokenType.MATCH_CONDITION): 3638 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3639 3640 if self._match(TokenType.ON): 3641 kwargs["on"] = self._parse_assignment() 3642 elif self._match(TokenType.USING): 3643 kwargs["using"] = self._parse_using_identifiers() 3644 elif ( 3645 not (outer_apply or cross_apply) 3646 and not isinstance(kwargs["this"], exp.Unnest) 3647 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3648 ): 3649 index = self._index 3650 joins: t.Optional[list] = list(self._parse_joins()) 3651 3652 if joins and self._match(TokenType.ON): 3653 kwargs["on"] = self._parse_assignment() 3654 elif joins and self._match(TokenType.USING): 3655 kwargs["using"] = self._parse_using_identifiers() 3656 else: 3657 joins = None 3658 self._retreat(index) 3659 3660 kwargs["this"].set("joins", joins if joins else None) 3661 3662 comments = [c for token in (method, side, kind) if token for c in token.comments] 3663 return self.expression(exp.Join, comments=comments, **kwargs) 3664 3665 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3666 this = self._parse_assignment() 3667 3668 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3669 return this 3670 3671 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3672 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3673 3674 return this 3675 3676 def _parse_index_params(self) -> exp.IndexParameters: 3677 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3678 3679 if self._match(TokenType.L_PAREN, advance=False): 3680 columns = self._parse_wrapped_csv(self._parse_with_operator) 3681 else: 3682 columns = None 3683 3684 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3685 partition_by = self._parse_partition_by() 3686 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3687 tablespace = ( 3688 self._parse_var(any_token=True) 3689 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3690 else None 3691 ) 3692 where = self._parse_where() 3693 3694 on = self._parse_field() if self._match(TokenType.ON) else None 3695 3696 return self.expression( 3697 exp.IndexParameters, 3698 using=using, 3699 columns=columns, 3700 include=include, 3701 partition_by=partition_by, 3702 where=where, 3703 with_storage=with_storage, 3704 tablespace=tablespace, 3705 on=on, 3706 ) 3707 3708 def _parse_index( 3709 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3710 ) -> t.Optional[exp.Index]: 3711 if index or anonymous: 3712 unique = None 3713 primary = None 3714 amp = None 3715 3716 self._match(TokenType.ON) 3717 self._match(TokenType.TABLE) # hive 3718 table = self._parse_table_parts(schema=True) 3719 else: 3720 unique = self._match(TokenType.UNIQUE) 3721 primary = self._match_text_seq("PRIMARY") 3722 amp = self._match_text_seq("AMP") 3723 3724 if not self._match(TokenType.INDEX): 3725 return None 3726 3727 index = self._parse_id_var() 3728 table = None 3729 3730 params = self._parse_index_params() 3731 3732 return self.expression( 3733 exp.Index, 3734 this=index, 3735 table=table, 3736 unique=unique, 3737 primary=primary, 3738 amp=amp, 3739 params=params, 3740 ) 3741 3742 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3743 hints: t.List[exp.Expression] = [] 3744 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3745 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3746 hints.append( 3747 self.expression( 3748 exp.WithTableHint, 3749 expressions=self._parse_csv( 3750 lambda: self._parse_function() or self._parse_var(any_token=True) 3751 ), 3752 ) 3753 ) 3754 self._match_r_paren() 3755 else: 3756 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3757 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3758 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3759 3760 self._match_set((TokenType.INDEX, TokenType.KEY)) 3761 if self._match(TokenType.FOR): 3762 hint.set("target", self._advance_any() and self._prev.text.upper()) 3763 3764 hint.set("expressions", self._parse_wrapped_id_vars()) 3765 hints.append(hint) 3766 3767 return hints or None 3768 3769 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3770 return ( 3771 (not schema and self._parse_function(optional_parens=False)) 3772 or self._parse_id_var(any_token=False) 3773 or self._parse_string_as_identifier() 3774 or self._parse_placeholder() 3775 ) 3776 3777 def _parse_table_parts( 3778 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3779 ) -> exp.Table: 3780 catalog = None 3781 db = None 3782 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3783 3784 while self._match(TokenType.DOT): 3785 if catalog: 3786 # This allows nesting the table in arbitrarily many dot expressions if needed 3787 table = self.expression( 3788 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3789 ) 3790 else: 3791 catalog = db 3792 db = table 3793 # "" used for tsql FROM a..b case 3794 table = self._parse_table_part(schema=schema) or "" 3795 3796 if ( 3797 wildcard 3798 and self._is_connected() 3799 and (isinstance(table, exp.Identifier) or not table) 3800 and self._match(TokenType.STAR) 3801 ): 3802 if isinstance(table, exp.Identifier): 3803 table.args["this"] += "*" 3804 else: 3805 table = exp.Identifier(this="*") 3806 3807 # We bubble up comments from the Identifier to the Table 3808 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3809 3810 if is_db_reference: 3811 catalog = db 3812 db = table 3813 table = None 3814 3815 if not table and not is_db_reference: 3816 self.raise_error(f"Expected table name but got {self._curr}") 3817 if not db and is_db_reference: 3818 self.raise_error(f"Expected database name but got {self._curr}") 3819 3820 table = self.expression( 3821 exp.Table, 3822 comments=comments, 3823 this=table, 3824 db=db, 3825 catalog=catalog, 3826 ) 3827 3828 changes = self._parse_changes() 3829 if changes: 3830 table.set("changes", changes) 3831 3832 at_before = self._parse_historical_data() 3833 if at_before: 3834 table.set("when", at_before) 3835 3836 pivots = self._parse_pivots() 3837 if pivots: 3838 table.set("pivots", pivots) 3839 3840 return table 3841 3842 def _parse_table( 3843 self, 3844 schema: bool = False, 3845 joins: bool = False, 3846 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3847 parse_bracket: bool = False, 3848 is_db_reference: bool = False, 3849 parse_partition: bool = False, 3850 ) -> t.Optional[exp.Expression]: 3851 lateral = self._parse_lateral() 3852 if lateral: 3853 return lateral 3854 3855 unnest = self._parse_unnest() 3856 if unnest: 3857 return unnest 3858 3859 values = self._parse_derived_table_values() 3860 if values: 3861 return values 3862 3863 subquery = self._parse_select(table=True) 3864 if subquery: 3865 if not subquery.args.get("pivots"): 3866 subquery.set("pivots", self._parse_pivots()) 3867 return subquery 3868 3869 bracket = parse_bracket and self._parse_bracket(None) 3870 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3871 3872 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3873 self._parse_table 3874 ) 3875 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3876 3877 only = self._match(TokenType.ONLY) 3878 3879 this = t.cast( 3880 exp.Expression, 3881 bracket 3882 or rows_from 3883 or self._parse_bracket( 3884 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3885 ), 3886 ) 3887 3888 if only: 3889 this.set("only", only) 3890 3891 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3892 self._match_text_seq("*") 3893 3894 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3895 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3896 this.set("partition", self._parse_partition()) 3897 3898 if schema: 3899 return self._parse_schema(this=this) 3900 3901 version = self._parse_version() 3902 3903 if version: 3904 this.set("version", version) 3905 3906 if self.dialect.ALIAS_POST_TABLESAMPLE: 3907 this.set("sample", self._parse_table_sample()) 3908 3909 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3910 if alias: 3911 this.set("alias", alias) 3912 3913 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3914 return self.expression( 3915 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3916 ) 3917 3918 this.set("hints", self._parse_table_hints()) 3919 3920 if not this.args.get("pivots"): 3921 this.set("pivots", self._parse_pivots()) 3922 3923 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3924 this.set("sample", self._parse_table_sample()) 3925 3926 if joins: 3927 for join in self._parse_joins(): 3928 this.append("joins", join) 3929 3930 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3931 this.set("ordinality", True) 3932 this.set("alias", self._parse_table_alias()) 3933 3934 return this 3935 3936 def _parse_version(self) -> t.Optional[exp.Version]: 3937 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3938 this = "TIMESTAMP" 3939 elif self._match(TokenType.VERSION_SNAPSHOT): 3940 this = "VERSION" 3941 else: 3942 return None 3943 3944 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3945 kind = self._prev.text.upper() 3946 start = self._parse_bitwise() 3947 self._match_texts(("TO", "AND")) 3948 end = self._parse_bitwise() 3949 expression: t.Optional[exp.Expression] = self.expression( 3950 exp.Tuple, expressions=[start, end] 3951 ) 3952 elif self._match_text_seq("CONTAINED", "IN"): 3953 kind = "CONTAINED IN" 3954 expression = self.expression( 3955 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3956 ) 3957 elif self._match(TokenType.ALL): 3958 kind = "ALL" 3959 expression = None 3960 else: 3961 self._match_text_seq("AS", "OF") 3962 kind = "AS OF" 3963 expression = self._parse_type() 3964 3965 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3966 3967 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3968 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3969 index = self._index 3970 historical_data = None 3971 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3972 this = self._prev.text.upper() 3973 kind = ( 3974 self._match(TokenType.L_PAREN) 3975 and self._match_texts(self.HISTORICAL_DATA_KIND) 3976 and self._prev.text.upper() 3977 ) 3978 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3979 3980 if expression: 3981 self._match_r_paren() 3982 historical_data = self.expression( 3983 exp.HistoricalData, this=this, kind=kind, expression=expression 3984 ) 3985 else: 3986 self._retreat(index) 3987 3988 return historical_data 3989 3990 def _parse_changes(self) -> t.Optional[exp.Changes]: 3991 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3992 return None 3993 3994 information = self._parse_var(any_token=True) 3995 self._match_r_paren() 3996 3997 return self.expression( 3998 exp.Changes, 3999 information=information, 4000 at_before=self._parse_historical_data(), 4001 end=self._parse_historical_data(), 4002 ) 4003 4004 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4005 if not self._match(TokenType.UNNEST): 4006 return None 4007 4008 expressions = self._parse_wrapped_csv(self._parse_equality) 4009 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4010 4011 alias = self._parse_table_alias() if with_alias else None 4012 4013 if alias: 4014 if self.dialect.UNNEST_COLUMN_ONLY: 4015 if alias.args.get("columns"): 4016 self.raise_error("Unexpected extra column alias in unnest.") 4017 4018 alias.set("columns", [alias.this]) 4019 alias.set("this", None) 4020 4021 columns = alias.args.get("columns") or [] 4022 if offset and len(expressions) < len(columns): 4023 offset = columns.pop() 4024 4025 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4026 self._match(TokenType.ALIAS) 4027 offset = self._parse_id_var( 4028 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4029 ) or exp.to_identifier("offset") 4030 4031 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4032 4033 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4034 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4035 if not is_derived and not ( 4036 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4037 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4038 ): 4039 return None 4040 4041 expressions = self._parse_csv(self._parse_value) 4042 alias = self._parse_table_alias() 4043 4044 if is_derived: 4045 self._match_r_paren() 4046 4047 return self.expression( 4048 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4049 ) 4050 4051 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4052 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4053 as_modifier and self._match_text_seq("USING", "SAMPLE") 4054 ): 4055 return None 4056 4057 bucket_numerator = None 4058 bucket_denominator = None 4059 bucket_field = None 4060 percent = None 4061 size = None 4062 seed = None 4063 4064 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4065 matched_l_paren = self._match(TokenType.L_PAREN) 4066 4067 if self.TABLESAMPLE_CSV: 4068 num = None 4069 expressions = self._parse_csv(self._parse_primary) 4070 else: 4071 expressions = None 4072 num = ( 4073 self._parse_factor() 4074 if self._match(TokenType.NUMBER, advance=False) 4075 else self._parse_primary() or self._parse_placeholder() 4076 ) 4077 4078 if self._match_text_seq("BUCKET"): 4079 bucket_numerator = self._parse_number() 4080 self._match_text_seq("OUT", "OF") 4081 bucket_denominator = bucket_denominator = self._parse_number() 4082 self._match(TokenType.ON) 4083 bucket_field = self._parse_field() 4084 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4085 percent = num 4086 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4087 size = num 4088 else: 4089 percent = num 4090 4091 if matched_l_paren: 4092 self._match_r_paren() 4093 4094 if self._match(TokenType.L_PAREN): 4095 method = self._parse_var(upper=True) 4096 seed = self._match(TokenType.COMMA) and self._parse_number() 4097 self._match_r_paren() 4098 elif self._match_texts(("SEED", "REPEATABLE")): 4099 seed = self._parse_wrapped(self._parse_number) 4100 4101 if not method and self.DEFAULT_SAMPLING_METHOD: 4102 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4103 4104 return self.expression( 4105 exp.TableSample, 4106 expressions=expressions, 4107 method=method, 4108 bucket_numerator=bucket_numerator, 4109 bucket_denominator=bucket_denominator, 4110 bucket_field=bucket_field, 4111 percent=percent, 4112 size=size, 4113 seed=seed, 4114 ) 4115 4116 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4117 return list(iter(self._parse_pivot, None)) or None 4118 4119 def _parse_joins(self) -> t.Iterator[exp.Join]: 4120 return iter(self._parse_join, None) 4121 4122 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4123 if not self._match(TokenType.INTO): 4124 return None 4125 4126 return self.expression( 4127 exp.UnpivotColumns, 4128 this=self._match_text_seq("NAME") and self._parse_column(), 4129 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4130 ) 4131 4132 # https://duckdb.org/docs/sql/statements/pivot 4133 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4134 def _parse_on() -> t.Optional[exp.Expression]: 4135 this = self._parse_bitwise() 4136 4137 if self._match(TokenType.IN): 4138 # PIVOT ... ON col IN (row_val1, row_val2) 4139 return self._parse_in(this) 4140 if self._match(TokenType.ALIAS, advance=False): 4141 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4142 return self._parse_alias(this) 4143 4144 return this 4145 4146 this = self._parse_table() 4147 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4148 into = self._parse_unpivot_columns() 4149 using = self._match(TokenType.USING) and self._parse_csv( 4150 lambda: self._parse_alias(self._parse_function()) 4151 ) 4152 group = self._parse_group() 4153 4154 return self.expression( 4155 exp.Pivot, 4156 this=this, 4157 expressions=expressions, 4158 using=using, 4159 group=group, 4160 unpivot=is_unpivot, 4161 into=into, 4162 ) 4163 4164 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4165 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4166 this = self._parse_select_or_expression() 4167 4168 self._match(TokenType.ALIAS) 4169 alias = self._parse_bitwise() 4170 if alias: 4171 if isinstance(alias, exp.Column) and not alias.db: 4172 alias = alias.this 4173 return self.expression(exp.PivotAlias, this=this, alias=alias) 4174 4175 return this 4176 4177 value = self._parse_column() 4178 4179 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4180 self.raise_error("Expecting IN (") 4181 4182 if self._match(TokenType.ANY): 4183 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4184 else: 4185 exprs = self._parse_csv(_parse_aliased_expression) 4186 4187 self._match_r_paren() 4188 return self.expression(exp.In, this=value, expressions=exprs) 4189 4190 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4191 index = self._index 4192 include_nulls = None 4193 4194 if self._match(TokenType.PIVOT): 4195 unpivot = False 4196 elif self._match(TokenType.UNPIVOT): 4197 unpivot = True 4198 4199 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4200 if self._match_text_seq("INCLUDE", "NULLS"): 4201 include_nulls = True 4202 elif self._match_text_seq("EXCLUDE", "NULLS"): 4203 include_nulls = False 4204 else: 4205 return None 4206 4207 expressions = [] 4208 4209 if not self._match(TokenType.L_PAREN): 4210 self._retreat(index) 4211 return None 4212 4213 if unpivot: 4214 expressions = self._parse_csv(self._parse_column) 4215 else: 4216 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4217 4218 if not expressions: 4219 self.raise_error("Failed to parse PIVOT's aggregation list") 4220 4221 if not self._match(TokenType.FOR): 4222 self.raise_error("Expecting FOR") 4223 4224 field = self._parse_pivot_in() 4225 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4226 self._parse_bitwise 4227 ) 4228 4229 self._match_r_paren() 4230 4231 pivot = self.expression( 4232 exp.Pivot, 4233 expressions=expressions, 4234 field=field, 4235 unpivot=unpivot, 4236 include_nulls=include_nulls, 4237 default_on_null=default_on_null, 4238 ) 4239 4240 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4241 pivot.set("alias", self._parse_table_alias()) 4242 4243 if not unpivot: 4244 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4245 4246 columns: t.List[exp.Expression] = [] 4247 pivot_field_expressions = pivot.args["field"].expressions 4248 4249 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4250 if not isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4251 for fld in pivot_field_expressions: 4252 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4253 for name in names: 4254 if self.PREFIXED_PIVOT_COLUMNS: 4255 name = f"{name}_{field_name}" if name else field_name 4256 else: 4257 name = f"{field_name}_{name}" if name else field_name 4258 4259 columns.append(exp.to_identifier(name)) 4260 4261 pivot.set("columns", columns) 4262 4263 return pivot 4264 4265 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4266 return [agg.alias for agg in aggregations] 4267 4268 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4269 if not skip_where_token and not self._match(TokenType.PREWHERE): 4270 return None 4271 4272 return self.expression( 4273 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4274 ) 4275 4276 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4277 if not skip_where_token and not self._match(TokenType.WHERE): 4278 return None 4279 4280 return self.expression( 4281 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4282 ) 4283 4284 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4285 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4286 return None 4287 4288 elements: t.Dict[str, t.Any] = defaultdict(list) 4289 4290 if self._match(TokenType.ALL): 4291 elements["all"] = True 4292 elif self._match(TokenType.DISTINCT): 4293 elements["all"] = False 4294 4295 while True: 4296 index = self._index 4297 4298 elements["expressions"].extend( 4299 self._parse_csv( 4300 lambda: None 4301 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4302 else self._parse_assignment() 4303 ) 4304 ) 4305 4306 before_with_index = self._index 4307 with_prefix = self._match(TokenType.WITH) 4308 4309 if self._match(TokenType.ROLLUP): 4310 elements["rollup"].append( 4311 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4312 ) 4313 elif self._match(TokenType.CUBE): 4314 elements["cube"].append( 4315 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4316 ) 4317 elif self._match(TokenType.GROUPING_SETS): 4318 elements["grouping_sets"].append( 4319 self.expression( 4320 exp.GroupingSets, 4321 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4322 ) 4323 ) 4324 elif self._match_text_seq("TOTALS"): 4325 elements["totals"] = True # type: ignore 4326 4327 if before_with_index <= self._index <= before_with_index + 1: 4328 self._retreat(before_with_index) 4329 break 4330 4331 if index == self._index: 4332 break 4333 4334 return self.expression(exp.Group, **elements) # type: ignore 4335 4336 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4337 return self.expression( 4338 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4339 ) 4340 4341 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4342 if self._match(TokenType.L_PAREN): 4343 grouping_set = self._parse_csv(self._parse_column) 4344 self._match_r_paren() 4345 return self.expression(exp.Tuple, expressions=grouping_set) 4346 4347 return self._parse_column() 4348 4349 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4350 if not skip_having_token and not self._match(TokenType.HAVING): 4351 return None 4352 return self.expression(exp.Having, this=self._parse_assignment()) 4353 4354 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4355 if not self._match(TokenType.QUALIFY): 4356 return None 4357 return self.expression(exp.Qualify, this=self._parse_assignment()) 4358 4359 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4360 if skip_start_token: 4361 start = None 4362 elif self._match(TokenType.START_WITH): 4363 start = self._parse_assignment() 4364 else: 4365 return None 4366 4367 self._match(TokenType.CONNECT_BY) 4368 nocycle = self._match_text_seq("NOCYCLE") 4369 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4370 exp.Prior, this=self._parse_bitwise() 4371 ) 4372 connect = self._parse_assignment() 4373 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4374 4375 if not start and self._match(TokenType.START_WITH): 4376 start = self._parse_assignment() 4377 4378 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4379 4380 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4381 this = self._parse_id_var(any_token=True) 4382 if self._match(TokenType.ALIAS): 4383 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4384 return this 4385 4386 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4387 if self._match_text_seq("INTERPOLATE"): 4388 return self._parse_wrapped_csv(self._parse_name_as_expression) 4389 return None 4390 4391 def _parse_order( 4392 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4393 ) -> t.Optional[exp.Expression]: 4394 siblings = None 4395 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4396 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4397 return this 4398 4399 siblings = True 4400 4401 return self.expression( 4402 exp.Order, 4403 this=this, 4404 expressions=self._parse_csv(self._parse_ordered), 4405 siblings=siblings, 4406 ) 4407 4408 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4409 if not self._match(token): 4410 return None 4411 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4412 4413 def _parse_ordered( 4414 self, parse_method: t.Optional[t.Callable] = None 4415 ) -> t.Optional[exp.Ordered]: 4416 this = parse_method() if parse_method else self._parse_assignment() 4417 if not this: 4418 return None 4419 4420 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4421 this = exp.var("ALL") 4422 4423 asc = self._match(TokenType.ASC) 4424 desc = self._match(TokenType.DESC) or (asc and False) 4425 4426 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4427 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4428 4429 nulls_first = is_nulls_first or False 4430 explicitly_null_ordered = is_nulls_first or is_nulls_last 4431 4432 if ( 4433 not explicitly_null_ordered 4434 and ( 4435 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4436 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4437 ) 4438 and self.dialect.NULL_ORDERING != "nulls_are_last" 4439 ): 4440 nulls_first = True 4441 4442 if self._match_text_seq("WITH", "FILL"): 4443 with_fill = self.expression( 4444 exp.WithFill, 4445 **{ # type: ignore 4446 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4447 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4448 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4449 "interpolate": self._parse_interpolate(), 4450 }, 4451 ) 4452 else: 4453 with_fill = None 4454 4455 return self.expression( 4456 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4457 ) 4458 4459 def _parse_limit_options(self) -> exp.LimitOptions: 4460 percent = self._match(TokenType.PERCENT) 4461 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4462 self._match_text_seq("ONLY") 4463 with_ties = self._match_text_seq("WITH", "TIES") 4464 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4465 4466 def _parse_limit( 4467 self, 4468 this: t.Optional[exp.Expression] = None, 4469 top: bool = False, 4470 skip_limit_token: bool = False, 4471 ) -> t.Optional[exp.Expression]: 4472 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4473 comments = self._prev_comments 4474 if top: 4475 limit_paren = self._match(TokenType.L_PAREN) 4476 expression = self._parse_term() if limit_paren else self._parse_number() 4477 4478 if limit_paren: 4479 self._match_r_paren() 4480 4481 limit_options = self._parse_limit_options() 4482 else: 4483 limit_options = None 4484 expression = self._parse_term() 4485 4486 if self._match(TokenType.COMMA): 4487 offset = expression 4488 expression = self._parse_term() 4489 else: 4490 offset = None 4491 4492 limit_exp = self.expression( 4493 exp.Limit, 4494 this=this, 4495 expression=expression, 4496 offset=offset, 4497 comments=comments, 4498 limit_options=limit_options, 4499 expressions=self._parse_limit_by(), 4500 ) 4501 4502 return limit_exp 4503 4504 if self._match(TokenType.FETCH): 4505 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4506 direction = self._prev.text.upper() if direction else "FIRST" 4507 4508 count = self._parse_field(tokens=self.FETCH_TOKENS) 4509 4510 return self.expression( 4511 exp.Fetch, 4512 direction=direction, 4513 count=count, 4514 limit_options=self._parse_limit_options(), 4515 ) 4516 4517 return this 4518 4519 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4520 if not self._match(TokenType.OFFSET): 4521 return this 4522 4523 count = self._parse_term() 4524 self._match_set((TokenType.ROW, TokenType.ROWS)) 4525 4526 return self.expression( 4527 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4528 ) 4529 4530 def _can_parse_limit_or_offset(self) -> bool: 4531 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4532 return False 4533 4534 index = self._index 4535 result = bool( 4536 self._try_parse(self._parse_limit, retreat=True) 4537 or self._try_parse(self._parse_offset, retreat=True) 4538 ) 4539 self._retreat(index) 4540 return result 4541 4542 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4543 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4544 4545 def _parse_locks(self) -> t.List[exp.Lock]: 4546 locks = [] 4547 while True: 4548 if self._match_text_seq("FOR", "UPDATE"): 4549 update = True 4550 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4551 "LOCK", "IN", "SHARE", "MODE" 4552 ): 4553 update = False 4554 else: 4555 break 4556 4557 expressions = None 4558 if self._match_text_seq("OF"): 4559 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4560 4561 wait: t.Optional[bool | exp.Expression] = None 4562 if self._match_text_seq("NOWAIT"): 4563 wait = True 4564 elif self._match_text_seq("WAIT"): 4565 wait = self._parse_primary() 4566 elif self._match_text_seq("SKIP", "LOCKED"): 4567 wait = False 4568 4569 locks.append( 4570 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4571 ) 4572 4573 return locks 4574 4575 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4576 while this and self._match_set(self.SET_OPERATIONS): 4577 token_type = self._prev.token_type 4578 4579 if token_type == TokenType.UNION: 4580 operation: t.Type[exp.SetOperation] = exp.Union 4581 elif token_type == TokenType.EXCEPT: 4582 operation = exp.Except 4583 else: 4584 operation = exp.Intersect 4585 4586 comments = self._prev.comments 4587 4588 if self._match(TokenType.DISTINCT): 4589 distinct: t.Optional[bool] = True 4590 elif self._match(TokenType.ALL): 4591 distinct = False 4592 else: 4593 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4594 if distinct is None: 4595 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4596 4597 by_name = self._match_text_seq("BY", "NAME") 4598 expression = self._parse_select(nested=True, parse_set_operation=False) 4599 4600 this = self.expression( 4601 operation, 4602 comments=comments, 4603 this=this, 4604 distinct=distinct, 4605 by_name=by_name, 4606 expression=expression, 4607 ) 4608 4609 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4610 expression = this.expression 4611 4612 if expression: 4613 for arg in self.SET_OP_MODIFIERS: 4614 expr = expression.args.get(arg) 4615 if expr: 4616 this.set(arg, expr.pop()) 4617 4618 return this 4619 4620 def _parse_expression(self) -> t.Optional[exp.Expression]: 4621 return self._parse_alias(self._parse_assignment()) 4622 4623 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4624 this = self._parse_disjunction() 4625 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4626 # This allows us to parse <non-identifier token> := <expr> 4627 this = exp.column( 4628 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4629 ) 4630 4631 while self._match_set(self.ASSIGNMENT): 4632 if isinstance(this, exp.Column) and len(this.parts) == 1: 4633 this = this.this 4634 4635 this = self.expression( 4636 self.ASSIGNMENT[self._prev.token_type], 4637 this=this, 4638 comments=self._prev_comments, 4639 expression=self._parse_assignment(), 4640 ) 4641 4642 return this 4643 4644 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4645 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4646 4647 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4648 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4649 4650 def _parse_equality(self) -> t.Optional[exp.Expression]: 4651 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4652 4653 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4654 return self._parse_tokens(self._parse_range, self.COMPARISON) 4655 4656 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4657 this = this or self._parse_bitwise() 4658 negate = self._match(TokenType.NOT) 4659 4660 if self._match_set(self.RANGE_PARSERS): 4661 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4662 if not expression: 4663 return this 4664 4665 this = expression 4666 elif self._match(TokenType.ISNULL): 4667 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4668 4669 # Postgres supports ISNULL and NOTNULL for conditions. 4670 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4671 if self._match(TokenType.NOTNULL): 4672 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4673 this = self.expression(exp.Not, this=this) 4674 4675 if negate: 4676 this = self._negate_range(this) 4677 4678 if self._match(TokenType.IS): 4679 this = self._parse_is(this) 4680 4681 return this 4682 4683 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4684 if not this: 4685 return this 4686 4687 return self.expression(exp.Not, this=this) 4688 4689 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4690 index = self._index - 1 4691 negate = self._match(TokenType.NOT) 4692 4693 if self._match_text_seq("DISTINCT", "FROM"): 4694 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4695 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4696 4697 if self._match(TokenType.JSON): 4698 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4699 4700 if self._match_text_seq("WITH"): 4701 _with = True 4702 elif self._match_text_seq("WITHOUT"): 4703 _with = False 4704 else: 4705 _with = None 4706 4707 unique = self._match(TokenType.UNIQUE) 4708 self._match_text_seq("KEYS") 4709 expression: t.Optional[exp.Expression] = self.expression( 4710 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4711 ) 4712 else: 4713 expression = self._parse_primary() or self._parse_null() 4714 if not expression: 4715 self._retreat(index) 4716 return None 4717 4718 this = self.expression(exp.Is, this=this, expression=expression) 4719 return self.expression(exp.Not, this=this) if negate else this 4720 4721 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4722 unnest = self._parse_unnest(with_alias=False) 4723 if unnest: 4724 this = self.expression(exp.In, this=this, unnest=unnest) 4725 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4726 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4727 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4728 4729 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4730 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4731 else: 4732 this = self.expression(exp.In, this=this, expressions=expressions) 4733 4734 if matched_l_paren: 4735 self._match_r_paren(this) 4736 elif not self._match(TokenType.R_BRACKET, expression=this): 4737 self.raise_error("Expecting ]") 4738 else: 4739 this = self.expression(exp.In, this=this, field=self._parse_column()) 4740 4741 return this 4742 4743 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4744 low = self._parse_bitwise() 4745 self._match(TokenType.AND) 4746 high = self._parse_bitwise() 4747 return self.expression(exp.Between, this=this, low=low, high=high) 4748 4749 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4750 if not self._match(TokenType.ESCAPE): 4751 return this 4752 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4753 4754 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4755 index = self._index 4756 4757 if not self._match(TokenType.INTERVAL) and match_interval: 4758 return None 4759 4760 if self._match(TokenType.STRING, advance=False): 4761 this = self._parse_primary() 4762 else: 4763 this = self._parse_term() 4764 4765 if not this or ( 4766 isinstance(this, exp.Column) 4767 and not this.table 4768 and not this.this.quoted 4769 and this.name.upper() == "IS" 4770 ): 4771 self._retreat(index) 4772 return None 4773 4774 unit = self._parse_function() or ( 4775 not self._match(TokenType.ALIAS, advance=False) 4776 and self._parse_var(any_token=True, upper=True) 4777 ) 4778 4779 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4780 # each INTERVAL expression into this canonical form so it's easy to transpile 4781 if this and this.is_number: 4782 this = exp.Literal.string(this.to_py()) 4783 elif this and this.is_string: 4784 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4785 if parts and unit: 4786 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4787 unit = None 4788 self._retreat(self._index - 1) 4789 4790 if len(parts) == 1: 4791 this = exp.Literal.string(parts[0][0]) 4792 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4793 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4794 unit = self.expression( 4795 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4796 ) 4797 4798 interval = self.expression(exp.Interval, this=this, unit=unit) 4799 4800 index = self._index 4801 self._match(TokenType.PLUS) 4802 4803 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4804 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4805 return self.expression( 4806 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4807 ) 4808 4809 self._retreat(index) 4810 return interval 4811 4812 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4813 this = self._parse_term() 4814 4815 while True: 4816 if self._match_set(self.BITWISE): 4817 this = self.expression( 4818 self.BITWISE[self._prev.token_type], 4819 this=this, 4820 expression=self._parse_term(), 4821 ) 4822 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4823 this = self.expression( 4824 exp.DPipe, 4825 this=this, 4826 expression=self._parse_term(), 4827 safe=not self.dialect.STRICT_STRING_CONCAT, 4828 ) 4829 elif self._match(TokenType.DQMARK): 4830 this = self.expression( 4831 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4832 ) 4833 elif self._match_pair(TokenType.LT, TokenType.LT): 4834 this = self.expression( 4835 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4836 ) 4837 elif self._match_pair(TokenType.GT, TokenType.GT): 4838 this = self.expression( 4839 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4840 ) 4841 else: 4842 break 4843 4844 return this 4845 4846 def _parse_term(self) -> t.Optional[exp.Expression]: 4847 this = self._parse_factor() 4848 4849 while self._match_set(self.TERM): 4850 klass = self.TERM[self._prev.token_type] 4851 comments = self._prev_comments 4852 expression = self._parse_factor() 4853 4854 this = self.expression(klass, this=this, comments=comments, expression=expression) 4855 4856 if isinstance(this, exp.Collate): 4857 expr = this.expression 4858 4859 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4860 # fallback to Identifier / Var 4861 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4862 ident = expr.this 4863 if isinstance(ident, exp.Identifier): 4864 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4865 4866 return this 4867 4868 def _parse_factor(self) -> t.Optional[exp.Expression]: 4869 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4870 this = parse_method() 4871 4872 while self._match_set(self.FACTOR): 4873 klass = self.FACTOR[self._prev.token_type] 4874 comments = self._prev_comments 4875 expression = parse_method() 4876 4877 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4878 self._retreat(self._index - 1) 4879 return this 4880 4881 this = self.expression(klass, this=this, comments=comments, expression=expression) 4882 4883 if isinstance(this, exp.Div): 4884 this.args["typed"] = self.dialect.TYPED_DIVISION 4885 this.args["safe"] = self.dialect.SAFE_DIVISION 4886 4887 return this 4888 4889 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4890 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4891 4892 def _parse_unary(self) -> t.Optional[exp.Expression]: 4893 if self._match_set(self.UNARY_PARSERS): 4894 return self.UNARY_PARSERS[self._prev.token_type](self) 4895 return self._parse_at_time_zone(self._parse_type()) 4896 4897 def _parse_type( 4898 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4899 ) -> t.Optional[exp.Expression]: 4900 interval = parse_interval and self._parse_interval() 4901 if interval: 4902 return interval 4903 4904 index = self._index 4905 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4906 4907 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4908 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4909 if isinstance(data_type, exp.Cast): 4910 # This constructor can contain ops directly after it, for instance struct unnesting: 4911 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4912 return self._parse_column_ops(data_type) 4913 4914 if data_type: 4915 index2 = self._index 4916 this = self._parse_primary() 4917 4918 if isinstance(this, exp.Literal): 4919 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4920 if parser: 4921 return parser(self, this, data_type) 4922 4923 return self.expression(exp.Cast, this=this, to=data_type) 4924 4925 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4926 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4927 # 4928 # If the index difference here is greater than 1, that means the parser itself must have 4929 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4930 # 4931 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4932 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4933 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4934 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4935 # 4936 # In these cases, we don't really want to return the converted type, but instead retreat 4937 # and try to parse a Column or Identifier in the section below. 4938 if data_type.expressions and index2 - index > 1: 4939 self._retreat(index2) 4940 return self._parse_column_ops(data_type) 4941 4942 self._retreat(index) 4943 4944 if fallback_to_identifier: 4945 return self._parse_id_var() 4946 4947 this = self._parse_column() 4948 return this and self._parse_column_ops(this) 4949 4950 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4951 this = self._parse_type() 4952 if not this: 4953 return None 4954 4955 if isinstance(this, exp.Column) and not this.table: 4956 this = exp.var(this.name.upper()) 4957 4958 return self.expression( 4959 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4960 ) 4961 4962 def _parse_types( 4963 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4964 ) -> t.Optional[exp.Expression]: 4965 index = self._index 4966 4967 this: t.Optional[exp.Expression] = None 4968 prefix = self._match_text_seq("SYSUDTLIB", ".") 4969 4970 if not self._match_set(self.TYPE_TOKENS): 4971 identifier = allow_identifiers and self._parse_id_var( 4972 any_token=False, tokens=(TokenType.VAR,) 4973 ) 4974 if isinstance(identifier, exp.Identifier): 4975 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4976 4977 if len(tokens) != 1: 4978 self.raise_error("Unexpected identifier", self._prev) 4979 4980 if tokens[0].token_type in self.TYPE_TOKENS: 4981 self._prev = tokens[0] 4982 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4983 type_name = identifier.name 4984 4985 while self._match(TokenType.DOT): 4986 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4987 4988 this = exp.DataType.build(type_name, udt=True) 4989 else: 4990 self._retreat(self._index - 1) 4991 return None 4992 else: 4993 return None 4994 4995 type_token = self._prev.token_type 4996 4997 if type_token == TokenType.PSEUDO_TYPE: 4998 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4999 5000 if type_token == TokenType.OBJECT_IDENTIFIER: 5001 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5002 5003 # https://materialize.com/docs/sql/types/map/ 5004 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5005 key_type = self._parse_types( 5006 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5007 ) 5008 if not self._match(TokenType.FARROW): 5009 self._retreat(index) 5010 return None 5011 5012 value_type = self._parse_types( 5013 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5014 ) 5015 if not self._match(TokenType.R_BRACKET): 5016 self._retreat(index) 5017 return None 5018 5019 return exp.DataType( 5020 this=exp.DataType.Type.MAP, 5021 expressions=[key_type, value_type], 5022 nested=True, 5023 prefix=prefix, 5024 ) 5025 5026 nested = type_token in self.NESTED_TYPE_TOKENS 5027 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5028 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5029 expressions = None 5030 maybe_func = False 5031 5032 if self._match(TokenType.L_PAREN): 5033 if is_struct: 5034 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5035 elif nested: 5036 expressions = self._parse_csv( 5037 lambda: self._parse_types( 5038 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5039 ) 5040 ) 5041 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5042 this = expressions[0] 5043 this.set("nullable", True) 5044 self._match_r_paren() 5045 return this 5046 elif type_token in self.ENUM_TYPE_TOKENS: 5047 expressions = self._parse_csv(self._parse_equality) 5048 elif is_aggregate: 5049 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5050 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5051 ) 5052 if not func_or_ident or not self._match(TokenType.COMMA): 5053 return None 5054 expressions = self._parse_csv( 5055 lambda: self._parse_types( 5056 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5057 ) 5058 ) 5059 expressions.insert(0, func_or_ident) 5060 else: 5061 expressions = self._parse_csv(self._parse_type_size) 5062 5063 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5064 if type_token == TokenType.VECTOR and len(expressions) == 2: 5065 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5066 5067 if not expressions or not self._match(TokenType.R_PAREN): 5068 self._retreat(index) 5069 return None 5070 5071 maybe_func = True 5072 5073 values: t.Optional[t.List[exp.Expression]] = None 5074 5075 if nested and self._match(TokenType.LT): 5076 if is_struct: 5077 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5078 else: 5079 expressions = self._parse_csv( 5080 lambda: self._parse_types( 5081 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5082 ) 5083 ) 5084 5085 if not self._match(TokenType.GT): 5086 self.raise_error("Expecting >") 5087 5088 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5089 values = self._parse_csv(self._parse_assignment) 5090 if not values and is_struct: 5091 values = None 5092 self._retreat(self._index - 1) 5093 else: 5094 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5095 5096 if type_token in self.TIMESTAMPS: 5097 if self._match_text_seq("WITH", "TIME", "ZONE"): 5098 maybe_func = False 5099 tz_type = ( 5100 exp.DataType.Type.TIMETZ 5101 if type_token in self.TIMES 5102 else exp.DataType.Type.TIMESTAMPTZ 5103 ) 5104 this = exp.DataType(this=tz_type, expressions=expressions) 5105 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5106 maybe_func = False 5107 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5108 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5109 maybe_func = False 5110 elif type_token == TokenType.INTERVAL: 5111 unit = self._parse_var(upper=True) 5112 if unit: 5113 if self._match_text_seq("TO"): 5114 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5115 5116 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5117 else: 5118 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5119 5120 if maybe_func and check_func: 5121 index2 = self._index 5122 peek = self._parse_string() 5123 5124 if not peek: 5125 self._retreat(index) 5126 return None 5127 5128 self._retreat(index2) 5129 5130 if not this: 5131 if self._match_text_seq("UNSIGNED"): 5132 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5133 if not unsigned_type_token: 5134 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5135 5136 type_token = unsigned_type_token or type_token 5137 5138 this = exp.DataType( 5139 this=exp.DataType.Type[type_token.value], 5140 expressions=expressions, 5141 nested=nested, 5142 prefix=prefix, 5143 ) 5144 5145 # Empty arrays/structs are allowed 5146 if values is not None: 5147 cls = exp.Struct if is_struct else exp.Array 5148 this = exp.cast(cls(expressions=values), this, copy=False) 5149 5150 elif expressions: 5151 this.set("expressions", expressions) 5152 5153 # https://materialize.com/docs/sql/types/list/#type-name 5154 while self._match(TokenType.LIST): 5155 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5156 5157 index = self._index 5158 5159 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5160 matched_array = self._match(TokenType.ARRAY) 5161 5162 while self._curr: 5163 datatype_token = self._prev.token_type 5164 matched_l_bracket = self._match(TokenType.L_BRACKET) 5165 5166 if (not matched_l_bracket and not matched_array) or ( 5167 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5168 ): 5169 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5170 # not to be confused with the fixed size array parsing 5171 break 5172 5173 matched_array = False 5174 values = self._parse_csv(self._parse_assignment) or None 5175 if ( 5176 values 5177 and not schema 5178 and ( 5179 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5180 ) 5181 ): 5182 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5183 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5184 self._retreat(index) 5185 break 5186 5187 this = exp.DataType( 5188 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5189 ) 5190 self._match(TokenType.R_BRACKET) 5191 5192 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5193 converter = self.TYPE_CONVERTERS.get(this.this) 5194 if converter: 5195 this = converter(t.cast(exp.DataType, this)) 5196 5197 return this 5198 5199 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5200 index = self._index 5201 5202 if ( 5203 self._curr 5204 and self._next 5205 and self._curr.token_type in self.TYPE_TOKENS 5206 and self._next.token_type in self.TYPE_TOKENS 5207 ): 5208 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5209 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5210 this = self._parse_id_var() 5211 else: 5212 this = ( 5213 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5214 or self._parse_id_var() 5215 ) 5216 5217 self._match(TokenType.COLON) 5218 5219 if ( 5220 type_required 5221 and not isinstance(this, exp.DataType) 5222 and not self._match_set(self.TYPE_TOKENS, advance=False) 5223 ): 5224 self._retreat(index) 5225 return self._parse_types() 5226 5227 return self._parse_column_def(this) 5228 5229 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5230 if not self._match_text_seq("AT", "TIME", "ZONE"): 5231 return this 5232 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5233 5234 def _parse_column(self) -> t.Optional[exp.Expression]: 5235 this = self._parse_column_reference() 5236 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5237 5238 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5239 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5240 5241 return column 5242 5243 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5244 this = self._parse_field() 5245 if ( 5246 not this 5247 and self._match(TokenType.VALUES, advance=False) 5248 and self.VALUES_FOLLOWED_BY_PAREN 5249 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5250 ): 5251 this = self._parse_id_var() 5252 5253 if isinstance(this, exp.Identifier): 5254 # We bubble up comments from the Identifier to the Column 5255 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5256 5257 return this 5258 5259 def _parse_colon_as_variant_extract( 5260 self, this: t.Optional[exp.Expression] 5261 ) -> t.Optional[exp.Expression]: 5262 casts = [] 5263 json_path = [] 5264 escape = None 5265 5266 while self._match(TokenType.COLON): 5267 start_index = self._index 5268 5269 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5270 path = self._parse_column_ops( 5271 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5272 ) 5273 5274 # The cast :: operator has a lower precedence than the extraction operator :, so 5275 # we rearrange the AST appropriately to avoid casting the JSON path 5276 while isinstance(path, exp.Cast): 5277 casts.append(path.to) 5278 path = path.this 5279 5280 if casts: 5281 dcolon_offset = next( 5282 i 5283 for i, t in enumerate(self._tokens[start_index:]) 5284 if t.token_type == TokenType.DCOLON 5285 ) 5286 end_token = self._tokens[start_index + dcolon_offset - 1] 5287 else: 5288 end_token = self._prev 5289 5290 if path: 5291 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5292 # it'll roundtrip to a string literal in GET_PATH 5293 if isinstance(path, exp.Identifier) and path.quoted: 5294 escape = True 5295 5296 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5297 5298 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5299 # Databricks transforms it back to the colon/dot notation 5300 if json_path: 5301 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5302 5303 if json_path_expr: 5304 json_path_expr.set("escape", escape) 5305 5306 this = self.expression( 5307 exp.JSONExtract, 5308 this=this, 5309 expression=json_path_expr, 5310 variant_extract=True, 5311 ) 5312 5313 while casts: 5314 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5315 5316 return this 5317 5318 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5319 return self._parse_types() 5320 5321 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5322 this = self._parse_bracket(this) 5323 5324 while self._match_set(self.COLUMN_OPERATORS): 5325 op_token = self._prev.token_type 5326 op = self.COLUMN_OPERATORS.get(op_token) 5327 5328 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5329 field = self._parse_dcolon() 5330 if not field: 5331 self.raise_error("Expected type") 5332 elif op and self._curr: 5333 field = self._parse_column_reference() or self._parse_bracket() 5334 else: 5335 field = self._parse_field(any_token=True, anonymous_func=True) 5336 5337 if isinstance(field, (exp.Func, exp.Window)) and this: 5338 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5339 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5340 this = exp.replace_tree( 5341 this, 5342 lambda n: ( 5343 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5344 if n.table 5345 else n.this 5346 ) 5347 if isinstance(n, exp.Column) 5348 else n, 5349 ) 5350 5351 if op: 5352 this = op(self, this, field) 5353 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5354 this = self.expression( 5355 exp.Column, 5356 comments=this.comments, 5357 this=field, 5358 table=this.this, 5359 db=this.args.get("table"), 5360 catalog=this.args.get("db"), 5361 ) 5362 elif isinstance(field, exp.Window): 5363 # Move the exp.Dot's to the window's function 5364 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5365 field.set("this", window_func) 5366 this = field 5367 else: 5368 this = self.expression(exp.Dot, this=this, expression=field) 5369 5370 if field and field.comments: 5371 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5372 5373 this = self._parse_bracket(this) 5374 5375 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5376 5377 def _parse_primary(self) -> t.Optional[exp.Expression]: 5378 if self._match_set(self.PRIMARY_PARSERS): 5379 token_type = self._prev.token_type 5380 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5381 5382 if token_type == TokenType.STRING: 5383 expressions = [primary] 5384 while self._match(TokenType.STRING): 5385 expressions.append(exp.Literal.string(self._prev.text)) 5386 5387 if len(expressions) > 1: 5388 return self.expression(exp.Concat, expressions=expressions) 5389 5390 return primary 5391 5392 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5393 return exp.Literal.number(f"0.{self._prev.text}") 5394 5395 if self._match(TokenType.L_PAREN): 5396 comments = self._prev_comments 5397 query = self._parse_select() 5398 5399 if query: 5400 expressions = [query] 5401 else: 5402 expressions = self._parse_expressions() 5403 5404 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5405 5406 if not this and self._match(TokenType.R_PAREN, advance=False): 5407 this = self.expression(exp.Tuple) 5408 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5409 this = self._parse_subquery(this=this, parse_alias=False) 5410 elif isinstance(this, exp.Subquery): 5411 this = self._parse_subquery( 5412 this=self._parse_set_operations(this), parse_alias=False 5413 ) 5414 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5415 this = self.expression(exp.Tuple, expressions=expressions) 5416 else: 5417 this = self.expression(exp.Paren, this=this) 5418 5419 if this: 5420 this.add_comments(comments) 5421 5422 self._match_r_paren(expression=this) 5423 return this 5424 5425 return None 5426 5427 def _parse_field( 5428 self, 5429 any_token: bool = False, 5430 tokens: t.Optional[t.Collection[TokenType]] = None, 5431 anonymous_func: bool = False, 5432 ) -> t.Optional[exp.Expression]: 5433 if anonymous_func: 5434 field = ( 5435 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5436 or self._parse_primary() 5437 ) 5438 else: 5439 field = self._parse_primary() or self._parse_function( 5440 anonymous=anonymous_func, any_token=any_token 5441 ) 5442 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5443 5444 def _parse_function( 5445 self, 5446 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5447 anonymous: bool = False, 5448 optional_parens: bool = True, 5449 any_token: bool = False, 5450 ) -> t.Optional[exp.Expression]: 5451 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5452 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5453 fn_syntax = False 5454 if ( 5455 self._match(TokenType.L_BRACE, advance=False) 5456 and self._next 5457 and self._next.text.upper() == "FN" 5458 ): 5459 self._advance(2) 5460 fn_syntax = True 5461 5462 func = self._parse_function_call( 5463 functions=functions, 5464 anonymous=anonymous, 5465 optional_parens=optional_parens, 5466 any_token=any_token, 5467 ) 5468 5469 if fn_syntax: 5470 self._match(TokenType.R_BRACE) 5471 5472 return func 5473 5474 def _parse_function_call( 5475 self, 5476 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5477 anonymous: bool = False, 5478 optional_parens: bool = True, 5479 any_token: bool = False, 5480 ) -> t.Optional[exp.Expression]: 5481 if not self._curr: 5482 return None 5483 5484 comments = self._curr.comments 5485 token_type = self._curr.token_type 5486 this = self._curr.text 5487 upper = this.upper() 5488 5489 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5490 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5491 self._advance() 5492 return self._parse_window(parser(self)) 5493 5494 if not self._next or self._next.token_type != TokenType.L_PAREN: 5495 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5496 self._advance() 5497 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5498 5499 return None 5500 5501 if any_token: 5502 if token_type in self.RESERVED_TOKENS: 5503 return None 5504 elif token_type not in self.FUNC_TOKENS: 5505 return None 5506 5507 self._advance(2) 5508 5509 parser = self.FUNCTION_PARSERS.get(upper) 5510 if parser and not anonymous: 5511 this = parser(self) 5512 else: 5513 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5514 5515 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5516 this = self.expression( 5517 subquery_predicate, comments=comments, this=self._parse_select() 5518 ) 5519 self._match_r_paren() 5520 return this 5521 5522 if functions is None: 5523 functions = self.FUNCTIONS 5524 5525 function = functions.get(upper) 5526 known_function = function and not anonymous 5527 5528 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5529 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5530 5531 post_func_comments = self._curr and self._curr.comments 5532 if known_function and post_func_comments: 5533 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5534 # call we'll construct it as exp.Anonymous, even if it's "known" 5535 if any( 5536 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5537 for comment in post_func_comments 5538 ): 5539 known_function = False 5540 5541 if alias and known_function: 5542 args = self._kv_to_prop_eq(args) 5543 5544 if known_function: 5545 func_builder = t.cast(t.Callable, function) 5546 5547 if "dialect" in func_builder.__code__.co_varnames: 5548 func = func_builder(args, dialect=self.dialect) 5549 else: 5550 func = func_builder(args) 5551 5552 func = self.validate_expression(func, args) 5553 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5554 func.meta["name"] = this 5555 5556 this = func 5557 else: 5558 if token_type == TokenType.IDENTIFIER: 5559 this = exp.Identifier(this=this, quoted=True) 5560 this = self.expression(exp.Anonymous, this=this, expressions=args) 5561 5562 if isinstance(this, exp.Expression): 5563 this.add_comments(comments) 5564 5565 self._match_r_paren(this) 5566 return self._parse_window(this) 5567 5568 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5569 return expression 5570 5571 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5572 transformed = [] 5573 5574 for index, e in enumerate(expressions): 5575 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5576 if isinstance(e, exp.Alias): 5577 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5578 5579 if not isinstance(e, exp.PropertyEQ): 5580 e = self.expression( 5581 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5582 ) 5583 5584 if isinstance(e.this, exp.Column): 5585 e.this.replace(e.this.this) 5586 else: 5587 e = self._to_prop_eq(e, index) 5588 5589 transformed.append(e) 5590 5591 return transformed 5592 5593 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5594 return self._parse_statement() 5595 5596 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5597 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5598 5599 def _parse_user_defined_function( 5600 self, kind: t.Optional[TokenType] = None 5601 ) -> t.Optional[exp.Expression]: 5602 this = self._parse_id_var() 5603 5604 while self._match(TokenType.DOT): 5605 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5606 5607 if not self._match(TokenType.L_PAREN): 5608 return this 5609 5610 expressions = self._parse_csv(self._parse_function_parameter) 5611 self._match_r_paren() 5612 return self.expression( 5613 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5614 ) 5615 5616 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5617 literal = self._parse_primary() 5618 if literal: 5619 return self.expression(exp.Introducer, this=token.text, expression=literal) 5620 5621 return self.expression(exp.Identifier, this=token.text) 5622 5623 def _parse_session_parameter(self) -> exp.SessionParameter: 5624 kind = None 5625 this = self._parse_id_var() or self._parse_primary() 5626 5627 if this and self._match(TokenType.DOT): 5628 kind = this.name 5629 this = self._parse_var() or self._parse_primary() 5630 5631 return self.expression(exp.SessionParameter, this=this, kind=kind) 5632 5633 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5634 return self._parse_id_var() 5635 5636 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5637 index = self._index 5638 5639 if self._match(TokenType.L_PAREN): 5640 expressions = t.cast( 5641 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5642 ) 5643 5644 if not self._match(TokenType.R_PAREN): 5645 self._retreat(index) 5646 else: 5647 expressions = [self._parse_lambda_arg()] 5648 5649 if self._match_set(self.LAMBDAS): 5650 return self.LAMBDAS[self._prev.token_type](self, expressions) 5651 5652 self._retreat(index) 5653 5654 this: t.Optional[exp.Expression] 5655 5656 if self._match(TokenType.DISTINCT): 5657 this = self.expression( 5658 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5659 ) 5660 else: 5661 this = self._parse_select_or_expression(alias=alias) 5662 5663 return self._parse_limit( 5664 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5665 ) 5666 5667 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5668 index = self._index 5669 if not self._match(TokenType.L_PAREN): 5670 return this 5671 5672 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5673 # expr can be of both types 5674 if self._match_set(self.SELECT_START_TOKENS): 5675 self._retreat(index) 5676 return this 5677 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5678 self._match_r_paren() 5679 return self.expression(exp.Schema, this=this, expressions=args) 5680 5681 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5682 return self._parse_column_def(self._parse_field(any_token=True)) 5683 5684 def _parse_column_def( 5685 self, this: t.Optional[exp.Expression], computed_column: bool = True 5686 ) -> t.Optional[exp.Expression]: 5687 # column defs are not really columns, they're identifiers 5688 if isinstance(this, exp.Column): 5689 this = this.this 5690 5691 if not computed_column: 5692 self._match(TokenType.ALIAS) 5693 5694 kind = self._parse_types(schema=True) 5695 5696 if self._match_text_seq("FOR", "ORDINALITY"): 5697 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5698 5699 constraints: t.List[exp.Expression] = [] 5700 5701 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5702 ("ALIAS", "MATERIALIZED") 5703 ): 5704 persisted = self._prev.text.upper() == "MATERIALIZED" 5705 constraint_kind = exp.ComputedColumnConstraint( 5706 this=self._parse_assignment(), 5707 persisted=persisted or self._match_text_seq("PERSISTED"), 5708 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5709 ) 5710 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5711 elif ( 5712 kind 5713 and self._match(TokenType.ALIAS, advance=False) 5714 and ( 5715 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5716 or (self._next and self._next.token_type == TokenType.L_PAREN) 5717 ) 5718 ): 5719 self._advance() 5720 constraints.append( 5721 self.expression( 5722 exp.ColumnConstraint, 5723 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5724 ) 5725 ) 5726 5727 while True: 5728 constraint = self._parse_column_constraint() 5729 if not constraint: 5730 break 5731 constraints.append(constraint) 5732 5733 if not kind and not constraints: 5734 return this 5735 5736 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5737 5738 def _parse_auto_increment( 5739 self, 5740 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5741 start = None 5742 increment = None 5743 5744 if self._match(TokenType.L_PAREN, advance=False): 5745 args = self._parse_wrapped_csv(self._parse_bitwise) 5746 start = seq_get(args, 0) 5747 increment = seq_get(args, 1) 5748 elif self._match_text_seq("START"): 5749 start = self._parse_bitwise() 5750 self._match_text_seq("INCREMENT") 5751 increment = self._parse_bitwise() 5752 5753 if start and increment: 5754 return exp.GeneratedAsIdentityColumnConstraint( 5755 start=start, increment=increment, this=False 5756 ) 5757 5758 return exp.AutoIncrementColumnConstraint() 5759 5760 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5761 if not self._match_text_seq("REFRESH"): 5762 self._retreat(self._index - 1) 5763 return None 5764 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5765 5766 def _parse_compress(self) -> exp.CompressColumnConstraint: 5767 if self._match(TokenType.L_PAREN, advance=False): 5768 return self.expression( 5769 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5770 ) 5771 5772 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5773 5774 def _parse_generated_as_identity( 5775 self, 5776 ) -> ( 5777 exp.GeneratedAsIdentityColumnConstraint 5778 | exp.ComputedColumnConstraint 5779 | exp.GeneratedAsRowColumnConstraint 5780 ): 5781 if self._match_text_seq("BY", "DEFAULT"): 5782 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5783 this = self.expression( 5784 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5785 ) 5786 else: 5787 self._match_text_seq("ALWAYS") 5788 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5789 5790 self._match(TokenType.ALIAS) 5791 5792 if self._match_text_seq("ROW"): 5793 start = self._match_text_seq("START") 5794 if not start: 5795 self._match(TokenType.END) 5796 hidden = self._match_text_seq("HIDDEN") 5797 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5798 5799 identity = self._match_text_seq("IDENTITY") 5800 5801 if self._match(TokenType.L_PAREN): 5802 if self._match(TokenType.START_WITH): 5803 this.set("start", self._parse_bitwise()) 5804 if self._match_text_seq("INCREMENT", "BY"): 5805 this.set("increment", self._parse_bitwise()) 5806 if self._match_text_seq("MINVALUE"): 5807 this.set("minvalue", self._parse_bitwise()) 5808 if self._match_text_seq("MAXVALUE"): 5809 this.set("maxvalue", self._parse_bitwise()) 5810 5811 if self._match_text_seq("CYCLE"): 5812 this.set("cycle", True) 5813 elif self._match_text_seq("NO", "CYCLE"): 5814 this.set("cycle", False) 5815 5816 if not identity: 5817 this.set("expression", self._parse_range()) 5818 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5819 args = self._parse_csv(self._parse_bitwise) 5820 this.set("start", seq_get(args, 0)) 5821 this.set("increment", seq_get(args, 1)) 5822 5823 self._match_r_paren() 5824 5825 return this 5826 5827 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5828 self._match_text_seq("LENGTH") 5829 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5830 5831 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5832 if self._match_text_seq("NULL"): 5833 return self.expression(exp.NotNullColumnConstraint) 5834 if self._match_text_seq("CASESPECIFIC"): 5835 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5836 if self._match_text_seq("FOR", "REPLICATION"): 5837 return self.expression(exp.NotForReplicationColumnConstraint) 5838 5839 # Unconsume the `NOT` token 5840 self._retreat(self._index - 1) 5841 return None 5842 5843 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5844 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5845 5846 procedure_option_follows = ( 5847 self._match(TokenType.WITH, advance=False) 5848 and self._next 5849 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5850 ) 5851 5852 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5853 return self.expression( 5854 exp.ColumnConstraint, 5855 this=this, 5856 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5857 ) 5858 5859 return this 5860 5861 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5862 if not self._match(TokenType.CONSTRAINT): 5863 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5864 5865 return self.expression( 5866 exp.Constraint, 5867 this=self._parse_id_var(), 5868 expressions=self._parse_unnamed_constraints(), 5869 ) 5870 5871 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5872 constraints = [] 5873 while True: 5874 constraint = self._parse_unnamed_constraint() or self._parse_function() 5875 if not constraint: 5876 break 5877 constraints.append(constraint) 5878 5879 return constraints 5880 5881 def _parse_unnamed_constraint( 5882 self, constraints: t.Optional[t.Collection[str]] = None 5883 ) -> t.Optional[exp.Expression]: 5884 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5885 constraints or self.CONSTRAINT_PARSERS 5886 ): 5887 return None 5888 5889 constraint = self._prev.text.upper() 5890 if constraint not in self.CONSTRAINT_PARSERS: 5891 self.raise_error(f"No parser found for schema constraint {constraint}.") 5892 5893 return self.CONSTRAINT_PARSERS[constraint](self) 5894 5895 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5896 return self._parse_id_var(any_token=False) 5897 5898 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5899 self._match_text_seq("KEY") 5900 return self.expression( 5901 exp.UniqueColumnConstraint, 5902 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5903 this=self._parse_schema(self._parse_unique_key()), 5904 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5905 on_conflict=self._parse_on_conflict(), 5906 ) 5907 5908 def _parse_key_constraint_options(self) -> t.List[str]: 5909 options = [] 5910 while True: 5911 if not self._curr: 5912 break 5913 5914 if self._match(TokenType.ON): 5915 action = None 5916 on = self._advance_any() and self._prev.text 5917 5918 if self._match_text_seq("NO", "ACTION"): 5919 action = "NO ACTION" 5920 elif self._match_text_seq("CASCADE"): 5921 action = "CASCADE" 5922 elif self._match_text_seq("RESTRICT"): 5923 action = "RESTRICT" 5924 elif self._match_pair(TokenType.SET, TokenType.NULL): 5925 action = "SET NULL" 5926 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5927 action = "SET DEFAULT" 5928 else: 5929 self.raise_error("Invalid key constraint") 5930 5931 options.append(f"ON {on} {action}") 5932 else: 5933 var = self._parse_var_from_options( 5934 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5935 ) 5936 if not var: 5937 break 5938 options.append(var.name) 5939 5940 return options 5941 5942 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5943 if match and not self._match(TokenType.REFERENCES): 5944 return None 5945 5946 expressions = None 5947 this = self._parse_table(schema=True) 5948 options = self._parse_key_constraint_options() 5949 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5950 5951 def _parse_foreign_key(self) -> exp.ForeignKey: 5952 expressions = self._parse_wrapped_id_vars() 5953 reference = self._parse_references() 5954 options = {} 5955 5956 while self._match(TokenType.ON): 5957 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5958 self.raise_error("Expected DELETE or UPDATE") 5959 5960 kind = self._prev.text.lower() 5961 5962 if self._match_text_seq("NO", "ACTION"): 5963 action = "NO ACTION" 5964 elif self._match(TokenType.SET): 5965 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5966 action = "SET " + self._prev.text.upper() 5967 else: 5968 self._advance() 5969 action = self._prev.text.upper() 5970 5971 options[kind] = action 5972 5973 return self.expression( 5974 exp.ForeignKey, 5975 expressions=expressions, 5976 reference=reference, 5977 **options, # type: ignore 5978 ) 5979 5980 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5981 return self._parse_ordered() or self._parse_field() 5982 5983 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5984 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5985 self._retreat(self._index - 1) 5986 return None 5987 5988 id_vars = self._parse_wrapped_id_vars() 5989 return self.expression( 5990 exp.PeriodForSystemTimeConstraint, 5991 this=seq_get(id_vars, 0), 5992 expression=seq_get(id_vars, 1), 5993 ) 5994 5995 def _parse_primary_key( 5996 self, wrapped_optional: bool = False, in_props: bool = False 5997 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5998 desc = ( 5999 self._match_set((TokenType.ASC, TokenType.DESC)) 6000 and self._prev.token_type == TokenType.DESC 6001 ) 6002 6003 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6004 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6005 6006 expressions = self._parse_wrapped_csv( 6007 self._parse_primary_key_part, optional=wrapped_optional 6008 ) 6009 options = self._parse_key_constraint_options() 6010 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6011 6012 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6013 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6014 6015 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6016 """ 6017 Parses a datetime column in ODBC format. We parse the column into the corresponding 6018 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6019 same as we did for `DATE('yyyy-mm-dd')`. 6020 6021 Reference: 6022 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6023 """ 6024 self._match(TokenType.VAR) 6025 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6026 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6027 if not self._match(TokenType.R_BRACE): 6028 self.raise_error("Expected }") 6029 return expression 6030 6031 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6032 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6033 return this 6034 6035 bracket_kind = self._prev.token_type 6036 if ( 6037 bracket_kind == TokenType.L_BRACE 6038 and self._curr 6039 and self._curr.token_type == TokenType.VAR 6040 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6041 ): 6042 return self._parse_odbc_datetime_literal() 6043 6044 expressions = self._parse_csv( 6045 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6046 ) 6047 6048 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6049 self.raise_error("Expected ]") 6050 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6051 self.raise_error("Expected }") 6052 6053 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6054 if bracket_kind == TokenType.L_BRACE: 6055 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6056 elif not this: 6057 this = build_array_constructor( 6058 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6059 ) 6060 else: 6061 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6062 if constructor_type: 6063 return build_array_constructor( 6064 constructor_type, 6065 args=expressions, 6066 bracket_kind=bracket_kind, 6067 dialect=self.dialect, 6068 ) 6069 6070 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6071 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6072 6073 self._add_comments(this) 6074 return self._parse_bracket(this) 6075 6076 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6077 if self._match(TokenType.COLON): 6078 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6079 return this 6080 6081 def _parse_case(self) -> t.Optional[exp.Expression]: 6082 ifs = [] 6083 default = None 6084 6085 comments = self._prev_comments 6086 expression = self._parse_assignment() 6087 6088 while self._match(TokenType.WHEN): 6089 this = self._parse_assignment() 6090 self._match(TokenType.THEN) 6091 then = self._parse_assignment() 6092 ifs.append(self.expression(exp.If, this=this, true=then)) 6093 6094 if self._match(TokenType.ELSE): 6095 default = self._parse_assignment() 6096 6097 if not self._match(TokenType.END): 6098 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6099 default = exp.column("interval") 6100 else: 6101 self.raise_error("Expected END after CASE", self._prev) 6102 6103 return self.expression( 6104 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6105 ) 6106 6107 def _parse_if(self) -> t.Optional[exp.Expression]: 6108 if self._match(TokenType.L_PAREN): 6109 args = self._parse_csv(self._parse_assignment) 6110 this = self.validate_expression(exp.If.from_arg_list(args), args) 6111 self._match_r_paren() 6112 else: 6113 index = self._index - 1 6114 6115 if self.NO_PAREN_IF_COMMANDS and index == 0: 6116 return self._parse_as_command(self._prev) 6117 6118 condition = self._parse_assignment() 6119 6120 if not condition: 6121 self._retreat(index) 6122 return None 6123 6124 self._match(TokenType.THEN) 6125 true = self._parse_assignment() 6126 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6127 self._match(TokenType.END) 6128 this = self.expression(exp.If, this=condition, true=true, false=false) 6129 6130 return this 6131 6132 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6133 if not self._match_text_seq("VALUE", "FOR"): 6134 self._retreat(self._index - 1) 6135 return None 6136 6137 return self.expression( 6138 exp.NextValueFor, 6139 this=self._parse_column(), 6140 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6141 ) 6142 6143 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6144 this = self._parse_function() or self._parse_var_or_string(upper=True) 6145 6146 if self._match(TokenType.FROM): 6147 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6148 6149 if not self._match(TokenType.COMMA): 6150 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6151 6152 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6153 6154 def _parse_gap_fill(self) -> exp.GapFill: 6155 self._match(TokenType.TABLE) 6156 this = self._parse_table() 6157 6158 self._match(TokenType.COMMA) 6159 args = [this, *self._parse_csv(self._parse_lambda)] 6160 6161 gap_fill = exp.GapFill.from_arg_list(args) 6162 return self.validate_expression(gap_fill, args) 6163 6164 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6165 this = self._parse_assignment() 6166 6167 if not self._match(TokenType.ALIAS): 6168 if self._match(TokenType.COMMA): 6169 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6170 6171 self.raise_error("Expected AS after CAST") 6172 6173 fmt = None 6174 to = self._parse_types() 6175 6176 default = self._match(TokenType.DEFAULT) 6177 if default: 6178 default = self._parse_bitwise() 6179 self._match_text_seq("ON", "CONVERSION", "ERROR") 6180 6181 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6182 fmt_string = self._parse_string() 6183 fmt = self._parse_at_time_zone(fmt_string) 6184 6185 if not to: 6186 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6187 if to.this in exp.DataType.TEMPORAL_TYPES: 6188 this = self.expression( 6189 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6190 this=this, 6191 format=exp.Literal.string( 6192 format_time( 6193 fmt_string.this if fmt_string else "", 6194 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6195 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6196 ) 6197 ), 6198 safe=safe, 6199 ) 6200 6201 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6202 this.set("zone", fmt.args["zone"]) 6203 return this 6204 elif not to: 6205 self.raise_error("Expected TYPE after CAST") 6206 elif isinstance(to, exp.Identifier): 6207 to = exp.DataType.build(to.name, udt=True) 6208 elif to.this == exp.DataType.Type.CHAR: 6209 if self._match(TokenType.CHARACTER_SET): 6210 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6211 6212 return self.expression( 6213 exp.Cast if strict else exp.TryCast, 6214 this=this, 6215 to=to, 6216 format=fmt, 6217 safe=safe, 6218 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6219 default=default, 6220 ) 6221 6222 def _parse_string_agg(self) -> exp.GroupConcat: 6223 if self._match(TokenType.DISTINCT): 6224 args: t.List[t.Optional[exp.Expression]] = [ 6225 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6226 ] 6227 if self._match(TokenType.COMMA): 6228 args.extend(self._parse_csv(self._parse_assignment)) 6229 else: 6230 args = self._parse_csv(self._parse_assignment) # type: ignore 6231 6232 if self._match_text_seq("ON", "OVERFLOW"): 6233 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6234 if self._match_text_seq("ERROR"): 6235 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6236 else: 6237 self._match_text_seq("TRUNCATE") 6238 on_overflow = self.expression( 6239 exp.OverflowTruncateBehavior, 6240 this=self._parse_string(), 6241 with_count=( 6242 self._match_text_seq("WITH", "COUNT") 6243 or not self._match_text_seq("WITHOUT", "COUNT") 6244 ), 6245 ) 6246 else: 6247 on_overflow = None 6248 6249 index = self._index 6250 if not self._match(TokenType.R_PAREN) and args: 6251 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6252 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6253 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6254 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6255 6256 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6257 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6258 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6259 if not self._match_text_seq("WITHIN", "GROUP"): 6260 self._retreat(index) 6261 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6262 6263 # The corresponding match_r_paren will be called in parse_function (caller) 6264 self._match_l_paren() 6265 6266 return self.expression( 6267 exp.GroupConcat, 6268 this=self._parse_order(this=seq_get(args, 0)), 6269 separator=seq_get(args, 1), 6270 on_overflow=on_overflow, 6271 ) 6272 6273 def _parse_convert( 6274 self, strict: bool, safe: t.Optional[bool] = None 6275 ) -> t.Optional[exp.Expression]: 6276 this = self._parse_bitwise() 6277 6278 if self._match(TokenType.USING): 6279 to: t.Optional[exp.Expression] = self.expression( 6280 exp.CharacterSet, this=self._parse_var() 6281 ) 6282 elif self._match(TokenType.COMMA): 6283 to = self._parse_types() 6284 else: 6285 to = None 6286 6287 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6288 6289 def _parse_xml_table(self) -> exp.XMLTable: 6290 namespaces = None 6291 passing = None 6292 columns = None 6293 6294 if self._match_text_seq("XMLNAMESPACES", "("): 6295 namespaces = self._parse_xml_namespace() 6296 self._match_text_seq(")", ",") 6297 6298 this = self._parse_string() 6299 6300 if self._match_text_seq("PASSING"): 6301 # The BY VALUE keywords are optional and are provided for semantic clarity 6302 self._match_text_seq("BY", "VALUE") 6303 passing = self._parse_csv(self._parse_column) 6304 6305 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6306 6307 if self._match_text_seq("COLUMNS"): 6308 columns = self._parse_csv(self._parse_field_def) 6309 6310 return self.expression( 6311 exp.XMLTable, 6312 this=this, 6313 namespaces=namespaces, 6314 passing=passing, 6315 columns=columns, 6316 by_ref=by_ref, 6317 ) 6318 6319 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6320 namespaces = [] 6321 6322 while True: 6323 if self._match(TokenType.DEFAULT): 6324 uri = self._parse_string() 6325 else: 6326 uri = self._parse_alias(self._parse_string()) 6327 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6328 if not self._match(TokenType.COMMA): 6329 break 6330 6331 return namespaces 6332 6333 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6334 """ 6335 There are generally two variants of the DECODE function: 6336 6337 - DECODE(bin, charset) 6338 - DECODE(expression, search, result [, search, result] ... [, default]) 6339 6340 The second variant will always be parsed into a CASE expression. Note that NULL 6341 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6342 instead of relying on pattern matching. 6343 """ 6344 args = self._parse_csv(self._parse_assignment) 6345 6346 if len(args) < 3: 6347 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6348 6349 expression, *expressions = args 6350 if not expression: 6351 return None 6352 6353 ifs = [] 6354 for search, result in zip(expressions[::2], expressions[1::2]): 6355 if not search or not result: 6356 return None 6357 6358 if isinstance(search, exp.Literal): 6359 ifs.append( 6360 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6361 ) 6362 elif isinstance(search, exp.Null): 6363 ifs.append( 6364 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6365 ) 6366 else: 6367 cond = exp.or_( 6368 exp.EQ(this=expression.copy(), expression=search), 6369 exp.and_( 6370 exp.Is(this=expression.copy(), expression=exp.Null()), 6371 exp.Is(this=search.copy(), expression=exp.Null()), 6372 copy=False, 6373 ), 6374 copy=False, 6375 ) 6376 ifs.append(exp.If(this=cond, true=result)) 6377 6378 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6379 6380 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6381 self._match_text_seq("KEY") 6382 key = self._parse_column() 6383 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6384 self._match_text_seq("VALUE") 6385 value = self._parse_bitwise() 6386 6387 if not key and not value: 6388 return None 6389 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6390 6391 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6392 if not this or not self._match_text_seq("FORMAT", "JSON"): 6393 return this 6394 6395 return self.expression(exp.FormatJson, this=this) 6396 6397 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6398 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6399 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6400 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6401 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6402 else: 6403 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6404 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6405 6406 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6407 6408 if not empty and not error and not null: 6409 return None 6410 6411 return self.expression( 6412 exp.OnCondition, 6413 empty=empty, 6414 error=error, 6415 null=null, 6416 ) 6417 6418 def _parse_on_handling( 6419 self, on: str, *values: str 6420 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6421 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6422 for value in values: 6423 if self._match_text_seq(value, "ON", on): 6424 return f"{value} ON {on}" 6425 6426 index = self._index 6427 if self._match(TokenType.DEFAULT): 6428 default_value = self._parse_bitwise() 6429 if self._match_text_seq("ON", on): 6430 return default_value 6431 6432 self._retreat(index) 6433 6434 return None 6435 6436 @t.overload 6437 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6438 6439 @t.overload 6440 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6441 6442 def _parse_json_object(self, agg=False): 6443 star = self._parse_star() 6444 expressions = ( 6445 [star] 6446 if star 6447 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6448 ) 6449 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6450 6451 unique_keys = None 6452 if self._match_text_seq("WITH", "UNIQUE"): 6453 unique_keys = True 6454 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6455 unique_keys = False 6456 6457 self._match_text_seq("KEYS") 6458 6459 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6460 self._parse_type() 6461 ) 6462 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6463 6464 return self.expression( 6465 exp.JSONObjectAgg if agg else exp.JSONObject, 6466 expressions=expressions, 6467 null_handling=null_handling, 6468 unique_keys=unique_keys, 6469 return_type=return_type, 6470 encoding=encoding, 6471 ) 6472 6473 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6474 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6475 if not self._match_text_seq("NESTED"): 6476 this = self._parse_id_var() 6477 kind = self._parse_types(allow_identifiers=False) 6478 nested = None 6479 else: 6480 this = None 6481 kind = None 6482 nested = True 6483 6484 path = self._match_text_seq("PATH") and self._parse_string() 6485 nested_schema = nested and self._parse_json_schema() 6486 6487 return self.expression( 6488 exp.JSONColumnDef, 6489 this=this, 6490 kind=kind, 6491 path=path, 6492 nested_schema=nested_schema, 6493 ) 6494 6495 def _parse_json_schema(self) -> exp.JSONSchema: 6496 self._match_text_seq("COLUMNS") 6497 return self.expression( 6498 exp.JSONSchema, 6499 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6500 ) 6501 6502 def _parse_json_table(self) -> exp.JSONTable: 6503 this = self._parse_format_json(self._parse_bitwise()) 6504 path = self._match(TokenType.COMMA) and self._parse_string() 6505 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6506 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6507 schema = self._parse_json_schema() 6508 6509 return exp.JSONTable( 6510 this=this, 6511 schema=schema, 6512 path=path, 6513 error_handling=error_handling, 6514 empty_handling=empty_handling, 6515 ) 6516 6517 def _parse_match_against(self) -> exp.MatchAgainst: 6518 expressions = self._parse_csv(self._parse_column) 6519 6520 self._match_text_seq(")", "AGAINST", "(") 6521 6522 this = self._parse_string() 6523 6524 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6525 modifier = "IN NATURAL LANGUAGE MODE" 6526 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6527 modifier = f"{modifier} WITH QUERY EXPANSION" 6528 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6529 modifier = "IN BOOLEAN MODE" 6530 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6531 modifier = "WITH QUERY EXPANSION" 6532 else: 6533 modifier = None 6534 6535 return self.expression( 6536 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6537 ) 6538 6539 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6540 def _parse_open_json(self) -> exp.OpenJSON: 6541 this = self._parse_bitwise() 6542 path = self._match(TokenType.COMMA) and self._parse_string() 6543 6544 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6545 this = self._parse_field(any_token=True) 6546 kind = self._parse_types() 6547 path = self._parse_string() 6548 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6549 6550 return self.expression( 6551 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6552 ) 6553 6554 expressions = None 6555 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6556 self._match_l_paren() 6557 expressions = self._parse_csv(_parse_open_json_column_def) 6558 6559 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6560 6561 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6562 args = self._parse_csv(self._parse_bitwise) 6563 6564 if self._match(TokenType.IN): 6565 return self.expression( 6566 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6567 ) 6568 6569 if haystack_first: 6570 haystack = seq_get(args, 0) 6571 needle = seq_get(args, 1) 6572 else: 6573 haystack = seq_get(args, 1) 6574 needle = seq_get(args, 0) 6575 6576 return self.expression( 6577 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6578 ) 6579 6580 def _parse_predict(self) -> exp.Predict: 6581 self._match_text_seq("MODEL") 6582 this = self._parse_table() 6583 6584 self._match(TokenType.COMMA) 6585 self._match_text_seq("TABLE") 6586 6587 return self.expression( 6588 exp.Predict, 6589 this=this, 6590 expression=self._parse_table(), 6591 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6592 ) 6593 6594 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6595 args = self._parse_csv(self._parse_table) 6596 return exp.JoinHint(this=func_name.upper(), expressions=args) 6597 6598 def _parse_substring(self) -> exp.Substring: 6599 # Postgres supports the form: substring(string [from int] [for int]) 6600 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6601 6602 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6603 6604 if self._match(TokenType.FROM): 6605 args.append(self._parse_bitwise()) 6606 if self._match(TokenType.FOR): 6607 if len(args) == 1: 6608 args.append(exp.Literal.number(1)) 6609 args.append(self._parse_bitwise()) 6610 6611 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6612 6613 def _parse_trim(self) -> exp.Trim: 6614 # https://www.w3resource.com/sql/character-functions/trim.php 6615 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6616 6617 position = None 6618 collation = None 6619 expression = None 6620 6621 if self._match_texts(self.TRIM_TYPES): 6622 position = self._prev.text.upper() 6623 6624 this = self._parse_bitwise() 6625 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6626 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6627 expression = self._parse_bitwise() 6628 6629 if invert_order: 6630 this, expression = expression, this 6631 6632 if self._match(TokenType.COLLATE): 6633 collation = self._parse_bitwise() 6634 6635 return self.expression( 6636 exp.Trim, this=this, position=position, expression=expression, collation=collation 6637 ) 6638 6639 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6640 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6641 6642 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6643 return self._parse_window(self._parse_id_var(), alias=True) 6644 6645 def _parse_respect_or_ignore_nulls( 6646 self, this: t.Optional[exp.Expression] 6647 ) -> t.Optional[exp.Expression]: 6648 if self._match_text_seq("IGNORE", "NULLS"): 6649 return self.expression(exp.IgnoreNulls, this=this) 6650 if self._match_text_seq("RESPECT", "NULLS"): 6651 return self.expression(exp.RespectNulls, this=this) 6652 return this 6653 6654 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6655 if self._match(TokenType.HAVING): 6656 self._match_texts(("MAX", "MIN")) 6657 max = self._prev.text.upper() != "MIN" 6658 return self.expression( 6659 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6660 ) 6661 6662 return this 6663 6664 def _parse_window( 6665 self, this: t.Optional[exp.Expression], alias: bool = False 6666 ) -> t.Optional[exp.Expression]: 6667 func = this 6668 comments = func.comments if isinstance(func, exp.Expression) else None 6669 6670 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6671 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6672 if self._match_text_seq("WITHIN", "GROUP"): 6673 order = self._parse_wrapped(self._parse_order) 6674 this = self.expression(exp.WithinGroup, this=this, expression=order) 6675 6676 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6677 self._match(TokenType.WHERE) 6678 this = self.expression( 6679 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6680 ) 6681 self._match_r_paren() 6682 6683 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6684 # Some dialects choose to implement and some do not. 6685 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6686 6687 # There is some code above in _parse_lambda that handles 6688 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6689 6690 # The below changes handle 6691 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6692 6693 # Oracle allows both formats 6694 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6695 # and Snowflake chose to do the same for familiarity 6696 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6697 if isinstance(this, exp.AggFunc): 6698 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6699 6700 if ignore_respect and ignore_respect is not this: 6701 ignore_respect.replace(ignore_respect.this) 6702 this = self.expression(ignore_respect.__class__, this=this) 6703 6704 this = self._parse_respect_or_ignore_nulls(this) 6705 6706 # bigquery select from window x AS (partition by ...) 6707 if alias: 6708 over = None 6709 self._match(TokenType.ALIAS) 6710 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6711 return this 6712 else: 6713 over = self._prev.text.upper() 6714 6715 if comments and isinstance(func, exp.Expression): 6716 func.pop_comments() 6717 6718 if not self._match(TokenType.L_PAREN): 6719 return self.expression( 6720 exp.Window, 6721 comments=comments, 6722 this=this, 6723 alias=self._parse_id_var(False), 6724 over=over, 6725 ) 6726 6727 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6728 6729 first = self._match(TokenType.FIRST) 6730 if self._match_text_seq("LAST"): 6731 first = False 6732 6733 partition, order = self._parse_partition_and_order() 6734 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6735 6736 if kind: 6737 self._match(TokenType.BETWEEN) 6738 start = self._parse_window_spec() 6739 self._match(TokenType.AND) 6740 end = self._parse_window_spec() 6741 6742 spec = self.expression( 6743 exp.WindowSpec, 6744 kind=kind, 6745 start=start["value"], 6746 start_side=start["side"], 6747 end=end["value"], 6748 end_side=end["side"], 6749 ) 6750 else: 6751 spec = None 6752 6753 self._match_r_paren() 6754 6755 window = self.expression( 6756 exp.Window, 6757 comments=comments, 6758 this=this, 6759 partition_by=partition, 6760 order=order, 6761 spec=spec, 6762 alias=window_alias, 6763 over=over, 6764 first=first, 6765 ) 6766 6767 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6768 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6769 return self._parse_window(window, alias=alias) 6770 6771 return window 6772 6773 def _parse_partition_and_order( 6774 self, 6775 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6776 return self._parse_partition_by(), self._parse_order() 6777 6778 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6779 self._match(TokenType.BETWEEN) 6780 6781 return { 6782 "value": ( 6783 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6784 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6785 or self._parse_bitwise() 6786 ), 6787 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6788 } 6789 6790 def _parse_alias( 6791 self, this: t.Optional[exp.Expression], explicit: bool = False 6792 ) -> t.Optional[exp.Expression]: 6793 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6794 # so this section tries to parse the clause version and if it fails, it treats the token 6795 # as an identifier (alias) 6796 if self._can_parse_limit_or_offset(): 6797 return this 6798 6799 any_token = self._match(TokenType.ALIAS) 6800 comments = self._prev_comments or [] 6801 6802 if explicit and not any_token: 6803 return this 6804 6805 if self._match(TokenType.L_PAREN): 6806 aliases = self.expression( 6807 exp.Aliases, 6808 comments=comments, 6809 this=this, 6810 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6811 ) 6812 self._match_r_paren(aliases) 6813 return aliases 6814 6815 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6816 self.STRING_ALIASES and self._parse_string_as_identifier() 6817 ) 6818 6819 if alias: 6820 comments.extend(alias.pop_comments()) 6821 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6822 column = this.this 6823 6824 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6825 if not this.comments and column and column.comments: 6826 this.comments = column.pop_comments() 6827 6828 return this 6829 6830 def _parse_id_var( 6831 self, 6832 any_token: bool = True, 6833 tokens: t.Optional[t.Collection[TokenType]] = None, 6834 ) -> t.Optional[exp.Expression]: 6835 expression = self._parse_identifier() 6836 if not expression and ( 6837 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6838 ): 6839 quoted = self._prev.token_type == TokenType.STRING 6840 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6841 6842 return expression 6843 6844 def _parse_string(self) -> t.Optional[exp.Expression]: 6845 if self._match_set(self.STRING_PARSERS): 6846 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6847 return self._parse_placeholder() 6848 6849 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6850 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6851 6852 def _parse_number(self) -> t.Optional[exp.Expression]: 6853 if self._match_set(self.NUMERIC_PARSERS): 6854 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6855 return self._parse_placeholder() 6856 6857 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6858 if self._match(TokenType.IDENTIFIER): 6859 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6860 return self._parse_placeholder() 6861 6862 def _parse_var( 6863 self, 6864 any_token: bool = False, 6865 tokens: t.Optional[t.Collection[TokenType]] = None, 6866 upper: bool = False, 6867 ) -> t.Optional[exp.Expression]: 6868 if ( 6869 (any_token and self._advance_any()) 6870 or self._match(TokenType.VAR) 6871 or (self._match_set(tokens) if tokens else False) 6872 ): 6873 return self.expression( 6874 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6875 ) 6876 return self._parse_placeholder() 6877 6878 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6879 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6880 self._advance() 6881 return self._prev 6882 return None 6883 6884 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6885 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6886 6887 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6888 return self._parse_primary() or self._parse_var(any_token=True) 6889 6890 def _parse_null(self) -> t.Optional[exp.Expression]: 6891 if self._match_set(self.NULL_TOKENS): 6892 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6893 return self._parse_placeholder() 6894 6895 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6896 if self._match(TokenType.TRUE): 6897 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6898 if self._match(TokenType.FALSE): 6899 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6900 return self._parse_placeholder() 6901 6902 def _parse_star(self) -> t.Optional[exp.Expression]: 6903 if self._match(TokenType.STAR): 6904 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6905 return self._parse_placeholder() 6906 6907 def _parse_parameter(self) -> exp.Parameter: 6908 this = self._parse_identifier() or self._parse_primary_or_var() 6909 return self.expression(exp.Parameter, this=this) 6910 6911 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6912 if self._match_set(self.PLACEHOLDER_PARSERS): 6913 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6914 if placeholder: 6915 return placeholder 6916 self._advance(-1) 6917 return None 6918 6919 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6920 if not self._match_texts(keywords): 6921 return None 6922 if self._match(TokenType.L_PAREN, advance=False): 6923 return self._parse_wrapped_csv(self._parse_expression) 6924 6925 expression = self._parse_expression() 6926 return [expression] if expression else None 6927 6928 def _parse_csv( 6929 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6930 ) -> t.List[exp.Expression]: 6931 parse_result = parse_method() 6932 items = [parse_result] if parse_result is not None else [] 6933 6934 while self._match(sep): 6935 self._add_comments(parse_result) 6936 parse_result = parse_method() 6937 if parse_result is not None: 6938 items.append(parse_result) 6939 6940 return items 6941 6942 def _parse_tokens( 6943 self, parse_method: t.Callable, expressions: t.Dict 6944 ) -> t.Optional[exp.Expression]: 6945 this = parse_method() 6946 6947 while self._match_set(expressions): 6948 this = self.expression( 6949 expressions[self._prev.token_type], 6950 this=this, 6951 comments=self._prev_comments, 6952 expression=parse_method(), 6953 ) 6954 6955 return this 6956 6957 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6958 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6959 6960 def _parse_wrapped_csv( 6961 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6962 ) -> t.List[exp.Expression]: 6963 return self._parse_wrapped( 6964 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6965 ) 6966 6967 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6968 wrapped = self._match(TokenType.L_PAREN) 6969 if not wrapped and not optional: 6970 self.raise_error("Expecting (") 6971 parse_result = parse_method() 6972 if wrapped: 6973 self._match_r_paren() 6974 return parse_result 6975 6976 def _parse_expressions(self) -> t.List[exp.Expression]: 6977 return self._parse_csv(self._parse_expression) 6978 6979 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6980 return self._parse_select() or self._parse_set_operations( 6981 self._parse_alias(self._parse_assignment(), explicit=True) 6982 if alias 6983 else self._parse_assignment() 6984 ) 6985 6986 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6987 return self._parse_query_modifiers( 6988 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6989 ) 6990 6991 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6992 this = None 6993 if self._match_texts(self.TRANSACTION_KIND): 6994 this = self._prev.text 6995 6996 self._match_texts(("TRANSACTION", "WORK")) 6997 6998 modes = [] 6999 while True: 7000 mode = [] 7001 while self._match(TokenType.VAR): 7002 mode.append(self._prev.text) 7003 7004 if mode: 7005 modes.append(" ".join(mode)) 7006 if not self._match(TokenType.COMMA): 7007 break 7008 7009 return self.expression(exp.Transaction, this=this, modes=modes) 7010 7011 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7012 chain = None 7013 savepoint = None 7014 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7015 7016 self._match_texts(("TRANSACTION", "WORK")) 7017 7018 if self._match_text_seq("TO"): 7019 self._match_text_seq("SAVEPOINT") 7020 savepoint = self._parse_id_var() 7021 7022 if self._match(TokenType.AND): 7023 chain = not self._match_text_seq("NO") 7024 self._match_text_seq("CHAIN") 7025 7026 if is_rollback: 7027 return self.expression(exp.Rollback, savepoint=savepoint) 7028 7029 return self.expression(exp.Commit, chain=chain) 7030 7031 def _parse_refresh(self) -> exp.Refresh: 7032 self._match(TokenType.TABLE) 7033 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7034 7035 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7036 if not self._match_text_seq("ADD"): 7037 return None 7038 7039 self._match(TokenType.COLUMN) 7040 exists_column = self._parse_exists(not_=True) 7041 expression = self._parse_field_def() 7042 7043 if expression: 7044 expression.set("exists", exists_column) 7045 7046 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7047 if self._match_texts(("FIRST", "AFTER")): 7048 position = self._prev.text 7049 column_position = self.expression( 7050 exp.ColumnPosition, this=self._parse_column(), position=position 7051 ) 7052 expression.set("position", column_position) 7053 7054 return expression 7055 7056 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7057 drop = self._match(TokenType.DROP) and self._parse_drop() 7058 if drop and not isinstance(drop, exp.Command): 7059 drop.set("kind", drop.args.get("kind", "COLUMN")) 7060 return drop 7061 7062 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7063 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7064 return self.expression( 7065 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7066 ) 7067 7068 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7069 index = self._index - 1 7070 7071 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7072 return self._parse_csv( 7073 lambda: self.expression( 7074 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7075 ) 7076 ) 7077 7078 self._retreat(index) 7079 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7080 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7081 7082 if self._match_text_seq("ADD", "COLUMNS"): 7083 schema = self._parse_schema() 7084 if schema: 7085 return [schema] 7086 return [] 7087 7088 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7089 7090 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7091 if self._match_texts(self.ALTER_ALTER_PARSERS): 7092 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7093 7094 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7095 # keyword after ALTER we default to parsing this statement 7096 self._match(TokenType.COLUMN) 7097 column = self._parse_field(any_token=True) 7098 7099 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7100 return self.expression(exp.AlterColumn, this=column, drop=True) 7101 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7102 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7103 if self._match(TokenType.COMMENT): 7104 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7105 if self._match_text_seq("DROP", "NOT", "NULL"): 7106 return self.expression( 7107 exp.AlterColumn, 7108 this=column, 7109 drop=True, 7110 allow_null=True, 7111 ) 7112 if self._match_text_seq("SET", "NOT", "NULL"): 7113 return self.expression( 7114 exp.AlterColumn, 7115 this=column, 7116 allow_null=False, 7117 ) 7118 self._match_text_seq("SET", "DATA") 7119 self._match_text_seq("TYPE") 7120 return self.expression( 7121 exp.AlterColumn, 7122 this=column, 7123 dtype=self._parse_types(), 7124 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7125 using=self._match(TokenType.USING) and self._parse_assignment(), 7126 ) 7127 7128 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7129 if self._match_texts(("ALL", "EVEN", "AUTO")): 7130 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7131 7132 self._match_text_seq("KEY", "DISTKEY") 7133 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7134 7135 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7136 if compound: 7137 self._match_text_seq("SORTKEY") 7138 7139 if self._match(TokenType.L_PAREN, advance=False): 7140 return self.expression( 7141 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7142 ) 7143 7144 self._match_texts(("AUTO", "NONE")) 7145 return self.expression( 7146 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7147 ) 7148 7149 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7150 index = self._index - 1 7151 7152 partition_exists = self._parse_exists() 7153 if self._match(TokenType.PARTITION, advance=False): 7154 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7155 7156 self._retreat(index) 7157 return self._parse_csv(self._parse_drop_column) 7158 7159 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7160 if self._match(TokenType.COLUMN): 7161 exists = self._parse_exists() 7162 old_column = self._parse_column() 7163 to = self._match_text_seq("TO") 7164 new_column = self._parse_column() 7165 7166 if old_column is None or to is None or new_column is None: 7167 return None 7168 7169 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7170 7171 self._match_text_seq("TO") 7172 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7173 7174 def _parse_alter_table_set(self) -> exp.AlterSet: 7175 alter_set = self.expression(exp.AlterSet) 7176 7177 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7178 "TABLE", "PROPERTIES" 7179 ): 7180 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7181 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7182 alter_set.set("expressions", [self._parse_assignment()]) 7183 elif self._match_texts(("LOGGED", "UNLOGGED")): 7184 alter_set.set("option", exp.var(self._prev.text.upper())) 7185 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7186 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7187 elif self._match_text_seq("LOCATION"): 7188 alter_set.set("location", self._parse_field()) 7189 elif self._match_text_seq("ACCESS", "METHOD"): 7190 alter_set.set("access_method", self._parse_field()) 7191 elif self._match_text_seq("TABLESPACE"): 7192 alter_set.set("tablespace", self._parse_field()) 7193 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7194 alter_set.set("file_format", [self._parse_field()]) 7195 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7196 alter_set.set("file_format", self._parse_wrapped_options()) 7197 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7198 alter_set.set("copy_options", self._parse_wrapped_options()) 7199 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7200 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7201 else: 7202 if self._match_text_seq("SERDE"): 7203 alter_set.set("serde", self._parse_field()) 7204 7205 alter_set.set("expressions", [self._parse_properties()]) 7206 7207 return alter_set 7208 7209 def _parse_alter(self) -> exp.Alter | exp.Command: 7210 start = self._prev 7211 7212 alter_token = self._match_set(self.ALTERABLES) and self._prev 7213 if not alter_token: 7214 return self._parse_as_command(start) 7215 7216 exists = self._parse_exists() 7217 only = self._match_text_seq("ONLY") 7218 this = self._parse_table(schema=True) 7219 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7220 7221 if self._next: 7222 self._advance() 7223 7224 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7225 if parser: 7226 actions = ensure_list(parser(self)) 7227 not_valid = self._match_text_seq("NOT", "VALID") 7228 options = self._parse_csv(self._parse_property) 7229 7230 if not self._curr and actions: 7231 return self.expression( 7232 exp.Alter, 7233 this=this, 7234 kind=alter_token.text.upper(), 7235 exists=exists, 7236 actions=actions, 7237 only=only, 7238 options=options, 7239 cluster=cluster, 7240 not_valid=not_valid, 7241 ) 7242 7243 return self._parse_as_command(start) 7244 7245 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7246 start = self._prev 7247 # https://duckdb.org/docs/sql/statements/analyze 7248 if not self._curr: 7249 return self.expression(exp.Analyze) 7250 7251 options = [] 7252 while self._match_texts(self.ANALYZE_STYLES): 7253 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7254 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7255 else: 7256 options.append(self._prev.text.upper()) 7257 7258 this: t.Optional[exp.Expression] = None 7259 inner_expression: t.Optional[exp.Expression] = None 7260 7261 kind = self._curr and self._curr.text.upper() 7262 7263 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7264 this = self._parse_table_parts() 7265 elif self._match_text_seq("TABLES"): 7266 if self._match_set((TokenType.FROM, TokenType.IN)): 7267 kind = f"{kind} {self._prev.text.upper()}" 7268 this = self._parse_table(schema=True, is_db_reference=True) 7269 elif self._match_text_seq("DATABASE"): 7270 this = self._parse_table(schema=True, is_db_reference=True) 7271 elif self._match_text_seq("CLUSTER"): 7272 this = self._parse_table() 7273 # Try matching inner expr keywords before fallback to parse table. 7274 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7275 kind = None 7276 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7277 else: 7278 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7279 kind = None 7280 this = self._parse_table_parts() 7281 7282 partition = self._try_parse(self._parse_partition) 7283 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7284 return self._parse_as_command(start) 7285 7286 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7287 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7288 "WITH", "ASYNC", "MODE" 7289 ): 7290 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7291 else: 7292 mode = None 7293 7294 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7295 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7296 7297 properties = self._parse_properties() 7298 return self.expression( 7299 exp.Analyze, 7300 kind=kind, 7301 this=this, 7302 mode=mode, 7303 partition=partition, 7304 properties=properties, 7305 expression=inner_expression, 7306 options=options, 7307 ) 7308 7309 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7310 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7311 this = None 7312 kind = self._prev.text.upper() 7313 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7314 expressions = [] 7315 7316 if not self._match_text_seq("STATISTICS"): 7317 self.raise_error("Expecting token STATISTICS") 7318 7319 if self._match_text_seq("NOSCAN"): 7320 this = "NOSCAN" 7321 elif self._match(TokenType.FOR): 7322 if self._match_text_seq("ALL", "COLUMNS"): 7323 this = "FOR ALL COLUMNS" 7324 if self._match_texts("COLUMNS"): 7325 this = "FOR COLUMNS" 7326 expressions = self._parse_csv(self._parse_column_reference) 7327 elif self._match_text_seq("SAMPLE"): 7328 sample = self._parse_number() 7329 expressions = [ 7330 self.expression( 7331 exp.AnalyzeSample, 7332 sample=sample, 7333 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7334 ) 7335 ] 7336 7337 return self.expression( 7338 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7339 ) 7340 7341 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7342 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7343 kind = None 7344 this = None 7345 expression: t.Optional[exp.Expression] = None 7346 if self._match_text_seq("REF", "UPDATE"): 7347 kind = "REF" 7348 this = "UPDATE" 7349 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7350 this = "UPDATE SET DANGLING TO NULL" 7351 elif self._match_text_seq("STRUCTURE"): 7352 kind = "STRUCTURE" 7353 if self._match_text_seq("CASCADE", "FAST"): 7354 this = "CASCADE FAST" 7355 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7356 ("ONLINE", "OFFLINE") 7357 ): 7358 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7359 expression = self._parse_into() 7360 7361 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7362 7363 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7364 this = self._prev.text.upper() 7365 if self._match_text_seq("COLUMNS"): 7366 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7367 return None 7368 7369 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7370 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7371 if self._match_text_seq("STATISTICS"): 7372 return self.expression(exp.AnalyzeDelete, kind=kind) 7373 return None 7374 7375 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7376 if self._match_text_seq("CHAINED", "ROWS"): 7377 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7378 return None 7379 7380 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7381 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7382 this = self._prev.text.upper() 7383 expression: t.Optional[exp.Expression] = None 7384 expressions = [] 7385 update_options = None 7386 7387 if self._match_text_seq("HISTOGRAM", "ON"): 7388 expressions = self._parse_csv(self._parse_column_reference) 7389 with_expressions = [] 7390 while self._match(TokenType.WITH): 7391 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7392 if self._match_texts(("SYNC", "ASYNC")): 7393 if self._match_text_seq("MODE", advance=False): 7394 with_expressions.append(f"{self._prev.text.upper()} MODE") 7395 self._advance() 7396 else: 7397 buckets = self._parse_number() 7398 if self._match_text_seq("BUCKETS"): 7399 with_expressions.append(f"{buckets} BUCKETS") 7400 if with_expressions: 7401 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7402 7403 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7404 TokenType.UPDATE, advance=False 7405 ): 7406 update_options = self._prev.text.upper() 7407 self._advance() 7408 elif self._match_text_seq("USING", "DATA"): 7409 expression = self.expression(exp.UsingData, this=self._parse_string()) 7410 7411 return self.expression( 7412 exp.AnalyzeHistogram, 7413 this=this, 7414 expressions=expressions, 7415 expression=expression, 7416 update_options=update_options, 7417 ) 7418 7419 def _parse_merge(self) -> exp.Merge: 7420 self._match(TokenType.INTO) 7421 target = self._parse_table() 7422 7423 if target and self._match(TokenType.ALIAS, advance=False): 7424 target.set("alias", self._parse_table_alias()) 7425 7426 self._match(TokenType.USING) 7427 using = self._parse_table() 7428 7429 self._match(TokenType.ON) 7430 on = self._parse_assignment() 7431 7432 return self.expression( 7433 exp.Merge, 7434 this=target, 7435 using=using, 7436 on=on, 7437 whens=self._parse_when_matched(), 7438 returning=self._parse_returning(), 7439 ) 7440 7441 def _parse_when_matched(self) -> exp.Whens: 7442 whens = [] 7443 7444 while self._match(TokenType.WHEN): 7445 matched = not self._match(TokenType.NOT) 7446 self._match_text_seq("MATCHED") 7447 source = ( 7448 False 7449 if self._match_text_seq("BY", "TARGET") 7450 else self._match_text_seq("BY", "SOURCE") 7451 ) 7452 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7453 7454 self._match(TokenType.THEN) 7455 7456 if self._match(TokenType.INSERT): 7457 this = self._parse_star() 7458 if this: 7459 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7460 else: 7461 then = self.expression( 7462 exp.Insert, 7463 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7464 expression=self._match_text_seq("VALUES") and self._parse_value(), 7465 ) 7466 elif self._match(TokenType.UPDATE): 7467 expressions = self._parse_star() 7468 if expressions: 7469 then = self.expression(exp.Update, expressions=expressions) 7470 else: 7471 then = self.expression( 7472 exp.Update, 7473 expressions=self._match(TokenType.SET) 7474 and self._parse_csv(self._parse_equality), 7475 ) 7476 elif self._match(TokenType.DELETE): 7477 then = self.expression(exp.Var, this=self._prev.text) 7478 else: 7479 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7480 7481 whens.append( 7482 self.expression( 7483 exp.When, 7484 matched=matched, 7485 source=source, 7486 condition=condition, 7487 then=then, 7488 ) 7489 ) 7490 return self.expression(exp.Whens, expressions=whens) 7491 7492 def _parse_show(self) -> t.Optional[exp.Expression]: 7493 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7494 if parser: 7495 return parser(self) 7496 return self._parse_as_command(self._prev) 7497 7498 def _parse_set_item_assignment( 7499 self, kind: t.Optional[str] = None 7500 ) -> t.Optional[exp.Expression]: 7501 index = self._index 7502 7503 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7504 return self._parse_set_transaction(global_=kind == "GLOBAL") 7505 7506 left = self._parse_primary() or self._parse_column() 7507 assignment_delimiter = self._match_texts(("=", "TO")) 7508 7509 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7510 self._retreat(index) 7511 return None 7512 7513 right = self._parse_statement() or self._parse_id_var() 7514 if isinstance(right, (exp.Column, exp.Identifier)): 7515 right = exp.var(right.name) 7516 7517 this = self.expression(exp.EQ, this=left, expression=right) 7518 return self.expression(exp.SetItem, this=this, kind=kind) 7519 7520 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7521 self._match_text_seq("TRANSACTION") 7522 characteristics = self._parse_csv( 7523 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7524 ) 7525 return self.expression( 7526 exp.SetItem, 7527 expressions=characteristics, 7528 kind="TRANSACTION", 7529 **{"global": global_}, # type: ignore 7530 ) 7531 7532 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7533 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7534 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7535 7536 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7537 index = self._index 7538 set_ = self.expression( 7539 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7540 ) 7541 7542 if self._curr: 7543 self._retreat(index) 7544 return self._parse_as_command(self._prev) 7545 7546 return set_ 7547 7548 def _parse_var_from_options( 7549 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7550 ) -> t.Optional[exp.Var]: 7551 start = self._curr 7552 if not start: 7553 return None 7554 7555 option = start.text.upper() 7556 continuations = options.get(option) 7557 7558 index = self._index 7559 self._advance() 7560 for keywords in continuations or []: 7561 if isinstance(keywords, str): 7562 keywords = (keywords,) 7563 7564 if self._match_text_seq(*keywords): 7565 option = f"{option} {' '.join(keywords)}" 7566 break 7567 else: 7568 if continuations or continuations is None: 7569 if raise_unmatched: 7570 self.raise_error(f"Unknown option {option}") 7571 7572 self._retreat(index) 7573 return None 7574 7575 return exp.var(option) 7576 7577 def _parse_as_command(self, start: Token) -> exp.Command: 7578 while self._curr: 7579 self._advance() 7580 text = self._find_sql(start, self._prev) 7581 size = len(start.text) 7582 self._warn_unsupported() 7583 return exp.Command(this=text[:size], expression=text[size:]) 7584 7585 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7586 settings = [] 7587 7588 self._match_l_paren() 7589 kind = self._parse_id_var() 7590 7591 if self._match(TokenType.L_PAREN): 7592 while True: 7593 key = self._parse_id_var() 7594 value = self._parse_primary() 7595 if not key and value is None: 7596 break 7597 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7598 self._match(TokenType.R_PAREN) 7599 7600 self._match_r_paren() 7601 7602 return self.expression( 7603 exp.DictProperty, 7604 this=this, 7605 kind=kind.this if kind else None, 7606 settings=settings, 7607 ) 7608 7609 def _parse_dict_range(self, this: str) -> exp.DictRange: 7610 self._match_l_paren() 7611 has_min = self._match_text_seq("MIN") 7612 if has_min: 7613 min = self._parse_var() or self._parse_primary() 7614 self._match_text_seq("MAX") 7615 max = self._parse_var() or self._parse_primary() 7616 else: 7617 max = self._parse_var() or self._parse_primary() 7618 min = exp.Literal.number(0) 7619 self._match_r_paren() 7620 return self.expression(exp.DictRange, this=this, min=min, max=max) 7621 7622 def _parse_comprehension( 7623 self, this: t.Optional[exp.Expression] 7624 ) -> t.Optional[exp.Comprehension]: 7625 index = self._index 7626 expression = self._parse_column() 7627 if not self._match(TokenType.IN): 7628 self._retreat(index - 1) 7629 return None 7630 iterator = self._parse_column() 7631 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7632 return self.expression( 7633 exp.Comprehension, 7634 this=this, 7635 expression=expression, 7636 iterator=iterator, 7637 condition=condition, 7638 ) 7639 7640 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7641 if self._match(TokenType.HEREDOC_STRING): 7642 return self.expression(exp.Heredoc, this=self._prev.text) 7643 7644 if not self._match_text_seq("$"): 7645 return None 7646 7647 tags = ["$"] 7648 tag_text = None 7649 7650 if self._is_connected(): 7651 self._advance() 7652 tags.append(self._prev.text.upper()) 7653 else: 7654 self.raise_error("No closing $ found") 7655 7656 if tags[-1] != "$": 7657 if self._is_connected() and self._match_text_seq("$"): 7658 tag_text = tags[-1] 7659 tags.append("$") 7660 else: 7661 self.raise_error("No closing $ found") 7662 7663 heredoc_start = self._curr 7664 7665 while self._curr: 7666 if self._match_text_seq(*tags, advance=False): 7667 this = self._find_sql(heredoc_start, self._prev) 7668 self._advance(len(tags)) 7669 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7670 7671 self._advance() 7672 7673 self.raise_error(f"No closing {''.join(tags)} found") 7674 return None 7675 7676 def _find_parser( 7677 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7678 ) -> t.Optional[t.Callable]: 7679 if not self._curr: 7680 return None 7681 7682 index = self._index 7683 this = [] 7684 while True: 7685 # The current token might be multiple words 7686 curr = self._curr.text.upper() 7687 key = curr.split(" ") 7688 this.append(curr) 7689 7690 self._advance() 7691 result, trie = in_trie(trie, key) 7692 if result == TrieResult.FAILED: 7693 break 7694 7695 if result == TrieResult.EXISTS: 7696 subparser = parsers[" ".join(this)] 7697 return subparser 7698 7699 self._retreat(index) 7700 return None 7701 7702 def _match(self, token_type, advance=True, expression=None): 7703 if not self._curr: 7704 return None 7705 7706 if self._curr.token_type == token_type: 7707 if advance: 7708 self._advance() 7709 self._add_comments(expression) 7710 return True 7711 7712 return None 7713 7714 def _match_set(self, types, advance=True): 7715 if not self._curr: 7716 return None 7717 7718 if self._curr.token_type in types: 7719 if advance: 7720 self._advance() 7721 return True 7722 7723 return None 7724 7725 def _match_pair(self, token_type_a, token_type_b, advance=True): 7726 if not self._curr or not self._next: 7727 return None 7728 7729 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7730 if advance: 7731 self._advance(2) 7732 return True 7733 7734 return None 7735 7736 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7737 if not self._match(TokenType.L_PAREN, expression=expression): 7738 self.raise_error("Expecting (") 7739 7740 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7741 if not self._match(TokenType.R_PAREN, expression=expression): 7742 self.raise_error("Expecting )") 7743 7744 def _match_texts(self, texts, advance=True): 7745 if ( 7746 self._curr 7747 and self._curr.token_type != TokenType.STRING 7748 and self._curr.text.upper() in texts 7749 ): 7750 if advance: 7751 self._advance() 7752 return True 7753 return None 7754 7755 def _match_text_seq(self, *texts, advance=True): 7756 index = self._index 7757 for text in texts: 7758 if ( 7759 self._curr 7760 and self._curr.token_type != TokenType.STRING 7761 and self._curr.text.upper() == text 7762 ): 7763 self._advance() 7764 else: 7765 self._retreat(index) 7766 return None 7767 7768 if not advance: 7769 self._retreat(index) 7770 7771 return True 7772 7773 def _replace_lambda( 7774 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7775 ) -> t.Optional[exp.Expression]: 7776 if not node: 7777 return node 7778 7779 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7780 7781 for column in node.find_all(exp.Column): 7782 typ = lambda_types.get(column.parts[0].name) 7783 if typ is not None: 7784 dot_or_id = column.to_dot() if column.table else column.this 7785 7786 if typ: 7787 dot_or_id = self.expression( 7788 exp.Cast, 7789 this=dot_or_id, 7790 to=typ, 7791 ) 7792 7793 parent = column.parent 7794 7795 while isinstance(parent, exp.Dot): 7796 if not isinstance(parent.parent, exp.Dot): 7797 parent.replace(dot_or_id) 7798 break 7799 parent = parent.parent 7800 else: 7801 if column is node: 7802 node = dot_or_id 7803 else: 7804 column.replace(dot_or_id) 7805 return node 7806 7807 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7808 start = self._prev 7809 7810 # Not to be confused with TRUNCATE(number, decimals) function call 7811 if self._match(TokenType.L_PAREN): 7812 self._retreat(self._index - 2) 7813 return self._parse_function() 7814 7815 # Clickhouse supports TRUNCATE DATABASE as well 7816 is_database = self._match(TokenType.DATABASE) 7817 7818 self._match(TokenType.TABLE) 7819 7820 exists = self._parse_exists(not_=False) 7821 7822 expressions = self._parse_csv( 7823 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7824 ) 7825 7826 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7827 7828 if self._match_text_seq("RESTART", "IDENTITY"): 7829 identity = "RESTART" 7830 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7831 identity = "CONTINUE" 7832 else: 7833 identity = None 7834 7835 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7836 option = self._prev.text 7837 else: 7838 option = None 7839 7840 partition = self._parse_partition() 7841 7842 # Fallback case 7843 if self._curr: 7844 return self._parse_as_command(start) 7845 7846 return self.expression( 7847 exp.TruncateTable, 7848 expressions=expressions, 7849 is_database=is_database, 7850 exists=exists, 7851 cluster=cluster, 7852 identity=identity, 7853 option=option, 7854 partition=partition, 7855 ) 7856 7857 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7858 this = self._parse_ordered(self._parse_opclass) 7859 7860 if not self._match(TokenType.WITH): 7861 return this 7862 7863 op = self._parse_var(any_token=True) 7864 7865 return self.expression(exp.WithOperator, this=this, op=op) 7866 7867 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7868 self._match(TokenType.EQ) 7869 self._match(TokenType.L_PAREN) 7870 7871 opts: t.List[t.Optional[exp.Expression]] = [] 7872 while self._curr and not self._match(TokenType.R_PAREN): 7873 if self._match_text_seq("FORMAT_NAME", "="): 7874 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7875 # so we parse it separately to use _parse_field() 7876 prop = self.expression( 7877 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7878 ) 7879 opts.append(prop) 7880 else: 7881 opts.append(self._parse_property()) 7882 7883 self._match(TokenType.COMMA) 7884 7885 return opts 7886 7887 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7888 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7889 7890 options = [] 7891 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7892 option = self._parse_var(any_token=True) 7893 prev = self._prev.text.upper() 7894 7895 # Different dialects might separate options and values by white space, "=" and "AS" 7896 self._match(TokenType.EQ) 7897 self._match(TokenType.ALIAS) 7898 7899 param = self.expression(exp.CopyParameter, this=option) 7900 7901 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7902 TokenType.L_PAREN, advance=False 7903 ): 7904 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7905 param.set("expressions", self._parse_wrapped_options()) 7906 elif prev == "FILE_FORMAT": 7907 # T-SQL's external file format case 7908 param.set("expression", self._parse_field()) 7909 else: 7910 param.set("expression", self._parse_unquoted_field()) 7911 7912 options.append(param) 7913 self._match(sep) 7914 7915 return options 7916 7917 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7918 expr = self.expression(exp.Credentials) 7919 7920 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7921 expr.set("storage", self._parse_field()) 7922 if self._match_text_seq("CREDENTIALS"): 7923 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7924 creds = ( 7925 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7926 ) 7927 expr.set("credentials", creds) 7928 if self._match_text_seq("ENCRYPTION"): 7929 expr.set("encryption", self._parse_wrapped_options()) 7930 if self._match_text_seq("IAM_ROLE"): 7931 expr.set("iam_role", self._parse_field()) 7932 if self._match_text_seq("REGION"): 7933 expr.set("region", self._parse_field()) 7934 7935 return expr 7936 7937 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7938 return self._parse_field() 7939 7940 def _parse_copy(self) -> exp.Copy | exp.Command: 7941 start = self._prev 7942 7943 self._match(TokenType.INTO) 7944 7945 this = ( 7946 self._parse_select(nested=True, parse_subquery_alias=False) 7947 if self._match(TokenType.L_PAREN, advance=False) 7948 else self._parse_table(schema=True) 7949 ) 7950 7951 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7952 7953 files = self._parse_csv(self._parse_file_location) 7954 credentials = self._parse_credentials() 7955 7956 self._match_text_seq("WITH") 7957 7958 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7959 7960 # Fallback case 7961 if self._curr: 7962 return self._parse_as_command(start) 7963 7964 return self.expression( 7965 exp.Copy, 7966 this=this, 7967 kind=kind, 7968 credentials=credentials, 7969 files=files, 7970 params=params, 7971 ) 7972 7973 def _parse_normalize(self) -> exp.Normalize: 7974 return self.expression( 7975 exp.Normalize, 7976 this=self._parse_bitwise(), 7977 form=self._match(TokenType.COMMA) and self._parse_var(), 7978 ) 7979 7980 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7981 args = self._parse_csv(lambda: self._parse_lambda()) 7982 7983 this = seq_get(args, 0) 7984 decimals = seq_get(args, 1) 7985 7986 return expr_type( 7987 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7988 ) 7989 7990 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7991 if self._match_text_seq("COLUMNS", "(", advance=False): 7992 this = self._parse_function() 7993 if isinstance(this, exp.Columns): 7994 this.set("unpack", True) 7995 return this 7996 7997 return self.expression( 7998 exp.Star, 7999 **{ # type: ignore 8000 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8001 "replace": self._parse_star_op("REPLACE"), 8002 "rename": self._parse_star_op("RENAME"), 8003 }, 8004 ) 8005 8006 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8007 privilege_parts = [] 8008 8009 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8010 # (end of privilege list) or L_PAREN (start of column list) are met 8011 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8012 privilege_parts.append(self._curr.text.upper()) 8013 self._advance() 8014 8015 this = exp.var(" ".join(privilege_parts)) 8016 expressions = ( 8017 self._parse_wrapped_csv(self._parse_column) 8018 if self._match(TokenType.L_PAREN, advance=False) 8019 else None 8020 ) 8021 8022 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8023 8024 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8025 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8026 principal = self._parse_id_var() 8027 8028 if not principal: 8029 return None 8030 8031 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8032 8033 def _parse_grant(self) -> exp.Grant | exp.Command: 8034 start = self._prev 8035 8036 privileges = self._parse_csv(self._parse_grant_privilege) 8037 8038 self._match(TokenType.ON) 8039 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8040 8041 # Attempt to parse the securable e.g. MySQL allows names 8042 # such as "foo.*", "*.*" which are not easily parseable yet 8043 securable = self._try_parse(self._parse_table_parts) 8044 8045 if not securable or not self._match_text_seq("TO"): 8046 return self._parse_as_command(start) 8047 8048 principals = self._parse_csv(self._parse_grant_principal) 8049 8050 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8051 8052 if self._curr: 8053 return self._parse_as_command(start) 8054 8055 return self.expression( 8056 exp.Grant, 8057 privileges=privileges, 8058 kind=kind, 8059 securable=securable, 8060 principals=principals, 8061 grant_option=grant_option, 8062 ) 8063 8064 def _parse_overlay(self) -> exp.Overlay: 8065 return self.expression( 8066 exp.Overlay, 8067 **{ # type: ignore 8068 "this": self._parse_bitwise(), 8069 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8070 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8071 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8072 }, 8073 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1460 def __init__( 1461 self, 1462 error_level: t.Optional[ErrorLevel] = None, 1463 error_message_context: int = 100, 1464 max_errors: int = 3, 1465 dialect: DialectType = None, 1466 ): 1467 from sqlglot.dialects import Dialect 1468 1469 self.error_level = error_level or ErrorLevel.IMMEDIATE 1470 self.error_message_context = error_message_context 1471 self.max_errors = max_errors 1472 self.dialect = Dialect.get_or_raise(dialect) 1473 self.reset()
1485 def parse( 1486 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1487 ) -> t.List[t.Optional[exp.Expression]]: 1488 """ 1489 Parses a list of tokens and returns a list of syntax trees, one tree 1490 per parsed SQL statement. 1491 1492 Args: 1493 raw_tokens: The list of tokens. 1494 sql: The original SQL string, used to produce helpful debug messages. 1495 1496 Returns: 1497 The list of the produced syntax trees. 1498 """ 1499 return self._parse( 1500 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1501 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1503 def parse_into( 1504 self, 1505 expression_types: exp.IntoType, 1506 raw_tokens: t.List[Token], 1507 sql: t.Optional[str] = None, 1508 ) -> t.List[t.Optional[exp.Expression]]: 1509 """ 1510 Parses a list of tokens into a given Expression type. If a collection of Expression 1511 types is given instead, this method will try to parse the token list into each one 1512 of them, stopping at the first for which the parsing succeeds. 1513 1514 Args: 1515 expression_types: The expression type(s) to try and parse the token list into. 1516 raw_tokens: The list of tokens. 1517 sql: The original SQL string, used to produce helpful debug messages. 1518 1519 Returns: 1520 The target Expression. 1521 """ 1522 errors = [] 1523 for expression_type in ensure_list(expression_types): 1524 parser = self.EXPRESSION_PARSERS.get(expression_type) 1525 if not parser: 1526 raise TypeError(f"No parser registered for {expression_type}") 1527 1528 try: 1529 return self._parse(parser, raw_tokens, sql) 1530 except ParseError as e: 1531 e.errors[0]["into_expression"] = expression_type 1532 errors.append(e) 1533 1534 raise ParseError( 1535 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1536 errors=merge_errors(errors), 1537 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1577 def check_errors(self) -> None: 1578 """Logs or raises any found errors, depending on the chosen error level setting.""" 1579 if self.error_level == ErrorLevel.WARN: 1580 for error in self.errors: 1581 logger.error(str(error)) 1582 elif self.error_level == ErrorLevel.RAISE and self.errors: 1583 raise ParseError( 1584 concat_messages(self.errors, self.max_errors), 1585 errors=merge_errors(self.errors), 1586 )
Logs or raises any found errors, depending on the chosen error level setting.
1588 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1589 """ 1590 Appends an error in the list of recorded errors or raises it, depending on the chosen 1591 error level setting. 1592 """ 1593 token = token or self._curr or self._prev or Token.string("") 1594 start = token.start 1595 end = token.end + 1 1596 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1597 highlight = self.sql[start:end] 1598 end_context = self.sql[end : end + self.error_message_context] 1599 1600 error = ParseError.new( 1601 f"{message}. Line {token.line}, Col: {token.col}.\n" 1602 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1603 description=message, 1604 line=token.line, 1605 col=token.col, 1606 start_context=start_context, 1607 highlight=highlight, 1608 end_context=end_context, 1609 ) 1610 1611 if self.error_level == ErrorLevel.IMMEDIATE: 1612 raise error 1613 1614 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1616 def expression( 1617 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1618 ) -> E: 1619 """ 1620 Creates a new, validated Expression. 1621 1622 Args: 1623 exp_class: The expression class to instantiate. 1624 comments: An optional list of comments to attach to the expression. 1625 kwargs: The arguments to set for the expression along with their respective values. 1626 1627 Returns: 1628 The target expression. 1629 """ 1630 instance = exp_class(**kwargs) 1631 instance.add_comments(comments) if comments else self._add_comments(instance) 1632 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1639 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1640 """ 1641 Validates an Expression, making sure that all its mandatory arguments are set. 1642 1643 Args: 1644 expression: The expression to validate. 1645 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1646 1647 Returns: 1648 The validated expression. 1649 """ 1650 if self.error_level != ErrorLevel.IGNORE: 1651 for error_message in expression.error_messages(args): 1652 self.raise_error(error_message) 1653 1654 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.