sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 154 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 155 156 157def build_locate_strposition(args: t.List): 158 return exp.StrPosition( 159 this=seq_get(args, 1), 160 substr=seq_get(args, 0), 161 position=seq_get(args, 2), 162 ) 163 164 165class _Parser(type): 166 def __new__(cls, clsname, bases, attrs): 167 klass = super().__new__(cls, clsname, bases, attrs) 168 169 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 170 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 171 172 return klass 173 174 175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.BLOB, 332 TokenType.MEDIUMBLOB, 333 TokenType.LONGBLOB, 334 TokenType.BINARY, 335 TokenType.VARBINARY, 336 TokenType.JSON, 337 TokenType.JSONB, 338 TokenType.INTERVAL, 339 TokenType.TINYBLOB, 340 TokenType.TINYTEXT, 341 TokenType.TIME, 342 TokenType.TIMETZ, 343 TokenType.TIMESTAMP, 344 TokenType.TIMESTAMP_S, 345 TokenType.TIMESTAMP_MS, 346 TokenType.TIMESTAMP_NS, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 TokenType.TIMESTAMPNTZ, 350 TokenType.DATETIME, 351 TokenType.DATETIME2, 352 TokenType.DATETIME64, 353 TokenType.SMALLDATETIME, 354 TokenType.DATE, 355 TokenType.DATE32, 356 TokenType.INT4RANGE, 357 TokenType.INT4MULTIRANGE, 358 TokenType.INT8RANGE, 359 TokenType.INT8MULTIRANGE, 360 TokenType.NUMRANGE, 361 TokenType.NUMMULTIRANGE, 362 TokenType.TSRANGE, 363 TokenType.TSMULTIRANGE, 364 TokenType.TSTZRANGE, 365 TokenType.TSTZMULTIRANGE, 366 TokenType.DATERANGE, 367 TokenType.DATEMULTIRANGE, 368 TokenType.DECIMAL, 369 TokenType.DECIMAL32, 370 TokenType.DECIMAL64, 371 TokenType.DECIMAL128, 372 TokenType.DECIMAL256, 373 TokenType.UDECIMAL, 374 TokenType.BIGDECIMAL, 375 TokenType.UUID, 376 TokenType.GEOGRAPHY, 377 TokenType.GEOMETRY, 378 TokenType.POINT, 379 TokenType.RING, 380 TokenType.LINESTRING, 381 TokenType.MULTILINESTRING, 382 TokenType.POLYGON, 383 TokenType.MULTIPOLYGON, 384 TokenType.HLLSKETCH, 385 TokenType.HSTORE, 386 TokenType.PSEUDO_TYPE, 387 TokenType.SUPER, 388 TokenType.SERIAL, 389 TokenType.SMALLSERIAL, 390 TokenType.BIGSERIAL, 391 TokenType.XML, 392 TokenType.YEAR, 393 TokenType.USERDEFINED, 394 TokenType.MONEY, 395 TokenType.SMALLMONEY, 396 TokenType.ROWVERSION, 397 TokenType.IMAGE, 398 TokenType.VARIANT, 399 TokenType.VECTOR, 400 TokenType.VOID, 401 TokenType.OBJECT, 402 TokenType.OBJECT_IDENTIFIER, 403 TokenType.INET, 404 TokenType.IPADDRESS, 405 TokenType.IPPREFIX, 406 TokenType.IPV4, 407 TokenType.IPV6, 408 TokenType.UNKNOWN, 409 TokenType.NOTHING, 410 TokenType.NULL, 411 TokenType.NAME, 412 TokenType.TDIGEST, 413 TokenType.DYNAMIC, 414 *ENUM_TYPE_TOKENS, 415 *NESTED_TYPE_TOKENS, 416 *AGGREGATE_TYPE_TOKENS, 417 } 418 419 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 420 TokenType.BIGINT: TokenType.UBIGINT, 421 TokenType.INT: TokenType.UINT, 422 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 423 TokenType.SMALLINT: TokenType.USMALLINT, 424 TokenType.TINYINT: TokenType.UTINYINT, 425 TokenType.DECIMAL: TokenType.UDECIMAL, 426 TokenType.DOUBLE: TokenType.UDOUBLE, 427 } 428 429 SUBQUERY_PREDICATES = { 430 TokenType.ANY: exp.Any, 431 TokenType.ALL: exp.All, 432 TokenType.EXISTS: exp.Exists, 433 TokenType.SOME: exp.Any, 434 } 435 436 RESERVED_TOKENS = { 437 *Tokenizer.SINGLE_TOKENS.values(), 438 TokenType.SELECT, 439 } - {TokenType.IDENTIFIER} 440 441 DB_CREATABLES = { 442 TokenType.DATABASE, 443 TokenType.DICTIONARY, 444 TokenType.FILE_FORMAT, 445 TokenType.MODEL, 446 TokenType.NAMESPACE, 447 TokenType.SCHEMA, 448 TokenType.SEQUENCE, 449 TokenType.SINK, 450 TokenType.SOURCE, 451 TokenType.STAGE, 452 TokenType.STORAGE_INTEGRATION, 453 TokenType.STREAMLIT, 454 TokenType.TABLE, 455 TokenType.TAG, 456 TokenType.VIEW, 457 TokenType.WAREHOUSE, 458 } 459 460 CREATABLES = { 461 TokenType.COLUMN, 462 TokenType.CONSTRAINT, 463 TokenType.FOREIGN_KEY, 464 TokenType.FUNCTION, 465 TokenType.INDEX, 466 TokenType.PROCEDURE, 467 *DB_CREATABLES, 468 } 469 470 ALTERABLES = { 471 TokenType.INDEX, 472 TokenType.TABLE, 473 TokenType.VIEW, 474 } 475 476 # Tokens that can represent identifiers 477 ID_VAR_TOKENS = { 478 TokenType.ALL, 479 TokenType.ATTACH, 480 TokenType.VAR, 481 TokenType.ANTI, 482 TokenType.APPLY, 483 TokenType.ASC, 484 TokenType.ASOF, 485 TokenType.AUTO_INCREMENT, 486 TokenType.BEGIN, 487 TokenType.BPCHAR, 488 TokenType.CACHE, 489 TokenType.CASE, 490 TokenType.COLLATE, 491 TokenType.COMMAND, 492 TokenType.COMMENT, 493 TokenType.COMMIT, 494 TokenType.CONSTRAINT, 495 TokenType.COPY, 496 TokenType.CUBE, 497 TokenType.CURRENT_SCHEMA, 498 TokenType.DEFAULT, 499 TokenType.DELETE, 500 TokenType.DESC, 501 TokenType.DESCRIBE, 502 TokenType.DETACH, 503 TokenType.DICTIONARY, 504 TokenType.DIV, 505 TokenType.END, 506 TokenType.EXECUTE, 507 TokenType.EXPORT, 508 TokenType.ESCAPE, 509 TokenType.FALSE, 510 TokenType.FIRST, 511 TokenType.FILTER, 512 TokenType.FINAL, 513 TokenType.FORMAT, 514 TokenType.FULL, 515 TokenType.GET, 516 TokenType.IDENTIFIER, 517 TokenType.IS, 518 TokenType.ISNULL, 519 TokenType.INTERVAL, 520 TokenType.KEEP, 521 TokenType.KILL, 522 TokenType.LEFT, 523 TokenType.LIMIT, 524 TokenType.LOAD, 525 TokenType.MERGE, 526 TokenType.NATURAL, 527 TokenType.NEXT, 528 TokenType.OFFSET, 529 TokenType.OPERATOR, 530 TokenType.ORDINALITY, 531 TokenType.OVERLAPS, 532 TokenType.OVERWRITE, 533 TokenType.PARTITION, 534 TokenType.PERCENT, 535 TokenType.PIVOT, 536 TokenType.PRAGMA, 537 TokenType.PUT, 538 TokenType.RANGE, 539 TokenType.RECURSIVE, 540 TokenType.REFERENCES, 541 TokenType.REFRESH, 542 TokenType.RENAME, 543 TokenType.REPLACE, 544 TokenType.RIGHT, 545 TokenType.ROLLUP, 546 TokenType.ROW, 547 TokenType.ROWS, 548 TokenType.SEMI, 549 TokenType.SET, 550 TokenType.SETTINGS, 551 TokenType.SHOW, 552 TokenType.TEMPORARY, 553 TokenType.TOP, 554 TokenType.TRUE, 555 TokenType.TRUNCATE, 556 TokenType.UNIQUE, 557 TokenType.UNNEST, 558 TokenType.UNPIVOT, 559 TokenType.UPDATE, 560 TokenType.USE, 561 TokenType.VOLATILE, 562 TokenType.WINDOW, 563 *CREATABLES, 564 *SUBQUERY_PREDICATES, 565 *TYPE_TOKENS, 566 *NO_PAREN_FUNCTIONS, 567 } 568 ID_VAR_TOKENS.remove(TokenType.UNION) 569 570 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 571 TokenType.ANTI, 572 TokenType.APPLY, 573 TokenType.ASOF, 574 TokenType.FULL, 575 TokenType.LEFT, 576 TokenType.LOCK, 577 TokenType.NATURAL, 578 TokenType.RIGHT, 579 TokenType.SEMI, 580 TokenType.WINDOW, 581 } 582 583 ALIAS_TOKENS = ID_VAR_TOKENS 584 585 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 586 587 ARRAY_CONSTRUCTORS = { 588 "ARRAY": exp.Array, 589 "LIST": exp.List, 590 } 591 592 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 593 594 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 595 596 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 597 598 FUNC_TOKENS = { 599 TokenType.COLLATE, 600 TokenType.COMMAND, 601 TokenType.CURRENT_DATE, 602 TokenType.CURRENT_DATETIME, 603 TokenType.CURRENT_SCHEMA, 604 TokenType.CURRENT_TIMESTAMP, 605 TokenType.CURRENT_TIME, 606 TokenType.CURRENT_USER, 607 TokenType.FILTER, 608 TokenType.FIRST, 609 TokenType.FORMAT, 610 TokenType.GET, 611 TokenType.GLOB, 612 TokenType.IDENTIFIER, 613 TokenType.INDEX, 614 TokenType.ISNULL, 615 TokenType.ILIKE, 616 TokenType.INSERT, 617 TokenType.LIKE, 618 TokenType.MERGE, 619 TokenType.NEXT, 620 TokenType.OFFSET, 621 TokenType.PRIMARY_KEY, 622 TokenType.RANGE, 623 TokenType.REPLACE, 624 TokenType.RLIKE, 625 TokenType.ROW, 626 TokenType.UNNEST, 627 TokenType.VAR, 628 TokenType.LEFT, 629 TokenType.RIGHT, 630 TokenType.SEQUENCE, 631 TokenType.DATE, 632 TokenType.DATETIME, 633 TokenType.TABLE, 634 TokenType.TIMESTAMP, 635 TokenType.TIMESTAMPTZ, 636 TokenType.TRUNCATE, 637 TokenType.WINDOW, 638 TokenType.XOR, 639 *TYPE_TOKENS, 640 *SUBQUERY_PREDICATES, 641 } 642 643 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 644 TokenType.AND: exp.And, 645 } 646 647 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 648 TokenType.COLON_EQ: exp.PropertyEQ, 649 } 650 651 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 652 TokenType.OR: exp.Or, 653 } 654 655 EQUALITY = { 656 TokenType.EQ: exp.EQ, 657 TokenType.NEQ: exp.NEQ, 658 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 659 } 660 661 COMPARISON = { 662 TokenType.GT: exp.GT, 663 TokenType.GTE: exp.GTE, 664 TokenType.LT: exp.LT, 665 TokenType.LTE: exp.LTE, 666 } 667 668 BITWISE = { 669 TokenType.AMP: exp.BitwiseAnd, 670 TokenType.CARET: exp.BitwiseXor, 671 TokenType.PIPE: exp.BitwiseOr, 672 } 673 674 TERM = { 675 TokenType.DASH: exp.Sub, 676 TokenType.PLUS: exp.Add, 677 TokenType.MOD: exp.Mod, 678 TokenType.COLLATE: exp.Collate, 679 } 680 681 FACTOR = { 682 TokenType.DIV: exp.IntDiv, 683 TokenType.LR_ARROW: exp.Distance, 684 TokenType.SLASH: exp.Div, 685 TokenType.STAR: exp.Mul, 686 } 687 688 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 689 690 TIMES = { 691 TokenType.TIME, 692 TokenType.TIMETZ, 693 } 694 695 TIMESTAMPS = { 696 TokenType.TIMESTAMP, 697 TokenType.TIMESTAMPNTZ, 698 TokenType.TIMESTAMPTZ, 699 TokenType.TIMESTAMPLTZ, 700 *TIMES, 701 } 702 703 SET_OPERATIONS = { 704 TokenType.UNION, 705 TokenType.INTERSECT, 706 TokenType.EXCEPT, 707 } 708 709 JOIN_METHODS = { 710 TokenType.ASOF, 711 TokenType.NATURAL, 712 TokenType.POSITIONAL, 713 } 714 715 JOIN_SIDES = { 716 TokenType.LEFT, 717 TokenType.RIGHT, 718 TokenType.FULL, 719 } 720 721 JOIN_KINDS = { 722 TokenType.ANTI, 723 TokenType.CROSS, 724 TokenType.INNER, 725 TokenType.OUTER, 726 TokenType.SEMI, 727 TokenType.STRAIGHT_JOIN, 728 } 729 730 JOIN_HINTS: t.Set[str] = set() 731 732 LAMBDAS = { 733 TokenType.ARROW: lambda self, expressions: self.expression( 734 exp.Lambda, 735 this=self._replace_lambda( 736 self._parse_assignment(), 737 expressions, 738 ), 739 expressions=expressions, 740 ), 741 TokenType.FARROW: lambda self, expressions: self.expression( 742 exp.Kwarg, 743 this=exp.var(expressions[0].name), 744 expression=self._parse_assignment(), 745 ), 746 } 747 748 COLUMN_OPERATORS = { 749 TokenType.DOT: None, 750 TokenType.DOTCOLON: lambda self, this, to: self.expression( 751 exp.JSONCast, 752 this=this, 753 to=to, 754 ), 755 TokenType.DCOLON: lambda self, this, to: self.expression( 756 exp.Cast if self.STRICT_CAST else exp.TryCast, 757 this=this, 758 to=to, 759 ), 760 TokenType.ARROW: lambda self, this, path: self.expression( 761 exp.JSONExtract, 762 this=this, 763 expression=self.dialect.to_json_path(path), 764 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 765 ), 766 TokenType.DARROW: lambda self, this, path: self.expression( 767 exp.JSONExtractScalar, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 773 exp.JSONBExtract, 774 this=this, 775 expression=path, 776 ), 777 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 778 exp.JSONBExtractScalar, 779 this=this, 780 expression=path, 781 ), 782 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 783 exp.JSONBContains, 784 this=this, 785 expression=key, 786 ), 787 } 788 789 EXPRESSION_PARSERS = { 790 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 791 exp.Column: lambda self: self._parse_column(), 792 exp.Condition: lambda self: self._parse_assignment(), 793 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 794 exp.Expression: lambda self: self._parse_expression(), 795 exp.From: lambda self: self._parse_from(joins=True), 796 exp.Group: lambda self: self._parse_group(), 797 exp.Having: lambda self: self._parse_having(), 798 exp.Hint: lambda self: self._parse_hint_body(), 799 exp.Identifier: lambda self: self._parse_id_var(), 800 exp.Join: lambda self: self._parse_join(), 801 exp.Lambda: lambda self: self._parse_lambda(), 802 exp.Lateral: lambda self: self._parse_lateral(), 803 exp.Limit: lambda self: self._parse_limit(), 804 exp.Offset: lambda self: self._parse_offset(), 805 exp.Order: lambda self: self._parse_order(), 806 exp.Ordered: lambda self: self._parse_ordered(), 807 exp.Properties: lambda self: self._parse_properties(), 808 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 809 exp.Qualify: lambda self: self._parse_qualify(), 810 exp.Returning: lambda self: self._parse_returning(), 811 exp.Select: lambda self: self._parse_select(), 812 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 813 exp.Table: lambda self: self._parse_table_parts(), 814 exp.TableAlias: lambda self: self._parse_table_alias(), 815 exp.Tuple: lambda self: self._parse_value(values=False), 816 exp.Whens: lambda self: self._parse_when_matched(), 817 exp.Where: lambda self: self._parse_where(), 818 exp.Window: lambda self: self._parse_named_window(), 819 exp.With: lambda self: self._parse_with(), 820 "JOIN_TYPE": lambda self: self._parse_join_parts(), 821 } 822 823 STATEMENT_PARSERS = { 824 TokenType.ALTER: lambda self: self._parse_alter(), 825 TokenType.ANALYZE: lambda self: self._parse_analyze(), 826 TokenType.BEGIN: lambda self: self._parse_transaction(), 827 TokenType.CACHE: lambda self: self._parse_cache(), 828 TokenType.COMMENT: lambda self: self._parse_comment(), 829 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 830 TokenType.COPY: lambda self: self._parse_copy(), 831 TokenType.CREATE: lambda self: self._parse_create(), 832 TokenType.DELETE: lambda self: self._parse_delete(), 833 TokenType.DESC: lambda self: self._parse_describe(), 834 TokenType.DESCRIBE: lambda self: self._parse_describe(), 835 TokenType.DROP: lambda self: self._parse_drop(), 836 TokenType.GRANT: lambda self: self._parse_grant(), 837 TokenType.INSERT: lambda self: self._parse_insert(), 838 TokenType.KILL: lambda self: self._parse_kill(), 839 TokenType.LOAD: lambda self: self._parse_load(), 840 TokenType.MERGE: lambda self: self._parse_merge(), 841 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 842 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 843 TokenType.REFRESH: lambda self: self._parse_refresh(), 844 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 845 TokenType.SET: lambda self: self._parse_set(), 846 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 847 TokenType.UNCACHE: lambda self: self._parse_uncache(), 848 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 849 TokenType.UPDATE: lambda self: self._parse_update(), 850 TokenType.USE: lambda self: self._parse_use(), 851 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 852 } 853 854 UNARY_PARSERS = { 855 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 856 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 857 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 858 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 859 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 860 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 861 } 862 863 STRING_PARSERS = { 864 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 865 exp.RawString, this=token.text 866 ), 867 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 868 exp.National, this=token.text 869 ), 870 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 871 TokenType.STRING: lambda self, token: self.expression( 872 exp.Literal, this=token.text, is_string=True 873 ), 874 TokenType.UNICODE_STRING: lambda self, token: self.expression( 875 exp.UnicodeString, 876 this=token.text, 877 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 878 ), 879 } 880 881 NUMERIC_PARSERS = { 882 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 883 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 884 TokenType.HEX_STRING: lambda self, token: self.expression( 885 exp.HexString, 886 this=token.text, 887 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 888 ), 889 TokenType.NUMBER: lambda self, token: self.expression( 890 exp.Literal, this=token.text, is_string=False 891 ), 892 } 893 894 PRIMARY_PARSERS = { 895 **STRING_PARSERS, 896 **NUMERIC_PARSERS, 897 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 898 TokenType.NULL: lambda self, _: self.expression(exp.Null), 899 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 900 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 901 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 902 TokenType.STAR: lambda self, _: self._parse_star_ops(), 903 } 904 905 PLACEHOLDER_PARSERS = { 906 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 907 TokenType.PARAMETER: lambda self: self._parse_parameter(), 908 TokenType.COLON: lambda self: ( 909 self.expression(exp.Placeholder, this=self._prev.text) 910 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 911 else None 912 ), 913 } 914 915 RANGE_PARSERS = { 916 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 917 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 918 TokenType.GLOB: binary_range_parser(exp.Glob), 919 TokenType.ILIKE: binary_range_parser(exp.ILike), 920 TokenType.IN: lambda self, this: self._parse_in(this), 921 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 922 TokenType.IS: lambda self, this: self._parse_is(this), 923 TokenType.LIKE: binary_range_parser(exp.Like), 924 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 925 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 926 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 927 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 928 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 929 } 930 931 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 932 "ALLOWED_VALUES": lambda self: self.expression( 933 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 934 ), 935 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 936 "AUTO": lambda self: self._parse_auto_property(), 937 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 938 "BACKUP": lambda self: self.expression( 939 exp.BackupProperty, this=self._parse_var(any_token=True) 940 ), 941 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 942 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 943 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 944 "CHECKSUM": lambda self: self._parse_checksum(), 945 "CLUSTER BY": lambda self: self._parse_cluster(), 946 "CLUSTERED": lambda self: self._parse_clustered_by(), 947 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 948 exp.CollateProperty, **kwargs 949 ), 950 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 951 "CONTAINS": lambda self: self._parse_contains_property(), 952 "COPY": lambda self: self._parse_copy_property(), 953 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 954 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 955 "DEFINER": lambda self: self._parse_definer(), 956 "DETERMINISTIC": lambda self: self.expression( 957 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 958 ), 959 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 960 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 961 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 962 "DISTKEY": lambda self: self._parse_distkey(), 963 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 964 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 965 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 966 "ENVIRONMENT": lambda self: self.expression( 967 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 968 ), 969 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 970 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 971 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 972 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 973 "FREESPACE": lambda self: self._parse_freespace(), 974 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 975 "HEAP": lambda self: self.expression(exp.HeapProperty), 976 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 977 "IMMUTABLE": lambda self: self.expression( 978 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 979 ), 980 "INHERITS": lambda self: self.expression( 981 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 982 ), 983 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 984 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 985 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 986 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 987 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 988 "LIKE": lambda self: self._parse_create_like(), 989 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 990 "LOCK": lambda self: self._parse_locking(), 991 "LOCKING": lambda self: self._parse_locking(), 992 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 993 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 994 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 995 "MODIFIES": lambda self: self._parse_modifies_property(), 996 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 997 "NO": lambda self: self._parse_no_property(), 998 "ON": lambda self: self._parse_on_property(), 999 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1000 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1001 "PARTITION": lambda self: self._parse_partitioned_of(), 1002 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1003 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1004 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1005 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1006 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1007 "READS": lambda self: self._parse_reads_property(), 1008 "REMOTE": lambda self: self._parse_remote_with_connection(), 1009 "RETURNS": lambda self: self._parse_returns(), 1010 "STRICT": lambda self: self.expression(exp.StrictProperty), 1011 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1012 "ROW": lambda self: self._parse_row(), 1013 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1014 "SAMPLE": lambda self: self.expression( 1015 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1016 ), 1017 "SECURE": lambda self: self.expression(exp.SecureProperty), 1018 "SECURITY": lambda self: self._parse_security(), 1019 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1020 "SETTINGS": lambda self: self._parse_settings_property(), 1021 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1022 "SORTKEY": lambda self: self._parse_sortkey(), 1023 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1024 "STABLE": lambda self: self.expression( 1025 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1026 ), 1027 "STORED": lambda self: self._parse_stored(), 1028 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1029 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1030 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1031 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1032 "TO": lambda self: self._parse_to_table(), 1033 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1034 "TRANSFORM": lambda self: self.expression( 1035 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1036 ), 1037 "TTL": lambda self: self._parse_ttl(), 1038 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1039 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1040 "VOLATILE": lambda self: self._parse_volatile_property(), 1041 "WITH": lambda self: self._parse_with_property(), 1042 } 1043 1044 CONSTRAINT_PARSERS = { 1045 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1046 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1047 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1048 "CHARACTER SET": lambda self: self.expression( 1049 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1050 ), 1051 "CHECK": lambda self: self.expression( 1052 exp.CheckColumnConstraint, 1053 this=self._parse_wrapped(self._parse_assignment), 1054 enforced=self._match_text_seq("ENFORCED"), 1055 ), 1056 "COLLATE": lambda self: self.expression( 1057 exp.CollateColumnConstraint, 1058 this=self._parse_identifier() or self._parse_column(), 1059 ), 1060 "COMMENT": lambda self: self.expression( 1061 exp.CommentColumnConstraint, this=self._parse_string() 1062 ), 1063 "COMPRESS": lambda self: self._parse_compress(), 1064 "CLUSTERED": lambda self: self.expression( 1065 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1066 ), 1067 "NONCLUSTERED": lambda self: self.expression( 1068 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1069 ), 1070 "DEFAULT": lambda self: self.expression( 1071 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1072 ), 1073 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1074 "EPHEMERAL": lambda self: self.expression( 1075 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1076 ), 1077 "EXCLUDE": lambda self: self.expression( 1078 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1079 ), 1080 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1081 "FORMAT": lambda self: self.expression( 1082 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1083 ), 1084 "GENERATED": lambda self: self._parse_generated_as_identity(), 1085 "IDENTITY": lambda self: self._parse_auto_increment(), 1086 "INLINE": lambda self: self._parse_inline(), 1087 "LIKE": lambda self: self._parse_create_like(), 1088 "NOT": lambda self: self._parse_not_constraint(), 1089 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1090 "ON": lambda self: ( 1091 self._match(TokenType.UPDATE) 1092 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1093 ) 1094 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1095 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1096 "PERIOD": lambda self: self._parse_period_for_system_time(), 1097 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1098 "REFERENCES": lambda self: self._parse_references(match=False), 1099 "TITLE": lambda self: self.expression( 1100 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1101 ), 1102 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1103 "UNIQUE": lambda self: self._parse_unique(), 1104 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1105 "WATERMARK": lambda self: self.expression( 1106 exp.WatermarkColumnConstraint, 1107 this=self._match(TokenType.FOR) and self._parse_column(), 1108 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1109 ), 1110 "WITH": lambda self: self.expression( 1111 exp.Properties, expressions=self._parse_wrapped_properties() 1112 ), 1113 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1114 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1115 } 1116 1117 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1118 klass = ( 1119 exp.PartitionedByBucket 1120 if self._prev.text.upper() == "BUCKET" 1121 else exp.PartitionByTruncate 1122 ) 1123 1124 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1125 this, expression = seq_get(args, 0), seq_get(args, 1) 1126 1127 if isinstance(this, exp.Literal): 1128 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1129 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1130 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1131 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1132 # 1133 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1134 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1135 this, expression = expression, this 1136 1137 return self.expression(klass, this=this, expression=expression) 1138 1139 ALTER_PARSERS = { 1140 "ADD": lambda self: self._parse_alter_table_add(), 1141 "AS": lambda self: self._parse_select(), 1142 "ALTER": lambda self: self._parse_alter_table_alter(), 1143 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1144 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1145 "DROP": lambda self: self._parse_alter_table_drop(), 1146 "RENAME": lambda self: self._parse_alter_table_rename(), 1147 "SET": lambda self: self._parse_alter_table_set(), 1148 "SWAP": lambda self: self.expression( 1149 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1150 ), 1151 } 1152 1153 ALTER_ALTER_PARSERS = { 1154 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1155 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1156 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1157 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1158 } 1159 1160 SCHEMA_UNNAMED_CONSTRAINTS = { 1161 "CHECK", 1162 "EXCLUDE", 1163 "FOREIGN KEY", 1164 "LIKE", 1165 "PERIOD", 1166 "PRIMARY KEY", 1167 "UNIQUE", 1168 "WATERMARK", 1169 "BUCKET", 1170 "TRUNCATE", 1171 } 1172 1173 NO_PAREN_FUNCTION_PARSERS = { 1174 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1175 "CASE": lambda self: self._parse_case(), 1176 "CONNECT_BY_ROOT": lambda self: self.expression( 1177 exp.ConnectByRoot, this=self._parse_column() 1178 ), 1179 "IF": lambda self: self._parse_if(), 1180 } 1181 1182 INVALID_FUNC_NAME_TOKENS = { 1183 TokenType.IDENTIFIER, 1184 TokenType.STRING, 1185 } 1186 1187 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1188 1189 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1190 1191 FUNCTION_PARSERS = { 1192 **{ 1193 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1194 }, 1195 **{ 1196 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1197 }, 1198 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1199 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1200 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1201 "DECODE": lambda self: self._parse_decode(), 1202 "EXTRACT": lambda self: self._parse_extract(), 1203 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1204 "GAP_FILL": lambda self: self._parse_gap_fill(), 1205 "JSON_OBJECT": lambda self: self._parse_json_object(), 1206 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1207 "JSON_TABLE": lambda self: self._parse_json_table(), 1208 "MATCH": lambda self: self._parse_match_against(), 1209 "NORMALIZE": lambda self: self._parse_normalize(), 1210 "OPENJSON": lambda self: self._parse_open_json(), 1211 "OVERLAY": lambda self: self._parse_overlay(), 1212 "POSITION": lambda self: self._parse_position(), 1213 "PREDICT": lambda self: self._parse_predict(), 1214 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1215 "STRING_AGG": lambda self: self._parse_string_agg(), 1216 "SUBSTRING": lambda self: self._parse_substring(), 1217 "TRIM": lambda self: self._parse_trim(), 1218 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1219 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1220 "XMLELEMENT": lambda self: self.expression( 1221 exp.XMLElement, 1222 this=self._match_text_seq("NAME") and self._parse_id_var(), 1223 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1224 ), 1225 "XMLTABLE": lambda self: self._parse_xml_table(), 1226 } 1227 1228 QUERY_MODIFIER_PARSERS = { 1229 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1230 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1231 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1232 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1233 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1234 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1235 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1236 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1237 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1238 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1239 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1240 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1241 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1242 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1243 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1244 TokenType.CLUSTER_BY: lambda self: ( 1245 "cluster", 1246 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1247 ), 1248 TokenType.DISTRIBUTE_BY: lambda self: ( 1249 "distribute", 1250 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1251 ), 1252 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1253 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1254 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1255 } 1256 1257 SET_PARSERS = { 1258 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1259 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1260 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1261 "TRANSACTION": lambda self: self._parse_set_transaction(), 1262 } 1263 1264 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1265 1266 TYPE_LITERAL_PARSERS = { 1267 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1268 } 1269 1270 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1271 1272 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1273 1274 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1275 1276 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1277 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1278 "ISOLATION": ( 1279 ("LEVEL", "REPEATABLE", "READ"), 1280 ("LEVEL", "READ", "COMMITTED"), 1281 ("LEVEL", "READ", "UNCOMITTED"), 1282 ("LEVEL", "SERIALIZABLE"), 1283 ), 1284 "READ": ("WRITE", "ONLY"), 1285 } 1286 1287 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1288 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1289 ) 1290 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1291 1292 CREATE_SEQUENCE: OPTIONS_TYPE = { 1293 "SCALE": ("EXTEND", "NOEXTEND"), 1294 "SHARD": ("EXTEND", "NOEXTEND"), 1295 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1296 **dict.fromkeys( 1297 ( 1298 "SESSION", 1299 "GLOBAL", 1300 "KEEP", 1301 "NOKEEP", 1302 "ORDER", 1303 "NOORDER", 1304 "NOCACHE", 1305 "CYCLE", 1306 "NOCYCLE", 1307 "NOMINVALUE", 1308 "NOMAXVALUE", 1309 "NOSCALE", 1310 "NOSHARD", 1311 ), 1312 tuple(), 1313 ), 1314 } 1315 1316 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1317 1318 USABLES: OPTIONS_TYPE = dict.fromkeys( 1319 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1320 ) 1321 1322 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1323 1324 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1325 "TYPE": ("EVOLUTION",), 1326 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1327 } 1328 1329 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1330 1331 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1332 1333 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1334 "NOT": ("ENFORCED",), 1335 "MATCH": ( 1336 "FULL", 1337 "PARTIAL", 1338 "SIMPLE", 1339 ), 1340 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1341 "USING": ( 1342 "BTREE", 1343 "HASH", 1344 ), 1345 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1346 } 1347 1348 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1349 "NO": ("OTHERS",), 1350 "CURRENT": ("ROW",), 1351 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1352 } 1353 1354 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1355 1356 CLONE_KEYWORDS = {"CLONE", "COPY"} 1357 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1358 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1359 1360 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1361 1362 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1363 1364 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1365 1366 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1367 1368 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1369 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1370 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1371 1372 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1373 1374 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1375 1376 ADD_CONSTRAINT_TOKENS = { 1377 TokenType.CONSTRAINT, 1378 TokenType.FOREIGN_KEY, 1379 TokenType.INDEX, 1380 TokenType.KEY, 1381 TokenType.PRIMARY_KEY, 1382 TokenType.UNIQUE, 1383 } 1384 1385 DISTINCT_TOKENS = {TokenType.DISTINCT} 1386 1387 NULL_TOKENS = {TokenType.NULL} 1388 1389 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1390 1391 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1392 1393 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1394 1395 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1396 1397 ODBC_DATETIME_LITERALS = { 1398 "d": exp.Date, 1399 "t": exp.Time, 1400 "ts": exp.Timestamp, 1401 } 1402 1403 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1404 1405 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1406 1407 # The style options for the DESCRIBE statement 1408 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1409 1410 # The style options for the ANALYZE statement 1411 ANALYZE_STYLES = { 1412 "BUFFER_USAGE_LIMIT", 1413 "FULL", 1414 "LOCAL", 1415 "NO_WRITE_TO_BINLOG", 1416 "SAMPLE", 1417 "SKIP_LOCKED", 1418 "VERBOSE", 1419 } 1420 1421 ANALYZE_EXPRESSION_PARSERS = { 1422 "ALL": lambda self: self._parse_analyze_columns(), 1423 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1424 "DELETE": lambda self: self._parse_analyze_delete(), 1425 "DROP": lambda self: self._parse_analyze_histogram(), 1426 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1427 "LIST": lambda self: self._parse_analyze_list(), 1428 "PREDICATE": lambda self: self._parse_analyze_columns(), 1429 "UPDATE": lambda self: self._parse_analyze_histogram(), 1430 "VALIDATE": lambda self: self._parse_analyze_validate(), 1431 } 1432 1433 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1434 1435 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1436 1437 OPERATION_MODIFIERS: t.Set[str] = set() 1438 1439 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1440 1441 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1442 1443 STRICT_CAST = True 1444 1445 PREFIXED_PIVOT_COLUMNS = False 1446 IDENTIFY_PIVOT_STRINGS = False 1447 1448 LOG_DEFAULTS_TO_LN = False 1449 1450 # Whether ADD is present for each column added by ALTER TABLE 1451 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1452 1453 # Whether the table sample clause expects CSV syntax 1454 TABLESAMPLE_CSV = False 1455 1456 # The default method used for table sampling 1457 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1458 1459 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1460 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1461 1462 # Whether the TRIM function expects the characters to trim as its first argument 1463 TRIM_PATTERN_FIRST = False 1464 1465 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1466 STRING_ALIASES = False 1467 1468 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1469 MODIFIERS_ATTACHED_TO_SET_OP = True 1470 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1471 1472 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1473 NO_PAREN_IF_COMMANDS = True 1474 1475 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1476 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1477 1478 # Whether the `:` operator is used to extract a value from a VARIANT column 1479 COLON_IS_VARIANT_EXTRACT = False 1480 1481 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1482 # If this is True and '(' is not found, the keyword will be treated as an identifier 1483 VALUES_FOLLOWED_BY_PAREN = True 1484 1485 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1486 SUPPORTS_IMPLICIT_UNNEST = False 1487 1488 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1489 INTERVAL_SPANS = True 1490 1491 # Whether a PARTITION clause can follow a table reference 1492 SUPPORTS_PARTITION_SELECTION = False 1493 1494 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1495 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1496 1497 # Whether the 'AS' keyword is optional in the CTE definition syntax 1498 OPTIONAL_ALIAS_TOKEN_CTE = True 1499 1500 __slots__ = ( 1501 "error_level", 1502 "error_message_context", 1503 "max_errors", 1504 "dialect", 1505 "sql", 1506 "errors", 1507 "_tokens", 1508 "_index", 1509 "_curr", 1510 "_next", 1511 "_prev", 1512 "_prev_comments", 1513 ) 1514 1515 # Autofilled 1516 SHOW_TRIE: t.Dict = {} 1517 SET_TRIE: t.Dict = {} 1518 1519 def __init__( 1520 self, 1521 error_level: t.Optional[ErrorLevel] = None, 1522 error_message_context: int = 100, 1523 max_errors: int = 3, 1524 dialect: DialectType = None, 1525 ): 1526 from sqlglot.dialects import Dialect 1527 1528 self.error_level = error_level or ErrorLevel.IMMEDIATE 1529 self.error_message_context = error_message_context 1530 self.max_errors = max_errors 1531 self.dialect = Dialect.get_or_raise(dialect) 1532 self.reset() 1533 1534 def reset(self): 1535 self.sql = "" 1536 self.errors = [] 1537 self._tokens = [] 1538 self._index = 0 1539 self._curr = None 1540 self._next = None 1541 self._prev = None 1542 self._prev_comments = None 1543 1544 def parse( 1545 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1546 ) -> t.List[t.Optional[exp.Expression]]: 1547 """ 1548 Parses a list of tokens and returns a list of syntax trees, one tree 1549 per parsed SQL statement. 1550 1551 Args: 1552 raw_tokens: The list of tokens. 1553 sql: The original SQL string, used to produce helpful debug messages. 1554 1555 Returns: 1556 The list of the produced syntax trees. 1557 """ 1558 return self._parse( 1559 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1560 ) 1561 1562 def parse_into( 1563 self, 1564 expression_types: exp.IntoType, 1565 raw_tokens: t.List[Token], 1566 sql: t.Optional[str] = None, 1567 ) -> t.List[t.Optional[exp.Expression]]: 1568 """ 1569 Parses a list of tokens into a given Expression type. If a collection of Expression 1570 types is given instead, this method will try to parse the token list into each one 1571 of them, stopping at the first for which the parsing succeeds. 1572 1573 Args: 1574 expression_types: The expression type(s) to try and parse the token list into. 1575 raw_tokens: The list of tokens. 1576 sql: The original SQL string, used to produce helpful debug messages. 1577 1578 Returns: 1579 The target Expression. 1580 """ 1581 errors = [] 1582 for expression_type in ensure_list(expression_types): 1583 parser = self.EXPRESSION_PARSERS.get(expression_type) 1584 if not parser: 1585 raise TypeError(f"No parser registered for {expression_type}") 1586 1587 try: 1588 return self._parse(parser, raw_tokens, sql) 1589 except ParseError as e: 1590 e.errors[0]["into_expression"] = expression_type 1591 errors.append(e) 1592 1593 raise ParseError( 1594 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1595 errors=merge_errors(errors), 1596 ) from errors[-1] 1597 1598 def _parse( 1599 self, 1600 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1601 raw_tokens: t.List[Token], 1602 sql: t.Optional[str] = None, 1603 ) -> t.List[t.Optional[exp.Expression]]: 1604 self.reset() 1605 self.sql = sql or "" 1606 1607 total = len(raw_tokens) 1608 chunks: t.List[t.List[Token]] = [[]] 1609 1610 for i, token in enumerate(raw_tokens): 1611 if token.token_type == TokenType.SEMICOLON: 1612 if token.comments: 1613 chunks.append([token]) 1614 1615 if i < total - 1: 1616 chunks.append([]) 1617 else: 1618 chunks[-1].append(token) 1619 1620 expressions = [] 1621 1622 for tokens in chunks: 1623 self._index = -1 1624 self._tokens = tokens 1625 self._advance() 1626 1627 expressions.append(parse_method(self)) 1628 1629 if self._index < len(self._tokens): 1630 self.raise_error("Invalid expression / Unexpected token") 1631 1632 self.check_errors() 1633 1634 return expressions 1635 1636 def check_errors(self) -> None: 1637 """Logs or raises any found errors, depending on the chosen error level setting.""" 1638 if self.error_level == ErrorLevel.WARN: 1639 for error in self.errors: 1640 logger.error(str(error)) 1641 elif self.error_level == ErrorLevel.RAISE and self.errors: 1642 raise ParseError( 1643 concat_messages(self.errors, self.max_errors), 1644 errors=merge_errors(self.errors), 1645 ) 1646 1647 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1648 """ 1649 Appends an error in the list of recorded errors or raises it, depending on the chosen 1650 error level setting. 1651 """ 1652 token = token or self._curr or self._prev or Token.string("") 1653 start = token.start 1654 end = token.end + 1 1655 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1656 highlight = self.sql[start:end] 1657 end_context = self.sql[end : end + self.error_message_context] 1658 1659 error = ParseError.new( 1660 f"{message}. Line {token.line}, Col: {token.col}.\n" 1661 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1662 description=message, 1663 line=token.line, 1664 col=token.col, 1665 start_context=start_context, 1666 highlight=highlight, 1667 end_context=end_context, 1668 ) 1669 1670 if self.error_level == ErrorLevel.IMMEDIATE: 1671 raise error 1672 1673 self.errors.append(error) 1674 1675 def expression( 1676 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1677 ) -> E: 1678 """ 1679 Creates a new, validated Expression. 1680 1681 Args: 1682 exp_class: The expression class to instantiate. 1683 comments: An optional list of comments to attach to the expression. 1684 kwargs: The arguments to set for the expression along with their respective values. 1685 1686 Returns: 1687 The target expression. 1688 """ 1689 instance = exp_class(**kwargs) 1690 instance.add_comments(comments) if comments else self._add_comments(instance) 1691 return self.validate_expression(instance) 1692 1693 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1694 if expression and self._prev_comments: 1695 expression.add_comments(self._prev_comments) 1696 self._prev_comments = None 1697 1698 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1699 """ 1700 Validates an Expression, making sure that all its mandatory arguments are set. 1701 1702 Args: 1703 expression: The expression to validate. 1704 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1705 1706 Returns: 1707 The validated expression. 1708 """ 1709 if self.error_level != ErrorLevel.IGNORE: 1710 for error_message in expression.error_messages(args): 1711 self.raise_error(error_message) 1712 1713 return expression 1714 1715 def _find_sql(self, start: Token, end: Token) -> str: 1716 return self.sql[start.start : end.end + 1] 1717 1718 def _is_connected(self) -> bool: 1719 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1720 1721 def _advance(self, times: int = 1) -> None: 1722 self._index += times 1723 self._curr = seq_get(self._tokens, self._index) 1724 self._next = seq_get(self._tokens, self._index + 1) 1725 1726 if self._index > 0: 1727 self._prev = self._tokens[self._index - 1] 1728 self._prev_comments = self._prev.comments 1729 else: 1730 self._prev = None 1731 self._prev_comments = None 1732 1733 def _retreat(self, index: int) -> None: 1734 if index != self._index: 1735 self._advance(index - self._index) 1736 1737 def _warn_unsupported(self) -> None: 1738 if len(self._tokens) <= 1: 1739 return 1740 1741 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1742 # interested in emitting a warning for the one being currently processed. 1743 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1744 1745 logger.warning( 1746 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1747 ) 1748 1749 def _parse_command(self) -> exp.Command: 1750 self._warn_unsupported() 1751 return self.expression( 1752 exp.Command, 1753 comments=self._prev_comments, 1754 this=self._prev.text.upper(), 1755 expression=self._parse_string(), 1756 ) 1757 1758 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1759 """ 1760 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1761 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1762 solve this by setting & resetting the parser state accordingly 1763 """ 1764 index = self._index 1765 error_level = self.error_level 1766 1767 self.error_level = ErrorLevel.IMMEDIATE 1768 try: 1769 this = parse_method() 1770 except ParseError: 1771 this = None 1772 finally: 1773 if not this or retreat: 1774 self._retreat(index) 1775 self.error_level = error_level 1776 1777 return this 1778 1779 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1780 start = self._prev 1781 exists = self._parse_exists() if allow_exists else None 1782 1783 self._match(TokenType.ON) 1784 1785 materialized = self._match_text_seq("MATERIALIZED") 1786 kind = self._match_set(self.CREATABLES) and self._prev 1787 if not kind: 1788 return self._parse_as_command(start) 1789 1790 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1791 this = self._parse_user_defined_function(kind=kind.token_type) 1792 elif kind.token_type == TokenType.TABLE: 1793 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1794 elif kind.token_type == TokenType.COLUMN: 1795 this = self._parse_column() 1796 else: 1797 this = self._parse_id_var() 1798 1799 self._match(TokenType.IS) 1800 1801 return self.expression( 1802 exp.Comment, 1803 this=this, 1804 kind=kind.text, 1805 expression=self._parse_string(), 1806 exists=exists, 1807 materialized=materialized, 1808 ) 1809 1810 def _parse_to_table( 1811 self, 1812 ) -> exp.ToTableProperty: 1813 table = self._parse_table_parts(schema=True) 1814 return self.expression(exp.ToTableProperty, this=table) 1815 1816 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1817 def _parse_ttl(self) -> exp.Expression: 1818 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1819 this = self._parse_bitwise() 1820 1821 if self._match_text_seq("DELETE"): 1822 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1823 if self._match_text_seq("RECOMPRESS"): 1824 return self.expression( 1825 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1826 ) 1827 if self._match_text_seq("TO", "DISK"): 1828 return self.expression( 1829 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1830 ) 1831 if self._match_text_seq("TO", "VOLUME"): 1832 return self.expression( 1833 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1834 ) 1835 1836 return this 1837 1838 expressions = self._parse_csv(_parse_ttl_action) 1839 where = self._parse_where() 1840 group = self._parse_group() 1841 1842 aggregates = None 1843 if group and self._match(TokenType.SET): 1844 aggregates = self._parse_csv(self._parse_set_item) 1845 1846 return self.expression( 1847 exp.MergeTreeTTL, 1848 expressions=expressions, 1849 where=where, 1850 group=group, 1851 aggregates=aggregates, 1852 ) 1853 1854 def _parse_statement(self) -> t.Optional[exp.Expression]: 1855 if self._curr is None: 1856 return None 1857 1858 if self._match_set(self.STATEMENT_PARSERS): 1859 comments = self._prev_comments 1860 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1861 stmt.add_comments(comments, prepend=True) 1862 return stmt 1863 1864 if self._match_set(self.dialect.tokenizer.COMMANDS): 1865 return self._parse_command() 1866 1867 expression = self._parse_expression() 1868 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1869 return self._parse_query_modifiers(expression) 1870 1871 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1872 start = self._prev 1873 temporary = self._match(TokenType.TEMPORARY) 1874 materialized = self._match_text_seq("MATERIALIZED") 1875 1876 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1877 if not kind: 1878 return self._parse_as_command(start) 1879 1880 concurrently = self._match_text_seq("CONCURRENTLY") 1881 if_exists = exists or self._parse_exists() 1882 1883 if kind == "COLUMN": 1884 this = self._parse_column() 1885 else: 1886 this = self._parse_table_parts( 1887 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1888 ) 1889 1890 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1891 1892 if self._match(TokenType.L_PAREN, advance=False): 1893 expressions = self._parse_wrapped_csv(self._parse_types) 1894 else: 1895 expressions = None 1896 1897 return self.expression( 1898 exp.Drop, 1899 exists=if_exists, 1900 this=this, 1901 expressions=expressions, 1902 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1903 temporary=temporary, 1904 materialized=materialized, 1905 cascade=self._match_text_seq("CASCADE"), 1906 constraints=self._match_text_seq("CONSTRAINTS"), 1907 purge=self._match_text_seq("PURGE"), 1908 cluster=cluster, 1909 concurrently=concurrently, 1910 ) 1911 1912 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1913 return ( 1914 self._match_text_seq("IF") 1915 and (not not_ or self._match(TokenType.NOT)) 1916 and self._match(TokenType.EXISTS) 1917 ) 1918 1919 def _parse_create(self) -> exp.Create | exp.Command: 1920 # Note: this can't be None because we've matched a statement parser 1921 start = self._prev 1922 1923 replace = ( 1924 start.token_type == TokenType.REPLACE 1925 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1926 or self._match_pair(TokenType.OR, TokenType.ALTER) 1927 ) 1928 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1929 1930 unique = self._match(TokenType.UNIQUE) 1931 1932 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1933 clustered = True 1934 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1935 "COLUMNSTORE" 1936 ): 1937 clustered = False 1938 else: 1939 clustered = None 1940 1941 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1942 self._advance() 1943 1944 properties = None 1945 create_token = self._match_set(self.CREATABLES) and self._prev 1946 1947 if not create_token: 1948 # exp.Properties.Location.POST_CREATE 1949 properties = self._parse_properties() 1950 create_token = self._match_set(self.CREATABLES) and self._prev 1951 1952 if not properties or not create_token: 1953 return self._parse_as_command(start) 1954 1955 concurrently = self._match_text_seq("CONCURRENTLY") 1956 exists = self._parse_exists(not_=True) 1957 this = None 1958 expression: t.Optional[exp.Expression] = None 1959 indexes = None 1960 no_schema_binding = None 1961 begin = None 1962 end = None 1963 clone = None 1964 1965 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1966 nonlocal properties 1967 if properties and temp_props: 1968 properties.expressions.extend(temp_props.expressions) 1969 elif temp_props: 1970 properties = temp_props 1971 1972 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1973 this = self._parse_user_defined_function(kind=create_token.token_type) 1974 1975 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1976 extend_props(self._parse_properties()) 1977 1978 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1979 extend_props(self._parse_properties()) 1980 1981 if not expression: 1982 if self._match(TokenType.COMMAND): 1983 expression = self._parse_as_command(self._prev) 1984 else: 1985 begin = self._match(TokenType.BEGIN) 1986 return_ = self._match_text_seq("RETURN") 1987 1988 if self._match(TokenType.STRING, advance=False): 1989 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1990 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1991 expression = self._parse_string() 1992 extend_props(self._parse_properties()) 1993 else: 1994 expression = self._parse_user_defined_function_expression() 1995 1996 end = self._match_text_seq("END") 1997 1998 if return_: 1999 expression = self.expression(exp.Return, this=expression) 2000 elif create_token.token_type == TokenType.INDEX: 2001 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2002 if not self._match(TokenType.ON): 2003 index = self._parse_id_var() 2004 anonymous = False 2005 else: 2006 index = None 2007 anonymous = True 2008 2009 this = self._parse_index(index=index, anonymous=anonymous) 2010 elif create_token.token_type in self.DB_CREATABLES: 2011 table_parts = self._parse_table_parts( 2012 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2013 ) 2014 2015 # exp.Properties.Location.POST_NAME 2016 self._match(TokenType.COMMA) 2017 extend_props(self._parse_properties(before=True)) 2018 2019 this = self._parse_schema(this=table_parts) 2020 2021 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2022 extend_props(self._parse_properties()) 2023 2024 has_alias = self._match(TokenType.ALIAS) 2025 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2026 # exp.Properties.Location.POST_ALIAS 2027 extend_props(self._parse_properties()) 2028 2029 if create_token.token_type == TokenType.SEQUENCE: 2030 expression = self._parse_types() 2031 extend_props(self._parse_properties()) 2032 else: 2033 expression = self._parse_ddl_select() 2034 2035 # Some dialects also support using a table as an alias instead of a SELECT. 2036 # Here we fallback to this as an alternative. 2037 if not expression and has_alias: 2038 expression = self._try_parse(self._parse_table_parts) 2039 2040 if create_token.token_type == TokenType.TABLE: 2041 # exp.Properties.Location.POST_EXPRESSION 2042 extend_props(self._parse_properties()) 2043 2044 indexes = [] 2045 while True: 2046 index = self._parse_index() 2047 2048 # exp.Properties.Location.POST_INDEX 2049 extend_props(self._parse_properties()) 2050 if not index: 2051 break 2052 else: 2053 self._match(TokenType.COMMA) 2054 indexes.append(index) 2055 elif create_token.token_type == TokenType.VIEW: 2056 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2057 no_schema_binding = True 2058 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2059 extend_props(self._parse_properties()) 2060 2061 shallow = self._match_text_seq("SHALLOW") 2062 2063 if self._match_texts(self.CLONE_KEYWORDS): 2064 copy = self._prev.text.lower() == "copy" 2065 clone = self.expression( 2066 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2067 ) 2068 2069 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2070 return self._parse_as_command(start) 2071 2072 create_kind_text = create_token.text.upper() 2073 return self.expression( 2074 exp.Create, 2075 this=this, 2076 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2077 replace=replace, 2078 refresh=refresh, 2079 unique=unique, 2080 expression=expression, 2081 exists=exists, 2082 properties=properties, 2083 indexes=indexes, 2084 no_schema_binding=no_schema_binding, 2085 begin=begin, 2086 end=end, 2087 clone=clone, 2088 concurrently=concurrently, 2089 clustered=clustered, 2090 ) 2091 2092 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2093 seq = exp.SequenceProperties() 2094 2095 options = [] 2096 index = self._index 2097 2098 while self._curr: 2099 self._match(TokenType.COMMA) 2100 if self._match_text_seq("INCREMENT"): 2101 self._match_text_seq("BY") 2102 self._match_text_seq("=") 2103 seq.set("increment", self._parse_term()) 2104 elif self._match_text_seq("MINVALUE"): 2105 seq.set("minvalue", self._parse_term()) 2106 elif self._match_text_seq("MAXVALUE"): 2107 seq.set("maxvalue", self._parse_term()) 2108 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2109 self._match_text_seq("=") 2110 seq.set("start", self._parse_term()) 2111 elif self._match_text_seq("CACHE"): 2112 # T-SQL allows empty CACHE which is initialized dynamically 2113 seq.set("cache", self._parse_number() or True) 2114 elif self._match_text_seq("OWNED", "BY"): 2115 # "OWNED BY NONE" is the default 2116 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2117 else: 2118 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2119 if opt: 2120 options.append(opt) 2121 else: 2122 break 2123 2124 seq.set("options", options if options else None) 2125 return None if self._index == index else seq 2126 2127 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2128 # only used for teradata currently 2129 self._match(TokenType.COMMA) 2130 2131 kwargs = { 2132 "no": self._match_text_seq("NO"), 2133 "dual": self._match_text_seq("DUAL"), 2134 "before": self._match_text_seq("BEFORE"), 2135 "default": self._match_text_seq("DEFAULT"), 2136 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2137 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2138 "after": self._match_text_seq("AFTER"), 2139 "minimum": self._match_texts(("MIN", "MINIMUM")), 2140 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2141 } 2142 2143 if self._match_texts(self.PROPERTY_PARSERS): 2144 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2145 try: 2146 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2147 except TypeError: 2148 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2149 2150 return None 2151 2152 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2153 return self._parse_wrapped_csv(self._parse_property) 2154 2155 def _parse_property(self) -> t.Optional[exp.Expression]: 2156 if self._match_texts(self.PROPERTY_PARSERS): 2157 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2158 2159 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2160 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2161 2162 if self._match_text_seq("COMPOUND", "SORTKEY"): 2163 return self._parse_sortkey(compound=True) 2164 2165 if self._match_text_seq("SQL", "SECURITY"): 2166 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2167 2168 index = self._index 2169 key = self._parse_column() 2170 2171 if not self._match(TokenType.EQ): 2172 self._retreat(index) 2173 return self._parse_sequence_properties() 2174 2175 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2176 if isinstance(key, exp.Column): 2177 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2178 2179 value = self._parse_bitwise() or self._parse_var(any_token=True) 2180 2181 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2182 if isinstance(value, exp.Column): 2183 value = exp.var(value.name) 2184 2185 return self.expression(exp.Property, this=key, value=value) 2186 2187 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2188 if self._match_text_seq("BY"): 2189 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2190 2191 self._match(TokenType.ALIAS) 2192 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2193 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2194 2195 return self.expression( 2196 exp.FileFormatProperty, 2197 this=( 2198 self.expression( 2199 exp.InputOutputFormat, 2200 input_format=input_format, 2201 output_format=output_format, 2202 ) 2203 if input_format or output_format 2204 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2205 ), 2206 ) 2207 2208 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2209 field = self._parse_field() 2210 if isinstance(field, exp.Identifier) and not field.quoted: 2211 field = exp.var(field) 2212 2213 return field 2214 2215 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2216 self._match(TokenType.EQ) 2217 self._match(TokenType.ALIAS) 2218 2219 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2220 2221 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2222 properties = [] 2223 while True: 2224 if before: 2225 prop = self._parse_property_before() 2226 else: 2227 prop = self._parse_property() 2228 if not prop: 2229 break 2230 for p in ensure_list(prop): 2231 properties.append(p) 2232 2233 if properties: 2234 return self.expression(exp.Properties, expressions=properties) 2235 2236 return None 2237 2238 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2239 return self.expression( 2240 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2241 ) 2242 2243 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2244 if self._match_texts(("DEFINER", "INVOKER")): 2245 security_specifier = self._prev.text.upper() 2246 return self.expression(exp.SecurityProperty, this=security_specifier) 2247 return None 2248 2249 def _parse_settings_property(self) -> exp.SettingsProperty: 2250 return self.expression( 2251 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2252 ) 2253 2254 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2255 if self._index >= 2: 2256 pre_volatile_token = self._tokens[self._index - 2] 2257 else: 2258 pre_volatile_token = None 2259 2260 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2261 return exp.VolatileProperty() 2262 2263 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2264 2265 def _parse_retention_period(self) -> exp.Var: 2266 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2267 number = self._parse_number() 2268 number_str = f"{number} " if number else "" 2269 unit = self._parse_var(any_token=True) 2270 return exp.var(f"{number_str}{unit}") 2271 2272 def _parse_system_versioning_property( 2273 self, with_: bool = False 2274 ) -> exp.WithSystemVersioningProperty: 2275 self._match(TokenType.EQ) 2276 prop = self.expression( 2277 exp.WithSystemVersioningProperty, 2278 **{ # type: ignore 2279 "on": True, 2280 "with": with_, 2281 }, 2282 ) 2283 2284 if self._match_text_seq("OFF"): 2285 prop.set("on", False) 2286 return prop 2287 2288 self._match(TokenType.ON) 2289 if self._match(TokenType.L_PAREN): 2290 while self._curr and not self._match(TokenType.R_PAREN): 2291 if self._match_text_seq("HISTORY_TABLE", "="): 2292 prop.set("this", self._parse_table_parts()) 2293 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2294 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2295 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2296 prop.set("retention_period", self._parse_retention_period()) 2297 2298 self._match(TokenType.COMMA) 2299 2300 return prop 2301 2302 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2303 self._match(TokenType.EQ) 2304 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2305 prop = self.expression(exp.DataDeletionProperty, on=on) 2306 2307 if self._match(TokenType.L_PAREN): 2308 while self._curr and not self._match(TokenType.R_PAREN): 2309 if self._match_text_seq("FILTER_COLUMN", "="): 2310 prop.set("filter_column", self._parse_column()) 2311 elif self._match_text_seq("RETENTION_PERIOD", "="): 2312 prop.set("retention_period", self._parse_retention_period()) 2313 2314 self._match(TokenType.COMMA) 2315 2316 return prop 2317 2318 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2319 kind = "HASH" 2320 expressions: t.Optional[t.List[exp.Expression]] = None 2321 if self._match_text_seq("BY", "HASH"): 2322 expressions = self._parse_wrapped_csv(self._parse_id_var) 2323 elif self._match_text_seq("BY", "RANDOM"): 2324 kind = "RANDOM" 2325 2326 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2327 buckets: t.Optional[exp.Expression] = None 2328 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2329 buckets = self._parse_number() 2330 2331 return self.expression( 2332 exp.DistributedByProperty, 2333 expressions=expressions, 2334 kind=kind, 2335 buckets=buckets, 2336 order=self._parse_order(), 2337 ) 2338 2339 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2340 self._match_text_seq("KEY") 2341 expressions = self._parse_wrapped_id_vars() 2342 return self.expression(expr_type, expressions=expressions) 2343 2344 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2345 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2346 prop = self._parse_system_versioning_property(with_=True) 2347 self._match_r_paren() 2348 return prop 2349 2350 if self._match(TokenType.L_PAREN, advance=False): 2351 return self._parse_wrapped_properties() 2352 2353 if self._match_text_seq("JOURNAL"): 2354 return self._parse_withjournaltable() 2355 2356 if self._match_texts(self.VIEW_ATTRIBUTES): 2357 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2358 2359 if self._match_text_seq("DATA"): 2360 return self._parse_withdata(no=False) 2361 elif self._match_text_seq("NO", "DATA"): 2362 return self._parse_withdata(no=True) 2363 2364 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2365 return self._parse_serde_properties(with_=True) 2366 2367 if self._match(TokenType.SCHEMA): 2368 return self.expression( 2369 exp.WithSchemaBindingProperty, 2370 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2371 ) 2372 2373 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2374 return self.expression( 2375 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2376 ) 2377 2378 if not self._next: 2379 return None 2380 2381 return self._parse_withisolatedloading() 2382 2383 def _parse_procedure_option(self) -> exp.Expression | None: 2384 if self._match_text_seq("EXECUTE", "AS"): 2385 return self.expression( 2386 exp.ExecuteAsProperty, 2387 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2388 or self._parse_string(), 2389 ) 2390 2391 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2392 2393 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2394 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2395 self._match(TokenType.EQ) 2396 2397 user = self._parse_id_var() 2398 self._match(TokenType.PARAMETER) 2399 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2400 2401 if not user or not host: 2402 return None 2403 2404 return exp.DefinerProperty(this=f"{user}@{host}") 2405 2406 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2407 self._match(TokenType.TABLE) 2408 self._match(TokenType.EQ) 2409 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2410 2411 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2412 return self.expression(exp.LogProperty, no=no) 2413 2414 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2415 return self.expression(exp.JournalProperty, **kwargs) 2416 2417 def _parse_checksum(self) -> exp.ChecksumProperty: 2418 self._match(TokenType.EQ) 2419 2420 on = None 2421 if self._match(TokenType.ON): 2422 on = True 2423 elif self._match_text_seq("OFF"): 2424 on = False 2425 2426 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2427 2428 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2429 return self.expression( 2430 exp.Cluster, 2431 expressions=( 2432 self._parse_wrapped_csv(self._parse_ordered) 2433 if wrapped 2434 else self._parse_csv(self._parse_ordered) 2435 ), 2436 ) 2437 2438 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2439 self._match_text_seq("BY") 2440 2441 self._match_l_paren() 2442 expressions = self._parse_csv(self._parse_column) 2443 self._match_r_paren() 2444 2445 if self._match_text_seq("SORTED", "BY"): 2446 self._match_l_paren() 2447 sorted_by = self._parse_csv(self._parse_ordered) 2448 self._match_r_paren() 2449 else: 2450 sorted_by = None 2451 2452 self._match(TokenType.INTO) 2453 buckets = self._parse_number() 2454 self._match_text_seq("BUCKETS") 2455 2456 return self.expression( 2457 exp.ClusteredByProperty, 2458 expressions=expressions, 2459 sorted_by=sorted_by, 2460 buckets=buckets, 2461 ) 2462 2463 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2464 if not self._match_text_seq("GRANTS"): 2465 self._retreat(self._index - 1) 2466 return None 2467 2468 return self.expression(exp.CopyGrantsProperty) 2469 2470 def _parse_freespace(self) -> exp.FreespaceProperty: 2471 self._match(TokenType.EQ) 2472 return self.expression( 2473 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2474 ) 2475 2476 def _parse_mergeblockratio( 2477 self, no: bool = False, default: bool = False 2478 ) -> exp.MergeBlockRatioProperty: 2479 if self._match(TokenType.EQ): 2480 return self.expression( 2481 exp.MergeBlockRatioProperty, 2482 this=self._parse_number(), 2483 percent=self._match(TokenType.PERCENT), 2484 ) 2485 2486 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2487 2488 def _parse_datablocksize( 2489 self, 2490 default: t.Optional[bool] = None, 2491 minimum: t.Optional[bool] = None, 2492 maximum: t.Optional[bool] = None, 2493 ) -> exp.DataBlocksizeProperty: 2494 self._match(TokenType.EQ) 2495 size = self._parse_number() 2496 2497 units = None 2498 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2499 units = self._prev.text 2500 2501 return self.expression( 2502 exp.DataBlocksizeProperty, 2503 size=size, 2504 units=units, 2505 default=default, 2506 minimum=minimum, 2507 maximum=maximum, 2508 ) 2509 2510 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2511 self._match(TokenType.EQ) 2512 always = self._match_text_seq("ALWAYS") 2513 manual = self._match_text_seq("MANUAL") 2514 never = self._match_text_seq("NEVER") 2515 default = self._match_text_seq("DEFAULT") 2516 2517 autotemp = None 2518 if self._match_text_seq("AUTOTEMP"): 2519 autotemp = self._parse_schema() 2520 2521 return self.expression( 2522 exp.BlockCompressionProperty, 2523 always=always, 2524 manual=manual, 2525 never=never, 2526 default=default, 2527 autotemp=autotemp, 2528 ) 2529 2530 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2531 index = self._index 2532 no = self._match_text_seq("NO") 2533 concurrent = self._match_text_seq("CONCURRENT") 2534 2535 if not self._match_text_seq("ISOLATED", "LOADING"): 2536 self._retreat(index) 2537 return None 2538 2539 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2540 return self.expression( 2541 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2542 ) 2543 2544 def _parse_locking(self) -> exp.LockingProperty: 2545 if self._match(TokenType.TABLE): 2546 kind = "TABLE" 2547 elif self._match(TokenType.VIEW): 2548 kind = "VIEW" 2549 elif self._match(TokenType.ROW): 2550 kind = "ROW" 2551 elif self._match_text_seq("DATABASE"): 2552 kind = "DATABASE" 2553 else: 2554 kind = None 2555 2556 if kind in ("DATABASE", "TABLE", "VIEW"): 2557 this = self._parse_table_parts() 2558 else: 2559 this = None 2560 2561 if self._match(TokenType.FOR): 2562 for_or_in = "FOR" 2563 elif self._match(TokenType.IN): 2564 for_or_in = "IN" 2565 else: 2566 for_or_in = None 2567 2568 if self._match_text_seq("ACCESS"): 2569 lock_type = "ACCESS" 2570 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2571 lock_type = "EXCLUSIVE" 2572 elif self._match_text_seq("SHARE"): 2573 lock_type = "SHARE" 2574 elif self._match_text_seq("READ"): 2575 lock_type = "READ" 2576 elif self._match_text_seq("WRITE"): 2577 lock_type = "WRITE" 2578 elif self._match_text_seq("CHECKSUM"): 2579 lock_type = "CHECKSUM" 2580 else: 2581 lock_type = None 2582 2583 override = self._match_text_seq("OVERRIDE") 2584 2585 return self.expression( 2586 exp.LockingProperty, 2587 this=this, 2588 kind=kind, 2589 for_or_in=for_or_in, 2590 lock_type=lock_type, 2591 override=override, 2592 ) 2593 2594 def _parse_partition_by(self) -> t.List[exp.Expression]: 2595 if self._match(TokenType.PARTITION_BY): 2596 return self._parse_csv(self._parse_assignment) 2597 return [] 2598 2599 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2600 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2601 if self._match_text_seq("MINVALUE"): 2602 return exp.var("MINVALUE") 2603 if self._match_text_seq("MAXVALUE"): 2604 return exp.var("MAXVALUE") 2605 return self._parse_bitwise() 2606 2607 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2608 expression = None 2609 from_expressions = None 2610 to_expressions = None 2611 2612 if self._match(TokenType.IN): 2613 this = self._parse_wrapped_csv(self._parse_bitwise) 2614 elif self._match(TokenType.FROM): 2615 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2616 self._match_text_seq("TO") 2617 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2618 elif self._match_text_seq("WITH", "(", "MODULUS"): 2619 this = self._parse_number() 2620 self._match_text_seq(",", "REMAINDER") 2621 expression = self._parse_number() 2622 self._match_r_paren() 2623 else: 2624 self.raise_error("Failed to parse partition bound spec.") 2625 2626 return self.expression( 2627 exp.PartitionBoundSpec, 2628 this=this, 2629 expression=expression, 2630 from_expressions=from_expressions, 2631 to_expressions=to_expressions, 2632 ) 2633 2634 # https://www.postgresql.org/docs/current/sql-createtable.html 2635 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2636 if not self._match_text_seq("OF"): 2637 self._retreat(self._index - 1) 2638 return None 2639 2640 this = self._parse_table(schema=True) 2641 2642 if self._match(TokenType.DEFAULT): 2643 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2644 elif self._match_text_seq("FOR", "VALUES"): 2645 expression = self._parse_partition_bound_spec() 2646 else: 2647 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2648 2649 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2650 2651 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2652 self._match(TokenType.EQ) 2653 return self.expression( 2654 exp.PartitionedByProperty, 2655 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2656 ) 2657 2658 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2659 if self._match_text_seq("AND", "STATISTICS"): 2660 statistics = True 2661 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2662 statistics = False 2663 else: 2664 statistics = None 2665 2666 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2667 2668 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2669 if self._match_text_seq("SQL"): 2670 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2671 return None 2672 2673 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2674 if self._match_text_seq("SQL", "DATA"): 2675 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2676 return None 2677 2678 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2679 if self._match_text_seq("PRIMARY", "INDEX"): 2680 return exp.NoPrimaryIndexProperty() 2681 if self._match_text_seq("SQL"): 2682 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2683 return None 2684 2685 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2686 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2687 return exp.OnCommitProperty() 2688 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2689 return exp.OnCommitProperty(delete=True) 2690 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2691 2692 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2693 if self._match_text_seq("SQL", "DATA"): 2694 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2695 return None 2696 2697 def _parse_distkey(self) -> exp.DistKeyProperty: 2698 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2699 2700 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2701 table = self._parse_table(schema=True) 2702 2703 options = [] 2704 while self._match_texts(("INCLUDING", "EXCLUDING")): 2705 this = self._prev.text.upper() 2706 2707 id_var = self._parse_id_var() 2708 if not id_var: 2709 return None 2710 2711 options.append( 2712 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2713 ) 2714 2715 return self.expression(exp.LikeProperty, this=table, expressions=options) 2716 2717 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2718 return self.expression( 2719 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2720 ) 2721 2722 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2723 self._match(TokenType.EQ) 2724 return self.expression( 2725 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2726 ) 2727 2728 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2729 self._match_text_seq("WITH", "CONNECTION") 2730 return self.expression( 2731 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2732 ) 2733 2734 def _parse_returns(self) -> exp.ReturnsProperty: 2735 value: t.Optional[exp.Expression] 2736 null = None 2737 is_table = self._match(TokenType.TABLE) 2738 2739 if is_table: 2740 if self._match(TokenType.LT): 2741 value = self.expression( 2742 exp.Schema, 2743 this="TABLE", 2744 expressions=self._parse_csv(self._parse_struct_types), 2745 ) 2746 if not self._match(TokenType.GT): 2747 self.raise_error("Expecting >") 2748 else: 2749 value = self._parse_schema(exp.var("TABLE")) 2750 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2751 null = True 2752 value = None 2753 else: 2754 value = self._parse_types() 2755 2756 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2757 2758 def _parse_describe(self) -> exp.Describe: 2759 kind = self._match_set(self.CREATABLES) and self._prev.text 2760 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2761 if self._match(TokenType.DOT): 2762 style = None 2763 self._retreat(self._index - 2) 2764 2765 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2766 2767 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2768 this = self._parse_statement() 2769 else: 2770 this = self._parse_table(schema=True) 2771 2772 properties = self._parse_properties() 2773 expressions = properties.expressions if properties else None 2774 partition = self._parse_partition() 2775 return self.expression( 2776 exp.Describe, 2777 this=this, 2778 style=style, 2779 kind=kind, 2780 expressions=expressions, 2781 partition=partition, 2782 format=format, 2783 ) 2784 2785 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2786 kind = self._prev.text.upper() 2787 expressions = [] 2788 2789 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2790 if self._match(TokenType.WHEN): 2791 expression = self._parse_disjunction() 2792 self._match(TokenType.THEN) 2793 else: 2794 expression = None 2795 2796 else_ = self._match(TokenType.ELSE) 2797 2798 if not self._match(TokenType.INTO): 2799 return None 2800 2801 return self.expression( 2802 exp.ConditionalInsert, 2803 this=self.expression( 2804 exp.Insert, 2805 this=self._parse_table(schema=True), 2806 expression=self._parse_derived_table_values(), 2807 ), 2808 expression=expression, 2809 else_=else_, 2810 ) 2811 2812 expression = parse_conditional_insert() 2813 while expression is not None: 2814 expressions.append(expression) 2815 expression = parse_conditional_insert() 2816 2817 return self.expression( 2818 exp.MultitableInserts, 2819 kind=kind, 2820 comments=comments, 2821 expressions=expressions, 2822 source=self._parse_table(), 2823 ) 2824 2825 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2826 comments = [] 2827 hint = self._parse_hint() 2828 overwrite = self._match(TokenType.OVERWRITE) 2829 ignore = self._match(TokenType.IGNORE) 2830 local = self._match_text_seq("LOCAL") 2831 alternative = None 2832 is_function = None 2833 2834 if self._match_text_seq("DIRECTORY"): 2835 this: t.Optional[exp.Expression] = self.expression( 2836 exp.Directory, 2837 this=self._parse_var_or_string(), 2838 local=local, 2839 row_format=self._parse_row_format(match_row=True), 2840 ) 2841 else: 2842 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2843 comments += ensure_list(self._prev_comments) 2844 return self._parse_multitable_inserts(comments) 2845 2846 if self._match(TokenType.OR): 2847 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2848 2849 self._match(TokenType.INTO) 2850 comments += ensure_list(self._prev_comments) 2851 self._match(TokenType.TABLE) 2852 is_function = self._match(TokenType.FUNCTION) 2853 2854 this = ( 2855 self._parse_table(schema=True, parse_partition=True) 2856 if not is_function 2857 else self._parse_function() 2858 ) 2859 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2860 this.set("alias", self._parse_table_alias()) 2861 2862 returning = self._parse_returning() 2863 2864 return self.expression( 2865 exp.Insert, 2866 comments=comments, 2867 hint=hint, 2868 is_function=is_function, 2869 this=this, 2870 stored=self._match_text_seq("STORED") and self._parse_stored(), 2871 by_name=self._match_text_seq("BY", "NAME"), 2872 exists=self._parse_exists(), 2873 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2874 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2875 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2876 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2877 conflict=self._parse_on_conflict(), 2878 returning=returning or self._parse_returning(), 2879 overwrite=overwrite, 2880 alternative=alternative, 2881 ignore=ignore, 2882 source=self._match(TokenType.TABLE) and self._parse_table(), 2883 ) 2884 2885 def _parse_kill(self) -> exp.Kill: 2886 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2887 2888 return self.expression( 2889 exp.Kill, 2890 this=self._parse_primary(), 2891 kind=kind, 2892 ) 2893 2894 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2895 conflict = self._match_text_seq("ON", "CONFLICT") 2896 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2897 2898 if not conflict and not duplicate: 2899 return None 2900 2901 conflict_keys = None 2902 constraint = None 2903 2904 if conflict: 2905 if self._match_text_seq("ON", "CONSTRAINT"): 2906 constraint = self._parse_id_var() 2907 elif self._match(TokenType.L_PAREN): 2908 conflict_keys = self._parse_csv(self._parse_id_var) 2909 self._match_r_paren() 2910 2911 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2912 if self._prev.token_type == TokenType.UPDATE: 2913 self._match(TokenType.SET) 2914 expressions = self._parse_csv(self._parse_equality) 2915 else: 2916 expressions = None 2917 2918 return self.expression( 2919 exp.OnConflict, 2920 duplicate=duplicate, 2921 expressions=expressions, 2922 action=action, 2923 conflict_keys=conflict_keys, 2924 constraint=constraint, 2925 where=self._parse_where(), 2926 ) 2927 2928 def _parse_returning(self) -> t.Optional[exp.Returning]: 2929 if not self._match(TokenType.RETURNING): 2930 return None 2931 return self.expression( 2932 exp.Returning, 2933 expressions=self._parse_csv(self._parse_expression), 2934 into=self._match(TokenType.INTO) and self._parse_table_part(), 2935 ) 2936 2937 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2938 if not self._match(TokenType.FORMAT): 2939 return None 2940 return self._parse_row_format() 2941 2942 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2943 index = self._index 2944 with_ = with_ or self._match_text_seq("WITH") 2945 2946 if not self._match(TokenType.SERDE_PROPERTIES): 2947 self._retreat(index) 2948 return None 2949 return self.expression( 2950 exp.SerdeProperties, 2951 **{ # type: ignore 2952 "expressions": self._parse_wrapped_properties(), 2953 "with": with_, 2954 }, 2955 ) 2956 2957 def _parse_row_format( 2958 self, match_row: bool = False 2959 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2960 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2961 return None 2962 2963 if self._match_text_seq("SERDE"): 2964 this = self._parse_string() 2965 2966 serde_properties = self._parse_serde_properties() 2967 2968 return self.expression( 2969 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2970 ) 2971 2972 self._match_text_seq("DELIMITED") 2973 2974 kwargs = {} 2975 2976 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2977 kwargs["fields"] = self._parse_string() 2978 if self._match_text_seq("ESCAPED", "BY"): 2979 kwargs["escaped"] = self._parse_string() 2980 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2981 kwargs["collection_items"] = self._parse_string() 2982 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2983 kwargs["map_keys"] = self._parse_string() 2984 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2985 kwargs["lines"] = self._parse_string() 2986 if self._match_text_seq("NULL", "DEFINED", "AS"): 2987 kwargs["null"] = self._parse_string() 2988 2989 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2990 2991 def _parse_load(self) -> exp.LoadData | exp.Command: 2992 if self._match_text_seq("DATA"): 2993 local = self._match_text_seq("LOCAL") 2994 self._match_text_seq("INPATH") 2995 inpath = self._parse_string() 2996 overwrite = self._match(TokenType.OVERWRITE) 2997 self._match_pair(TokenType.INTO, TokenType.TABLE) 2998 2999 return self.expression( 3000 exp.LoadData, 3001 this=self._parse_table(schema=True), 3002 local=local, 3003 overwrite=overwrite, 3004 inpath=inpath, 3005 partition=self._parse_partition(), 3006 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3007 serde=self._match_text_seq("SERDE") and self._parse_string(), 3008 ) 3009 return self._parse_as_command(self._prev) 3010 3011 def _parse_delete(self) -> exp.Delete: 3012 # This handles MySQL's "Multiple-Table Syntax" 3013 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3014 tables = None 3015 if not self._match(TokenType.FROM, advance=False): 3016 tables = self._parse_csv(self._parse_table) or None 3017 3018 returning = self._parse_returning() 3019 3020 return self.expression( 3021 exp.Delete, 3022 tables=tables, 3023 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3024 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3025 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3026 where=self._parse_where(), 3027 returning=returning or self._parse_returning(), 3028 limit=self._parse_limit(), 3029 ) 3030 3031 def _parse_update(self) -> exp.Update: 3032 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3033 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3034 returning = self._parse_returning() 3035 return self.expression( 3036 exp.Update, 3037 **{ # type: ignore 3038 "this": this, 3039 "expressions": expressions, 3040 "from": self._parse_from(joins=True), 3041 "where": self._parse_where(), 3042 "returning": returning or self._parse_returning(), 3043 "order": self._parse_order(), 3044 "limit": self._parse_limit(), 3045 }, 3046 ) 3047 3048 def _parse_use(self) -> exp.Use: 3049 return self.expression( 3050 exp.Use, 3051 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3052 this=self._parse_table(schema=False), 3053 ) 3054 3055 def _parse_uncache(self) -> exp.Uncache: 3056 if not self._match(TokenType.TABLE): 3057 self.raise_error("Expecting TABLE after UNCACHE") 3058 3059 return self.expression( 3060 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3061 ) 3062 3063 def _parse_cache(self) -> exp.Cache: 3064 lazy = self._match_text_seq("LAZY") 3065 self._match(TokenType.TABLE) 3066 table = self._parse_table(schema=True) 3067 3068 options = [] 3069 if self._match_text_seq("OPTIONS"): 3070 self._match_l_paren() 3071 k = self._parse_string() 3072 self._match(TokenType.EQ) 3073 v = self._parse_string() 3074 options = [k, v] 3075 self._match_r_paren() 3076 3077 self._match(TokenType.ALIAS) 3078 return self.expression( 3079 exp.Cache, 3080 this=table, 3081 lazy=lazy, 3082 options=options, 3083 expression=self._parse_select(nested=True), 3084 ) 3085 3086 def _parse_partition(self) -> t.Optional[exp.Partition]: 3087 if not self._match_texts(self.PARTITION_KEYWORDS): 3088 return None 3089 3090 return self.expression( 3091 exp.Partition, 3092 subpartition=self._prev.text.upper() == "SUBPARTITION", 3093 expressions=self._parse_wrapped_csv(self._parse_assignment), 3094 ) 3095 3096 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3097 def _parse_value_expression() -> t.Optional[exp.Expression]: 3098 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3099 return exp.var(self._prev.text.upper()) 3100 return self._parse_expression() 3101 3102 if self._match(TokenType.L_PAREN): 3103 expressions = self._parse_csv(_parse_value_expression) 3104 self._match_r_paren() 3105 return self.expression(exp.Tuple, expressions=expressions) 3106 3107 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3108 expression = self._parse_expression() 3109 if expression: 3110 return self.expression(exp.Tuple, expressions=[expression]) 3111 return None 3112 3113 def _parse_projections(self) -> t.List[exp.Expression]: 3114 return self._parse_expressions() 3115 3116 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3117 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3118 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3119 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3120 ) 3121 elif self._match(TokenType.FROM): 3122 from_ = self._parse_from(skip_from_token=True) 3123 # Support parentheses for duckdb FROM-first syntax 3124 select = self._parse_select() 3125 if select: 3126 select.set("from", from_) 3127 this = select 3128 else: 3129 this = exp.select("*").from_(t.cast(exp.From, from_)) 3130 else: 3131 this = ( 3132 self._parse_table() 3133 if table 3134 else self._parse_select(nested=True, parse_set_operation=False) 3135 ) 3136 3137 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3138 # in case a modifier (e.g. join) is following 3139 if table and isinstance(this, exp.Values) and this.alias: 3140 alias = this.args["alias"].pop() 3141 this = exp.Table(this=this, alias=alias) 3142 3143 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3144 3145 return this 3146 3147 def _parse_select( 3148 self, 3149 nested: bool = False, 3150 table: bool = False, 3151 parse_subquery_alias: bool = True, 3152 parse_set_operation: bool = True, 3153 ) -> t.Optional[exp.Expression]: 3154 cte = self._parse_with() 3155 3156 if cte: 3157 this = self._parse_statement() 3158 3159 if not this: 3160 self.raise_error("Failed to parse any statement following CTE") 3161 return cte 3162 3163 if "with" in this.arg_types: 3164 this.set("with", cte) 3165 else: 3166 self.raise_error(f"{this.key} does not support CTE") 3167 this = cte 3168 3169 return this 3170 3171 # duckdb supports leading with FROM x 3172 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3173 3174 if self._match(TokenType.SELECT): 3175 comments = self._prev_comments 3176 3177 hint = self._parse_hint() 3178 3179 if self._next and not self._next.token_type == TokenType.DOT: 3180 all_ = self._match(TokenType.ALL) 3181 distinct = self._match_set(self.DISTINCT_TOKENS) 3182 else: 3183 all_, distinct = None, None 3184 3185 kind = ( 3186 self._match(TokenType.ALIAS) 3187 and self._match_texts(("STRUCT", "VALUE")) 3188 and self._prev.text.upper() 3189 ) 3190 3191 if distinct: 3192 distinct = self.expression( 3193 exp.Distinct, 3194 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3195 ) 3196 3197 if all_ and distinct: 3198 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3199 3200 operation_modifiers = [] 3201 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3202 operation_modifiers.append(exp.var(self._prev.text.upper())) 3203 3204 limit = self._parse_limit(top=True) 3205 projections = self._parse_projections() 3206 3207 this = self.expression( 3208 exp.Select, 3209 kind=kind, 3210 hint=hint, 3211 distinct=distinct, 3212 expressions=projections, 3213 limit=limit, 3214 operation_modifiers=operation_modifiers or None, 3215 ) 3216 this.comments = comments 3217 3218 into = self._parse_into() 3219 if into: 3220 this.set("into", into) 3221 3222 if not from_: 3223 from_ = self._parse_from() 3224 3225 if from_: 3226 this.set("from", from_) 3227 3228 this = self._parse_query_modifiers(this) 3229 elif (table or nested) and self._match(TokenType.L_PAREN): 3230 this = self._parse_wrapped_select(table=table) 3231 3232 # We return early here so that the UNION isn't attached to the subquery by the 3233 # following call to _parse_set_operations, but instead becomes the parent node 3234 self._match_r_paren() 3235 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3236 elif self._match(TokenType.VALUES, advance=False): 3237 this = self._parse_derived_table_values() 3238 elif from_: 3239 this = exp.select("*").from_(from_.this, copy=False) 3240 elif self._match(TokenType.SUMMARIZE): 3241 table = self._match(TokenType.TABLE) 3242 this = self._parse_select() or self._parse_string() or self._parse_table() 3243 return self.expression(exp.Summarize, this=this, table=table) 3244 elif self._match(TokenType.DESCRIBE): 3245 this = self._parse_describe() 3246 elif self._match_text_seq("STREAM"): 3247 this = self._parse_function() 3248 if this: 3249 this = self.expression(exp.Stream, this=this) 3250 else: 3251 self._retreat(self._index - 1) 3252 else: 3253 this = None 3254 3255 return self._parse_set_operations(this) if parse_set_operation else this 3256 3257 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3258 self._match_text_seq("SEARCH") 3259 3260 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3261 3262 if not kind: 3263 return None 3264 3265 self._match_text_seq("FIRST", "BY") 3266 3267 return self.expression( 3268 exp.RecursiveWithSearch, 3269 kind=kind, 3270 this=self._parse_id_var(), 3271 expression=self._match_text_seq("SET") and self._parse_id_var(), 3272 using=self._match_text_seq("USING") and self._parse_id_var(), 3273 ) 3274 3275 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3276 if not skip_with_token and not self._match(TokenType.WITH): 3277 return None 3278 3279 comments = self._prev_comments 3280 recursive = self._match(TokenType.RECURSIVE) 3281 3282 last_comments = None 3283 expressions = [] 3284 while True: 3285 cte = self._parse_cte() 3286 if isinstance(cte, exp.CTE): 3287 expressions.append(cte) 3288 if last_comments: 3289 cte.add_comments(last_comments) 3290 3291 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3292 break 3293 else: 3294 self._match(TokenType.WITH) 3295 3296 last_comments = self._prev_comments 3297 3298 return self.expression( 3299 exp.With, 3300 comments=comments, 3301 expressions=expressions, 3302 recursive=recursive, 3303 search=self._parse_recursive_with_search(), 3304 ) 3305 3306 def _parse_cte(self) -> t.Optional[exp.CTE]: 3307 index = self._index 3308 3309 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3310 if not alias or not alias.this: 3311 self.raise_error("Expected CTE to have alias") 3312 3313 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3314 self._retreat(index) 3315 return None 3316 3317 comments = self._prev_comments 3318 3319 if self._match_text_seq("NOT", "MATERIALIZED"): 3320 materialized = False 3321 elif self._match_text_seq("MATERIALIZED"): 3322 materialized = True 3323 else: 3324 materialized = None 3325 3326 cte = self.expression( 3327 exp.CTE, 3328 this=self._parse_wrapped(self._parse_statement), 3329 alias=alias, 3330 materialized=materialized, 3331 comments=comments, 3332 ) 3333 3334 if isinstance(cte.this, exp.Values): 3335 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3336 3337 return cte 3338 3339 def _parse_table_alias( 3340 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3341 ) -> t.Optional[exp.TableAlias]: 3342 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3343 # so this section tries to parse the clause version and if it fails, it treats the token 3344 # as an identifier (alias) 3345 if self._can_parse_limit_or_offset(): 3346 return None 3347 3348 any_token = self._match(TokenType.ALIAS) 3349 alias = ( 3350 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3351 or self._parse_string_as_identifier() 3352 ) 3353 3354 index = self._index 3355 if self._match(TokenType.L_PAREN): 3356 columns = self._parse_csv(self._parse_function_parameter) 3357 self._match_r_paren() if columns else self._retreat(index) 3358 else: 3359 columns = None 3360 3361 if not alias and not columns: 3362 return None 3363 3364 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3365 3366 # We bubble up comments from the Identifier to the TableAlias 3367 if isinstance(alias, exp.Identifier): 3368 table_alias.add_comments(alias.pop_comments()) 3369 3370 return table_alias 3371 3372 def _parse_subquery( 3373 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3374 ) -> t.Optional[exp.Subquery]: 3375 if not this: 3376 return None 3377 3378 return self.expression( 3379 exp.Subquery, 3380 this=this, 3381 pivots=self._parse_pivots(), 3382 alias=self._parse_table_alias() if parse_alias else None, 3383 sample=self._parse_table_sample(), 3384 ) 3385 3386 def _implicit_unnests_to_explicit(self, this: E) -> E: 3387 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3388 3389 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3390 for i, join in enumerate(this.args.get("joins") or []): 3391 table = join.this 3392 normalized_table = table.copy() 3393 normalized_table.meta["maybe_column"] = True 3394 normalized_table = _norm(normalized_table, dialect=self.dialect) 3395 3396 if isinstance(table, exp.Table) and not join.args.get("on"): 3397 if normalized_table.parts[0].name in refs: 3398 table_as_column = table.to_column() 3399 unnest = exp.Unnest(expressions=[table_as_column]) 3400 3401 # Table.to_column creates a parent Alias node that we want to convert to 3402 # a TableAlias and attach to the Unnest, so it matches the parser's output 3403 if isinstance(table.args.get("alias"), exp.TableAlias): 3404 table_as_column.replace(table_as_column.this) 3405 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3406 3407 table.replace(unnest) 3408 3409 refs.add(normalized_table.alias_or_name) 3410 3411 return this 3412 3413 def _parse_query_modifiers( 3414 self, this: t.Optional[exp.Expression] 3415 ) -> t.Optional[exp.Expression]: 3416 if isinstance(this, self.MODIFIABLES): 3417 for join in self._parse_joins(): 3418 this.append("joins", join) 3419 for lateral in iter(self._parse_lateral, None): 3420 this.append("laterals", lateral) 3421 3422 while True: 3423 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3424 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3425 key, expression = parser(self) 3426 3427 if expression: 3428 this.set(key, expression) 3429 if key == "limit": 3430 offset = expression.args.pop("offset", None) 3431 3432 if offset: 3433 offset = exp.Offset(expression=offset) 3434 this.set("offset", offset) 3435 3436 limit_by_expressions = expression.expressions 3437 expression.set("expressions", None) 3438 offset.set("expressions", limit_by_expressions) 3439 continue 3440 break 3441 3442 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3443 this = self._implicit_unnests_to_explicit(this) 3444 3445 return this 3446 3447 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3448 start = self._curr 3449 while self._curr: 3450 self._advance() 3451 3452 end = self._tokens[self._index - 1] 3453 return exp.Hint(expressions=[self._find_sql(start, end)]) 3454 3455 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3456 return self._parse_function_call() 3457 3458 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3459 start_index = self._index 3460 should_fallback_to_string = False 3461 3462 hints = [] 3463 try: 3464 for hint in iter( 3465 lambda: self._parse_csv( 3466 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3467 ), 3468 [], 3469 ): 3470 hints.extend(hint) 3471 except ParseError: 3472 should_fallback_to_string = True 3473 3474 if should_fallback_to_string or self._curr: 3475 self._retreat(start_index) 3476 return self._parse_hint_fallback_to_string() 3477 3478 return self.expression(exp.Hint, expressions=hints) 3479 3480 def _parse_hint(self) -> t.Optional[exp.Hint]: 3481 if self._match(TokenType.HINT) and self._prev_comments: 3482 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3483 3484 return None 3485 3486 def _parse_into(self) -> t.Optional[exp.Into]: 3487 if not self._match(TokenType.INTO): 3488 return None 3489 3490 temp = self._match(TokenType.TEMPORARY) 3491 unlogged = self._match_text_seq("UNLOGGED") 3492 self._match(TokenType.TABLE) 3493 3494 return self.expression( 3495 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3496 ) 3497 3498 def _parse_from( 3499 self, joins: bool = False, skip_from_token: bool = False 3500 ) -> t.Optional[exp.From]: 3501 if not skip_from_token and not self._match(TokenType.FROM): 3502 return None 3503 3504 return self.expression( 3505 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3506 ) 3507 3508 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3509 return self.expression( 3510 exp.MatchRecognizeMeasure, 3511 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3512 this=self._parse_expression(), 3513 ) 3514 3515 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3516 if not self._match(TokenType.MATCH_RECOGNIZE): 3517 return None 3518 3519 self._match_l_paren() 3520 3521 partition = self._parse_partition_by() 3522 order = self._parse_order() 3523 3524 measures = ( 3525 self._parse_csv(self._parse_match_recognize_measure) 3526 if self._match_text_seq("MEASURES") 3527 else None 3528 ) 3529 3530 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3531 rows = exp.var("ONE ROW PER MATCH") 3532 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3533 text = "ALL ROWS PER MATCH" 3534 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3535 text += " SHOW EMPTY MATCHES" 3536 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3537 text += " OMIT EMPTY MATCHES" 3538 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3539 text += " WITH UNMATCHED ROWS" 3540 rows = exp.var(text) 3541 else: 3542 rows = None 3543 3544 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3545 text = "AFTER MATCH SKIP" 3546 if self._match_text_seq("PAST", "LAST", "ROW"): 3547 text += " PAST LAST ROW" 3548 elif self._match_text_seq("TO", "NEXT", "ROW"): 3549 text += " TO NEXT ROW" 3550 elif self._match_text_seq("TO", "FIRST"): 3551 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3552 elif self._match_text_seq("TO", "LAST"): 3553 text += f" TO LAST {self._advance_any().text}" # type: ignore 3554 after = exp.var(text) 3555 else: 3556 after = None 3557 3558 if self._match_text_seq("PATTERN"): 3559 self._match_l_paren() 3560 3561 if not self._curr: 3562 self.raise_error("Expecting )", self._curr) 3563 3564 paren = 1 3565 start = self._curr 3566 3567 while self._curr and paren > 0: 3568 if self._curr.token_type == TokenType.L_PAREN: 3569 paren += 1 3570 if self._curr.token_type == TokenType.R_PAREN: 3571 paren -= 1 3572 3573 end = self._prev 3574 self._advance() 3575 3576 if paren > 0: 3577 self.raise_error("Expecting )", self._curr) 3578 3579 pattern = exp.var(self._find_sql(start, end)) 3580 else: 3581 pattern = None 3582 3583 define = ( 3584 self._parse_csv(self._parse_name_as_expression) 3585 if self._match_text_seq("DEFINE") 3586 else None 3587 ) 3588 3589 self._match_r_paren() 3590 3591 return self.expression( 3592 exp.MatchRecognize, 3593 partition_by=partition, 3594 order=order, 3595 measures=measures, 3596 rows=rows, 3597 after=after, 3598 pattern=pattern, 3599 define=define, 3600 alias=self._parse_table_alias(), 3601 ) 3602 3603 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3604 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3605 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3606 cross_apply = False 3607 3608 if cross_apply is not None: 3609 this = self._parse_select(table=True) 3610 view = None 3611 outer = None 3612 elif self._match(TokenType.LATERAL): 3613 this = self._parse_select(table=True) 3614 view = self._match(TokenType.VIEW) 3615 outer = self._match(TokenType.OUTER) 3616 else: 3617 return None 3618 3619 if not this: 3620 this = ( 3621 self._parse_unnest() 3622 or self._parse_function() 3623 or self._parse_id_var(any_token=False) 3624 ) 3625 3626 while self._match(TokenType.DOT): 3627 this = exp.Dot( 3628 this=this, 3629 expression=self._parse_function() or self._parse_id_var(any_token=False), 3630 ) 3631 3632 ordinality: t.Optional[bool] = None 3633 3634 if view: 3635 table = self._parse_id_var(any_token=False) 3636 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3637 table_alias: t.Optional[exp.TableAlias] = self.expression( 3638 exp.TableAlias, this=table, columns=columns 3639 ) 3640 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3641 # We move the alias from the lateral's child node to the lateral itself 3642 table_alias = this.args["alias"].pop() 3643 else: 3644 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3645 table_alias = self._parse_table_alias() 3646 3647 return self.expression( 3648 exp.Lateral, 3649 this=this, 3650 view=view, 3651 outer=outer, 3652 alias=table_alias, 3653 cross_apply=cross_apply, 3654 ordinality=ordinality, 3655 ) 3656 3657 def _parse_join_parts( 3658 self, 3659 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3660 return ( 3661 self._match_set(self.JOIN_METHODS) and self._prev, 3662 self._match_set(self.JOIN_SIDES) and self._prev, 3663 self._match_set(self.JOIN_KINDS) and self._prev, 3664 ) 3665 3666 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3667 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3668 this = self._parse_column() 3669 if isinstance(this, exp.Column): 3670 return this.this 3671 return this 3672 3673 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3674 3675 def _parse_join( 3676 self, skip_join_token: bool = False, parse_bracket: bool = False 3677 ) -> t.Optional[exp.Join]: 3678 if self._match(TokenType.COMMA): 3679 table = self._try_parse(self._parse_table) 3680 if table: 3681 return self.expression(exp.Join, this=table) 3682 return None 3683 3684 index = self._index 3685 method, side, kind = self._parse_join_parts() 3686 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3687 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3688 3689 if not skip_join_token and not join: 3690 self._retreat(index) 3691 kind = None 3692 method = None 3693 side = None 3694 3695 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3696 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3697 3698 if not skip_join_token and not join and not outer_apply and not cross_apply: 3699 return None 3700 3701 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3702 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3703 kwargs["expressions"] = self._parse_csv( 3704 lambda: self._parse_table(parse_bracket=parse_bracket) 3705 ) 3706 3707 if method: 3708 kwargs["method"] = method.text 3709 if side: 3710 kwargs["side"] = side.text 3711 if kind: 3712 kwargs["kind"] = kind.text 3713 if hint: 3714 kwargs["hint"] = hint 3715 3716 if self._match(TokenType.MATCH_CONDITION): 3717 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3718 3719 if self._match(TokenType.ON): 3720 kwargs["on"] = self._parse_assignment() 3721 elif self._match(TokenType.USING): 3722 kwargs["using"] = self._parse_using_identifiers() 3723 elif ( 3724 not (outer_apply or cross_apply) 3725 and not isinstance(kwargs["this"], exp.Unnest) 3726 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3727 ): 3728 index = self._index 3729 joins: t.Optional[list] = list(self._parse_joins()) 3730 3731 if joins and self._match(TokenType.ON): 3732 kwargs["on"] = self._parse_assignment() 3733 elif joins and self._match(TokenType.USING): 3734 kwargs["using"] = self._parse_using_identifiers() 3735 else: 3736 joins = None 3737 self._retreat(index) 3738 3739 kwargs["this"].set("joins", joins if joins else None) 3740 3741 kwargs["pivots"] = self._parse_pivots() 3742 3743 comments = [c for token in (method, side, kind) if token for c in token.comments] 3744 return self.expression(exp.Join, comments=comments, **kwargs) 3745 3746 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3747 this = self._parse_assignment() 3748 3749 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3750 return this 3751 3752 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3753 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3754 3755 return this 3756 3757 def _parse_index_params(self) -> exp.IndexParameters: 3758 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3759 3760 if self._match(TokenType.L_PAREN, advance=False): 3761 columns = self._parse_wrapped_csv(self._parse_with_operator) 3762 else: 3763 columns = None 3764 3765 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3766 partition_by = self._parse_partition_by() 3767 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3768 tablespace = ( 3769 self._parse_var(any_token=True) 3770 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3771 else None 3772 ) 3773 where = self._parse_where() 3774 3775 on = self._parse_field() if self._match(TokenType.ON) else None 3776 3777 return self.expression( 3778 exp.IndexParameters, 3779 using=using, 3780 columns=columns, 3781 include=include, 3782 partition_by=partition_by, 3783 where=where, 3784 with_storage=with_storage, 3785 tablespace=tablespace, 3786 on=on, 3787 ) 3788 3789 def _parse_index( 3790 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3791 ) -> t.Optional[exp.Index]: 3792 if index or anonymous: 3793 unique = None 3794 primary = None 3795 amp = None 3796 3797 self._match(TokenType.ON) 3798 self._match(TokenType.TABLE) # hive 3799 table = self._parse_table_parts(schema=True) 3800 else: 3801 unique = self._match(TokenType.UNIQUE) 3802 primary = self._match_text_seq("PRIMARY") 3803 amp = self._match_text_seq("AMP") 3804 3805 if not self._match(TokenType.INDEX): 3806 return None 3807 3808 index = self._parse_id_var() 3809 table = None 3810 3811 params = self._parse_index_params() 3812 3813 return self.expression( 3814 exp.Index, 3815 this=index, 3816 table=table, 3817 unique=unique, 3818 primary=primary, 3819 amp=amp, 3820 params=params, 3821 ) 3822 3823 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3824 hints: t.List[exp.Expression] = [] 3825 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3826 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3827 hints.append( 3828 self.expression( 3829 exp.WithTableHint, 3830 expressions=self._parse_csv( 3831 lambda: self._parse_function() or self._parse_var(any_token=True) 3832 ), 3833 ) 3834 ) 3835 self._match_r_paren() 3836 else: 3837 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3838 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3839 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3840 3841 self._match_set((TokenType.INDEX, TokenType.KEY)) 3842 if self._match(TokenType.FOR): 3843 hint.set("target", self._advance_any() and self._prev.text.upper()) 3844 3845 hint.set("expressions", self._parse_wrapped_id_vars()) 3846 hints.append(hint) 3847 3848 return hints or None 3849 3850 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3851 return ( 3852 (not schema and self._parse_function(optional_parens=False)) 3853 or self._parse_id_var(any_token=False) 3854 or self._parse_string_as_identifier() 3855 or self._parse_placeholder() 3856 ) 3857 3858 def _parse_table_parts( 3859 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3860 ) -> exp.Table: 3861 catalog = None 3862 db = None 3863 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3864 3865 while self._match(TokenType.DOT): 3866 if catalog: 3867 # This allows nesting the table in arbitrarily many dot expressions if needed 3868 table = self.expression( 3869 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3870 ) 3871 else: 3872 catalog = db 3873 db = table 3874 # "" used for tsql FROM a..b case 3875 table = self._parse_table_part(schema=schema) or "" 3876 3877 if ( 3878 wildcard 3879 and self._is_connected() 3880 and (isinstance(table, exp.Identifier) or not table) 3881 and self._match(TokenType.STAR) 3882 ): 3883 if isinstance(table, exp.Identifier): 3884 table.args["this"] += "*" 3885 else: 3886 table = exp.Identifier(this="*") 3887 3888 # We bubble up comments from the Identifier to the Table 3889 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3890 3891 if is_db_reference: 3892 catalog = db 3893 db = table 3894 table = None 3895 3896 if not table and not is_db_reference: 3897 self.raise_error(f"Expected table name but got {self._curr}") 3898 if not db and is_db_reference: 3899 self.raise_error(f"Expected database name but got {self._curr}") 3900 3901 table = self.expression( 3902 exp.Table, 3903 comments=comments, 3904 this=table, 3905 db=db, 3906 catalog=catalog, 3907 ) 3908 3909 changes = self._parse_changes() 3910 if changes: 3911 table.set("changes", changes) 3912 3913 at_before = self._parse_historical_data() 3914 if at_before: 3915 table.set("when", at_before) 3916 3917 pivots = self._parse_pivots() 3918 if pivots: 3919 table.set("pivots", pivots) 3920 3921 return table 3922 3923 def _parse_table( 3924 self, 3925 schema: bool = False, 3926 joins: bool = False, 3927 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3928 parse_bracket: bool = False, 3929 is_db_reference: bool = False, 3930 parse_partition: bool = False, 3931 ) -> t.Optional[exp.Expression]: 3932 lateral = self._parse_lateral() 3933 if lateral: 3934 return lateral 3935 3936 unnest = self._parse_unnest() 3937 if unnest: 3938 return unnest 3939 3940 values = self._parse_derived_table_values() 3941 if values: 3942 return values 3943 3944 subquery = self._parse_select(table=True) 3945 if subquery: 3946 if not subquery.args.get("pivots"): 3947 subquery.set("pivots", self._parse_pivots()) 3948 return subquery 3949 3950 bracket = parse_bracket and self._parse_bracket(None) 3951 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3952 3953 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3954 self._parse_table 3955 ) 3956 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3957 3958 only = self._match(TokenType.ONLY) 3959 3960 this = t.cast( 3961 exp.Expression, 3962 bracket 3963 or rows_from 3964 or self._parse_bracket( 3965 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3966 ), 3967 ) 3968 3969 if only: 3970 this.set("only", only) 3971 3972 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3973 self._match_text_seq("*") 3974 3975 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3976 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3977 this.set("partition", self._parse_partition()) 3978 3979 if schema: 3980 return self._parse_schema(this=this) 3981 3982 version = self._parse_version() 3983 3984 if version: 3985 this.set("version", version) 3986 3987 if self.dialect.ALIAS_POST_TABLESAMPLE: 3988 this.set("sample", self._parse_table_sample()) 3989 3990 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3991 if alias: 3992 this.set("alias", alias) 3993 3994 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3995 return self.expression( 3996 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3997 ) 3998 3999 this.set("hints", self._parse_table_hints()) 4000 4001 if not this.args.get("pivots"): 4002 this.set("pivots", self._parse_pivots()) 4003 4004 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4005 this.set("sample", self._parse_table_sample()) 4006 4007 if joins: 4008 for join in self._parse_joins(): 4009 this.append("joins", join) 4010 4011 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4012 this.set("ordinality", True) 4013 this.set("alias", self._parse_table_alias()) 4014 4015 return this 4016 4017 def _parse_version(self) -> t.Optional[exp.Version]: 4018 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4019 this = "TIMESTAMP" 4020 elif self._match(TokenType.VERSION_SNAPSHOT): 4021 this = "VERSION" 4022 else: 4023 return None 4024 4025 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4026 kind = self._prev.text.upper() 4027 start = self._parse_bitwise() 4028 self._match_texts(("TO", "AND")) 4029 end = self._parse_bitwise() 4030 expression: t.Optional[exp.Expression] = self.expression( 4031 exp.Tuple, expressions=[start, end] 4032 ) 4033 elif self._match_text_seq("CONTAINED", "IN"): 4034 kind = "CONTAINED IN" 4035 expression = self.expression( 4036 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4037 ) 4038 elif self._match(TokenType.ALL): 4039 kind = "ALL" 4040 expression = None 4041 else: 4042 self._match_text_seq("AS", "OF") 4043 kind = "AS OF" 4044 expression = self._parse_type() 4045 4046 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4047 4048 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4049 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4050 index = self._index 4051 historical_data = None 4052 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4053 this = self._prev.text.upper() 4054 kind = ( 4055 self._match(TokenType.L_PAREN) 4056 and self._match_texts(self.HISTORICAL_DATA_KIND) 4057 and self._prev.text.upper() 4058 ) 4059 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4060 4061 if expression: 4062 self._match_r_paren() 4063 historical_data = self.expression( 4064 exp.HistoricalData, this=this, kind=kind, expression=expression 4065 ) 4066 else: 4067 self._retreat(index) 4068 4069 return historical_data 4070 4071 def _parse_changes(self) -> t.Optional[exp.Changes]: 4072 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4073 return None 4074 4075 information = self._parse_var(any_token=True) 4076 self._match_r_paren() 4077 4078 return self.expression( 4079 exp.Changes, 4080 information=information, 4081 at_before=self._parse_historical_data(), 4082 end=self._parse_historical_data(), 4083 ) 4084 4085 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4086 if not self._match(TokenType.UNNEST): 4087 return None 4088 4089 expressions = self._parse_wrapped_csv(self._parse_equality) 4090 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4091 4092 alias = self._parse_table_alias() if with_alias else None 4093 4094 if alias: 4095 if self.dialect.UNNEST_COLUMN_ONLY: 4096 if alias.args.get("columns"): 4097 self.raise_error("Unexpected extra column alias in unnest.") 4098 4099 alias.set("columns", [alias.this]) 4100 alias.set("this", None) 4101 4102 columns = alias.args.get("columns") or [] 4103 if offset and len(expressions) < len(columns): 4104 offset = columns.pop() 4105 4106 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4107 self._match(TokenType.ALIAS) 4108 offset = self._parse_id_var( 4109 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4110 ) or exp.to_identifier("offset") 4111 4112 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4113 4114 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4115 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4116 if not is_derived and not ( 4117 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4118 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4119 ): 4120 return None 4121 4122 expressions = self._parse_csv(self._parse_value) 4123 alias = self._parse_table_alias() 4124 4125 if is_derived: 4126 self._match_r_paren() 4127 4128 return self.expression( 4129 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4130 ) 4131 4132 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4133 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4134 as_modifier and self._match_text_seq("USING", "SAMPLE") 4135 ): 4136 return None 4137 4138 bucket_numerator = None 4139 bucket_denominator = None 4140 bucket_field = None 4141 percent = None 4142 size = None 4143 seed = None 4144 4145 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4146 matched_l_paren = self._match(TokenType.L_PAREN) 4147 4148 if self.TABLESAMPLE_CSV: 4149 num = None 4150 expressions = self._parse_csv(self._parse_primary) 4151 else: 4152 expressions = None 4153 num = ( 4154 self._parse_factor() 4155 if self._match(TokenType.NUMBER, advance=False) 4156 else self._parse_primary() or self._parse_placeholder() 4157 ) 4158 4159 if self._match_text_seq("BUCKET"): 4160 bucket_numerator = self._parse_number() 4161 self._match_text_seq("OUT", "OF") 4162 bucket_denominator = bucket_denominator = self._parse_number() 4163 self._match(TokenType.ON) 4164 bucket_field = self._parse_field() 4165 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4166 percent = num 4167 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4168 size = num 4169 else: 4170 percent = num 4171 4172 if matched_l_paren: 4173 self._match_r_paren() 4174 4175 if self._match(TokenType.L_PAREN): 4176 method = self._parse_var(upper=True) 4177 seed = self._match(TokenType.COMMA) and self._parse_number() 4178 self._match_r_paren() 4179 elif self._match_texts(("SEED", "REPEATABLE")): 4180 seed = self._parse_wrapped(self._parse_number) 4181 4182 if not method and self.DEFAULT_SAMPLING_METHOD: 4183 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4184 4185 return self.expression( 4186 exp.TableSample, 4187 expressions=expressions, 4188 method=method, 4189 bucket_numerator=bucket_numerator, 4190 bucket_denominator=bucket_denominator, 4191 bucket_field=bucket_field, 4192 percent=percent, 4193 size=size, 4194 seed=seed, 4195 ) 4196 4197 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4198 return list(iter(self._parse_pivot, None)) or None 4199 4200 def _parse_joins(self) -> t.Iterator[exp.Join]: 4201 return iter(self._parse_join, None) 4202 4203 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4204 if not self._match(TokenType.INTO): 4205 return None 4206 4207 return self.expression( 4208 exp.UnpivotColumns, 4209 this=self._match_text_seq("NAME") and self._parse_column(), 4210 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4211 ) 4212 4213 # https://duckdb.org/docs/sql/statements/pivot 4214 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4215 def _parse_on() -> t.Optional[exp.Expression]: 4216 this = self._parse_bitwise() 4217 4218 if self._match(TokenType.IN): 4219 # PIVOT ... ON col IN (row_val1, row_val2) 4220 return self._parse_in(this) 4221 if self._match(TokenType.ALIAS, advance=False): 4222 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4223 return self._parse_alias(this) 4224 4225 return this 4226 4227 this = self._parse_table() 4228 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4229 into = self._parse_unpivot_columns() 4230 using = self._match(TokenType.USING) and self._parse_csv( 4231 lambda: self._parse_alias(self._parse_function()) 4232 ) 4233 group = self._parse_group() 4234 4235 return self.expression( 4236 exp.Pivot, 4237 this=this, 4238 expressions=expressions, 4239 using=using, 4240 group=group, 4241 unpivot=is_unpivot, 4242 into=into, 4243 ) 4244 4245 def _parse_pivot_in(self) -> exp.In: 4246 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4247 this = self._parse_select_or_expression() 4248 4249 self._match(TokenType.ALIAS) 4250 alias = self._parse_bitwise() 4251 if alias: 4252 if isinstance(alias, exp.Column) and not alias.db: 4253 alias = alias.this 4254 return self.expression(exp.PivotAlias, this=this, alias=alias) 4255 4256 return this 4257 4258 value = self._parse_column() 4259 4260 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4261 self.raise_error("Expecting IN (") 4262 4263 if self._match(TokenType.ANY): 4264 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4265 else: 4266 exprs = self._parse_csv(_parse_aliased_expression) 4267 4268 self._match_r_paren() 4269 return self.expression(exp.In, this=value, expressions=exprs) 4270 4271 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4272 index = self._index 4273 include_nulls = None 4274 4275 if self._match(TokenType.PIVOT): 4276 unpivot = False 4277 elif self._match(TokenType.UNPIVOT): 4278 unpivot = True 4279 4280 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4281 if self._match_text_seq("INCLUDE", "NULLS"): 4282 include_nulls = True 4283 elif self._match_text_seq("EXCLUDE", "NULLS"): 4284 include_nulls = False 4285 else: 4286 return None 4287 4288 expressions = [] 4289 4290 if not self._match(TokenType.L_PAREN): 4291 self._retreat(index) 4292 return None 4293 4294 if unpivot: 4295 expressions = self._parse_csv(self._parse_column) 4296 else: 4297 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4298 4299 if not expressions: 4300 self.raise_error("Failed to parse PIVOT's aggregation list") 4301 4302 if not self._match(TokenType.FOR): 4303 self.raise_error("Expecting FOR") 4304 4305 fields = [] 4306 while True: 4307 field = self._try_parse(self._parse_pivot_in) 4308 if not field: 4309 break 4310 fields.append(field) 4311 4312 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4313 self._parse_bitwise 4314 ) 4315 4316 group = self._parse_group() 4317 4318 self._match_r_paren() 4319 4320 pivot = self.expression( 4321 exp.Pivot, 4322 expressions=expressions, 4323 fields=fields, 4324 unpivot=unpivot, 4325 include_nulls=include_nulls, 4326 default_on_null=default_on_null, 4327 group=group, 4328 ) 4329 4330 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4331 pivot.set("alias", self._parse_table_alias()) 4332 4333 if not unpivot: 4334 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4335 4336 columns: t.List[exp.Expression] = [] 4337 all_fields = [] 4338 for pivot_field in pivot.fields: 4339 pivot_field_expressions = pivot_field.expressions 4340 4341 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4342 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4343 continue 4344 4345 all_fields.append( 4346 [ 4347 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4348 for fld in pivot_field_expressions 4349 ] 4350 ) 4351 4352 if all_fields: 4353 if names: 4354 all_fields.append(names) 4355 4356 # Generate all possible combinations of the pivot columns 4357 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4358 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4359 for fld_parts_tuple in itertools.product(*all_fields): 4360 fld_parts = list(fld_parts_tuple) 4361 4362 if names and self.PREFIXED_PIVOT_COLUMNS: 4363 # Move the "name" to the front of the list 4364 fld_parts.insert(0, fld_parts.pop(-1)) 4365 4366 columns.append(exp.to_identifier("_".join(fld_parts))) 4367 4368 pivot.set("columns", columns) 4369 4370 return pivot 4371 4372 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4373 return [agg.alias for agg in aggregations if agg.alias] 4374 4375 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4376 if not skip_where_token and not self._match(TokenType.PREWHERE): 4377 return None 4378 4379 return self.expression( 4380 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4381 ) 4382 4383 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4384 if not skip_where_token and not self._match(TokenType.WHERE): 4385 return None 4386 4387 return self.expression( 4388 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4389 ) 4390 4391 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4392 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4393 return None 4394 4395 elements: t.Dict[str, t.Any] = defaultdict(list) 4396 4397 if self._match(TokenType.ALL): 4398 elements["all"] = True 4399 elif self._match(TokenType.DISTINCT): 4400 elements["all"] = False 4401 4402 while True: 4403 index = self._index 4404 4405 elements["expressions"].extend( 4406 self._parse_csv( 4407 lambda: None 4408 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4409 else self._parse_assignment() 4410 ) 4411 ) 4412 4413 before_with_index = self._index 4414 with_prefix = self._match(TokenType.WITH) 4415 4416 if self._match(TokenType.ROLLUP): 4417 elements["rollup"].append( 4418 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4419 ) 4420 elif self._match(TokenType.CUBE): 4421 elements["cube"].append( 4422 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4423 ) 4424 elif self._match(TokenType.GROUPING_SETS): 4425 elements["grouping_sets"].append( 4426 self.expression( 4427 exp.GroupingSets, 4428 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4429 ) 4430 ) 4431 elif self._match_text_seq("TOTALS"): 4432 elements["totals"] = True # type: ignore 4433 4434 if before_with_index <= self._index <= before_with_index + 1: 4435 self._retreat(before_with_index) 4436 break 4437 4438 if index == self._index: 4439 break 4440 4441 return self.expression(exp.Group, **elements) # type: ignore 4442 4443 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4444 return self.expression( 4445 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4446 ) 4447 4448 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4449 if self._match(TokenType.L_PAREN): 4450 grouping_set = self._parse_csv(self._parse_column) 4451 self._match_r_paren() 4452 return self.expression(exp.Tuple, expressions=grouping_set) 4453 4454 return self._parse_column() 4455 4456 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4457 if not skip_having_token and not self._match(TokenType.HAVING): 4458 return None 4459 return self.expression(exp.Having, this=self._parse_assignment()) 4460 4461 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4462 if not self._match(TokenType.QUALIFY): 4463 return None 4464 return self.expression(exp.Qualify, this=self._parse_assignment()) 4465 4466 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4467 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4468 exp.Prior, this=self._parse_bitwise() 4469 ) 4470 connect = self._parse_assignment() 4471 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4472 return connect 4473 4474 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4475 if skip_start_token: 4476 start = None 4477 elif self._match(TokenType.START_WITH): 4478 start = self._parse_assignment() 4479 else: 4480 return None 4481 4482 self._match(TokenType.CONNECT_BY) 4483 nocycle = self._match_text_seq("NOCYCLE") 4484 connect = self._parse_connect_with_prior() 4485 4486 if not start and self._match(TokenType.START_WITH): 4487 start = self._parse_assignment() 4488 4489 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4490 4491 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4492 this = self._parse_id_var(any_token=True) 4493 if self._match(TokenType.ALIAS): 4494 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4495 return this 4496 4497 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4498 if self._match_text_seq("INTERPOLATE"): 4499 return self._parse_wrapped_csv(self._parse_name_as_expression) 4500 return None 4501 4502 def _parse_order( 4503 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4504 ) -> t.Optional[exp.Expression]: 4505 siblings = None 4506 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4507 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4508 return this 4509 4510 siblings = True 4511 4512 return self.expression( 4513 exp.Order, 4514 this=this, 4515 expressions=self._parse_csv(self._parse_ordered), 4516 siblings=siblings, 4517 ) 4518 4519 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4520 if not self._match(token): 4521 return None 4522 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4523 4524 def _parse_ordered( 4525 self, parse_method: t.Optional[t.Callable] = None 4526 ) -> t.Optional[exp.Ordered]: 4527 this = parse_method() if parse_method else self._parse_assignment() 4528 if not this: 4529 return None 4530 4531 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4532 this = exp.var("ALL") 4533 4534 asc = self._match(TokenType.ASC) 4535 desc = self._match(TokenType.DESC) or (asc and False) 4536 4537 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4538 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4539 4540 nulls_first = is_nulls_first or False 4541 explicitly_null_ordered = is_nulls_first or is_nulls_last 4542 4543 if ( 4544 not explicitly_null_ordered 4545 and ( 4546 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4547 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4548 ) 4549 and self.dialect.NULL_ORDERING != "nulls_are_last" 4550 ): 4551 nulls_first = True 4552 4553 if self._match_text_seq("WITH", "FILL"): 4554 with_fill = self.expression( 4555 exp.WithFill, 4556 **{ # type: ignore 4557 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4558 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4559 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4560 "interpolate": self._parse_interpolate(), 4561 }, 4562 ) 4563 else: 4564 with_fill = None 4565 4566 return self.expression( 4567 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4568 ) 4569 4570 def _parse_limit_options(self) -> exp.LimitOptions: 4571 percent = self._match(TokenType.PERCENT) 4572 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4573 self._match_text_seq("ONLY") 4574 with_ties = self._match_text_seq("WITH", "TIES") 4575 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4576 4577 def _parse_limit( 4578 self, 4579 this: t.Optional[exp.Expression] = None, 4580 top: bool = False, 4581 skip_limit_token: bool = False, 4582 ) -> t.Optional[exp.Expression]: 4583 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4584 comments = self._prev_comments 4585 if top: 4586 limit_paren = self._match(TokenType.L_PAREN) 4587 expression = self._parse_term() if limit_paren else self._parse_number() 4588 4589 if limit_paren: 4590 self._match_r_paren() 4591 4592 limit_options = self._parse_limit_options() 4593 else: 4594 limit_options = None 4595 expression = self._parse_term() 4596 4597 if self._match(TokenType.COMMA): 4598 offset = expression 4599 expression = self._parse_term() 4600 else: 4601 offset = None 4602 4603 limit_exp = self.expression( 4604 exp.Limit, 4605 this=this, 4606 expression=expression, 4607 offset=offset, 4608 comments=comments, 4609 limit_options=limit_options, 4610 expressions=self._parse_limit_by(), 4611 ) 4612 4613 return limit_exp 4614 4615 if self._match(TokenType.FETCH): 4616 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4617 direction = self._prev.text.upper() if direction else "FIRST" 4618 4619 count = self._parse_field(tokens=self.FETCH_TOKENS) 4620 4621 return self.expression( 4622 exp.Fetch, 4623 direction=direction, 4624 count=count, 4625 limit_options=self._parse_limit_options(), 4626 ) 4627 4628 return this 4629 4630 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4631 if not self._match(TokenType.OFFSET): 4632 return this 4633 4634 count = self._parse_term() 4635 self._match_set((TokenType.ROW, TokenType.ROWS)) 4636 4637 return self.expression( 4638 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4639 ) 4640 4641 def _can_parse_limit_or_offset(self) -> bool: 4642 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4643 return False 4644 4645 index = self._index 4646 result = bool( 4647 self._try_parse(self._parse_limit, retreat=True) 4648 or self._try_parse(self._parse_offset, retreat=True) 4649 ) 4650 self._retreat(index) 4651 return result 4652 4653 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4654 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4655 4656 def _parse_locks(self) -> t.List[exp.Lock]: 4657 locks = [] 4658 while True: 4659 if self._match_text_seq("FOR", "UPDATE"): 4660 update = True 4661 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4662 "LOCK", "IN", "SHARE", "MODE" 4663 ): 4664 update = False 4665 else: 4666 break 4667 4668 expressions = None 4669 if self._match_text_seq("OF"): 4670 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4671 4672 wait: t.Optional[bool | exp.Expression] = None 4673 if self._match_text_seq("NOWAIT"): 4674 wait = True 4675 elif self._match_text_seq("WAIT"): 4676 wait = self._parse_primary() 4677 elif self._match_text_seq("SKIP", "LOCKED"): 4678 wait = False 4679 4680 locks.append( 4681 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4682 ) 4683 4684 return locks 4685 4686 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4687 start = self._index 4688 _, side_token, kind_token = self._parse_join_parts() 4689 4690 side = side_token.text if side_token else None 4691 kind = kind_token.text if kind_token else None 4692 4693 if not self._match_set(self.SET_OPERATIONS): 4694 self._retreat(start) 4695 return None 4696 4697 token_type = self._prev.token_type 4698 4699 if token_type == TokenType.UNION: 4700 operation: t.Type[exp.SetOperation] = exp.Union 4701 elif token_type == TokenType.EXCEPT: 4702 operation = exp.Except 4703 else: 4704 operation = exp.Intersect 4705 4706 comments = self._prev.comments 4707 4708 if self._match(TokenType.DISTINCT): 4709 distinct: t.Optional[bool] = True 4710 elif self._match(TokenType.ALL): 4711 distinct = False 4712 else: 4713 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4714 if distinct is None: 4715 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4716 4717 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4718 "STRICT", "CORRESPONDING" 4719 ) 4720 if self._match_text_seq("CORRESPONDING"): 4721 by_name = True 4722 if not side and not kind: 4723 kind = "INNER" 4724 4725 on_column_list = None 4726 if by_name and self._match_texts(("ON", "BY")): 4727 on_column_list = self._parse_wrapped_csv(self._parse_column) 4728 4729 expression = self._parse_select(nested=True, parse_set_operation=False) 4730 4731 return self.expression( 4732 operation, 4733 comments=comments, 4734 this=this, 4735 distinct=distinct, 4736 by_name=by_name, 4737 expression=expression, 4738 side=side, 4739 kind=kind, 4740 on=on_column_list, 4741 ) 4742 4743 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4744 while this: 4745 setop = self.parse_set_operation(this) 4746 if not setop: 4747 break 4748 this = setop 4749 4750 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4751 expression = this.expression 4752 4753 if expression: 4754 for arg in self.SET_OP_MODIFIERS: 4755 expr = expression.args.get(arg) 4756 if expr: 4757 this.set(arg, expr.pop()) 4758 4759 return this 4760 4761 def _parse_expression(self) -> t.Optional[exp.Expression]: 4762 return self._parse_alias(self._parse_assignment()) 4763 4764 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4765 this = self._parse_disjunction() 4766 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4767 # This allows us to parse <non-identifier token> := <expr> 4768 this = exp.column( 4769 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4770 ) 4771 4772 while self._match_set(self.ASSIGNMENT): 4773 if isinstance(this, exp.Column) and len(this.parts) == 1: 4774 this = this.this 4775 4776 this = self.expression( 4777 self.ASSIGNMENT[self._prev.token_type], 4778 this=this, 4779 comments=self._prev_comments, 4780 expression=self._parse_assignment(), 4781 ) 4782 4783 return this 4784 4785 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4786 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4787 4788 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4789 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4790 4791 def _parse_equality(self) -> t.Optional[exp.Expression]: 4792 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4793 4794 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4795 return self._parse_tokens(self._parse_range, self.COMPARISON) 4796 4797 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4798 this = this or self._parse_bitwise() 4799 negate = self._match(TokenType.NOT) 4800 4801 if self._match_set(self.RANGE_PARSERS): 4802 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4803 if not expression: 4804 return this 4805 4806 this = expression 4807 elif self._match(TokenType.ISNULL): 4808 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4809 4810 # Postgres supports ISNULL and NOTNULL for conditions. 4811 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4812 if self._match(TokenType.NOTNULL): 4813 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4814 this = self.expression(exp.Not, this=this) 4815 4816 if negate: 4817 this = self._negate_range(this) 4818 4819 if self._match(TokenType.IS): 4820 this = self._parse_is(this) 4821 4822 return this 4823 4824 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4825 if not this: 4826 return this 4827 4828 return self.expression(exp.Not, this=this) 4829 4830 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4831 index = self._index - 1 4832 negate = self._match(TokenType.NOT) 4833 4834 if self._match_text_seq("DISTINCT", "FROM"): 4835 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4836 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4837 4838 if self._match(TokenType.JSON): 4839 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4840 4841 if self._match_text_seq("WITH"): 4842 _with = True 4843 elif self._match_text_seq("WITHOUT"): 4844 _with = False 4845 else: 4846 _with = None 4847 4848 unique = self._match(TokenType.UNIQUE) 4849 self._match_text_seq("KEYS") 4850 expression: t.Optional[exp.Expression] = self.expression( 4851 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4852 ) 4853 else: 4854 expression = self._parse_primary() or self._parse_null() 4855 if not expression: 4856 self._retreat(index) 4857 return None 4858 4859 this = self.expression(exp.Is, this=this, expression=expression) 4860 return self.expression(exp.Not, this=this) if negate else this 4861 4862 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4863 unnest = self._parse_unnest(with_alias=False) 4864 if unnest: 4865 this = self.expression(exp.In, this=this, unnest=unnest) 4866 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4867 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4868 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4869 4870 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4871 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4872 else: 4873 this = self.expression(exp.In, this=this, expressions=expressions) 4874 4875 if matched_l_paren: 4876 self._match_r_paren(this) 4877 elif not self._match(TokenType.R_BRACKET, expression=this): 4878 self.raise_error("Expecting ]") 4879 else: 4880 this = self.expression(exp.In, this=this, field=self._parse_column()) 4881 4882 return this 4883 4884 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4885 low = self._parse_bitwise() 4886 self._match(TokenType.AND) 4887 high = self._parse_bitwise() 4888 return self.expression(exp.Between, this=this, low=low, high=high) 4889 4890 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4891 if not self._match(TokenType.ESCAPE): 4892 return this 4893 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4894 4895 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4896 index = self._index 4897 4898 if not self._match(TokenType.INTERVAL) and match_interval: 4899 return None 4900 4901 if self._match(TokenType.STRING, advance=False): 4902 this = self._parse_primary() 4903 else: 4904 this = self._parse_term() 4905 4906 if not this or ( 4907 isinstance(this, exp.Column) 4908 and not this.table 4909 and not this.this.quoted 4910 and this.name.upper() == "IS" 4911 ): 4912 self._retreat(index) 4913 return None 4914 4915 unit = self._parse_function() or ( 4916 not self._match(TokenType.ALIAS, advance=False) 4917 and self._parse_var(any_token=True, upper=True) 4918 ) 4919 4920 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4921 # each INTERVAL expression into this canonical form so it's easy to transpile 4922 if this and this.is_number: 4923 this = exp.Literal.string(this.to_py()) 4924 elif this and this.is_string: 4925 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4926 if parts and unit: 4927 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4928 unit = None 4929 self._retreat(self._index - 1) 4930 4931 if len(parts) == 1: 4932 this = exp.Literal.string(parts[0][0]) 4933 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4934 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4935 unit = self.expression( 4936 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4937 ) 4938 4939 interval = self.expression(exp.Interval, this=this, unit=unit) 4940 4941 index = self._index 4942 self._match(TokenType.PLUS) 4943 4944 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4945 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4946 return self.expression( 4947 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4948 ) 4949 4950 self._retreat(index) 4951 return interval 4952 4953 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4954 this = self._parse_term() 4955 4956 while True: 4957 if self._match_set(self.BITWISE): 4958 this = self.expression( 4959 self.BITWISE[self._prev.token_type], 4960 this=this, 4961 expression=self._parse_term(), 4962 ) 4963 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4964 this = self.expression( 4965 exp.DPipe, 4966 this=this, 4967 expression=self._parse_term(), 4968 safe=not self.dialect.STRICT_STRING_CONCAT, 4969 ) 4970 elif self._match(TokenType.DQMARK): 4971 this = self.expression( 4972 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4973 ) 4974 elif self._match_pair(TokenType.LT, TokenType.LT): 4975 this = self.expression( 4976 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4977 ) 4978 elif self._match_pair(TokenType.GT, TokenType.GT): 4979 this = self.expression( 4980 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4981 ) 4982 else: 4983 break 4984 4985 return this 4986 4987 def _parse_term(self) -> t.Optional[exp.Expression]: 4988 this = self._parse_factor() 4989 4990 while self._match_set(self.TERM): 4991 klass = self.TERM[self._prev.token_type] 4992 comments = self._prev_comments 4993 expression = self._parse_factor() 4994 4995 this = self.expression(klass, this=this, comments=comments, expression=expression) 4996 4997 if isinstance(this, exp.Collate): 4998 expr = this.expression 4999 5000 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5001 # fallback to Identifier / Var 5002 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5003 ident = expr.this 5004 if isinstance(ident, exp.Identifier): 5005 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5006 5007 return this 5008 5009 def _parse_factor(self) -> t.Optional[exp.Expression]: 5010 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5011 this = parse_method() 5012 5013 while self._match_set(self.FACTOR): 5014 klass = self.FACTOR[self._prev.token_type] 5015 comments = self._prev_comments 5016 expression = parse_method() 5017 5018 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5019 self._retreat(self._index - 1) 5020 return this 5021 5022 this = self.expression(klass, this=this, comments=comments, expression=expression) 5023 5024 if isinstance(this, exp.Div): 5025 this.args["typed"] = self.dialect.TYPED_DIVISION 5026 this.args["safe"] = self.dialect.SAFE_DIVISION 5027 5028 return this 5029 5030 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5031 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5032 5033 def _parse_unary(self) -> t.Optional[exp.Expression]: 5034 if self._match_set(self.UNARY_PARSERS): 5035 return self.UNARY_PARSERS[self._prev.token_type](self) 5036 return self._parse_at_time_zone(self._parse_type()) 5037 5038 def _parse_type( 5039 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5040 ) -> t.Optional[exp.Expression]: 5041 interval = parse_interval and self._parse_interval() 5042 if interval: 5043 return interval 5044 5045 index = self._index 5046 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5047 5048 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5049 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5050 if isinstance(data_type, exp.Cast): 5051 # This constructor can contain ops directly after it, for instance struct unnesting: 5052 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5053 return self._parse_column_ops(data_type) 5054 5055 if data_type: 5056 index2 = self._index 5057 this = self._parse_primary() 5058 5059 if isinstance(this, exp.Literal): 5060 this = self._parse_column_ops(this) 5061 5062 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5063 if parser: 5064 return parser(self, this, data_type) 5065 5066 return self.expression(exp.Cast, this=this, to=data_type) 5067 5068 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5069 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5070 # 5071 # If the index difference here is greater than 1, that means the parser itself must have 5072 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5073 # 5074 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5075 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5076 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5077 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5078 # 5079 # In these cases, we don't really want to return the converted type, but instead retreat 5080 # and try to parse a Column or Identifier in the section below. 5081 if data_type.expressions and index2 - index > 1: 5082 self._retreat(index2) 5083 return self._parse_column_ops(data_type) 5084 5085 self._retreat(index) 5086 5087 if fallback_to_identifier: 5088 return self._parse_id_var() 5089 5090 this = self._parse_column() 5091 return this and self._parse_column_ops(this) 5092 5093 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5094 this = self._parse_type() 5095 if not this: 5096 return None 5097 5098 if isinstance(this, exp.Column) and not this.table: 5099 this = exp.var(this.name.upper()) 5100 5101 return self.expression( 5102 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5103 ) 5104 5105 def _parse_types( 5106 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5107 ) -> t.Optional[exp.Expression]: 5108 index = self._index 5109 5110 this: t.Optional[exp.Expression] = None 5111 prefix = self._match_text_seq("SYSUDTLIB", ".") 5112 5113 if not self._match_set(self.TYPE_TOKENS): 5114 identifier = allow_identifiers and self._parse_id_var( 5115 any_token=False, tokens=(TokenType.VAR,) 5116 ) 5117 if isinstance(identifier, exp.Identifier): 5118 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5119 5120 if len(tokens) != 1: 5121 self.raise_error("Unexpected identifier", self._prev) 5122 5123 if tokens[0].token_type in self.TYPE_TOKENS: 5124 self._prev = tokens[0] 5125 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5126 type_name = identifier.name 5127 5128 while self._match(TokenType.DOT): 5129 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5130 5131 this = exp.DataType.build(type_name, udt=True) 5132 else: 5133 self._retreat(self._index - 1) 5134 return None 5135 else: 5136 return None 5137 5138 type_token = self._prev.token_type 5139 5140 if type_token == TokenType.PSEUDO_TYPE: 5141 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5142 5143 if type_token == TokenType.OBJECT_IDENTIFIER: 5144 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5145 5146 # https://materialize.com/docs/sql/types/map/ 5147 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5148 key_type = self._parse_types( 5149 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5150 ) 5151 if not self._match(TokenType.FARROW): 5152 self._retreat(index) 5153 return None 5154 5155 value_type = self._parse_types( 5156 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5157 ) 5158 if not self._match(TokenType.R_BRACKET): 5159 self._retreat(index) 5160 return None 5161 5162 return exp.DataType( 5163 this=exp.DataType.Type.MAP, 5164 expressions=[key_type, value_type], 5165 nested=True, 5166 prefix=prefix, 5167 ) 5168 5169 nested = type_token in self.NESTED_TYPE_TOKENS 5170 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5171 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5172 expressions = None 5173 maybe_func = False 5174 5175 if self._match(TokenType.L_PAREN): 5176 if is_struct: 5177 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5178 elif nested: 5179 expressions = self._parse_csv( 5180 lambda: self._parse_types( 5181 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5182 ) 5183 ) 5184 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5185 this = expressions[0] 5186 this.set("nullable", True) 5187 self._match_r_paren() 5188 return this 5189 elif type_token in self.ENUM_TYPE_TOKENS: 5190 expressions = self._parse_csv(self._parse_equality) 5191 elif is_aggregate: 5192 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5193 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5194 ) 5195 if not func_or_ident: 5196 return None 5197 expressions = [func_or_ident] 5198 if self._match(TokenType.COMMA): 5199 expressions.extend( 5200 self._parse_csv( 5201 lambda: self._parse_types( 5202 check_func=check_func, 5203 schema=schema, 5204 allow_identifiers=allow_identifiers, 5205 ) 5206 ) 5207 ) 5208 else: 5209 expressions = self._parse_csv(self._parse_type_size) 5210 5211 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5212 if type_token == TokenType.VECTOR and len(expressions) == 2: 5213 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5214 5215 if not expressions or not self._match(TokenType.R_PAREN): 5216 self._retreat(index) 5217 return None 5218 5219 maybe_func = True 5220 5221 values: t.Optional[t.List[exp.Expression]] = None 5222 5223 if nested and self._match(TokenType.LT): 5224 if is_struct: 5225 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5226 else: 5227 expressions = self._parse_csv( 5228 lambda: self._parse_types( 5229 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5230 ) 5231 ) 5232 5233 if not self._match(TokenType.GT): 5234 self.raise_error("Expecting >") 5235 5236 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5237 values = self._parse_csv(self._parse_assignment) 5238 if not values and is_struct: 5239 values = None 5240 self._retreat(self._index - 1) 5241 else: 5242 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5243 5244 if type_token in self.TIMESTAMPS: 5245 if self._match_text_seq("WITH", "TIME", "ZONE"): 5246 maybe_func = False 5247 tz_type = ( 5248 exp.DataType.Type.TIMETZ 5249 if type_token in self.TIMES 5250 else exp.DataType.Type.TIMESTAMPTZ 5251 ) 5252 this = exp.DataType(this=tz_type, expressions=expressions) 5253 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5254 maybe_func = False 5255 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5256 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5257 maybe_func = False 5258 elif type_token == TokenType.INTERVAL: 5259 unit = self._parse_var(upper=True) 5260 if unit: 5261 if self._match_text_seq("TO"): 5262 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5263 5264 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5265 else: 5266 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5267 elif type_token == TokenType.VOID: 5268 this = exp.DataType(this=exp.DataType.Type.NULL) 5269 5270 if maybe_func and check_func: 5271 index2 = self._index 5272 peek = self._parse_string() 5273 5274 if not peek: 5275 self._retreat(index) 5276 return None 5277 5278 self._retreat(index2) 5279 5280 if not this: 5281 if self._match_text_seq("UNSIGNED"): 5282 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5283 if not unsigned_type_token: 5284 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5285 5286 type_token = unsigned_type_token or type_token 5287 5288 this = exp.DataType( 5289 this=exp.DataType.Type[type_token.value], 5290 expressions=expressions, 5291 nested=nested, 5292 prefix=prefix, 5293 ) 5294 5295 # Empty arrays/structs are allowed 5296 if values is not None: 5297 cls = exp.Struct if is_struct else exp.Array 5298 this = exp.cast(cls(expressions=values), this, copy=False) 5299 5300 elif expressions: 5301 this.set("expressions", expressions) 5302 5303 # https://materialize.com/docs/sql/types/list/#type-name 5304 while self._match(TokenType.LIST): 5305 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5306 5307 index = self._index 5308 5309 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5310 matched_array = self._match(TokenType.ARRAY) 5311 5312 while self._curr: 5313 datatype_token = self._prev.token_type 5314 matched_l_bracket = self._match(TokenType.L_BRACKET) 5315 5316 if (not matched_l_bracket and not matched_array) or ( 5317 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5318 ): 5319 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5320 # not to be confused with the fixed size array parsing 5321 break 5322 5323 matched_array = False 5324 values = self._parse_csv(self._parse_assignment) or None 5325 if ( 5326 values 5327 and not schema 5328 and ( 5329 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5330 ) 5331 ): 5332 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5333 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5334 self._retreat(index) 5335 break 5336 5337 this = exp.DataType( 5338 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5339 ) 5340 self._match(TokenType.R_BRACKET) 5341 5342 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5343 converter = self.TYPE_CONVERTERS.get(this.this) 5344 if converter: 5345 this = converter(t.cast(exp.DataType, this)) 5346 5347 return this 5348 5349 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5350 index = self._index 5351 5352 if ( 5353 self._curr 5354 and self._next 5355 and self._curr.token_type in self.TYPE_TOKENS 5356 and self._next.token_type in self.TYPE_TOKENS 5357 ): 5358 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5359 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5360 this = self._parse_id_var() 5361 else: 5362 this = ( 5363 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5364 or self._parse_id_var() 5365 ) 5366 5367 self._match(TokenType.COLON) 5368 5369 if ( 5370 type_required 5371 and not isinstance(this, exp.DataType) 5372 and not self._match_set(self.TYPE_TOKENS, advance=False) 5373 ): 5374 self._retreat(index) 5375 return self._parse_types() 5376 5377 return self._parse_column_def(this) 5378 5379 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5380 if not self._match_text_seq("AT", "TIME", "ZONE"): 5381 return this 5382 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5383 5384 def _parse_column(self) -> t.Optional[exp.Expression]: 5385 this = self._parse_column_reference() 5386 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5387 5388 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5389 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5390 5391 return column 5392 5393 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5394 this = self._parse_field() 5395 if ( 5396 not this 5397 and self._match(TokenType.VALUES, advance=False) 5398 and self.VALUES_FOLLOWED_BY_PAREN 5399 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5400 ): 5401 this = self._parse_id_var() 5402 5403 if isinstance(this, exp.Identifier): 5404 # We bubble up comments from the Identifier to the Column 5405 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5406 5407 return this 5408 5409 def _parse_colon_as_variant_extract( 5410 self, this: t.Optional[exp.Expression] 5411 ) -> t.Optional[exp.Expression]: 5412 casts = [] 5413 json_path = [] 5414 escape = None 5415 5416 while self._match(TokenType.COLON): 5417 start_index = self._index 5418 5419 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5420 path = self._parse_column_ops( 5421 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5422 ) 5423 5424 # The cast :: operator has a lower precedence than the extraction operator :, so 5425 # we rearrange the AST appropriately to avoid casting the JSON path 5426 while isinstance(path, exp.Cast): 5427 casts.append(path.to) 5428 path = path.this 5429 5430 if casts: 5431 dcolon_offset = next( 5432 i 5433 for i, t in enumerate(self._tokens[start_index:]) 5434 if t.token_type == TokenType.DCOLON 5435 ) 5436 end_token = self._tokens[start_index + dcolon_offset - 1] 5437 else: 5438 end_token = self._prev 5439 5440 if path: 5441 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5442 # it'll roundtrip to a string literal in GET_PATH 5443 if isinstance(path, exp.Identifier) and path.quoted: 5444 escape = True 5445 5446 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5447 5448 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5449 # Databricks transforms it back to the colon/dot notation 5450 if json_path: 5451 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5452 5453 if json_path_expr: 5454 json_path_expr.set("escape", escape) 5455 5456 this = self.expression( 5457 exp.JSONExtract, 5458 this=this, 5459 expression=json_path_expr, 5460 variant_extract=True, 5461 ) 5462 5463 while casts: 5464 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5465 5466 return this 5467 5468 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5469 return self._parse_types() 5470 5471 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5472 this = self._parse_bracket(this) 5473 5474 while self._match_set(self.COLUMN_OPERATORS): 5475 op_token = self._prev.token_type 5476 op = self.COLUMN_OPERATORS.get(op_token) 5477 5478 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5479 field = self._parse_dcolon() 5480 if not field: 5481 self.raise_error("Expected type") 5482 elif op and self._curr: 5483 field = self._parse_column_reference() or self._parse_bracket() 5484 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5485 field = self._parse_column_ops(field) 5486 else: 5487 field = self._parse_field(any_token=True, anonymous_func=True) 5488 5489 if isinstance(field, (exp.Func, exp.Window)) and this: 5490 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5491 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5492 this = exp.replace_tree( 5493 this, 5494 lambda n: ( 5495 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5496 if n.table 5497 else n.this 5498 ) 5499 if isinstance(n, exp.Column) 5500 else n, 5501 ) 5502 5503 if op: 5504 this = op(self, this, field) 5505 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5506 this = self.expression( 5507 exp.Column, 5508 comments=this.comments, 5509 this=field, 5510 table=this.this, 5511 db=this.args.get("table"), 5512 catalog=this.args.get("db"), 5513 ) 5514 elif isinstance(field, exp.Window): 5515 # Move the exp.Dot's to the window's function 5516 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5517 field.set("this", window_func) 5518 this = field 5519 else: 5520 this = self.expression(exp.Dot, this=this, expression=field) 5521 5522 if field and field.comments: 5523 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5524 5525 this = self._parse_bracket(this) 5526 5527 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5528 5529 def _parse_primary(self) -> t.Optional[exp.Expression]: 5530 if self._match_set(self.PRIMARY_PARSERS): 5531 token_type = self._prev.token_type 5532 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5533 5534 if token_type == TokenType.STRING: 5535 expressions = [primary] 5536 while self._match(TokenType.STRING): 5537 expressions.append(exp.Literal.string(self._prev.text)) 5538 5539 if len(expressions) > 1: 5540 return self.expression(exp.Concat, expressions=expressions) 5541 5542 return primary 5543 5544 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5545 return exp.Literal.number(f"0.{self._prev.text}") 5546 5547 if self._match(TokenType.L_PAREN): 5548 comments = self._prev_comments 5549 query = self._parse_select() 5550 5551 if query: 5552 expressions = [query] 5553 else: 5554 expressions = self._parse_expressions() 5555 5556 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5557 5558 if not this and self._match(TokenType.R_PAREN, advance=False): 5559 this = self.expression(exp.Tuple) 5560 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5561 this = self._parse_subquery(this=this, parse_alias=False) 5562 elif isinstance(this, exp.Subquery): 5563 this = self._parse_subquery( 5564 this=self._parse_set_operations(this), parse_alias=False 5565 ) 5566 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5567 this = self.expression(exp.Tuple, expressions=expressions) 5568 else: 5569 this = self.expression(exp.Paren, this=this) 5570 5571 if this: 5572 this.add_comments(comments) 5573 5574 self._match_r_paren(expression=this) 5575 return this 5576 5577 return None 5578 5579 def _parse_field( 5580 self, 5581 any_token: bool = False, 5582 tokens: t.Optional[t.Collection[TokenType]] = None, 5583 anonymous_func: bool = False, 5584 ) -> t.Optional[exp.Expression]: 5585 if anonymous_func: 5586 field = ( 5587 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5588 or self._parse_primary() 5589 ) 5590 else: 5591 field = self._parse_primary() or self._parse_function( 5592 anonymous=anonymous_func, any_token=any_token 5593 ) 5594 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5595 5596 def _parse_function( 5597 self, 5598 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5599 anonymous: bool = False, 5600 optional_parens: bool = True, 5601 any_token: bool = False, 5602 ) -> t.Optional[exp.Expression]: 5603 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5604 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5605 fn_syntax = False 5606 if ( 5607 self._match(TokenType.L_BRACE, advance=False) 5608 and self._next 5609 and self._next.text.upper() == "FN" 5610 ): 5611 self._advance(2) 5612 fn_syntax = True 5613 5614 func = self._parse_function_call( 5615 functions=functions, 5616 anonymous=anonymous, 5617 optional_parens=optional_parens, 5618 any_token=any_token, 5619 ) 5620 5621 if fn_syntax: 5622 self._match(TokenType.R_BRACE) 5623 5624 return func 5625 5626 def _parse_function_call( 5627 self, 5628 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5629 anonymous: bool = False, 5630 optional_parens: bool = True, 5631 any_token: bool = False, 5632 ) -> t.Optional[exp.Expression]: 5633 if not self._curr: 5634 return None 5635 5636 comments = self._curr.comments 5637 token = self._curr 5638 token_type = self._curr.token_type 5639 this = self._curr.text 5640 upper = this.upper() 5641 5642 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5643 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5644 self._advance() 5645 return self._parse_window(parser(self)) 5646 5647 if not self._next or self._next.token_type != TokenType.L_PAREN: 5648 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5649 self._advance() 5650 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5651 5652 return None 5653 5654 if any_token: 5655 if token_type in self.RESERVED_TOKENS: 5656 return None 5657 elif token_type not in self.FUNC_TOKENS: 5658 return None 5659 5660 self._advance(2) 5661 5662 parser = self.FUNCTION_PARSERS.get(upper) 5663 if parser and not anonymous: 5664 this = parser(self) 5665 else: 5666 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5667 5668 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5669 this = self.expression( 5670 subquery_predicate, comments=comments, this=self._parse_select() 5671 ) 5672 self._match_r_paren() 5673 return this 5674 5675 if functions is None: 5676 functions = self.FUNCTIONS 5677 5678 function = functions.get(upper) 5679 known_function = function and not anonymous 5680 5681 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5682 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5683 5684 post_func_comments = self._curr and self._curr.comments 5685 if known_function and post_func_comments: 5686 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5687 # call we'll construct it as exp.Anonymous, even if it's "known" 5688 if any( 5689 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5690 for comment in post_func_comments 5691 ): 5692 known_function = False 5693 5694 if alias and known_function: 5695 args = self._kv_to_prop_eq(args) 5696 5697 if known_function: 5698 func_builder = t.cast(t.Callable, function) 5699 5700 if "dialect" in func_builder.__code__.co_varnames: 5701 func = func_builder(args, dialect=self.dialect) 5702 else: 5703 func = func_builder(args) 5704 5705 func = self.validate_expression(func, args) 5706 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5707 func.meta["name"] = this 5708 5709 this = func 5710 else: 5711 if token_type == TokenType.IDENTIFIER: 5712 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5713 this = self.expression(exp.Anonymous, this=this, expressions=args) 5714 5715 if isinstance(this, exp.Expression): 5716 this.add_comments(comments) 5717 5718 self._match_r_paren(this) 5719 return self._parse_window(this) 5720 5721 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5722 return expression 5723 5724 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5725 transformed = [] 5726 5727 for index, e in enumerate(expressions): 5728 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5729 if isinstance(e, exp.Alias): 5730 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5731 5732 if not isinstance(e, exp.PropertyEQ): 5733 e = self.expression( 5734 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5735 ) 5736 5737 if isinstance(e.this, exp.Column): 5738 e.this.replace(e.this.this) 5739 else: 5740 e = self._to_prop_eq(e, index) 5741 5742 transformed.append(e) 5743 5744 return transformed 5745 5746 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5747 return self._parse_statement() 5748 5749 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5750 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5751 5752 def _parse_user_defined_function( 5753 self, kind: t.Optional[TokenType] = None 5754 ) -> t.Optional[exp.Expression]: 5755 this = self._parse_table_parts(schema=True) 5756 5757 if not self._match(TokenType.L_PAREN): 5758 return this 5759 5760 expressions = self._parse_csv(self._parse_function_parameter) 5761 self._match_r_paren() 5762 return self.expression( 5763 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5764 ) 5765 5766 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5767 literal = self._parse_primary() 5768 if literal: 5769 return self.expression(exp.Introducer, this=token.text, expression=literal) 5770 5771 return self._identifier_expression(token) 5772 5773 def _parse_session_parameter(self) -> exp.SessionParameter: 5774 kind = None 5775 this = self._parse_id_var() or self._parse_primary() 5776 5777 if this and self._match(TokenType.DOT): 5778 kind = this.name 5779 this = self._parse_var() or self._parse_primary() 5780 5781 return self.expression(exp.SessionParameter, this=this, kind=kind) 5782 5783 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5784 return self._parse_id_var() 5785 5786 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5787 index = self._index 5788 5789 if self._match(TokenType.L_PAREN): 5790 expressions = t.cast( 5791 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5792 ) 5793 5794 if not self._match(TokenType.R_PAREN): 5795 self._retreat(index) 5796 else: 5797 expressions = [self._parse_lambda_arg()] 5798 5799 if self._match_set(self.LAMBDAS): 5800 return self.LAMBDAS[self._prev.token_type](self, expressions) 5801 5802 self._retreat(index) 5803 5804 this: t.Optional[exp.Expression] 5805 5806 if self._match(TokenType.DISTINCT): 5807 this = self.expression( 5808 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5809 ) 5810 else: 5811 this = self._parse_select_or_expression(alias=alias) 5812 5813 return self._parse_limit( 5814 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5815 ) 5816 5817 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5818 index = self._index 5819 if not self._match(TokenType.L_PAREN): 5820 return this 5821 5822 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5823 # expr can be of both types 5824 if self._match_set(self.SELECT_START_TOKENS): 5825 self._retreat(index) 5826 return this 5827 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5828 self._match_r_paren() 5829 return self.expression(exp.Schema, this=this, expressions=args) 5830 5831 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5832 return self._parse_column_def(self._parse_field(any_token=True)) 5833 5834 def _parse_column_def( 5835 self, this: t.Optional[exp.Expression], computed_column: bool = True 5836 ) -> t.Optional[exp.Expression]: 5837 # column defs are not really columns, they're identifiers 5838 if isinstance(this, exp.Column): 5839 this = this.this 5840 5841 if not computed_column: 5842 self._match(TokenType.ALIAS) 5843 5844 kind = self._parse_types(schema=True) 5845 5846 if self._match_text_seq("FOR", "ORDINALITY"): 5847 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5848 5849 constraints: t.List[exp.Expression] = [] 5850 5851 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5852 ("ALIAS", "MATERIALIZED") 5853 ): 5854 persisted = self._prev.text.upper() == "MATERIALIZED" 5855 constraint_kind = exp.ComputedColumnConstraint( 5856 this=self._parse_assignment(), 5857 persisted=persisted or self._match_text_seq("PERSISTED"), 5858 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5859 ) 5860 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5861 elif ( 5862 kind 5863 and self._match(TokenType.ALIAS, advance=False) 5864 and ( 5865 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5866 or (self._next and self._next.token_type == TokenType.L_PAREN) 5867 ) 5868 ): 5869 self._advance() 5870 constraints.append( 5871 self.expression( 5872 exp.ColumnConstraint, 5873 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5874 ) 5875 ) 5876 5877 while True: 5878 constraint = self._parse_column_constraint() 5879 if not constraint: 5880 break 5881 constraints.append(constraint) 5882 5883 if not kind and not constraints: 5884 return this 5885 5886 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5887 5888 def _parse_auto_increment( 5889 self, 5890 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5891 start = None 5892 increment = None 5893 5894 if self._match(TokenType.L_PAREN, advance=False): 5895 args = self._parse_wrapped_csv(self._parse_bitwise) 5896 start = seq_get(args, 0) 5897 increment = seq_get(args, 1) 5898 elif self._match_text_seq("START"): 5899 start = self._parse_bitwise() 5900 self._match_text_seq("INCREMENT") 5901 increment = self._parse_bitwise() 5902 5903 if start and increment: 5904 return exp.GeneratedAsIdentityColumnConstraint( 5905 start=start, increment=increment, this=False 5906 ) 5907 5908 return exp.AutoIncrementColumnConstraint() 5909 5910 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5911 if not self._match_text_seq("REFRESH"): 5912 self._retreat(self._index - 1) 5913 return None 5914 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5915 5916 def _parse_compress(self) -> exp.CompressColumnConstraint: 5917 if self._match(TokenType.L_PAREN, advance=False): 5918 return self.expression( 5919 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5920 ) 5921 5922 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5923 5924 def _parse_generated_as_identity( 5925 self, 5926 ) -> ( 5927 exp.GeneratedAsIdentityColumnConstraint 5928 | exp.ComputedColumnConstraint 5929 | exp.GeneratedAsRowColumnConstraint 5930 ): 5931 if self._match_text_seq("BY", "DEFAULT"): 5932 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5933 this = self.expression( 5934 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5935 ) 5936 else: 5937 self._match_text_seq("ALWAYS") 5938 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5939 5940 self._match(TokenType.ALIAS) 5941 5942 if self._match_text_seq("ROW"): 5943 start = self._match_text_seq("START") 5944 if not start: 5945 self._match(TokenType.END) 5946 hidden = self._match_text_seq("HIDDEN") 5947 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5948 5949 identity = self._match_text_seq("IDENTITY") 5950 5951 if self._match(TokenType.L_PAREN): 5952 if self._match(TokenType.START_WITH): 5953 this.set("start", self._parse_bitwise()) 5954 if self._match_text_seq("INCREMENT", "BY"): 5955 this.set("increment", self._parse_bitwise()) 5956 if self._match_text_seq("MINVALUE"): 5957 this.set("minvalue", self._parse_bitwise()) 5958 if self._match_text_seq("MAXVALUE"): 5959 this.set("maxvalue", self._parse_bitwise()) 5960 5961 if self._match_text_seq("CYCLE"): 5962 this.set("cycle", True) 5963 elif self._match_text_seq("NO", "CYCLE"): 5964 this.set("cycle", False) 5965 5966 if not identity: 5967 this.set("expression", self._parse_range()) 5968 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5969 args = self._parse_csv(self._parse_bitwise) 5970 this.set("start", seq_get(args, 0)) 5971 this.set("increment", seq_get(args, 1)) 5972 5973 self._match_r_paren() 5974 5975 return this 5976 5977 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5978 self._match_text_seq("LENGTH") 5979 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5980 5981 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5982 if self._match_text_seq("NULL"): 5983 return self.expression(exp.NotNullColumnConstraint) 5984 if self._match_text_seq("CASESPECIFIC"): 5985 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5986 if self._match_text_seq("FOR", "REPLICATION"): 5987 return self.expression(exp.NotForReplicationColumnConstraint) 5988 5989 # Unconsume the `NOT` token 5990 self._retreat(self._index - 1) 5991 return None 5992 5993 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5994 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5995 5996 procedure_option_follows = ( 5997 self._match(TokenType.WITH, advance=False) 5998 and self._next 5999 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6000 ) 6001 6002 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6003 return self.expression( 6004 exp.ColumnConstraint, 6005 this=this, 6006 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6007 ) 6008 6009 return this 6010 6011 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6012 if not self._match(TokenType.CONSTRAINT): 6013 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6014 6015 return self.expression( 6016 exp.Constraint, 6017 this=self._parse_id_var(), 6018 expressions=self._parse_unnamed_constraints(), 6019 ) 6020 6021 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6022 constraints = [] 6023 while True: 6024 constraint = self._parse_unnamed_constraint() or self._parse_function() 6025 if not constraint: 6026 break 6027 constraints.append(constraint) 6028 6029 return constraints 6030 6031 def _parse_unnamed_constraint( 6032 self, constraints: t.Optional[t.Collection[str]] = None 6033 ) -> t.Optional[exp.Expression]: 6034 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6035 constraints or self.CONSTRAINT_PARSERS 6036 ): 6037 return None 6038 6039 constraint = self._prev.text.upper() 6040 if constraint not in self.CONSTRAINT_PARSERS: 6041 self.raise_error(f"No parser found for schema constraint {constraint}.") 6042 6043 return self.CONSTRAINT_PARSERS[constraint](self) 6044 6045 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6046 return self._parse_id_var(any_token=False) 6047 6048 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6049 self._match_text_seq("KEY") 6050 return self.expression( 6051 exp.UniqueColumnConstraint, 6052 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6053 this=self._parse_schema(self._parse_unique_key()), 6054 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6055 on_conflict=self._parse_on_conflict(), 6056 options=self._parse_key_constraint_options(), 6057 ) 6058 6059 def _parse_key_constraint_options(self) -> t.List[str]: 6060 options = [] 6061 while True: 6062 if not self._curr: 6063 break 6064 6065 if self._match(TokenType.ON): 6066 action = None 6067 on = self._advance_any() and self._prev.text 6068 6069 if self._match_text_seq("NO", "ACTION"): 6070 action = "NO ACTION" 6071 elif self._match_text_seq("CASCADE"): 6072 action = "CASCADE" 6073 elif self._match_text_seq("RESTRICT"): 6074 action = "RESTRICT" 6075 elif self._match_pair(TokenType.SET, TokenType.NULL): 6076 action = "SET NULL" 6077 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6078 action = "SET DEFAULT" 6079 else: 6080 self.raise_error("Invalid key constraint") 6081 6082 options.append(f"ON {on} {action}") 6083 else: 6084 var = self._parse_var_from_options( 6085 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6086 ) 6087 if not var: 6088 break 6089 options.append(var.name) 6090 6091 return options 6092 6093 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6094 if match and not self._match(TokenType.REFERENCES): 6095 return None 6096 6097 expressions = None 6098 this = self._parse_table(schema=True) 6099 options = self._parse_key_constraint_options() 6100 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6101 6102 def _parse_foreign_key(self) -> exp.ForeignKey: 6103 expressions = ( 6104 self._parse_wrapped_id_vars() 6105 if not self._match(TokenType.REFERENCES, advance=False) 6106 else None 6107 ) 6108 reference = self._parse_references() 6109 on_options = {} 6110 6111 while self._match(TokenType.ON): 6112 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6113 self.raise_error("Expected DELETE or UPDATE") 6114 6115 kind = self._prev.text.lower() 6116 6117 if self._match_text_seq("NO", "ACTION"): 6118 action = "NO ACTION" 6119 elif self._match(TokenType.SET): 6120 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6121 action = "SET " + self._prev.text.upper() 6122 else: 6123 self._advance() 6124 action = self._prev.text.upper() 6125 6126 on_options[kind] = action 6127 6128 return self.expression( 6129 exp.ForeignKey, 6130 expressions=expressions, 6131 reference=reference, 6132 options=self._parse_key_constraint_options(), 6133 **on_options, # type: ignore 6134 ) 6135 6136 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6137 return self._parse_ordered() or self._parse_field() 6138 6139 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6140 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6141 self._retreat(self._index - 1) 6142 return None 6143 6144 id_vars = self._parse_wrapped_id_vars() 6145 return self.expression( 6146 exp.PeriodForSystemTimeConstraint, 6147 this=seq_get(id_vars, 0), 6148 expression=seq_get(id_vars, 1), 6149 ) 6150 6151 def _parse_primary_key( 6152 self, wrapped_optional: bool = False, in_props: bool = False 6153 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6154 desc = ( 6155 self._match_set((TokenType.ASC, TokenType.DESC)) 6156 and self._prev.token_type == TokenType.DESC 6157 ) 6158 6159 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6160 return self.expression( 6161 exp.PrimaryKeyColumnConstraint, 6162 desc=desc, 6163 options=self._parse_key_constraint_options(), 6164 ) 6165 6166 expressions = self._parse_wrapped_csv( 6167 self._parse_primary_key_part, optional=wrapped_optional 6168 ) 6169 options = self._parse_key_constraint_options() 6170 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6171 6172 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6173 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6174 6175 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6176 """ 6177 Parses a datetime column in ODBC format. We parse the column into the corresponding 6178 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6179 same as we did for `DATE('yyyy-mm-dd')`. 6180 6181 Reference: 6182 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6183 """ 6184 self._match(TokenType.VAR) 6185 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6186 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6187 if not self._match(TokenType.R_BRACE): 6188 self.raise_error("Expected }") 6189 return expression 6190 6191 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6192 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6193 return this 6194 6195 bracket_kind = self._prev.token_type 6196 if ( 6197 bracket_kind == TokenType.L_BRACE 6198 and self._curr 6199 and self._curr.token_type == TokenType.VAR 6200 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6201 ): 6202 return self._parse_odbc_datetime_literal() 6203 6204 expressions = self._parse_csv( 6205 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6206 ) 6207 6208 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6209 self.raise_error("Expected ]") 6210 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6211 self.raise_error("Expected }") 6212 6213 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6214 if bracket_kind == TokenType.L_BRACE: 6215 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6216 elif not this: 6217 this = build_array_constructor( 6218 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6219 ) 6220 else: 6221 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6222 if constructor_type: 6223 return build_array_constructor( 6224 constructor_type, 6225 args=expressions, 6226 bracket_kind=bracket_kind, 6227 dialect=self.dialect, 6228 ) 6229 6230 expressions = apply_index_offset( 6231 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6232 ) 6233 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6234 6235 self._add_comments(this) 6236 return self._parse_bracket(this) 6237 6238 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6239 if self._match(TokenType.COLON): 6240 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6241 return this 6242 6243 def _parse_case(self) -> t.Optional[exp.Expression]: 6244 ifs = [] 6245 default = None 6246 6247 comments = self._prev_comments 6248 expression = self._parse_assignment() 6249 6250 while self._match(TokenType.WHEN): 6251 this = self._parse_assignment() 6252 self._match(TokenType.THEN) 6253 then = self._parse_assignment() 6254 ifs.append(self.expression(exp.If, this=this, true=then)) 6255 6256 if self._match(TokenType.ELSE): 6257 default = self._parse_assignment() 6258 6259 if not self._match(TokenType.END): 6260 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6261 default = exp.column("interval") 6262 else: 6263 self.raise_error("Expected END after CASE", self._prev) 6264 6265 return self.expression( 6266 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6267 ) 6268 6269 def _parse_if(self) -> t.Optional[exp.Expression]: 6270 if self._match(TokenType.L_PAREN): 6271 args = self._parse_csv( 6272 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6273 ) 6274 this = self.validate_expression(exp.If.from_arg_list(args), args) 6275 self._match_r_paren() 6276 else: 6277 index = self._index - 1 6278 6279 if self.NO_PAREN_IF_COMMANDS and index == 0: 6280 return self._parse_as_command(self._prev) 6281 6282 condition = self._parse_assignment() 6283 6284 if not condition: 6285 self._retreat(index) 6286 return None 6287 6288 self._match(TokenType.THEN) 6289 true = self._parse_assignment() 6290 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6291 self._match(TokenType.END) 6292 this = self.expression(exp.If, this=condition, true=true, false=false) 6293 6294 return this 6295 6296 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6297 if not self._match_text_seq("VALUE", "FOR"): 6298 self._retreat(self._index - 1) 6299 return None 6300 6301 return self.expression( 6302 exp.NextValueFor, 6303 this=self._parse_column(), 6304 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6305 ) 6306 6307 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6308 this = self._parse_function() or self._parse_var_or_string(upper=True) 6309 6310 if self._match(TokenType.FROM): 6311 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6312 6313 if not self._match(TokenType.COMMA): 6314 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6315 6316 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6317 6318 def _parse_gap_fill(self) -> exp.GapFill: 6319 self._match(TokenType.TABLE) 6320 this = self._parse_table() 6321 6322 self._match(TokenType.COMMA) 6323 args = [this, *self._parse_csv(self._parse_lambda)] 6324 6325 gap_fill = exp.GapFill.from_arg_list(args) 6326 return self.validate_expression(gap_fill, args) 6327 6328 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6329 this = self._parse_assignment() 6330 6331 if not self._match(TokenType.ALIAS): 6332 if self._match(TokenType.COMMA): 6333 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6334 6335 self.raise_error("Expected AS after CAST") 6336 6337 fmt = None 6338 to = self._parse_types() 6339 6340 default = self._match(TokenType.DEFAULT) 6341 if default: 6342 default = self._parse_bitwise() 6343 self._match_text_seq("ON", "CONVERSION", "ERROR") 6344 6345 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6346 fmt_string = self._parse_string() 6347 fmt = self._parse_at_time_zone(fmt_string) 6348 6349 if not to: 6350 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6351 if to.this in exp.DataType.TEMPORAL_TYPES: 6352 this = self.expression( 6353 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6354 this=this, 6355 format=exp.Literal.string( 6356 format_time( 6357 fmt_string.this if fmt_string else "", 6358 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6359 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6360 ) 6361 ), 6362 safe=safe, 6363 ) 6364 6365 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6366 this.set("zone", fmt.args["zone"]) 6367 return this 6368 elif not to: 6369 self.raise_error("Expected TYPE after CAST") 6370 elif isinstance(to, exp.Identifier): 6371 to = exp.DataType.build(to.name, udt=True) 6372 elif to.this == exp.DataType.Type.CHAR: 6373 if self._match(TokenType.CHARACTER_SET): 6374 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6375 6376 return self.expression( 6377 exp.Cast if strict else exp.TryCast, 6378 this=this, 6379 to=to, 6380 format=fmt, 6381 safe=safe, 6382 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6383 default=default, 6384 ) 6385 6386 def _parse_string_agg(self) -> exp.GroupConcat: 6387 if self._match(TokenType.DISTINCT): 6388 args: t.List[t.Optional[exp.Expression]] = [ 6389 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6390 ] 6391 if self._match(TokenType.COMMA): 6392 args.extend(self._parse_csv(self._parse_assignment)) 6393 else: 6394 args = self._parse_csv(self._parse_assignment) # type: ignore 6395 6396 if self._match_text_seq("ON", "OVERFLOW"): 6397 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6398 if self._match_text_seq("ERROR"): 6399 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6400 else: 6401 self._match_text_seq("TRUNCATE") 6402 on_overflow = self.expression( 6403 exp.OverflowTruncateBehavior, 6404 this=self._parse_string(), 6405 with_count=( 6406 self._match_text_seq("WITH", "COUNT") 6407 or not self._match_text_seq("WITHOUT", "COUNT") 6408 ), 6409 ) 6410 else: 6411 on_overflow = None 6412 6413 index = self._index 6414 if not self._match(TokenType.R_PAREN) and args: 6415 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6416 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6417 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6418 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6419 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6420 6421 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6422 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6423 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6424 if not self._match_text_seq("WITHIN", "GROUP"): 6425 self._retreat(index) 6426 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6427 6428 # The corresponding match_r_paren will be called in parse_function (caller) 6429 self._match_l_paren() 6430 6431 return self.expression( 6432 exp.GroupConcat, 6433 this=self._parse_order(this=seq_get(args, 0)), 6434 separator=seq_get(args, 1), 6435 on_overflow=on_overflow, 6436 ) 6437 6438 def _parse_convert( 6439 self, strict: bool, safe: t.Optional[bool] = None 6440 ) -> t.Optional[exp.Expression]: 6441 this = self._parse_bitwise() 6442 6443 if self._match(TokenType.USING): 6444 to: t.Optional[exp.Expression] = self.expression( 6445 exp.CharacterSet, this=self._parse_var() 6446 ) 6447 elif self._match(TokenType.COMMA): 6448 to = self._parse_types() 6449 else: 6450 to = None 6451 6452 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6453 6454 def _parse_xml_table(self) -> exp.XMLTable: 6455 namespaces = None 6456 passing = None 6457 columns = None 6458 6459 if self._match_text_seq("XMLNAMESPACES", "("): 6460 namespaces = self._parse_xml_namespace() 6461 self._match_text_seq(")", ",") 6462 6463 this = self._parse_string() 6464 6465 if self._match_text_seq("PASSING"): 6466 # The BY VALUE keywords are optional and are provided for semantic clarity 6467 self._match_text_seq("BY", "VALUE") 6468 passing = self._parse_csv(self._parse_column) 6469 6470 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6471 6472 if self._match_text_seq("COLUMNS"): 6473 columns = self._parse_csv(self._parse_field_def) 6474 6475 return self.expression( 6476 exp.XMLTable, 6477 this=this, 6478 namespaces=namespaces, 6479 passing=passing, 6480 columns=columns, 6481 by_ref=by_ref, 6482 ) 6483 6484 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6485 namespaces = [] 6486 6487 while True: 6488 if self._match(TokenType.DEFAULT): 6489 uri = self._parse_string() 6490 else: 6491 uri = self._parse_alias(self._parse_string()) 6492 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6493 if not self._match(TokenType.COMMA): 6494 break 6495 6496 return namespaces 6497 6498 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6499 """ 6500 There are generally two variants of the DECODE function: 6501 6502 - DECODE(bin, charset) 6503 - DECODE(expression, search, result [, search, result] ... [, default]) 6504 6505 The second variant will always be parsed into a CASE expression. Note that NULL 6506 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6507 instead of relying on pattern matching. 6508 """ 6509 args = self._parse_csv(self._parse_assignment) 6510 6511 if len(args) < 3: 6512 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6513 6514 expression, *expressions = args 6515 if not expression: 6516 return None 6517 6518 ifs = [] 6519 for search, result in zip(expressions[::2], expressions[1::2]): 6520 if not search or not result: 6521 return None 6522 6523 if isinstance(search, exp.Literal): 6524 ifs.append( 6525 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6526 ) 6527 elif isinstance(search, exp.Null): 6528 ifs.append( 6529 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6530 ) 6531 else: 6532 cond = exp.or_( 6533 exp.EQ(this=expression.copy(), expression=search), 6534 exp.and_( 6535 exp.Is(this=expression.copy(), expression=exp.Null()), 6536 exp.Is(this=search.copy(), expression=exp.Null()), 6537 copy=False, 6538 ), 6539 copy=False, 6540 ) 6541 ifs.append(exp.If(this=cond, true=result)) 6542 6543 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6544 6545 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6546 self._match_text_seq("KEY") 6547 key = self._parse_column() 6548 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6549 self._match_text_seq("VALUE") 6550 value = self._parse_bitwise() 6551 6552 if not key and not value: 6553 return None 6554 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6555 6556 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6557 if not this or not self._match_text_seq("FORMAT", "JSON"): 6558 return this 6559 6560 return self.expression(exp.FormatJson, this=this) 6561 6562 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6563 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6564 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6565 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6566 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6567 else: 6568 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6569 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6570 6571 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6572 6573 if not empty and not error and not null: 6574 return None 6575 6576 return self.expression( 6577 exp.OnCondition, 6578 empty=empty, 6579 error=error, 6580 null=null, 6581 ) 6582 6583 def _parse_on_handling( 6584 self, on: str, *values: str 6585 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6586 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6587 for value in values: 6588 if self._match_text_seq(value, "ON", on): 6589 return f"{value} ON {on}" 6590 6591 index = self._index 6592 if self._match(TokenType.DEFAULT): 6593 default_value = self._parse_bitwise() 6594 if self._match_text_seq("ON", on): 6595 return default_value 6596 6597 self._retreat(index) 6598 6599 return None 6600 6601 @t.overload 6602 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6603 6604 @t.overload 6605 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6606 6607 def _parse_json_object(self, agg=False): 6608 star = self._parse_star() 6609 expressions = ( 6610 [star] 6611 if star 6612 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6613 ) 6614 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6615 6616 unique_keys = None 6617 if self._match_text_seq("WITH", "UNIQUE"): 6618 unique_keys = True 6619 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6620 unique_keys = False 6621 6622 self._match_text_seq("KEYS") 6623 6624 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6625 self._parse_type() 6626 ) 6627 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6628 6629 return self.expression( 6630 exp.JSONObjectAgg if agg else exp.JSONObject, 6631 expressions=expressions, 6632 null_handling=null_handling, 6633 unique_keys=unique_keys, 6634 return_type=return_type, 6635 encoding=encoding, 6636 ) 6637 6638 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6639 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6640 if not self._match_text_seq("NESTED"): 6641 this = self._parse_id_var() 6642 kind = self._parse_types(allow_identifiers=False) 6643 nested = None 6644 else: 6645 this = None 6646 kind = None 6647 nested = True 6648 6649 path = self._match_text_seq("PATH") and self._parse_string() 6650 nested_schema = nested and self._parse_json_schema() 6651 6652 return self.expression( 6653 exp.JSONColumnDef, 6654 this=this, 6655 kind=kind, 6656 path=path, 6657 nested_schema=nested_schema, 6658 ) 6659 6660 def _parse_json_schema(self) -> exp.JSONSchema: 6661 self._match_text_seq("COLUMNS") 6662 return self.expression( 6663 exp.JSONSchema, 6664 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6665 ) 6666 6667 def _parse_json_table(self) -> exp.JSONTable: 6668 this = self._parse_format_json(self._parse_bitwise()) 6669 path = self._match(TokenType.COMMA) and self._parse_string() 6670 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6671 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6672 schema = self._parse_json_schema() 6673 6674 return exp.JSONTable( 6675 this=this, 6676 schema=schema, 6677 path=path, 6678 error_handling=error_handling, 6679 empty_handling=empty_handling, 6680 ) 6681 6682 def _parse_match_against(self) -> exp.MatchAgainst: 6683 expressions = self._parse_csv(self._parse_column) 6684 6685 self._match_text_seq(")", "AGAINST", "(") 6686 6687 this = self._parse_string() 6688 6689 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6690 modifier = "IN NATURAL LANGUAGE MODE" 6691 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6692 modifier = f"{modifier} WITH QUERY EXPANSION" 6693 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6694 modifier = "IN BOOLEAN MODE" 6695 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6696 modifier = "WITH QUERY EXPANSION" 6697 else: 6698 modifier = None 6699 6700 return self.expression( 6701 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6702 ) 6703 6704 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6705 def _parse_open_json(self) -> exp.OpenJSON: 6706 this = self._parse_bitwise() 6707 path = self._match(TokenType.COMMA) and self._parse_string() 6708 6709 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6710 this = self._parse_field(any_token=True) 6711 kind = self._parse_types() 6712 path = self._parse_string() 6713 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6714 6715 return self.expression( 6716 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6717 ) 6718 6719 expressions = None 6720 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6721 self._match_l_paren() 6722 expressions = self._parse_csv(_parse_open_json_column_def) 6723 6724 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6725 6726 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6727 args = self._parse_csv(self._parse_bitwise) 6728 6729 if self._match(TokenType.IN): 6730 return self.expression( 6731 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6732 ) 6733 6734 if haystack_first: 6735 haystack = seq_get(args, 0) 6736 needle = seq_get(args, 1) 6737 else: 6738 haystack = seq_get(args, 1) 6739 needle = seq_get(args, 0) 6740 6741 return self.expression( 6742 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6743 ) 6744 6745 def _parse_predict(self) -> exp.Predict: 6746 self._match_text_seq("MODEL") 6747 this = self._parse_table() 6748 6749 self._match(TokenType.COMMA) 6750 self._match_text_seq("TABLE") 6751 6752 return self.expression( 6753 exp.Predict, 6754 this=this, 6755 expression=self._parse_table(), 6756 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6757 ) 6758 6759 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6760 args = self._parse_csv(self._parse_table) 6761 return exp.JoinHint(this=func_name.upper(), expressions=args) 6762 6763 def _parse_substring(self) -> exp.Substring: 6764 # Postgres supports the form: substring(string [from int] [for int]) 6765 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6766 6767 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6768 6769 if self._match(TokenType.FROM): 6770 args.append(self._parse_bitwise()) 6771 if self._match(TokenType.FOR): 6772 if len(args) == 1: 6773 args.append(exp.Literal.number(1)) 6774 args.append(self._parse_bitwise()) 6775 6776 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6777 6778 def _parse_trim(self) -> exp.Trim: 6779 # https://www.w3resource.com/sql/character-functions/trim.php 6780 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6781 6782 position = None 6783 collation = None 6784 expression = None 6785 6786 if self._match_texts(self.TRIM_TYPES): 6787 position = self._prev.text.upper() 6788 6789 this = self._parse_bitwise() 6790 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6791 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6792 expression = self._parse_bitwise() 6793 6794 if invert_order: 6795 this, expression = expression, this 6796 6797 if self._match(TokenType.COLLATE): 6798 collation = self._parse_bitwise() 6799 6800 return self.expression( 6801 exp.Trim, this=this, position=position, expression=expression, collation=collation 6802 ) 6803 6804 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6805 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6806 6807 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6808 return self._parse_window(self._parse_id_var(), alias=True) 6809 6810 def _parse_respect_or_ignore_nulls( 6811 self, this: t.Optional[exp.Expression] 6812 ) -> t.Optional[exp.Expression]: 6813 if self._match_text_seq("IGNORE", "NULLS"): 6814 return self.expression(exp.IgnoreNulls, this=this) 6815 if self._match_text_seq("RESPECT", "NULLS"): 6816 return self.expression(exp.RespectNulls, this=this) 6817 return this 6818 6819 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6820 if self._match(TokenType.HAVING): 6821 self._match_texts(("MAX", "MIN")) 6822 max = self._prev.text.upper() != "MIN" 6823 return self.expression( 6824 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6825 ) 6826 6827 return this 6828 6829 def _parse_window( 6830 self, this: t.Optional[exp.Expression], alias: bool = False 6831 ) -> t.Optional[exp.Expression]: 6832 func = this 6833 comments = func.comments if isinstance(func, exp.Expression) else None 6834 6835 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6836 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6837 if self._match_text_seq("WITHIN", "GROUP"): 6838 order = self._parse_wrapped(self._parse_order) 6839 this = self.expression(exp.WithinGroup, this=this, expression=order) 6840 6841 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6842 self._match(TokenType.WHERE) 6843 this = self.expression( 6844 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6845 ) 6846 self._match_r_paren() 6847 6848 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6849 # Some dialects choose to implement and some do not. 6850 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6851 6852 # There is some code above in _parse_lambda that handles 6853 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6854 6855 # The below changes handle 6856 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6857 6858 # Oracle allows both formats 6859 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6860 # and Snowflake chose to do the same for familiarity 6861 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6862 if isinstance(this, exp.AggFunc): 6863 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6864 6865 if ignore_respect and ignore_respect is not this: 6866 ignore_respect.replace(ignore_respect.this) 6867 this = self.expression(ignore_respect.__class__, this=this) 6868 6869 this = self._parse_respect_or_ignore_nulls(this) 6870 6871 # bigquery select from window x AS (partition by ...) 6872 if alias: 6873 over = None 6874 self._match(TokenType.ALIAS) 6875 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6876 return this 6877 else: 6878 over = self._prev.text.upper() 6879 6880 if comments and isinstance(func, exp.Expression): 6881 func.pop_comments() 6882 6883 if not self._match(TokenType.L_PAREN): 6884 return self.expression( 6885 exp.Window, 6886 comments=comments, 6887 this=this, 6888 alias=self._parse_id_var(False), 6889 over=over, 6890 ) 6891 6892 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6893 6894 first = self._match(TokenType.FIRST) 6895 if self._match_text_seq("LAST"): 6896 first = False 6897 6898 partition, order = self._parse_partition_and_order() 6899 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6900 6901 if kind: 6902 self._match(TokenType.BETWEEN) 6903 start = self._parse_window_spec() 6904 self._match(TokenType.AND) 6905 end = self._parse_window_spec() 6906 exclude = ( 6907 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6908 if self._match_text_seq("EXCLUDE") 6909 else None 6910 ) 6911 6912 spec = self.expression( 6913 exp.WindowSpec, 6914 kind=kind, 6915 start=start["value"], 6916 start_side=start["side"], 6917 end=end["value"], 6918 end_side=end["side"], 6919 exclude=exclude, 6920 ) 6921 else: 6922 spec = None 6923 6924 self._match_r_paren() 6925 6926 window = self.expression( 6927 exp.Window, 6928 comments=comments, 6929 this=this, 6930 partition_by=partition, 6931 order=order, 6932 spec=spec, 6933 alias=window_alias, 6934 over=over, 6935 first=first, 6936 ) 6937 6938 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6939 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6940 return self._parse_window(window, alias=alias) 6941 6942 return window 6943 6944 def _parse_partition_and_order( 6945 self, 6946 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6947 return self._parse_partition_by(), self._parse_order() 6948 6949 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6950 self._match(TokenType.BETWEEN) 6951 6952 return { 6953 "value": ( 6954 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6955 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6956 or self._parse_bitwise() 6957 ), 6958 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6959 } 6960 6961 def _parse_alias( 6962 self, this: t.Optional[exp.Expression], explicit: bool = False 6963 ) -> t.Optional[exp.Expression]: 6964 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6965 # so this section tries to parse the clause version and if it fails, it treats the token 6966 # as an identifier (alias) 6967 if self._can_parse_limit_or_offset(): 6968 return this 6969 6970 any_token = self._match(TokenType.ALIAS) 6971 comments = self._prev_comments or [] 6972 6973 if explicit and not any_token: 6974 return this 6975 6976 if self._match(TokenType.L_PAREN): 6977 aliases = self.expression( 6978 exp.Aliases, 6979 comments=comments, 6980 this=this, 6981 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6982 ) 6983 self._match_r_paren(aliases) 6984 return aliases 6985 6986 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6987 self.STRING_ALIASES and self._parse_string_as_identifier() 6988 ) 6989 6990 if alias: 6991 comments.extend(alias.pop_comments()) 6992 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6993 column = this.this 6994 6995 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6996 if not this.comments and column and column.comments: 6997 this.comments = column.pop_comments() 6998 6999 return this 7000 7001 def _parse_id_var( 7002 self, 7003 any_token: bool = True, 7004 tokens: t.Optional[t.Collection[TokenType]] = None, 7005 ) -> t.Optional[exp.Expression]: 7006 expression = self._parse_identifier() 7007 if not expression and ( 7008 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7009 ): 7010 quoted = self._prev.token_type == TokenType.STRING 7011 expression = self._identifier_expression(quoted=quoted) 7012 7013 return expression 7014 7015 def _parse_string(self) -> t.Optional[exp.Expression]: 7016 if self._match_set(self.STRING_PARSERS): 7017 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7018 return self._parse_placeholder() 7019 7020 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7021 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7022 if output: 7023 output.update_positions(self._prev) 7024 return output 7025 7026 def _parse_number(self) -> t.Optional[exp.Expression]: 7027 if self._match_set(self.NUMERIC_PARSERS): 7028 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7029 return self._parse_placeholder() 7030 7031 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7032 if self._match(TokenType.IDENTIFIER): 7033 return self._identifier_expression(quoted=True) 7034 return self._parse_placeholder() 7035 7036 def _parse_var( 7037 self, 7038 any_token: bool = False, 7039 tokens: t.Optional[t.Collection[TokenType]] = None, 7040 upper: bool = False, 7041 ) -> t.Optional[exp.Expression]: 7042 if ( 7043 (any_token and self._advance_any()) 7044 or self._match(TokenType.VAR) 7045 or (self._match_set(tokens) if tokens else False) 7046 ): 7047 return self.expression( 7048 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7049 ) 7050 return self._parse_placeholder() 7051 7052 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7053 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7054 self._advance() 7055 return self._prev 7056 return None 7057 7058 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7059 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7060 7061 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7062 return self._parse_primary() or self._parse_var(any_token=True) 7063 7064 def _parse_null(self) -> t.Optional[exp.Expression]: 7065 if self._match_set(self.NULL_TOKENS): 7066 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7067 return self._parse_placeholder() 7068 7069 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7070 if self._match(TokenType.TRUE): 7071 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7072 if self._match(TokenType.FALSE): 7073 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7074 return self._parse_placeholder() 7075 7076 def _parse_star(self) -> t.Optional[exp.Expression]: 7077 if self._match(TokenType.STAR): 7078 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7079 return self._parse_placeholder() 7080 7081 def _parse_parameter(self) -> exp.Parameter: 7082 this = self._parse_identifier() or self._parse_primary_or_var() 7083 return self.expression(exp.Parameter, this=this) 7084 7085 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7086 if self._match_set(self.PLACEHOLDER_PARSERS): 7087 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7088 if placeholder: 7089 return placeholder 7090 self._advance(-1) 7091 return None 7092 7093 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7094 if not self._match_texts(keywords): 7095 return None 7096 if self._match(TokenType.L_PAREN, advance=False): 7097 return self._parse_wrapped_csv(self._parse_expression) 7098 7099 expression = self._parse_expression() 7100 return [expression] if expression else None 7101 7102 def _parse_csv( 7103 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7104 ) -> t.List[exp.Expression]: 7105 parse_result = parse_method() 7106 items = [parse_result] if parse_result is not None else [] 7107 7108 while self._match(sep): 7109 self._add_comments(parse_result) 7110 parse_result = parse_method() 7111 if parse_result is not None: 7112 items.append(parse_result) 7113 7114 return items 7115 7116 def _parse_tokens( 7117 self, parse_method: t.Callable, expressions: t.Dict 7118 ) -> t.Optional[exp.Expression]: 7119 this = parse_method() 7120 7121 while self._match_set(expressions): 7122 this = self.expression( 7123 expressions[self._prev.token_type], 7124 this=this, 7125 comments=self._prev_comments, 7126 expression=parse_method(), 7127 ) 7128 7129 return this 7130 7131 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7132 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7133 7134 def _parse_wrapped_csv( 7135 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7136 ) -> t.List[exp.Expression]: 7137 return self._parse_wrapped( 7138 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7139 ) 7140 7141 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7142 wrapped = self._match(TokenType.L_PAREN) 7143 if not wrapped and not optional: 7144 self.raise_error("Expecting (") 7145 parse_result = parse_method() 7146 if wrapped: 7147 self._match_r_paren() 7148 return parse_result 7149 7150 def _parse_expressions(self) -> t.List[exp.Expression]: 7151 return self._parse_csv(self._parse_expression) 7152 7153 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7154 return self._parse_select() or self._parse_set_operations( 7155 self._parse_alias(self._parse_assignment(), explicit=True) 7156 if alias 7157 else self._parse_assignment() 7158 ) 7159 7160 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7161 return self._parse_query_modifiers( 7162 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7163 ) 7164 7165 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7166 this = None 7167 if self._match_texts(self.TRANSACTION_KIND): 7168 this = self._prev.text 7169 7170 self._match_texts(("TRANSACTION", "WORK")) 7171 7172 modes = [] 7173 while True: 7174 mode = [] 7175 while self._match(TokenType.VAR): 7176 mode.append(self._prev.text) 7177 7178 if mode: 7179 modes.append(" ".join(mode)) 7180 if not self._match(TokenType.COMMA): 7181 break 7182 7183 return self.expression(exp.Transaction, this=this, modes=modes) 7184 7185 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7186 chain = None 7187 savepoint = None 7188 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7189 7190 self._match_texts(("TRANSACTION", "WORK")) 7191 7192 if self._match_text_seq("TO"): 7193 self._match_text_seq("SAVEPOINT") 7194 savepoint = self._parse_id_var() 7195 7196 if self._match(TokenType.AND): 7197 chain = not self._match_text_seq("NO") 7198 self._match_text_seq("CHAIN") 7199 7200 if is_rollback: 7201 return self.expression(exp.Rollback, savepoint=savepoint) 7202 7203 return self.expression(exp.Commit, chain=chain) 7204 7205 def _parse_refresh(self) -> exp.Refresh: 7206 self._match(TokenType.TABLE) 7207 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7208 7209 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7210 if not self._match_text_seq("ADD"): 7211 return None 7212 7213 self._match(TokenType.COLUMN) 7214 exists_column = self._parse_exists(not_=True) 7215 expression = self._parse_field_def() 7216 7217 if expression: 7218 expression.set("exists", exists_column) 7219 7220 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7221 if self._match_texts(("FIRST", "AFTER")): 7222 position = self._prev.text 7223 column_position = self.expression( 7224 exp.ColumnPosition, this=self._parse_column(), position=position 7225 ) 7226 expression.set("position", column_position) 7227 7228 return expression 7229 7230 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7231 drop = self._match(TokenType.DROP) and self._parse_drop() 7232 if drop and not isinstance(drop, exp.Command): 7233 drop.set("kind", drop.args.get("kind", "COLUMN")) 7234 return drop 7235 7236 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7237 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7238 return self.expression( 7239 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7240 ) 7241 7242 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7243 index = self._index - 1 7244 7245 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7246 return self._parse_csv( 7247 lambda: self.expression( 7248 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7249 ) 7250 ) 7251 7252 self._retreat(index) 7253 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7254 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7255 7256 if self._match_text_seq("ADD", "COLUMNS"): 7257 schema = self._parse_schema() 7258 if schema: 7259 return [schema] 7260 return [] 7261 7262 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7263 7264 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7265 if self._match_texts(self.ALTER_ALTER_PARSERS): 7266 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7267 7268 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7269 # keyword after ALTER we default to parsing this statement 7270 self._match(TokenType.COLUMN) 7271 column = self._parse_field(any_token=True) 7272 7273 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7274 return self.expression(exp.AlterColumn, this=column, drop=True) 7275 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7276 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7277 if self._match(TokenType.COMMENT): 7278 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7279 if self._match_text_seq("DROP", "NOT", "NULL"): 7280 return self.expression( 7281 exp.AlterColumn, 7282 this=column, 7283 drop=True, 7284 allow_null=True, 7285 ) 7286 if self._match_text_seq("SET", "NOT", "NULL"): 7287 return self.expression( 7288 exp.AlterColumn, 7289 this=column, 7290 allow_null=False, 7291 ) 7292 7293 if self._match_text_seq("SET", "VISIBLE"): 7294 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7295 if self._match_text_seq("SET", "INVISIBLE"): 7296 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7297 7298 self._match_text_seq("SET", "DATA") 7299 self._match_text_seq("TYPE") 7300 return self.expression( 7301 exp.AlterColumn, 7302 this=column, 7303 dtype=self._parse_types(), 7304 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7305 using=self._match(TokenType.USING) and self._parse_assignment(), 7306 ) 7307 7308 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7309 if self._match_texts(("ALL", "EVEN", "AUTO")): 7310 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7311 7312 self._match_text_seq("KEY", "DISTKEY") 7313 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7314 7315 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7316 if compound: 7317 self._match_text_seq("SORTKEY") 7318 7319 if self._match(TokenType.L_PAREN, advance=False): 7320 return self.expression( 7321 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7322 ) 7323 7324 self._match_texts(("AUTO", "NONE")) 7325 return self.expression( 7326 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7327 ) 7328 7329 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7330 index = self._index - 1 7331 7332 partition_exists = self._parse_exists() 7333 if self._match(TokenType.PARTITION, advance=False): 7334 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7335 7336 self._retreat(index) 7337 return self._parse_csv(self._parse_drop_column) 7338 7339 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7340 if self._match(TokenType.COLUMN): 7341 exists = self._parse_exists() 7342 old_column = self._parse_column() 7343 to = self._match_text_seq("TO") 7344 new_column = self._parse_column() 7345 7346 if old_column is None or to is None or new_column is None: 7347 return None 7348 7349 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7350 7351 self._match_text_seq("TO") 7352 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7353 7354 def _parse_alter_table_set(self) -> exp.AlterSet: 7355 alter_set = self.expression(exp.AlterSet) 7356 7357 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7358 "TABLE", "PROPERTIES" 7359 ): 7360 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7361 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7362 alter_set.set("expressions", [self._parse_assignment()]) 7363 elif self._match_texts(("LOGGED", "UNLOGGED")): 7364 alter_set.set("option", exp.var(self._prev.text.upper())) 7365 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7366 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7367 elif self._match_text_seq("LOCATION"): 7368 alter_set.set("location", self._parse_field()) 7369 elif self._match_text_seq("ACCESS", "METHOD"): 7370 alter_set.set("access_method", self._parse_field()) 7371 elif self._match_text_seq("TABLESPACE"): 7372 alter_set.set("tablespace", self._parse_field()) 7373 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7374 alter_set.set("file_format", [self._parse_field()]) 7375 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7376 alter_set.set("file_format", self._parse_wrapped_options()) 7377 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7378 alter_set.set("copy_options", self._parse_wrapped_options()) 7379 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7380 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7381 else: 7382 if self._match_text_seq("SERDE"): 7383 alter_set.set("serde", self._parse_field()) 7384 7385 alter_set.set("expressions", [self._parse_properties()]) 7386 7387 return alter_set 7388 7389 def _parse_alter(self) -> exp.Alter | exp.Command: 7390 start = self._prev 7391 7392 alter_token = self._match_set(self.ALTERABLES) and self._prev 7393 if not alter_token: 7394 return self._parse_as_command(start) 7395 7396 exists = self._parse_exists() 7397 only = self._match_text_seq("ONLY") 7398 this = self._parse_table(schema=True) 7399 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7400 7401 if self._next: 7402 self._advance() 7403 7404 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7405 if parser: 7406 actions = ensure_list(parser(self)) 7407 not_valid = self._match_text_seq("NOT", "VALID") 7408 options = self._parse_csv(self._parse_property) 7409 7410 if not self._curr and actions: 7411 return self.expression( 7412 exp.Alter, 7413 this=this, 7414 kind=alter_token.text.upper(), 7415 exists=exists, 7416 actions=actions, 7417 only=only, 7418 options=options, 7419 cluster=cluster, 7420 not_valid=not_valid, 7421 ) 7422 7423 return self._parse_as_command(start) 7424 7425 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7426 start = self._prev 7427 # https://duckdb.org/docs/sql/statements/analyze 7428 if not self._curr: 7429 return self.expression(exp.Analyze) 7430 7431 options = [] 7432 while self._match_texts(self.ANALYZE_STYLES): 7433 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7434 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7435 else: 7436 options.append(self._prev.text.upper()) 7437 7438 this: t.Optional[exp.Expression] = None 7439 inner_expression: t.Optional[exp.Expression] = None 7440 7441 kind = self._curr and self._curr.text.upper() 7442 7443 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7444 this = self._parse_table_parts() 7445 elif self._match_text_seq("TABLES"): 7446 if self._match_set((TokenType.FROM, TokenType.IN)): 7447 kind = f"{kind} {self._prev.text.upper()}" 7448 this = self._parse_table(schema=True, is_db_reference=True) 7449 elif self._match_text_seq("DATABASE"): 7450 this = self._parse_table(schema=True, is_db_reference=True) 7451 elif self._match_text_seq("CLUSTER"): 7452 this = self._parse_table() 7453 # Try matching inner expr keywords before fallback to parse table. 7454 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7455 kind = None 7456 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7457 else: 7458 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7459 kind = None 7460 this = self._parse_table_parts() 7461 7462 partition = self._try_parse(self._parse_partition) 7463 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7464 return self._parse_as_command(start) 7465 7466 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7467 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7468 "WITH", "ASYNC", "MODE" 7469 ): 7470 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7471 else: 7472 mode = None 7473 7474 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7475 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7476 7477 properties = self._parse_properties() 7478 return self.expression( 7479 exp.Analyze, 7480 kind=kind, 7481 this=this, 7482 mode=mode, 7483 partition=partition, 7484 properties=properties, 7485 expression=inner_expression, 7486 options=options, 7487 ) 7488 7489 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7490 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7491 this = None 7492 kind = self._prev.text.upper() 7493 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7494 expressions = [] 7495 7496 if not self._match_text_seq("STATISTICS"): 7497 self.raise_error("Expecting token STATISTICS") 7498 7499 if self._match_text_seq("NOSCAN"): 7500 this = "NOSCAN" 7501 elif self._match(TokenType.FOR): 7502 if self._match_text_seq("ALL", "COLUMNS"): 7503 this = "FOR ALL COLUMNS" 7504 if self._match_texts("COLUMNS"): 7505 this = "FOR COLUMNS" 7506 expressions = self._parse_csv(self._parse_column_reference) 7507 elif self._match_text_seq("SAMPLE"): 7508 sample = self._parse_number() 7509 expressions = [ 7510 self.expression( 7511 exp.AnalyzeSample, 7512 sample=sample, 7513 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7514 ) 7515 ] 7516 7517 return self.expression( 7518 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7519 ) 7520 7521 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7522 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7523 kind = None 7524 this = None 7525 expression: t.Optional[exp.Expression] = None 7526 if self._match_text_seq("REF", "UPDATE"): 7527 kind = "REF" 7528 this = "UPDATE" 7529 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7530 this = "UPDATE SET DANGLING TO NULL" 7531 elif self._match_text_seq("STRUCTURE"): 7532 kind = "STRUCTURE" 7533 if self._match_text_seq("CASCADE", "FAST"): 7534 this = "CASCADE FAST" 7535 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7536 ("ONLINE", "OFFLINE") 7537 ): 7538 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7539 expression = self._parse_into() 7540 7541 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7542 7543 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7544 this = self._prev.text.upper() 7545 if self._match_text_seq("COLUMNS"): 7546 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7547 return None 7548 7549 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7550 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7551 if self._match_text_seq("STATISTICS"): 7552 return self.expression(exp.AnalyzeDelete, kind=kind) 7553 return None 7554 7555 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7556 if self._match_text_seq("CHAINED", "ROWS"): 7557 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7558 return None 7559 7560 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7561 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7562 this = self._prev.text.upper() 7563 expression: t.Optional[exp.Expression] = None 7564 expressions = [] 7565 update_options = None 7566 7567 if self._match_text_seq("HISTOGRAM", "ON"): 7568 expressions = self._parse_csv(self._parse_column_reference) 7569 with_expressions = [] 7570 while self._match(TokenType.WITH): 7571 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7572 if self._match_texts(("SYNC", "ASYNC")): 7573 if self._match_text_seq("MODE", advance=False): 7574 with_expressions.append(f"{self._prev.text.upper()} MODE") 7575 self._advance() 7576 else: 7577 buckets = self._parse_number() 7578 if self._match_text_seq("BUCKETS"): 7579 with_expressions.append(f"{buckets} BUCKETS") 7580 if with_expressions: 7581 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7582 7583 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7584 TokenType.UPDATE, advance=False 7585 ): 7586 update_options = self._prev.text.upper() 7587 self._advance() 7588 elif self._match_text_seq("USING", "DATA"): 7589 expression = self.expression(exp.UsingData, this=self._parse_string()) 7590 7591 return self.expression( 7592 exp.AnalyzeHistogram, 7593 this=this, 7594 expressions=expressions, 7595 expression=expression, 7596 update_options=update_options, 7597 ) 7598 7599 def _parse_merge(self) -> exp.Merge: 7600 self._match(TokenType.INTO) 7601 target = self._parse_table() 7602 7603 if target and self._match(TokenType.ALIAS, advance=False): 7604 target.set("alias", self._parse_table_alias()) 7605 7606 self._match(TokenType.USING) 7607 using = self._parse_table() 7608 7609 self._match(TokenType.ON) 7610 on = self._parse_assignment() 7611 7612 return self.expression( 7613 exp.Merge, 7614 this=target, 7615 using=using, 7616 on=on, 7617 whens=self._parse_when_matched(), 7618 returning=self._parse_returning(), 7619 ) 7620 7621 def _parse_when_matched(self) -> exp.Whens: 7622 whens = [] 7623 7624 while self._match(TokenType.WHEN): 7625 matched = not self._match(TokenType.NOT) 7626 self._match_text_seq("MATCHED") 7627 source = ( 7628 False 7629 if self._match_text_seq("BY", "TARGET") 7630 else self._match_text_seq("BY", "SOURCE") 7631 ) 7632 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7633 7634 self._match(TokenType.THEN) 7635 7636 if self._match(TokenType.INSERT): 7637 this = self._parse_star() 7638 if this: 7639 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7640 else: 7641 then = self.expression( 7642 exp.Insert, 7643 this=exp.var("ROW") 7644 if self._match_text_seq("ROW") 7645 else self._parse_value(values=False), 7646 expression=self._match_text_seq("VALUES") and self._parse_value(), 7647 ) 7648 elif self._match(TokenType.UPDATE): 7649 expressions = self._parse_star() 7650 if expressions: 7651 then = self.expression(exp.Update, expressions=expressions) 7652 else: 7653 then = self.expression( 7654 exp.Update, 7655 expressions=self._match(TokenType.SET) 7656 and self._parse_csv(self._parse_equality), 7657 ) 7658 elif self._match(TokenType.DELETE): 7659 then = self.expression(exp.Var, this=self._prev.text) 7660 else: 7661 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7662 7663 whens.append( 7664 self.expression( 7665 exp.When, 7666 matched=matched, 7667 source=source, 7668 condition=condition, 7669 then=then, 7670 ) 7671 ) 7672 return self.expression(exp.Whens, expressions=whens) 7673 7674 def _parse_show(self) -> t.Optional[exp.Expression]: 7675 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7676 if parser: 7677 return parser(self) 7678 return self._parse_as_command(self._prev) 7679 7680 def _parse_set_item_assignment( 7681 self, kind: t.Optional[str] = None 7682 ) -> t.Optional[exp.Expression]: 7683 index = self._index 7684 7685 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7686 return self._parse_set_transaction(global_=kind == "GLOBAL") 7687 7688 left = self._parse_primary() or self._parse_column() 7689 assignment_delimiter = self._match_texts(("=", "TO")) 7690 7691 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7692 self._retreat(index) 7693 return None 7694 7695 right = self._parse_statement() or self._parse_id_var() 7696 if isinstance(right, (exp.Column, exp.Identifier)): 7697 right = exp.var(right.name) 7698 7699 this = self.expression(exp.EQ, this=left, expression=right) 7700 return self.expression(exp.SetItem, this=this, kind=kind) 7701 7702 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7703 self._match_text_seq("TRANSACTION") 7704 characteristics = self._parse_csv( 7705 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7706 ) 7707 return self.expression( 7708 exp.SetItem, 7709 expressions=characteristics, 7710 kind="TRANSACTION", 7711 **{"global": global_}, # type: ignore 7712 ) 7713 7714 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7715 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7716 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7717 7718 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7719 index = self._index 7720 set_ = self.expression( 7721 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7722 ) 7723 7724 if self._curr: 7725 self._retreat(index) 7726 return self._parse_as_command(self._prev) 7727 7728 return set_ 7729 7730 def _parse_var_from_options( 7731 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7732 ) -> t.Optional[exp.Var]: 7733 start = self._curr 7734 if not start: 7735 return None 7736 7737 option = start.text.upper() 7738 continuations = options.get(option) 7739 7740 index = self._index 7741 self._advance() 7742 for keywords in continuations or []: 7743 if isinstance(keywords, str): 7744 keywords = (keywords,) 7745 7746 if self._match_text_seq(*keywords): 7747 option = f"{option} {' '.join(keywords)}" 7748 break 7749 else: 7750 if continuations or continuations is None: 7751 if raise_unmatched: 7752 self.raise_error(f"Unknown option {option}") 7753 7754 self._retreat(index) 7755 return None 7756 7757 return exp.var(option) 7758 7759 def _parse_as_command(self, start: Token) -> exp.Command: 7760 while self._curr: 7761 self._advance() 7762 text = self._find_sql(start, self._prev) 7763 size = len(start.text) 7764 self._warn_unsupported() 7765 return exp.Command(this=text[:size], expression=text[size:]) 7766 7767 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7768 settings = [] 7769 7770 self._match_l_paren() 7771 kind = self._parse_id_var() 7772 7773 if self._match(TokenType.L_PAREN): 7774 while True: 7775 key = self._parse_id_var() 7776 value = self._parse_primary() 7777 if not key and value is None: 7778 break 7779 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7780 self._match(TokenType.R_PAREN) 7781 7782 self._match_r_paren() 7783 7784 return self.expression( 7785 exp.DictProperty, 7786 this=this, 7787 kind=kind.this if kind else None, 7788 settings=settings, 7789 ) 7790 7791 def _parse_dict_range(self, this: str) -> exp.DictRange: 7792 self._match_l_paren() 7793 has_min = self._match_text_seq("MIN") 7794 if has_min: 7795 min = self._parse_var() or self._parse_primary() 7796 self._match_text_seq("MAX") 7797 max = self._parse_var() or self._parse_primary() 7798 else: 7799 max = self._parse_var() or self._parse_primary() 7800 min = exp.Literal.number(0) 7801 self._match_r_paren() 7802 return self.expression(exp.DictRange, this=this, min=min, max=max) 7803 7804 def _parse_comprehension( 7805 self, this: t.Optional[exp.Expression] 7806 ) -> t.Optional[exp.Comprehension]: 7807 index = self._index 7808 expression = self._parse_column() 7809 if not self._match(TokenType.IN): 7810 self._retreat(index - 1) 7811 return None 7812 iterator = self._parse_column() 7813 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7814 return self.expression( 7815 exp.Comprehension, 7816 this=this, 7817 expression=expression, 7818 iterator=iterator, 7819 condition=condition, 7820 ) 7821 7822 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7823 if self._match(TokenType.HEREDOC_STRING): 7824 return self.expression(exp.Heredoc, this=self._prev.text) 7825 7826 if not self._match_text_seq("$"): 7827 return None 7828 7829 tags = ["$"] 7830 tag_text = None 7831 7832 if self._is_connected(): 7833 self._advance() 7834 tags.append(self._prev.text.upper()) 7835 else: 7836 self.raise_error("No closing $ found") 7837 7838 if tags[-1] != "$": 7839 if self._is_connected() and self._match_text_seq("$"): 7840 tag_text = tags[-1] 7841 tags.append("$") 7842 else: 7843 self.raise_error("No closing $ found") 7844 7845 heredoc_start = self._curr 7846 7847 while self._curr: 7848 if self._match_text_seq(*tags, advance=False): 7849 this = self._find_sql(heredoc_start, self._prev) 7850 self._advance(len(tags)) 7851 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7852 7853 self._advance() 7854 7855 self.raise_error(f"No closing {''.join(tags)} found") 7856 return None 7857 7858 def _find_parser( 7859 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7860 ) -> t.Optional[t.Callable]: 7861 if not self._curr: 7862 return None 7863 7864 index = self._index 7865 this = [] 7866 while True: 7867 # The current token might be multiple words 7868 curr = self._curr.text.upper() 7869 key = curr.split(" ") 7870 this.append(curr) 7871 7872 self._advance() 7873 result, trie = in_trie(trie, key) 7874 if result == TrieResult.FAILED: 7875 break 7876 7877 if result == TrieResult.EXISTS: 7878 subparser = parsers[" ".join(this)] 7879 return subparser 7880 7881 self._retreat(index) 7882 return None 7883 7884 def _match(self, token_type, advance=True, expression=None): 7885 if not self._curr: 7886 return None 7887 7888 if self._curr.token_type == token_type: 7889 if advance: 7890 self._advance() 7891 self._add_comments(expression) 7892 return True 7893 7894 return None 7895 7896 def _match_set(self, types, advance=True): 7897 if not self._curr: 7898 return None 7899 7900 if self._curr.token_type in types: 7901 if advance: 7902 self._advance() 7903 return True 7904 7905 return None 7906 7907 def _match_pair(self, token_type_a, token_type_b, advance=True): 7908 if not self._curr or not self._next: 7909 return None 7910 7911 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7912 if advance: 7913 self._advance(2) 7914 return True 7915 7916 return None 7917 7918 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7919 if not self._match(TokenType.L_PAREN, expression=expression): 7920 self.raise_error("Expecting (") 7921 7922 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7923 if not self._match(TokenType.R_PAREN, expression=expression): 7924 self.raise_error("Expecting )") 7925 7926 def _match_texts(self, texts, advance=True): 7927 if ( 7928 self._curr 7929 and self._curr.token_type != TokenType.STRING 7930 and self._curr.text.upper() in texts 7931 ): 7932 if advance: 7933 self._advance() 7934 return True 7935 return None 7936 7937 def _match_text_seq(self, *texts, advance=True): 7938 index = self._index 7939 for text in texts: 7940 if ( 7941 self._curr 7942 and self._curr.token_type != TokenType.STRING 7943 and self._curr.text.upper() == text 7944 ): 7945 self._advance() 7946 else: 7947 self._retreat(index) 7948 return None 7949 7950 if not advance: 7951 self._retreat(index) 7952 7953 return True 7954 7955 def _replace_lambda( 7956 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7957 ) -> t.Optional[exp.Expression]: 7958 if not node: 7959 return node 7960 7961 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7962 7963 for column in node.find_all(exp.Column): 7964 typ = lambda_types.get(column.parts[0].name) 7965 if typ is not None: 7966 dot_or_id = column.to_dot() if column.table else column.this 7967 7968 if typ: 7969 dot_or_id = self.expression( 7970 exp.Cast, 7971 this=dot_or_id, 7972 to=typ, 7973 ) 7974 7975 parent = column.parent 7976 7977 while isinstance(parent, exp.Dot): 7978 if not isinstance(parent.parent, exp.Dot): 7979 parent.replace(dot_or_id) 7980 break 7981 parent = parent.parent 7982 else: 7983 if column is node: 7984 node = dot_or_id 7985 else: 7986 column.replace(dot_or_id) 7987 return node 7988 7989 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7990 start = self._prev 7991 7992 # Not to be confused with TRUNCATE(number, decimals) function call 7993 if self._match(TokenType.L_PAREN): 7994 self._retreat(self._index - 2) 7995 return self._parse_function() 7996 7997 # Clickhouse supports TRUNCATE DATABASE as well 7998 is_database = self._match(TokenType.DATABASE) 7999 8000 self._match(TokenType.TABLE) 8001 8002 exists = self._parse_exists(not_=False) 8003 8004 expressions = self._parse_csv( 8005 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8006 ) 8007 8008 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8009 8010 if self._match_text_seq("RESTART", "IDENTITY"): 8011 identity = "RESTART" 8012 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8013 identity = "CONTINUE" 8014 else: 8015 identity = None 8016 8017 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8018 option = self._prev.text 8019 else: 8020 option = None 8021 8022 partition = self._parse_partition() 8023 8024 # Fallback case 8025 if self._curr: 8026 return self._parse_as_command(start) 8027 8028 return self.expression( 8029 exp.TruncateTable, 8030 expressions=expressions, 8031 is_database=is_database, 8032 exists=exists, 8033 cluster=cluster, 8034 identity=identity, 8035 option=option, 8036 partition=partition, 8037 ) 8038 8039 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8040 this = self._parse_ordered(self._parse_opclass) 8041 8042 if not self._match(TokenType.WITH): 8043 return this 8044 8045 op = self._parse_var(any_token=True) 8046 8047 return self.expression(exp.WithOperator, this=this, op=op) 8048 8049 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8050 self._match(TokenType.EQ) 8051 self._match(TokenType.L_PAREN) 8052 8053 opts: t.List[t.Optional[exp.Expression]] = [] 8054 option: exp.Expression | None 8055 while self._curr and not self._match(TokenType.R_PAREN): 8056 if self._match_text_seq("FORMAT_NAME", "="): 8057 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8058 option = self._parse_format_name() 8059 else: 8060 option = self._parse_property() 8061 8062 if option is None: 8063 self.raise_error("Unable to parse option") 8064 break 8065 8066 opts.append(option) 8067 8068 return opts 8069 8070 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8071 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8072 8073 options = [] 8074 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8075 option = self._parse_var(any_token=True) 8076 prev = self._prev.text.upper() 8077 8078 # Different dialects might separate options and values by white space, "=" and "AS" 8079 self._match(TokenType.EQ) 8080 self._match(TokenType.ALIAS) 8081 8082 param = self.expression(exp.CopyParameter, this=option) 8083 8084 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8085 TokenType.L_PAREN, advance=False 8086 ): 8087 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8088 param.set("expressions", self._parse_wrapped_options()) 8089 elif prev == "FILE_FORMAT": 8090 # T-SQL's external file format case 8091 param.set("expression", self._parse_field()) 8092 else: 8093 param.set("expression", self._parse_unquoted_field()) 8094 8095 options.append(param) 8096 self._match(sep) 8097 8098 return options 8099 8100 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8101 expr = self.expression(exp.Credentials) 8102 8103 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8104 expr.set("storage", self._parse_field()) 8105 if self._match_text_seq("CREDENTIALS"): 8106 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8107 creds = ( 8108 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8109 ) 8110 expr.set("credentials", creds) 8111 if self._match_text_seq("ENCRYPTION"): 8112 expr.set("encryption", self._parse_wrapped_options()) 8113 if self._match_text_seq("IAM_ROLE"): 8114 expr.set("iam_role", self._parse_field()) 8115 if self._match_text_seq("REGION"): 8116 expr.set("region", self._parse_field()) 8117 8118 return expr 8119 8120 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8121 return self._parse_field() 8122 8123 def _parse_copy(self) -> exp.Copy | exp.Command: 8124 start = self._prev 8125 8126 self._match(TokenType.INTO) 8127 8128 this = ( 8129 self._parse_select(nested=True, parse_subquery_alias=False) 8130 if self._match(TokenType.L_PAREN, advance=False) 8131 else self._parse_table(schema=True) 8132 ) 8133 8134 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8135 8136 files = self._parse_csv(self._parse_file_location) 8137 credentials = self._parse_credentials() 8138 8139 self._match_text_seq("WITH") 8140 8141 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8142 8143 # Fallback case 8144 if self._curr: 8145 return self._parse_as_command(start) 8146 8147 return self.expression( 8148 exp.Copy, 8149 this=this, 8150 kind=kind, 8151 credentials=credentials, 8152 files=files, 8153 params=params, 8154 ) 8155 8156 def _parse_normalize(self) -> exp.Normalize: 8157 return self.expression( 8158 exp.Normalize, 8159 this=self._parse_bitwise(), 8160 form=self._match(TokenType.COMMA) and self._parse_var(), 8161 ) 8162 8163 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8164 args = self._parse_csv(lambda: self._parse_lambda()) 8165 8166 this = seq_get(args, 0) 8167 decimals = seq_get(args, 1) 8168 8169 return expr_type( 8170 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8171 ) 8172 8173 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8174 if self._match_text_seq("COLUMNS", "(", advance=False): 8175 this = self._parse_function() 8176 if isinstance(this, exp.Columns): 8177 this.set("unpack", True) 8178 return this 8179 8180 return self.expression( 8181 exp.Star, 8182 **{ # type: ignore 8183 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8184 "replace": self._parse_star_op("REPLACE"), 8185 "rename": self._parse_star_op("RENAME"), 8186 }, 8187 ) 8188 8189 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8190 privilege_parts = [] 8191 8192 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8193 # (end of privilege list) or L_PAREN (start of column list) are met 8194 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8195 privilege_parts.append(self._curr.text.upper()) 8196 self._advance() 8197 8198 this = exp.var(" ".join(privilege_parts)) 8199 expressions = ( 8200 self._parse_wrapped_csv(self._parse_column) 8201 if self._match(TokenType.L_PAREN, advance=False) 8202 else None 8203 ) 8204 8205 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8206 8207 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8208 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8209 principal = self._parse_id_var() 8210 8211 if not principal: 8212 return None 8213 8214 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8215 8216 def _parse_grant(self) -> exp.Grant | exp.Command: 8217 start = self._prev 8218 8219 privileges = self._parse_csv(self._parse_grant_privilege) 8220 8221 self._match(TokenType.ON) 8222 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8223 8224 # Attempt to parse the securable e.g. MySQL allows names 8225 # such as "foo.*", "*.*" which are not easily parseable yet 8226 securable = self._try_parse(self._parse_table_parts) 8227 8228 if not securable or not self._match_text_seq("TO"): 8229 return self._parse_as_command(start) 8230 8231 principals = self._parse_csv(self._parse_grant_principal) 8232 8233 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8234 8235 if self._curr: 8236 return self._parse_as_command(start) 8237 8238 return self.expression( 8239 exp.Grant, 8240 privileges=privileges, 8241 kind=kind, 8242 securable=securable, 8243 principals=principals, 8244 grant_option=grant_option, 8245 ) 8246 8247 def _parse_overlay(self) -> exp.Overlay: 8248 return self.expression( 8249 exp.Overlay, 8250 **{ # type: ignore 8251 "this": self._parse_bitwise(), 8252 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8253 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8254 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8255 }, 8256 ) 8257 8258 def _parse_format_name(self) -> exp.Property: 8259 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8260 # for FILE_FORMAT = <format_name> 8261 return self.expression( 8262 exp.Property, 8263 this=exp.var("FORMAT_NAME"), 8264 value=self._parse_string() or self._parse_table_parts(), 8265 ) 8266 8267 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8268 args: t.List[exp.Expression] = [] 8269 8270 if self._match(TokenType.DISTINCT): 8271 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8272 self._match(TokenType.COMMA) 8273 8274 args.extend(self._parse_csv(self._parse_assignment)) 8275 8276 return self.expression( 8277 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8278 ) 8279 8280 def _identifier_expression( 8281 self, token: t.Optional[Token] = None, **kwargs: t.Any 8282 ) -> exp.Identifier: 8283 token = token or self._prev 8284 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8285 expression.update_positions(token) 8286 return expression
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
176class Parser(metaclass=_Parser): 177 """ 178 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 179 180 Args: 181 error_level: The desired error level. 182 Default: ErrorLevel.IMMEDIATE 183 error_message_context: The amount of context to capture from a query string when displaying 184 the error message (in number of characters). 185 Default: 100 186 max_errors: Maximum number of error messages to include in a raised ParseError. 187 This is only relevant if error_level is ErrorLevel.RAISE. 188 Default: 3 189 """ 190 191 FUNCTIONS: t.Dict[str, t.Callable] = { 192 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 193 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 194 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 195 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 196 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 197 ), 198 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 199 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 200 ), 201 "CHAR": lambda args: exp.Chr(expressions=args), 202 "CHR": lambda args: exp.Chr(expressions=args), 203 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 204 "CONCAT": lambda args, dialect: exp.Concat( 205 expressions=args, 206 safe=not dialect.STRICT_STRING_CONCAT, 207 coalesce=dialect.CONCAT_COALESCE, 208 ), 209 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONVERT_TIMEZONE": build_convert_timezone, 215 "DATE_TO_DATE_STR": lambda args: exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 220 start=seq_get(args, 0), 221 end=seq_get(args, 1), 222 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 223 ), 224 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 225 "HEX": build_hex, 226 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 227 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 228 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 229 "LIKE": build_like, 230 "LOG": build_logarithm, 231 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 232 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 233 "LOWER": build_lower, 234 "LPAD": lambda args: build_pad(args), 235 "LEFTPAD": lambda args: build_pad(args), 236 "LTRIM": lambda args: build_trim(args), 237 "MOD": build_mod, 238 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 239 "RPAD": lambda args: build_pad(args, is_left=False), 240 "RTRIM": lambda args: build_trim(args, is_left=False), 241 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 242 if len(args) != 2 243 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 244 "STRPOS": exp.StrPosition.from_arg_list, 245 "CHARINDEX": lambda args: build_locate_strposition(args), 246 "INSTR": exp.StrPosition.from_arg_list, 247 "LOCATE": lambda args: build_locate_strposition(args), 248 "TIME_TO_TIME_STR": lambda args: exp.Cast( 249 this=seq_get(args, 0), 250 to=exp.DataType(this=exp.DataType.Type.TEXT), 251 ), 252 "TO_HEX": build_hex, 253 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 254 this=exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 start=exp.Literal.number(1), 259 length=exp.Literal.number(10), 260 ), 261 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 262 "UPPER": build_upper, 263 "VAR_MAP": build_var_map, 264 } 265 266 NO_PAREN_FUNCTIONS = { 267 TokenType.CURRENT_DATE: exp.CurrentDate, 268 TokenType.CURRENT_DATETIME: exp.CurrentDate, 269 TokenType.CURRENT_TIME: exp.CurrentTime, 270 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 271 TokenType.CURRENT_USER: exp.CurrentUser, 272 } 273 274 STRUCT_TYPE_TOKENS = { 275 TokenType.NESTED, 276 TokenType.OBJECT, 277 TokenType.STRUCT, 278 TokenType.UNION, 279 } 280 281 NESTED_TYPE_TOKENS = { 282 TokenType.ARRAY, 283 TokenType.LIST, 284 TokenType.LOWCARDINALITY, 285 TokenType.MAP, 286 TokenType.NULLABLE, 287 TokenType.RANGE, 288 *STRUCT_TYPE_TOKENS, 289 } 290 291 ENUM_TYPE_TOKENS = { 292 TokenType.DYNAMIC, 293 TokenType.ENUM, 294 TokenType.ENUM8, 295 TokenType.ENUM16, 296 } 297 298 AGGREGATE_TYPE_TOKENS = { 299 TokenType.AGGREGATEFUNCTION, 300 TokenType.SIMPLEAGGREGATEFUNCTION, 301 } 302 303 TYPE_TOKENS = { 304 TokenType.BIT, 305 TokenType.BOOLEAN, 306 TokenType.TINYINT, 307 TokenType.UTINYINT, 308 TokenType.SMALLINT, 309 TokenType.USMALLINT, 310 TokenType.INT, 311 TokenType.UINT, 312 TokenType.BIGINT, 313 TokenType.UBIGINT, 314 TokenType.INT128, 315 TokenType.UINT128, 316 TokenType.INT256, 317 TokenType.UINT256, 318 TokenType.MEDIUMINT, 319 TokenType.UMEDIUMINT, 320 TokenType.FIXEDSTRING, 321 TokenType.FLOAT, 322 TokenType.DOUBLE, 323 TokenType.UDOUBLE, 324 TokenType.CHAR, 325 TokenType.NCHAR, 326 TokenType.VARCHAR, 327 TokenType.NVARCHAR, 328 TokenType.BPCHAR, 329 TokenType.TEXT, 330 TokenType.MEDIUMTEXT, 331 TokenType.LONGTEXT, 332 TokenType.BLOB, 333 TokenType.MEDIUMBLOB, 334 TokenType.LONGBLOB, 335 TokenType.BINARY, 336 TokenType.VARBINARY, 337 TokenType.JSON, 338 TokenType.JSONB, 339 TokenType.INTERVAL, 340 TokenType.TINYBLOB, 341 TokenType.TINYTEXT, 342 TokenType.TIME, 343 TokenType.TIMETZ, 344 TokenType.TIMESTAMP, 345 TokenType.TIMESTAMP_S, 346 TokenType.TIMESTAMP_MS, 347 TokenType.TIMESTAMP_NS, 348 TokenType.TIMESTAMPTZ, 349 TokenType.TIMESTAMPLTZ, 350 TokenType.TIMESTAMPNTZ, 351 TokenType.DATETIME, 352 TokenType.DATETIME2, 353 TokenType.DATETIME64, 354 TokenType.SMALLDATETIME, 355 TokenType.DATE, 356 TokenType.DATE32, 357 TokenType.INT4RANGE, 358 TokenType.INT4MULTIRANGE, 359 TokenType.INT8RANGE, 360 TokenType.INT8MULTIRANGE, 361 TokenType.NUMRANGE, 362 TokenType.NUMMULTIRANGE, 363 TokenType.TSRANGE, 364 TokenType.TSMULTIRANGE, 365 TokenType.TSTZRANGE, 366 TokenType.TSTZMULTIRANGE, 367 TokenType.DATERANGE, 368 TokenType.DATEMULTIRANGE, 369 TokenType.DECIMAL, 370 TokenType.DECIMAL32, 371 TokenType.DECIMAL64, 372 TokenType.DECIMAL128, 373 TokenType.DECIMAL256, 374 TokenType.UDECIMAL, 375 TokenType.BIGDECIMAL, 376 TokenType.UUID, 377 TokenType.GEOGRAPHY, 378 TokenType.GEOMETRY, 379 TokenType.POINT, 380 TokenType.RING, 381 TokenType.LINESTRING, 382 TokenType.MULTILINESTRING, 383 TokenType.POLYGON, 384 TokenType.MULTIPOLYGON, 385 TokenType.HLLSKETCH, 386 TokenType.HSTORE, 387 TokenType.PSEUDO_TYPE, 388 TokenType.SUPER, 389 TokenType.SERIAL, 390 TokenType.SMALLSERIAL, 391 TokenType.BIGSERIAL, 392 TokenType.XML, 393 TokenType.YEAR, 394 TokenType.USERDEFINED, 395 TokenType.MONEY, 396 TokenType.SMALLMONEY, 397 TokenType.ROWVERSION, 398 TokenType.IMAGE, 399 TokenType.VARIANT, 400 TokenType.VECTOR, 401 TokenType.VOID, 402 TokenType.OBJECT, 403 TokenType.OBJECT_IDENTIFIER, 404 TokenType.INET, 405 TokenType.IPADDRESS, 406 TokenType.IPPREFIX, 407 TokenType.IPV4, 408 TokenType.IPV6, 409 TokenType.UNKNOWN, 410 TokenType.NOTHING, 411 TokenType.NULL, 412 TokenType.NAME, 413 TokenType.TDIGEST, 414 TokenType.DYNAMIC, 415 *ENUM_TYPE_TOKENS, 416 *NESTED_TYPE_TOKENS, 417 *AGGREGATE_TYPE_TOKENS, 418 } 419 420 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 421 TokenType.BIGINT: TokenType.UBIGINT, 422 TokenType.INT: TokenType.UINT, 423 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 424 TokenType.SMALLINT: TokenType.USMALLINT, 425 TokenType.TINYINT: TokenType.UTINYINT, 426 TokenType.DECIMAL: TokenType.UDECIMAL, 427 TokenType.DOUBLE: TokenType.UDOUBLE, 428 } 429 430 SUBQUERY_PREDICATES = { 431 TokenType.ANY: exp.Any, 432 TokenType.ALL: exp.All, 433 TokenType.EXISTS: exp.Exists, 434 TokenType.SOME: exp.Any, 435 } 436 437 RESERVED_TOKENS = { 438 *Tokenizer.SINGLE_TOKENS.values(), 439 TokenType.SELECT, 440 } - {TokenType.IDENTIFIER} 441 442 DB_CREATABLES = { 443 TokenType.DATABASE, 444 TokenType.DICTIONARY, 445 TokenType.FILE_FORMAT, 446 TokenType.MODEL, 447 TokenType.NAMESPACE, 448 TokenType.SCHEMA, 449 TokenType.SEQUENCE, 450 TokenType.SINK, 451 TokenType.SOURCE, 452 TokenType.STAGE, 453 TokenType.STORAGE_INTEGRATION, 454 TokenType.STREAMLIT, 455 TokenType.TABLE, 456 TokenType.TAG, 457 TokenType.VIEW, 458 TokenType.WAREHOUSE, 459 } 460 461 CREATABLES = { 462 TokenType.COLUMN, 463 TokenType.CONSTRAINT, 464 TokenType.FOREIGN_KEY, 465 TokenType.FUNCTION, 466 TokenType.INDEX, 467 TokenType.PROCEDURE, 468 *DB_CREATABLES, 469 } 470 471 ALTERABLES = { 472 TokenType.INDEX, 473 TokenType.TABLE, 474 TokenType.VIEW, 475 } 476 477 # Tokens that can represent identifiers 478 ID_VAR_TOKENS = { 479 TokenType.ALL, 480 TokenType.ATTACH, 481 TokenType.VAR, 482 TokenType.ANTI, 483 TokenType.APPLY, 484 TokenType.ASC, 485 TokenType.ASOF, 486 TokenType.AUTO_INCREMENT, 487 TokenType.BEGIN, 488 TokenType.BPCHAR, 489 TokenType.CACHE, 490 TokenType.CASE, 491 TokenType.COLLATE, 492 TokenType.COMMAND, 493 TokenType.COMMENT, 494 TokenType.COMMIT, 495 TokenType.CONSTRAINT, 496 TokenType.COPY, 497 TokenType.CUBE, 498 TokenType.CURRENT_SCHEMA, 499 TokenType.DEFAULT, 500 TokenType.DELETE, 501 TokenType.DESC, 502 TokenType.DESCRIBE, 503 TokenType.DETACH, 504 TokenType.DICTIONARY, 505 TokenType.DIV, 506 TokenType.END, 507 TokenType.EXECUTE, 508 TokenType.EXPORT, 509 TokenType.ESCAPE, 510 TokenType.FALSE, 511 TokenType.FIRST, 512 TokenType.FILTER, 513 TokenType.FINAL, 514 TokenType.FORMAT, 515 TokenType.FULL, 516 TokenType.GET, 517 TokenType.IDENTIFIER, 518 TokenType.IS, 519 TokenType.ISNULL, 520 TokenType.INTERVAL, 521 TokenType.KEEP, 522 TokenType.KILL, 523 TokenType.LEFT, 524 TokenType.LIMIT, 525 TokenType.LOAD, 526 TokenType.MERGE, 527 TokenType.NATURAL, 528 TokenType.NEXT, 529 TokenType.OFFSET, 530 TokenType.OPERATOR, 531 TokenType.ORDINALITY, 532 TokenType.OVERLAPS, 533 TokenType.OVERWRITE, 534 TokenType.PARTITION, 535 TokenType.PERCENT, 536 TokenType.PIVOT, 537 TokenType.PRAGMA, 538 TokenType.PUT, 539 TokenType.RANGE, 540 TokenType.RECURSIVE, 541 TokenType.REFERENCES, 542 TokenType.REFRESH, 543 TokenType.RENAME, 544 TokenType.REPLACE, 545 TokenType.RIGHT, 546 TokenType.ROLLUP, 547 TokenType.ROW, 548 TokenType.ROWS, 549 TokenType.SEMI, 550 TokenType.SET, 551 TokenType.SETTINGS, 552 TokenType.SHOW, 553 TokenType.TEMPORARY, 554 TokenType.TOP, 555 TokenType.TRUE, 556 TokenType.TRUNCATE, 557 TokenType.UNIQUE, 558 TokenType.UNNEST, 559 TokenType.UNPIVOT, 560 TokenType.UPDATE, 561 TokenType.USE, 562 TokenType.VOLATILE, 563 TokenType.WINDOW, 564 *CREATABLES, 565 *SUBQUERY_PREDICATES, 566 *TYPE_TOKENS, 567 *NO_PAREN_FUNCTIONS, 568 } 569 ID_VAR_TOKENS.remove(TokenType.UNION) 570 571 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 572 TokenType.ANTI, 573 TokenType.APPLY, 574 TokenType.ASOF, 575 TokenType.FULL, 576 TokenType.LEFT, 577 TokenType.LOCK, 578 TokenType.NATURAL, 579 TokenType.RIGHT, 580 TokenType.SEMI, 581 TokenType.WINDOW, 582 } 583 584 ALIAS_TOKENS = ID_VAR_TOKENS 585 586 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 587 588 ARRAY_CONSTRUCTORS = { 589 "ARRAY": exp.Array, 590 "LIST": exp.List, 591 } 592 593 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 594 595 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 596 597 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 598 599 FUNC_TOKENS = { 600 TokenType.COLLATE, 601 TokenType.COMMAND, 602 TokenType.CURRENT_DATE, 603 TokenType.CURRENT_DATETIME, 604 TokenType.CURRENT_SCHEMA, 605 TokenType.CURRENT_TIMESTAMP, 606 TokenType.CURRENT_TIME, 607 TokenType.CURRENT_USER, 608 TokenType.FILTER, 609 TokenType.FIRST, 610 TokenType.FORMAT, 611 TokenType.GET, 612 TokenType.GLOB, 613 TokenType.IDENTIFIER, 614 TokenType.INDEX, 615 TokenType.ISNULL, 616 TokenType.ILIKE, 617 TokenType.INSERT, 618 TokenType.LIKE, 619 TokenType.MERGE, 620 TokenType.NEXT, 621 TokenType.OFFSET, 622 TokenType.PRIMARY_KEY, 623 TokenType.RANGE, 624 TokenType.REPLACE, 625 TokenType.RLIKE, 626 TokenType.ROW, 627 TokenType.UNNEST, 628 TokenType.VAR, 629 TokenType.LEFT, 630 TokenType.RIGHT, 631 TokenType.SEQUENCE, 632 TokenType.DATE, 633 TokenType.DATETIME, 634 TokenType.TABLE, 635 TokenType.TIMESTAMP, 636 TokenType.TIMESTAMPTZ, 637 TokenType.TRUNCATE, 638 TokenType.WINDOW, 639 TokenType.XOR, 640 *TYPE_TOKENS, 641 *SUBQUERY_PREDICATES, 642 } 643 644 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 645 TokenType.AND: exp.And, 646 } 647 648 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 649 TokenType.COLON_EQ: exp.PropertyEQ, 650 } 651 652 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 653 TokenType.OR: exp.Or, 654 } 655 656 EQUALITY = { 657 TokenType.EQ: exp.EQ, 658 TokenType.NEQ: exp.NEQ, 659 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 660 } 661 662 COMPARISON = { 663 TokenType.GT: exp.GT, 664 TokenType.GTE: exp.GTE, 665 TokenType.LT: exp.LT, 666 TokenType.LTE: exp.LTE, 667 } 668 669 BITWISE = { 670 TokenType.AMP: exp.BitwiseAnd, 671 TokenType.CARET: exp.BitwiseXor, 672 TokenType.PIPE: exp.BitwiseOr, 673 } 674 675 TERM = { 676 TokenType.DASH: exp.Sub, 677 TokenType.PLUS: exp.Add, 678 TokenType.MOD: exp.Mod, 679 TokenType.COLLATE: exp.Collate, 680 } 681 682 FACTOR = { 683 TokenType.DIV: exp.IntDiv, 684 TokenType.LR_ARROW: exp.Distance, 685 TokenType.SLASH: exp.Div, 686 TokenType.STAR: exp.Mul, 687 } 688 689 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 690 691 TIMES = { 692 TokenType.TIME, 693 TokenType.TIMETZ, 694 } 695 696 TIMESTAMPS = { 697 TokenType.TIMESTAMP, 698 TokenType.TIMESTAMPNTZ, 699 TokenType.TIMESTAMPTZ, 700 TokenType.TIMESTAMPLTZ, 701 *TIMES, 702 } 703 704 SET_OPERATIONS = { 705 TokenType.UNION, 706 TokenType.INTERSECT, 707 TokenType.EXCEPT, 708 } 709 710 JOIN_METHODS = { 711 TokenType.ASOF, 712 TokenType.NATURAL, 713 TokenType.POSITIONAL, 714 } 715 716 JOIN_SIDES = { 717 TokenType.LEFT, 718 TokenType.RIGHT, 719 TokenType.FULL, 720 } 721 722 JOIN_KINDS = { 723 TokenType.ANTI, 724 TokenType.CROSS, 725 TokenType.INNER, 726 TokenType.OUTER, 727 TokenType.SEMI, 728 TokenType.STRAIGHT_JOIN, 729 } 730 731 JOIN_HINTS: t.Set[str] = set() 732 733 LAMBDAS = { 734 TokenType.ARROW: lambda self, expressions: self.expression( 735 exp.Lambda, 736 this=self._replace_lambda( 737 self._parse_assignment(), 738 expressions, 739 ), 740 expressions=expressions, 741 ), 742 TokenType.FARROW: lambda self, expressions: self.expression( 743 exp.Kwarg, 744 this=exp.var(expressions[0].name), 745 expression=self._parse_assignment(), 746 ), 747 } 748 749 COLUMN_OPERATORS = { 750 TokenType.DOT: None, 751 TokenType.DOTCOLON: lambda self, this, to: self.expression( 752 exp.JSONCast, 753 this=this, 754 to=to, 755 ), 756 TokenType.DCOLON: lambda self, this, to: self.expression( 757 exp.Cast if self.STRICT_CAST else exp.TryCast, 758 this=this, 759 to=to, 760 ), 761 TokenType.ARROW: lambda self, this, path: self.expression( 762 exp.JSONExtract, 763 this=this, 764 expression=self.dialect.to_json_path(path), 765 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 766 ), 767 TokenType.DARROW: lambda self, this, path: self.expression( 768 exp.JSONExtractScalar, 769 this=this, 770 expression=self.dialect.to_json_path(path), 771 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 772 ), 773 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 774 exp.JSONBExtract, 775 this=this, 776 expression=path, 777 ), 778 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtractScalar, 780 this=this, 781 expression=path, 782 ), 783 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 784 exp.JSONBContains, 785 this=this, 786 expression=key, 787 ), 788 } 789 790 EXPRESSION_PARSERS = { 791 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 792 exp.Column: lambda self: self._parse_column(), 793 exp.Condition: lambda self: self._parse_assignment(), 794 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 795 exp.Expression: lambda self: self._parse_expression(), 796 exp.From: lambda self: self._parse_from(joins=True), 797 exp.Group: lambda self: self._parse_group(), 798 exp.Having: lambda self: self._parse_having(), 799 exp.Hint: lambda self: self._parse_hint_body(), 800 exp.Identifier: lambda self: self._parse_id_var(), 801 exp.Join: lambda self: self._parse_join(), 802 exp.Lambda: lambda self: self._parse_lambda(), 803 exp.Lateral: lambda self: self._parse_lateral(), 804 exp.Limit: lambda self: self._parse_limit(), 805 exp.Offset: lambda self: self._parse_offset(), 806 exp.Order: lambda self: self._parse_order(), 807 exp.Ordered: lambda self: self._parse_ordered(), 808 exp.Properties: lambda self: self._parse_properties(), 809 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 810 exp.Qualify: lambda self: self._parse_qualify(), 811 exp.Returning: lambda self: self._parse_returning(), 812 exp.Select: lambda self: self._parse_select(), 813 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 814 exp.Table: lambda self: self._parse_table_parts(), 815 exp.TableAlias: lambda self: self._parse_table_alias(), 816 exp.Tuple: lambda self: self._parse_value(values=False), 817 exp.Whens: lambda self: self._parse_when_matched(), 818 exp.Where: lambda self: self._parse_where(), 819 exp.Window: lambda self: self._parse_named_window(), 820 exp.With: lambda self: self._parse_with(), 821 "JOIN_TYPE": lambda self: self._parse_join_parts(), 822 } 823 824 STATEMENT_PARSERS = { 825 TokenType.ALTER: lambda self: self._parse_alter(), 826 TokenType.ANALYZE: lambda self: self._parse_analyze(), 827 TokenType.BEGIN: lambda self: self._parse_transaction(), 828 TokenType.CACHE: lambda self: self._parse_cache(), 829 TokenType.COMMENT: lambda self: self._parse_comment(), 830 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 831 TokenType.COPY: lambda self: self._parse_copy(), 832 TokenType.CREATE: lambda self: self._parse_create(), 833 TokenType.DELETE: lambda self: self._parse_delete(), 834 TokenType.DESC: lambda self: self._parse_describe(), 835 TokenType.DESCRIBE: lambda self: self._parse_describe(), 836 TokenType.DROP: lambda self: self._parse_drop(), 837 TokenType.GRANT: lambda self: self._parse_grant(), 838 TokenType.INSERT: lambda self: self._parse_insert(), 839 TokenType.KILL: lambda self: self._parse_kill(), 840 TokenType.LOAD: lambda self: self._parse_load(), 841 TokenType.MERGE: lambda self: self._parse_merge(), 842 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 843 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 844 TokenType.REFRESH: lambda self: self._parse_refresh(), 845 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 846 TokenType.SET: lambda self: self._parse_set(), 847 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 848 TokenType.UNCACHE: lambda self: self._parse_uncache(), 849 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 850 TokenType.UPDATE: lambda self: self._parse_update(), 851 TokenType.USE: lambda self: self._parse_use(), 852 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 853 } 854 855 UNARY_PARSERS = { 856 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 857 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 858 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 859 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 860 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 861 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 862 } 863 864 STRING_PARSERS = { 865 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 866 exp.RawString, this=token.text 867 ), 868 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 869 exp.National, this=token.text 870 ), 871 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 872 TokenType.STRING: lambda self, token: self.expression( 873 exp.Literal, this=token.text, is_string=True 874 ), 875 TokenType.UNICODE_STRING: lambda self, token: self.expression( 876 exp.UnicodeString, 877 this=token.text, 878 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 879 ), 880 } 881 882 NUMERIC_PARSERS = { 883 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 884 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 885 TokenType.HEX_STRING: lambda self, token: self.expression( 886 exp.HexString, 887 this=token.text, 888 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 889 ), 890 TokenType.NUMBER: lambda self, token: self.expression( 891 exp.Literal, this=token.text, is_string=False 892 ), 893 } 894 895 PRIMARY_PARSERS = { 896 **STRING_PARSERS, 897 **NUMERIC_PARSERS, 898 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 899 TokenType.NULL: lambda self, _: self.expression(exp.Null), 900 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 901 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 902 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 903 TokenType.STAR: lambda self, _: self._parse_star_ops(), 904 } 905 906 PLACEHOLDER_PARSERS = { 907 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 908 TokenType.PARAMETER: lambda self: self._parse_parameter(), 909 TokenType.COLON: lambda self: ( 910 self.expression(exp.Placeholder, this=self._prev.text) 911 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 912 else None 913 ), 914 } 915 916 RANGE_PARSERS = { 917 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 918 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 919 TokenType.GLOB: binary_range_parser(exp.Glob), 920 TokenType.ILIKE: binary_range_parser(exp.ILike), 921 TokenType.IN: lambda self, this: self._parse_in(this), 922 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 923 TokenType.IS: lambda self, this: self._parse_is(this), 924 TokenType.LIKE: binary_range_parser(exp.Like), 925 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 926 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 927 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 928 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 929 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 930 } 931 932 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 933 "ALLOWED_VALUES": lambda self: self.expression( 934 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 935 ), 936 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 937 "AUTO": lambda self: self._parse_auto_property(), 938 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 939 "BACKUP": lambda self: self.expression( 940 exp.BackupProperty, this=self._parse_var(any_token=True) 941 ), 942 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 943 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 944 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 945 "CHECKSUM": lambda self: self._parse_checksum(), 946 "CLUSTER BY": lambda self: self._parse_cluster(), 947 "CLUSTERED": lambda self: self._parse_clustered_by(), 948 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 949 exp.CollateProperty, **kwargs 950 ), 951 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 952 "CONTAINS": lambda self: self._parse_contains_property(), 953 "COPY": lambda self: self._parse_copy_property(), 954 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 955 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 956 "DEFINER": lambda self: self._parse_definer(), 957 "DETERMINISTIC": lambda self: self.expression( 958 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 959 ), 960 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 961 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 962 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 963 "DISTKEY": lambda self: self._parse_distkey(), 964 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 965 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 966 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 967 "ENVIRONMENT": lambda self: self.expression( 968 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 969 ), 970 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 971 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 972 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 973 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 974 "FREESPACE": lambda self: self._parse_freespace(), 975 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 976 "HEAP": lambda self: self.expression(exp.HeapProperty), 977 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 978 "IMMUTABLE": lambda self: self.expression( 979 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 980 ), 981 "INHERITS": lambda self: self.expression( 982 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 983 ), 984 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 985 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 986 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 987 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 988 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 989 "LIKE": lambda self: self._parse_create_like(), 990 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 991 "LOCK": lambda self: self._parse_locking(), 992 "LOCKING": lambda self: self._parse_locking(), 993 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 994 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 995 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 996 "MODIFIES": lambda self: self._parse_modifies_property(), 997 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 998 "NO": lambda self: self._parse_no_property(), 999 "ON": lambda self: self._parse_on_property(), 1000 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1001 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1002 "PARTITION": lambda self: self._parse_partitioned_of(), 1003 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1004 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1005 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1006 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1007 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1008 "READS": lambda self: self._parse_reads_property(), 1009 "REMOTE": lambda self: self._parse_remote_with_connection(), 1010 "RETURNS": lambda self: self._parse_returns(), 1011 "STRICT": lambda self: self.expression(exp.StrictProperty), 1012 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1013 "ROW": lambda self: self._parse_row(), 1014 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1015 "SAMPLE": lambda self: self.expression( 1016 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1017 ), 1018 "SECURE": lambda self: self.expression(exp.SecureProperty), 1019 "SECURITY": lambda self: self._parse_security(), 1020 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1021 "SETTINGS": lambda self: self._parse_settings_property(), 1022 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1023 "SORTKEY": lambda self: self._parse_sortkey(), 1024 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1025 "STABLE": lambda self: self.expression( 1026 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1027 ), 1028 "STORED": lambda self: self._parse_stored(), 1029 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1030 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1031 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1032 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1033 "TO": lambda self: self._parse_to_table(), 1034 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1035 "TRANSFORM": lambda self: self.expression( 1036 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1037 ), 1038 "TTL": lambda self: self._parse_ttl(), 1039 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1040 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1041 "VOLATILE": lambda self: self._parse_volatile_property(), 1042 "WITH": lambda self: self._parse_with_property(), 1043 } 1044 1045 CONSTRAINT_PARSERS = { 1046 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1047 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1048 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1049 "CHARACTER SET": lambda self: self.expression( 1050 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1051 ), 1052 "CHECK": lambda self: self.expression( 1053 exp.CheckColumnConstraint, 1054 this=self._parse_wrapped(self._parse_assignment), 1055 enforced=self._match_text_seq("ENFORCED"), 1056 ), 1057 "COLLATE": lambda self: self.expression( 1058 exp.CollateColumnConstraint, 1059 this=self._parse_identifier() or self._parse_column(), 1060 ), 1061 "COMMENT": lambda self: self.expression( 1062 exp.CommentColumnConstraint, this=self._parse_string() 1063 ), 1064 "COMPRESS": lambda self: self._parse_compress(), 1065 "CLUSTERED": lambda self: self.expression( 1066 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1067 ), 1068 "NONCLUSTERED": lambda self: self.expression( 1069 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1070 ), 1071 "DEFAULT": lambda self: self.expression( 1072 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1073 ), 1074 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1075 "EPHEMERAL": lambda self: self.expression( 1076 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1077 ), 1078 "EXCLUDE": lambda self: self.expression( 1079 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1080 ), 1081 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1082 "FORMAT": lambda self: self.expression( 1083 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1084 ), 1085 "GENERATED": lambda self: self._parse_generated_as_identity(), 1086 "IDENTITY": lambda self: self._parse_auto_increment(), 1087 "INLINE": lambda self: self._parse_inline(), 1088 "LIKE": lambda self: self._parse_create_like(), 1089 "NOT": lambda self: self._parse_not_constraint(), 1090 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1091 "ON": lambda self: ( 1092 self._match(TokenType.UPDATE) 1093 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1094 ) 1095 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1096 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1097 "PERIOD": lambda self: self._parse_period_for_system_time(), 1098 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1099 "REFERENCES": lambda self: self._parse_references(match=False), 1100 "TITLE": lambda self: self.expression( 1101 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1102 ), 1103 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1104 "UNIQUE": lambda self: self._parse_unique(), 1105 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1106 "WATERMARK": lambda self: self.expression( 1107 exp.WatermarkColumnConstraint, 1108 this=self._match(TokenType.FOR) and self._parse_column(), 1109 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1110 ), 1111 "WITH": lambda self: self.expression( 1112 exp.Properties, expressions=self._parse_wrapped_properties() 1113 ), 1114 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1115 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1116 } 1117 1118 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1119 klass = ( 1120 exp.PartitionedByBucket 1121 if self._prev.text.upper() == "BUCKET" 1122 else exp.PartitionByTruncate 1123 ) 1124 1125 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1126 this, expression = seq_get(args, 0), seq_get(args, 1) 1127 1128 if isinstance(this, exp.Literal): 1129 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1130 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1131 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1132 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1133 # 1134 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1135 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1136 this, expression = expression, this 1137 1138 return self.expression(klass, this=this, expression=expression) 1139 1140 ALTER_PARSERS = { 1141 "ADD": lambda self: self._parse_alter_table_add(), 1142 "AS": lambda self: self._parse_select(), 1143 "ALTER": lambda self: self._parse_alter_table_alter(), 1144 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1145 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1146 "DROP": lambda self: self._parse_alter_table_drop(), 1147 "RENAME": lambda self: self._parse_alter_table_rename(), 1148 "SET": lambda self: self._parse_alter_table_set(), 1149 "SWAP": lambda self: self.expression( 1150 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1151 ), 1152 } 1153 1154 ALTER_ALTER_PARSERS = { 1155 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1156 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1157 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1158 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1159 } 1160 1161 SCHEMA_UNNAMED_CONSTRAINTS = { 1162 "CHECK", 1163 "EXCLUDE", 1164 "FOREIGN KEY", 1165 "LIKE", 1166 "PERIOD", 1167 "PRIMARY KEY", 1168 "UNIQUE", 1169 "WATERMARK", 1170 "BUCKET", 1171 "TRUNCATE", 1172 } 1173 1174 NO_PAREN_FUNCTION_PARSERS = { 1175 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1176 "CASE": lambda self: self._parse_case(), 1177 "CONNECT_BY_ROOT": lambda self: self.expression( 1178 exp.ConnectByRoot, this=self._parse_column() 1179 ), 1180 "IF": lambda self: self._parse_if(), 1181 } 1182 1183 INVALID_FUNC_NAME_TOKENS = { 1184 TokenType.IDENTIFIER, 1185 TokenType.STRING, 1186 } 1187 1188 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1189 1190 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1191 1192 FUNCTION_PARSERS = { 1193 **{ 1194 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1195 }, 1196 **{ 1197 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1198 }, 1199 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1200 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1201 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1202 "DECODE": lambda self: self._parse_decode(), 1203 "EXTRACT": lambda self: self._parse_extract(), 1204 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1205 "GAP_FILL": lambda self: self._parse_gap_fill(), 1206 "JSON_OBJECT": lambda self: self._parse_json_object(), 1207 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1208 "JSON_TABLE": lambda self: self._parse_json_table(), 1209 "MATCH": lambda self: self._parse_match_against(), 1210 "NORMALIZE": lambda self: self._parse_normalize(), 1211 "OPENJSON": lambda self: self._parse_open_json(), 1212 "OVERLAY": lambda self: self._parse_overlay(), 1213 "POSITION": lambda self: self._parse_position(), 1214 "PREDICT": lambda self: self._parse_predict(), 1215 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1216 "STRING_AGG": lambda self: self._parse_string_agg(), 1217 "SUBSTRING": lambda self: self._parse_substring(), 1218 "TRIM": lambda self: self._parse_trim(), 1219 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1220 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1221 "XMLELEMENT": lambda self: self.expression( 1222 exp.XMLElement, 1223 this=self._match_text_seq("NAME") and self._parse_id_var(), 1224 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1225 ), 1226 "XMLTABLE": lambda self: self._parse_xml_table(), 1227 } 1228 1229 QUERY_MODIFIER_PARSERS = { 1230 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1231 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1232 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1233 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1234 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1235 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1236 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1237 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1238 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1239 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1240 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1241 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1242 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1243 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1244 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1245 TokenType.CLUSTER_BY: lambda self: ( 1246 "cluster", 1247 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1248 ), 1249 TokenType.DISTRIBUTE_BY: lambda self: ( 1250 "distribute", 1251 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1252 ), 1253 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1254 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1255 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1256 } 1257 1258 SET_PARSERS = { 1259 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1260 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1261 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1262 "TRANSACTION": lambda self: self._parse_set_transaction(), 1263 } 1264 1265 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1266 1267 TYPE_LITERAL_PARSERS = { 1268 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1269 } 1270 1271 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1272 1273 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1274 1275 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1276 1277 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1278 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1279 "ISOLATION": ( 1280 ("LEVEL", "REPEATABLE", "READ"), 1281 ("LEVEL", "READ", "COMMITTED"), 1282 ("LEVEL", "READ", "UNCOMITTED"), 1283 ("LEVEL", "SERIALIZABLE"), 1284 ), 1285 "READ": ("WRITE", "ONLY"), 1286 } 1287 1288 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1289 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1290 ) 1291 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1292 1293 CREATE_SEQUENCE: OPTIONS_TYPE = { 1294 "SCALE": ("EXTEND", "NOEXTEND"), 1295 "SHARD": ("EXTEND", "NOEXTEND"), 1296 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1297 **dict.fromkeys( 1298 ( 1299 "SESSION", 1300 "GLOBAL", 1301 "KEEP", 1302 "NOKEEP", 1303 "ORDER", 1304 "NOORDER", 1305 "NOCACHE", 1306 "CYCLE", 1307 "NOCYCLE", 1308 "NOMINVALUE", 1309 "NOMAXVALUE", 1310 "NOSCALE", 1311 "NOSHARD", 1312 ), 1313 tuple(), 1314 ), 1315 } 1316 1317 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1318 1319 USABLES: OPTIONS_TYPE = dict.fromkeys( 1320 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1321 ) 1322 1323 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1324 1325 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1326 "TYPE": ("EVOLUTION",), 1327 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1328 } 1329 1330 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1331 1332 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1333 1334 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1335 "NOT": ("ENFORCED",), 1336 "MATCH": ( 1337 "FULL", 1338 "PARTIAL", 1339 "SIMPLE", 1340 ), 1341 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1342 "USING": ( 1343 "BTREE", 1344 "HASH", 1345 ), 1346 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1347 } 1348 1349 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1350 "NO": ("OTHERS",), 1351 "CURRENT": ("ROW",), 1352 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1353 } 1354 1355 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1356 1357 CLONE_KEYWORDS = {"CLONE", "COPY"} 1358 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1359 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1360 1361 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1362 1363 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1364 1365 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1366 1367 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1368 1369 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1370 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1371 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1372 1373 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1374 1375 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1376 1377 ADD_CONSTRAINT_TOKENS = { 1378 TokenType.CONSTRAINT, 1379 TokenType.FOREIGN_KEY, 1380 TokenType.INDEX, 1381 TokenType.KEY, 1382 TokenType.PRIMARY_KEY, 1383 TokenType.UNIQUE, 1384 } 1385 1386 DISTINCT_TOKENS = {TokenType.DISTINCT} 1387 1388 NULL_TOKENS = {TokenType.NULL} 1389 1390 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1391 1392 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1393 1394 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1395 1396 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1397 1398 ODBC_DATETIME_LITERALS = { 1399 "d": exp.Date, 1400 "t": exp.Time, 1401 "ts": exp.Timestamp, 1402 } 1403 1404 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1405 1406 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1407 1408 # The style options for the DESCRIBE statement 1409 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1410 1411 # The style options for the ANALYZE statement 1412 ANALYZE_STYLES = { 1413 "BUFFER_USAGE_LIMIT", 1414 "FULL", 1415 "LOCAL", 1416 "NO_WRITE_TO_BINLOG", 1417 "SAMPLE", 1418 "SKIP_LOCKED", 1419 "VERBOSE", 1420 } 1421 1422 ANALYZE_EXPRESSION_PARSERS = { 1423 "ALL": lambda self: self._parse_analyze_columns(), 1424 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1425 "DELETE": lambda self: self._parse_analyze_delete(), 1426 "DROP": lambda self: self._parse_analyze_histogram(), 1427 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1428 "LIST": lambda self: self._parse_analyze_list(), 1429 "PREDICATE": lambda self: self._parse_analyze_columns(), 1430 "UPDATE": lambda self: self._parse_analyze_histogram(), 1431 "VALIDATE": lambda self: self._parse_analyze_validate(), 1432 } 1433 1434 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1435 1436 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1437 1438 OPERATION_MODIFIERS: t.Set[str] = set() 1439 1440 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1441 1442 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1443 1444 STRICT_CAST = True 1445 1446 PREFIXED_PIVOT_COLUMNS = False 1447 IDENTIFY_PIVOT_STRINGS = False 1448 1449 LOG_DEFAULTS_TO_LN = False 1450 1451 # Whether ADD is present for each column added by ALTER TABLE 1452 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1453 1454 # Whether the table sample clause expects CSV syntax 1455 TABLESAMPLE_CSV = False 1456 1457 # The default method used for table sampling 1458 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1459 1460 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1461 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1462 1463 # Whether the TRIM function expects the characters to trim as its first argument 1464 TRIM_PATTERN_FIRST = False 1465 1466 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1467 STRING_ALIASES = False 1468 1469 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1470 MODIFIERS_ATTACHED_TO_SET_OP = True 1471 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1472 1473 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1474 NO_PAREN_IF_COMMANDS = True 1475 1476 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1477 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1478 1479 # Whether the `:` operator is used to extract a value from a VARIANT column 1480 COLON_IS_VARIANT_EXTRACT = False 1481 1482 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1483 # If this is True and '(' is not found, the keyword will be treated as an identifier 1484 VALUES_FOLLOWED_BY_PAREN = True 1485 1486 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1487 SUPPORTS_IMPLICIT_UNNEST = False 1488 1489 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1490 INTERVAL_SPANS = True 1491 1492 # Whether a PARTITION clause can follow a table reference 1493 SUPPORTS_PARTITION_SELECTION = False 1494 1495 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1496 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1497 1498 # Whether the 'AS' keyword is optional in the CTE definition syntax 1499 OPTIONAL_ALIAS_TOKEN_CTE = True 1500 1501 __slots__ = ( 1502 "error_level", 1503 "error_message_context", 1504 "max_errors", 1505 "dialect", 1506 "sql", 1507 "errors", 1508 "_tokens", 1509 "_index", 1510 "_curr", 1511 "_next", 1512 "_prev", 1513 "_prev_comments", 1514 ) 1515 1516 # Autofilled 1517 SHOW_TRIE: t.Dict = {} 1518 SET_TRIE: t.Dict = {} 1519 1520 def __init__( 1521 self, 1522 error_level: t.Optional[ErrorLevel] = None, 1523 error_message_context: int = 100, 1524 max_errors: int = 3, 1525 dialect: DialectType = None, 1526 ): 1527 from sqlglot.dialects import Dialect 1528 1529 self.error_level = error_level or ErrorLevel.IMMEDIATE 1530 self.error_message_context = error_message_context 1531 self.max_errors = max_errors 1532 self.dialect = Dialect.get_or_raise(dialect) 1533 self.reset() 1534 1535 def reset(self): 1536 self.sql = "" 1537 self.errors = [] 1538 self._tokens = [] 1539 self._index = 0 1540 self._curr = None 1541 self._next = None 1542 self._prev = None 1543 self._prev_comments = None 1544 1545 def parse( 1546 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1547 ) -> t.List[t.Optional[exp.Expression]]: 1548 """ 1549 Parses a list of tokens and returns a list of syntax trees, one tree 1550 per parsed SQL statement. 1551 1552 Args: 1553 raw_tokens: The list of tokens. 1554 sql: The original SQL string, used to produce helpful debug messages. 1555 1556 Returns: 1557 The list of the produced syntax trees. 1558 """ 1559 return self._parse( 1560 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1561 ) 1562 1563 def parse_into( 1564 self, 1565 expression_types: exp.IntoType, 1566 raw_tokens: t.List[Token], 1567 sql: t.Optional[str] = None, 1568 ) -> t.List[t.Optional[exp.Expression]]: 1569 """ 1570 Parses a list of tokens into a given Expression type. If a collection of Expression 1571 types is given instead, this method will try to parse the token list into each one 1572 of them, stopping at the first for which the parsing succeeds. 1573 1574 Args: 1575 expression_types: The expression type(s) to try and parse the token list into. 1576 raw_tokens: The list of tokens. 1577 sql: The original SQL string, used to produce helpful debug messages. 1578 1579 Returns: 1580 The target Expression. 1581 """ 1582 errors = [] 1583 for expression_type in ensure_list(expression_types): 1584 parser = self.EXPRESSION_PARSERS.get(expression_type) 1585 if not parser: 1586 raise TypeError(f"No parser registered for {expression_type}") 1587 1588 try: 1589 return self._parse(parser, raw_tokens, sql) 1590 except ParseError as e: 1591 e.errors[0]["into_expression"] = expression_type 1592 errors.append(e) 1593 1594 raise ParseError( 1595 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1596 errors=merge_errors(errors), 1597 ) from errors[-1] 1598 1599 def _parse( 1600 self, 1601 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1602 raw_tokens: t.List[Token], 1603 sql: t.Optional[str] = None, 1604 ) -> t.List[t.Optional[exp.Expression]]: 1605 self.reset() 1606 self.sql = sql or "" 1607 1608 total = len(raw_tokens) 1609 chunks: t.List[t.List[Token]] = [[]] 1610 1611 for i, token in enumerate(raw_tokens): 1612 if token.token_type == TokenType.SEMICOLON: 1613 if token.comments: 1614 chunks.append([token]) 1615 1616 if i < total - 1: 1617 chunks.append([]) 1618 else: 1619 chunks[-1].append(token) 1620 1621 expressions = [] 1622 1623 for tokens in chunks: 1624 self._index = -1 1625 self._tokens = tokens 1626 self._advance() 1627 1628 expressions.append(parse_method(self)) 1629 1630 if self._index < len(self._tokens): 1631 self.raise_error("Invalid expression / Unexpected token") 1632 1633 self.check_errors() 1634 1635 return expressions 1636 1637 def check_errors(self) -> None: 1638 """Logs or raises any found errors, depending on the chosen error level setting.""" 1639 if self.error_level == ErrorLevel.WARN: 1640 for error in self.errors: 1641 logger.error(str(error)) 1642 elif self.error_level == ErrorLevel.RAISE and self.errors: 1643 raise ParseError( 1644 concat_messages(self.errors, self.max_errors), 1645 errors=merge_errors(self.errors), 1646 ) 1647 1648 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1649 """ 1650 Appends an error in the list of recorded errors or raises it, depending on the chosen 1651 error level setting. 1652 """ 1653 token = token or self._curr or self._prev or Token.string("") 1654 start = token.start 1655 end = token.end + 1 1656 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1657 highlight = self.sql[start:end] 1658 end_context = self.sql[end : end + self.error_message_context] 1659 1660 error = ParseError.new( 1661 f"{message}. Line {token.line}, Col: {token.col}.\n" 1662 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1663 description=message, 1664 line=token.line, 1665 col=token.col, 1666 start_context=start_context, 1667 highlight=highlight, 1668 end_context=end_context, 1669 ) 1670 1671 if self.error_level == ErrorLevel.IMMEDIATE: 1672 raise error 1673 1674 self.errors.append(error) 1675 1676 def expression( 1677 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1678 ) -> E: 1679 """ 1680 Creates a new, validated Expression. 1681 1682 Args: 1683 exp_class: The expression class to instantiate. 1684 comments: An optional list of comments to attach to the expression. 1685 kwargs: The arguments to set for the expression along with their respective values. 1686 1687 Returns: 1688 The target expression. 1689 """ 1690 instance = exp_class(**kwargs) 1691 instance.add_comments(comments) if comments else self._add_comments(instance) 1692 return self.validate_expression(instance) 1693 1694 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1695 if expression and self._prev_comments: 1696 expression.add_comments(self._prev_comments) 1697 self._prev_comments = None 1698 1699 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1700 """ 1701 Validates an Expression, making sure that all its mandatory arguments are set. 1702 1703 Args: 1704 expression: The expression to validate. 1705 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1706 1707 Returns: 1708 The validated expression. 1709 """ 1710 if self.error_level != ErrorLevel.IGNORE: 1711 for error_message in expression.error_messages(args): 1712 self.raise_error(error_message) 1713 1714 return expression 1715 1716 def _find_sql(self, start: Token, end: Token) -> str: 1717 return self.sql[start.start : end.end + 1] 1718 1719 def _is_connected(self) -> bool: 1720 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1721 1722 def _advance(self, times: int = 1) -> None: 1723 self._index += times 1724 self._curr = seq_get(self._tokens, self._index) 1725 self._next = seq_get(self._tokens, self._index + 1) 1726 1727 if self._index > 0: 1728 self._prev = self._tokens[self._index - 1] 1729 self._prev_comments = self._prev.comments 1730 else: 1731 self._prev = None 1732 self._prev_comments = None 1733 1734 def _retreat(self, index: int) -> None: 1735 if index != self._index: 1736 self._advance(index - self._index) 1737 1738 def _warn_unsupported(self) -> None: 1739 if len(self._tokens) <= 1: 1740 return 1741 1742 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1743 # interested in emitting a warning for the one being currently processed. 1744 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1745 1746 logger.warning( 1747 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1748 ) 1749 1750 def _parse_command(self) -> exp.Command: 1751 self._warn_unsupported() 1752 return self.expression( 1753 exp.Command, 1754 comments=self._prev_comments, 1755 this=self._prev.text.upper(), 1756 expression=self._parse_string(), 1757 ) 1758 1759 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1760 """ 1761 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1762 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1763 solve this by setting & resetting the parser state accordingly 1764 """ 1765 index = self._index 1766 error_level = self.error_level 1767 1768 self.error_level = ErrorLevel.IMMEDIATE 1769 try: 1770 this = parse_method() 1771 except ParseError: 1772 this = None 1773 finally: 1774 if not this or retreat: 1775 self._retreat(index) 1776 self.error_level = error_level 1777 1778 return this 1779 1780 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1781 start = self._prev 1782 exists = self._parse_exists() if allow_exists else None 1783 1784 self._match(TokenType.ON) 1785 1786 materialized = self._match_text_seq("MATERIALIZED") 1787 kind = self._match_set(self.CREATABLES) and self._prev 1788 if not kind: 1789 return self._parse_as_command(start) 1790 1791 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1792 this = self._parse_user_defined_function(kind=kind.token_type) 1793 elif kind.token_type == TokenType.TABLE: 1794 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1795 elif kind.token_type == TokenType.COLUMN: 1796 this = self._parse_column() 1797 else: 1798 this = self._parse_id_var() 1799 1800 self._match(TokenType.IS) 1801 1802 return self.expression( 1803 exp.Comment, 1804 this=this, 1805 kind=kind.text, 1806 expression=self._parse_string(), 1807 exists=exists, 1808 materialized=materialized, 1809 ) 1810 1811 def _parse_to_table( 1812 self, 1813 ) -> exp.ToTableProperty: 1814 table = self._parse_table_parts(schema=True) 1815 return self.expression(exp.ToTableProperty, this=table) 1816 1817 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1818 def _parse_ttl(self) -> exp.Expression: 1819 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1820 this = self._parse_bitwise() 1821 1822 if self._match_text_seq("DELETE"): 1823 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1824 if self._match_text_seq("RECOMPRESS"): 1825 return self.expression( 1826 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1827 ) 1828 if self._match_text_seq("TO", "DISK"): 1829 return self.expression( 1830 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1831 ) 1832 if self._match_text_seq("TO", "VOLUME"): 1833 return self.expression( 1834 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1835 ) 1836 1837 return this 1838 1839 expressions = self._parse_csv(_parse_ttl_action) 1840 where = self._parse_where() 1841 group = self._parse_group() 1842 1843 aggregates = None 1844 if group and self._match(TokenType.SET): 1845 aggregates = self._parse_csv(self._parse_set_item) 1846 1847 return self.expression( 1848 exp.MergeTreeTTL, 1849 expressions=expressions, 1850 where=where, 1851 group=group, 1852 aggregates=aggregates, 1853 ) 1854 1855 def _parse_statement(self) -> t.Optional[exp.Expression]: 1856 if self._curr is None: 1857 return None 1858 1859 if self._match_set(self.STATEMENT_PARSERS): 1860 comments = self._prev_comments 1861 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1862 stmt.add_comments(comments, prepend=True) 1863 return stmt 1864 1865 if self._match_set(self.dialect.tokenizer.COMMANDS): 1866 return self._parse_command() 1867 1868 expression = self._parse_expression() 1869 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1870 return self._parse_query_modifiers(expression) 1871 1872 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1873 start = self._prev 1874 temporary = self._match(TokenType.TEMPORARY) 1875 materialized = self._match_text_seq("MATERIALIZED") 1876 1877 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1878 if not kind: 1879 return self._parse_as_command(start) 1880 1881 concurrently = self._match_text_seq("CONCURRENTLY") 1882 if_exists = exists or self._parse_exists() 1883 1884 if kind == "COLUMN": 1885 this = self._parse_column() 1886 else: 1887 this = self._parse_table_parts( 1888 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1889 ) 1890 1891 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1892 1893 if self._match(TokenType.L_PAREN, advance=False): 1894 expressions = self._parse_wrapped_csv(self._parse_types) 1895 else: 1896 expressions = None 1897 1898 return self.expression( 1899 exp.Drop, 1900 exists=if_exists, 1901 this=this, 1902 expressions=expressions, 1903 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1904 temporary=temporary, 1905 materialized=materialized, 1906 cascade=self._match_text_seq("CASCADE"), 1907 constraints=self._match_text_seq("CONSTRAINTS"), 1908 purge=self._match_text_seq("PURGE"), 1909 cluster=cluster, 1910 concurrently=concurrently, 1911 ) 1912 1913 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1914 return ( 1915 self._match_text_seq("IF") 1916 and (not not_ or self._match(TokenType.NOT)) 1917 and self._match(TokenType.EXISTS) 1918 ) 1919 1920 def _parse_create(self) -> exp.Create | exp.Command: 1921 # Note: this can't be None because we've matched a statement parser 1922 start = self._prev 1923 1924 replace = ( 1925 start.token_type == TokenType.REPLACE 1926 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1927 or self._match_pair(TokenType.OR, TokenType.ALTER) 1928 ) 1929 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1930 1931 unique = self._match(TokenType.UNIQUE) 1932 1933 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1934 clustered = True 1935 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1936 "COLUMNSTORE" 1937 ): 1938 clustered = False 1939 else: 1940 clustered = None 1941 1942 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1943 self._advance() 1944 1945 properties = None 1946 create_token = self._match_set(self.CREATABLES) and self._prev 1947 1948 if not create_token: 1949 # exp.Properties.Location.POST_CREATE 1950 properties = self._parse_properties() 1951 create_token = self._match_set(self.CREATABLES) and self._prev 1952 1953 if not properties or not create_token: 1954 return self._parse_as_command(start) 1955 1956 concurrently = self._match_text_seq("CONCURRENTLY") 1957 exists = self._parse_exists(not_=True) 1958 this = None 1959 expression: t.Optional[exp.Expression] = None 1960 indexes = None 1961 no_schema_binding = None 1962 begin = None 1963 end = None 1964 clone = None 1965 1966 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1967 nonlocal properties 1968 if properties and temp_props: 1969 properties.expressions.extend(temp_props.expressions) 1970 elif temp_props: 1971 properties = temp_props 1972 1973 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1974 this = self._parse_user_defined_function(kind=create_token.token_type) 1975 1976 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1977 extend_props(self._parse_properties()) 1978 1979 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1980 extend_props(self._parse_properties()) 1981 1982 if not expression: 1983 if self._match(TokenType.COMMAND): 1984 expression = self._parse_as_command(self._prev) 1985 else: 1986 begin = self._match(TokenType.BEGIN) 1987 return_ = self._match_text_seq("RETURN") 1988 1989 if self._match(TokenType.STRING, advance=False): 1990 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1991 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1992 expression = self._parse_string() 1993 extend_props(self._parse_properties()) 1994 else: 1995 expression = self._parse_user_defined_function_expression() 1996 1997 end = self._match_text_seq("END") 1998 1999 if return_: 2000 expression = self.expression(exp.Return, this=expression) 2001 elif create_token.token_type == TokenType.INDEX: 2002 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2003 if not self._match(TokenType.ON): 2004 index = self._parse_id_var() 2005 anonymous = False 2006 else: 2007 index = None 2008 anonymous = True 2009 2010 this = self._parse_index(index=index, anonymous=anonymous) 2011 elif create_token.token_type in self.DB_CREATABLES: 2012 table_parts = self._parse_table_parts( 2013 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2014 ) 2015 2016 # exp.Properties.Location.POST_NAME 2017 self._match(TokenType.COMMA) 2018 extend_props(self._parse_properties(before=True)) 2019 2020 this = self._parse_schema(this=table_parts) 2021 2022 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2023 extend_props(self._parse_properties()) 2024 2025 has_alias = self._match(TokenType.ALIAS) 2026 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2027 # exp.Properties.Location.POST_ALIAS 2028 extend_props(self._parse_properties()) 2029 2030 if create_token.token_type == TokenType.SEQUENCE: 2031 expression = self._parse_types() 2032 extend_props(self._parse_properties()) 2033 else: 2034 expression = self._parse_ddl_select() 2035 2036 # Some dialects also support using a table as an alias instead of a SELECT. 2037 # Here we fallback to this as an alternative. 2038 if not expression and has_alias: 2039 expression = self._try_parse(self._parse_table_parts) 2040 2041 if create_token.token_type == TokenType.TABLE: 2042 # exp.Properties.Location.POST_EXPRESSION 2043 extend_props(self._parse_properties()) 2044 2045 indexes = [] 2046 while True: 2047 index = self._parse_index() 2048 2049 # exp.Properties.Location.POST_INDEX 2050 extend_props(self._parse_properties()) 2051 if not index: 2052 break 2053 else: 2054 self._match(TokenType.COMMA) 2055 indexes.append(index) 2056 elif create_token.token_type == TokenType.VIEW: 2057 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2058 no_schema_binding = True 2059 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2060 extend_props(self._parse_properties()) 2061 2062 shallow = self._match_text_seq("SHALLOW") 2063 2064 if self._match_texts(self.CLONE_KEYWORDS): 2065 copy = self._prev.text.lower() == "copy" 2066 clone = self.expression( 2067 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2068 ) 2069 2070 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2071 return self._parse_as_command(start) 2072 2073 create_kind_text = create_token.text.upper() 2074 return self.expression( 2075 exp.Create, 2076 this=this, 2077 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2078 replace=replace, 2079 refresh=refresh, 2080 unique=unique, 2081 expression=expression, 2082 exists=exists, 2083 properties=properties, 2084 indexes=indexes, 2085 no_schema_binding=no_schema_binding, 2086 begin=begin, 2087 end=end, 2088 clone=clone, 2089 concurrently=concurrently, 2090 clustered=clustered, 2091 ) 2092 2093 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2094 seq = exp.SequenceProperties() 2095 2096 options = [] 2097 index = self._index 2098 2099 while self._curr: 2100 self._match(TokenType.COMMA) 2101 if self._match_text_seq("INCREMENT"): 2102 self._match_text_seq("BY") 2103 self._match_text_seq("=") 2104 seq.set("increment", self._parse_term()) 2105 elif self._match_text_seq("MINVALUE"): 2106 seq.set("minvalue", self._parse_term()) 2107 elif self._match_text_seq("MAXVALUE"): 2108 seq.set("maxvalue", self._parse_term()) 2109 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2110 self._match_text_seq("=") 2111 seq.set("start", self._parse_term()) 2112 elif self._match_text_seq("CACHE"): 2113 # T-SQL allows empty CACHE which is initialized dynamically 2114 seq.set("cache", self._parse_number() or True) 2115 elif self._match_text_seq("OWNED", "BY"): 2116 # "OWNED BY NONE" is the default 2117 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2118 else: 2119 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2120 if opt: 2121 options.append(opt) 2122 else: 2123 break 2124 2125 seq.set("options", options if options else None) 2126 return None if self._index == index else seq 2127 2128 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2129 # only used for teradata currently 2130 self._match(TokenType.COMMA) 2131 2132 kwargs = { 2133 "no": self._match_text_seq("NO"), 2134 "dual": self._match_text_seq("DUAL"), 2135 "before": self._match_text_seq("BEFORE"), 2136 "default": self._match_text_seq("DEFAULT"), 2137 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2138 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2139 "after": self._match_text_seq("AFTER"), 2140 "minimum": self._match_texts(("MIN", "MINIMUM")), 2141 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2142 } 2143 2144 if self._match_texts(self.PROPERTY_PARSERS): 2145 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2146 try: 2147 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2148 except TypeError: 2149 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2150 2151 return None 2152 2153 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2154 return self._parse_wrapped_csv(self._parse_property) 2155 2156 def _parse_property(self) -> t.Optional[exp.Expression]: 2157 if self._match_texts(self.PROPERTY_PARSERS): 2158 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2159 2160 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2161 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2162 2163 if self._match_text_seq("COMPOUND", "SORTKEY"): 2164 return self._parse_sortkey(compound=True) 2165 2166 if self._match_text_seq("SQL", "SECURITY"): 2167 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2168 2169 index = self._index 2170 key = self._parse_column() 2171 2172 if not self._match(TokenType.EQ): 2173 self._retreat(index) 2174 return self._parse_sequence_properties() 2175 2176 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2177 if isinstance(key, exp.Column): 2178 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2179 2180 value = self._parse_bitwise() or self._parse_var(any_token=True) 2181 2182 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2183 if isinstance(value, exp.Column): 2184 value = exp.var(value.name) 2185 2186 return self.expression(exp.Property, this=key, value=value) 2187 2188 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2189 if self._match_text_seq("BY"): 2190 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2191 2192 self._match(TokenType.ALIAS) 2193 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2194 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2195 2196 return self.expression( 2197 exp.FileFormatProperty, 2198 this=( 2199 self.expression( 2200 exp.InputOutputFormat, 2201 input_format=input_format, 2202 output_format=output_format, 2203 ) 2204 if input_format or output_format 2205 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2206 ), 2207 ) 2208 2209 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2210 field = self._parse_field() 2211 if isinstance(field, exp.Identifier) and not field.quoted: 2212 field = exp.var(field) 2213 2214 return field 2215 2216 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2217 self._match(TokenType.EQ) 2218 self._match(TokenType.ALIAS) 2219 2220 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2221 2222 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2223 properties = [] 2224 while True: 2225 if before: 2226 prop = self._parse_property_before() 2227 else: 2228 prop = self._parse_property() 2229 if not prop: 2230 break 2231 for p in ensure_list(prop): 2232 properties.append(p) 2233 2234 if properties: 2235 return self.expression(exp.Properties, expressions=properties) 2236 2237 return None 2238 2239 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2240 return self.expression( 2241 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2242 ) 2243 2244 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2245 if self._match_texts(("DEFINER", "INVOKER")): 2246 security_specifier = self._prev.text.upper() 2247 return self.expression(exp.SecurityProperty, this=security_specifier) 2248 return None 2249 2250 def _parse_settings_property(self) -> exp.SettingsProperty: 2251 return self.expression( 2252 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2253 ) 2254 2255 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2256 if self._index >= 2: 2257 pre_volatile_token = self._tokens[self._index - 2] 2258 else: 2259 pre_volatile_token = None 2260 2261 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2262 return exp.VolatileProperty() 2263 2264 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2265 2266 def _parse_retention_period(self) -> exp.Var: 2267 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2268 number = self._parse_number() 2269 number_str = f"{number} " if number else "" 2270 unit = self._parse_var(any_token=True) 2271 return exp.var(f"{number_str}{unit}") 2272 2273 def _parse_system_versioning_property( 2274 self, with_: bool = False 2275 ) -> exp.WithSystemVersioningProperty: 2276 self._match(TokenType.EQ) 2277 prop = self.expression( 2278 exp.WithSystemVersioningProperty, 2279 **{ # type: ignore 2280 "on": True, 2281 "with": with_, 2282 }, 2283 ) 2284 2285 if self._match_text_seq("OFF"): 2286 prop.set("on", False) 2287 return prop 2288 2289 self._match(TokenType.ON) 2290 if self._match(TokenType.L_PAREN): 2291 while self._curr and not self._match(TokenType.R_PAREN): 2292 if self._match_text_seq("HISTORY_TABLE", "="): 2293 prop.set("this", self._parse_table_parts()) 2294 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2295 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2296 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2297 prop.set("retention_period", self._parse_retention_period()) 2298 2299 self._match(TokenType.COMMA) 2300 2301 return prop 2302 2303 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2304 self._match(TokenType.EQ) 2305 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2306 prop = self.expression(exp.DataDeletionProperty, on=on) 2307 2308 if self._match(TokenType.L_PAREN): 2309 while self._curr and not self._match(TokenType.R_PAREN): 2310 if self._match_text_seq("FILTER_COLUMN", "="): 2311 prop.set("filter_column", self._parse_column()) 2312 elif self._match_text_seq("RETENTION_PERIOD", "="): 2313 prop.set("retention_period", self._parse_retention_period()) 2314 2315 self._match(TokenType.COMMA) 2316 2317 return prop 2318 2319 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2320 kind = "HASH" 2321 expressions: t.Optional[t.List[exp.Expression]] = None 2322 if self._match_text_seq("BY", "HASH"): 2323 expressions = self._parse_wrapped_csv(self._parse_id_var) 2324 elif self._match_text_seq("BY", "RANDOM"): 2325 kind = "RANDOM" 2326 2327 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2328 buckets: t.Optional[exp.Expression] = None 2329 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2330 buckets = self._parse_number() 2331 2332 return self.expression( 2333 exp.DistributedByProperty, 2334 expressions=expressions, 2335 kind=kind, 2336 buckets=buckets, 2337 order=self._parse_order(), 2338 ) 2339 2340 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2341 self._match_text_seq("KEY") 2342 expressions = self._parse_wrapped_id_vars() 2343 return self.expression(expr_type, expressions=expressions) 2344 2345 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2346 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2347 prop = self._parse_system_versioning_property(with_=True) 2348 self._match_r_paren() 2349 return prop 2350 2351 if self._match(TokenType.L_PAREN, advance=False): 2352 return self._parse_wrapped_properties() 2353 2354 if self._match_text_seq("JOURNAL"): 2355 return self._parse_withjournaltable() 2356 2357 if self._match_texts(self.VIEW_ATTRIBUTES): 2358 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2359 2360 if self._match_text_seq("DATA"): 2361 return self._parse_withdata(no=False) 2362 elif self._match_text_seq("NO", "DATA"): 2363 return self._parse_withdata(no=True) 2364 2365 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2366 return self._parse_serde_properties(with_=True) 2367 2368 if self._match(TokenType.SCHEMA): 2369 return self.expression( 2370 exp.WithSchemaBindingProperty, 2371 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2372 ) 2373 2374 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2375 return self.expression( 2376 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2377 ) 2378 2379 if not self._next: 2380 return None 2381 2382 return self._parse_withisolatedloading() 2383 2384 def _parse_procedure_option(self) -> exp.Expression | None: 2385 if self._match_text_seq("EXECUTE", "AS"): 2386 return self.expression( 2387 exp.ExecuteAsProperty, 2388 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2389 or self._parse_string(), 2390 ) 2391 2392 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2393 2394 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2395 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2396 self._match(TokenType.EQ) 2397 2398 user = self._parse_id_var() 2399 self._match(TokenType.PARAMETER) 2400 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2401 2402 if not user or not host: 2403 return None 2404 2405 return exp.DefinerProperty(this=f"{user}@{host}") 2406 2407 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2408 self._match(TokenType.TABLE) 2409 self._match(TokenType.EQ) 2410 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2411 2412 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2413 return self.expression(exp.LogProperty, no=no) 2414 2415 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2416 return self.expression(exp.JournalProperty, **kwargs) 2417 2418 def _parse_checksum(self) -> exp.ChecksumProperty: 2419 self._match(TokenType.EQ) 2420 2421 on = None 2422 if self._match(TokenType.ON): 2423 on = True 2424 elif self._match_text_seq("OFF"): 2425 on = False 2426 2427 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2428 2429 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2430 return self.expression( 2431 exp.Cluster, 2432 expressions=( 2433 self._parse_wrapped_csv(self._parse_ordered) 2434 if wrapped 2435 else self._parse_csv(self._parse_ordered) 2436 ), 2437 ) 2438 2439 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2440 self._match_text_seq("BY") 2441 2442 self._match_l_paren() 2443 expressions = self._parse_csv(self._parse_column) 2444 self._match_r_paren() 2445 2446 if self._match_text_seq("SORTED", "BY"): 2447 self._match_l_paren() 2448 sorted_by = self._parse_csv(self._parse_ordered) 2449 self._match_r_paren() 2450 else: 2451 sorted_by = None 2452 2453 self._match(TokenType.INTO) 2454 buckets = self._parse_number() 2455 self._match_text_seq("BUCKETS") 2456 2457 return self.expression( 2458 exp.ClusteredByProperty, 2459 expressions=expressions, 2460 sorted_by=sorted_by, 2461 buckets=buckets, 2462 ) 2463 2464 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2465 if not self._match_text_seq("GRANTS"): 2466 self._retreat(self._index - 1) 2467 return None 2468 2469 return self.expression(exp.CopyGrantsProperty) 2470 2471 def _parse_freespace(self) -> exp.FreespaceProperty: 2472 self._match(TokenType.EQ) 2473 return self.expression( 2474 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2475 ) 2476 2477 def _parse_mergeblockratio( 2478 self, no: bool = False, default: bool = False 2479 ) -> exp.MergeBlockRatioProperty: 2480 if self._match(TokenType.EQ): 2481 return self.expression( 2482 exp.MergeBlockRatioProperty, 2483 this=self._parse_number(), 2484 percent=self._match(TokenType.PERCENT), 2485 ) 2486 2487 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2488 2489 def _parse_datablocksize( 2490 self, 2491 default: t.Optional[bool] = None, 2492 minimum: t.Optional[bool] = None, 2493 maximum: t.Optional[bool] = None, 2494 ) -> exp.DataBlocksizeProperty: 2495 self._match(TokenType.EQ) 2496 size = self._parse_number() 2497 2498 units = None 2499 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2500 units = self._prev.text 2501 2502 return self.expression( 2503 exp.DataBlocksizeProperty, 2504 size=size, 2505 units=units, 2506 default=default, 2507 minimum=minimum, 2508 maximum=maximum, 2509 ) 2510 2511 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2512 self._match(TokenType.EQ) 2513 always = self._match_text_seq("ALWAYS") 2514 manual = self._match_text_seq("MANUAL") 2515 never = self._match_text_seq("NEVER") 2516 default = self._match_text_seq("DEFAULT") 2517 2518 autotemp = None 2519 if self._match_text_seq("AUTOTEMP"): 2520 autotemp = self._parse_schema() 2521 2522 return self.expression( 2523 exp.BlockCompressionProperty, 2524 always=always, 2525 manual=manual, 2526 never=never, 2527 default=default, 2528 autotemp=autotemp, 2529 ) 2530 2531 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2532 index = self._index 2533 no = self._match_text_seq("NO") 2534 concurrent = self._match_text_seq("CONCURRENT") 2535 2536 if not self._match_text_seq("ISOLATED", "LOADING"): 2537 self._retreat(index) 2538 return None 2539 2540 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2541 return self.expression( 2542 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2543 ) 2544 2545 def _parse_locking(self) -> exp.LockingProperty: 2546 if self._match(TokenType.TABLE): 2547 kind = "TABLE" 2548 elif self._match(TokenType.VIEW): 2549 kind = "VIEW" 2550 elif self._match(TokenType.ROW): 2551 kind = "ROW" 2552 elif self._match_text_seq("DATABASE"): 2553 kind = "DATABASE" 2554 else: 2555 kind = None 2556 2557 if kind in ("DATABASE", "TABLE", "VIEW"): 2558 this = self._parse_table_parts() 2559 else: 2560 this = None 2561 2562 if self._match(TokenType.FOR): 2563 for_or_in = "FOR" 2564 elif self._match(TokenType.IN): 2565 for_or_in = "IN" 2566 else: 2567 for_or_in = None 2568 2569 if self._match_text_seq("ACCESS"): 2570 lock_type = "ACCESS" 2571 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2572 lock_type = "EXCLUSIVE" 2573 elif self._match_text_seq("SHARE"): 2574 lock_type = "SHARE" 2575 elif self._match_text_seq("READ"): 2576 lock_type = "READ" 2577 elif self._match_text_seq("WRITE"): 2578 lock_type = "WRITE" 2579 elif self._match_text_seq("CHECKSUM"): 2580 lock_type = "CHECKSUM" 2581 else: 2582 lock_type = None 2583 2584 override = self._match_text_seq("OVERRIDE") 2585 2586 return self.expression( 2587 exp.LockingProperty, 2588 this=this, 2589 kind=kind, 2590 for_or_in=for_or_in, 2591 lock_type=lock_type, 2592 override=override, 2593 ) 2594 2595 def _parse_partition_by(self) -> t.List[exp.Expression]: 2596 if self._match(TokenType.PARTITION_BY): 2597 return self._parse_csv(self._parse_assignment) 2598 return [] 2599 2600 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2601 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2602 if self._match_text_seq("MINVALUE"): 2603 return exp.var("MINVALUE") 2604 if self._match_text_seq("MAXVALUE"): 2605 return exp.var("MAXVALUE") 2606 return self._parse_bitwise() 2607 2608 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2609 expression = None 2610 from_expressions = None 2611 to_expressions = None 2612 2613 if self._match(TokenType.IN): 2614 this = self._parse_wrapped_csv(self._parse_bitwise) 2615 elif self._match(TokenType.FROM): 2616 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2617 self._match_text_seq("TO") 2618 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2619 elif self._match_text_seq("WITH", "(", "MODULUS"): 2620 this = self._parse_number() 2621 self._match_text_seq(",", "REMAINDER") 2622 expression = self._parse_number() 2623 self._match_r_paren() 2624 else: 2625 self.raise_error("Failed to parse partition bound spec.") 2626 2627 return self.expression( 2628 exp.PartitionBoundSpec, 2629 this=this, 2630 expression=expression, 2631 from_expressions=from_expressions, 2632 to_expressions=to_expressions, 2633 ) 2634 2635 # https://www.postgresql.org/docs/current/sql-createtable.html 2636 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2637 if not self._match_text_seq("OF"): 2638 self._retreat(self._index - 1) 2639 return None 2640 2641 this = self._parse_table(schema=True) 2642 2643 if self._match(TokenType.DEFAULT): 2644 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2645 elif self._match_text_seq("FOR", "VALUES"): 2646 expression = self._parse_partition_bound_spec() 2647 else: 2648 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2649 2650 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2651 2652 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2653 self._match(TokenType.EQ) 2654 return self.expression( 2655 exp.PartitionedByProperty, 2656 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2657 ) 2658 2659 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2660 if self._match_text_seq("AND", "STATISTICS"): 2661 statistics = True 2662 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2663 statistics = False 2664 else: 2665 statistics = None 2666 2667 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2668 2669 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2670 if self._match_text_seq("SQL"): 2671 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2672 return None 2673 2674 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2675 if self._match_text_seq("SQL", "DATA"): 2676 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2677 return None 2678 2679 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2680 if self._match_text_seq("PRIMARY", "INDEX"): 2681 return exp.NoPrimaryIndexProperty() 2682 if self._match_text_seq("SQL"): 2683 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2684 return None 2685 2686 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2687 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2688 return exp.OnCommitProperty() 2689 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2690 return exp.OnCommitProperty(delete=True) 2691 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2692 2693 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2694 if self._match_text_seq("SQL", "DATA"): 2695 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2696 return None 2697 2698 def _parse_distkey(self) -> exp.DistKeyProperty: 2699 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2700 2701 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2702 table = self._parse_table(schema=True) 2703 2704 options = [] 2705 while self._match_texts(("INCLUDING", "EXCLUDING")): 2706 this = self._prev.text.upper() 2707 2708 id_var = self._parse_id_var() 2709 if not id_var: 2710 return None 2711 2712 options.append( 2713 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2714 ) 2715 2716 return self.expression(exp.LikeProperty, this=table, expressions=options) 2717 2718 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2719 return self.expression( 2720 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2721 ) 2722 2723 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2724 self._match(TokenType.EQ) 2725 return self.expression( 2726 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2727 ) 2728 2729 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2730 self._match_text_seq("WITH", "CONNECTION") 2731 return self.expression( 2732 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2733 ) 2734 2735 def _parse_returns(self) -> exp.ReturnsProperty: 2736 value: t.Optional[exp.Expression] 2737 null = None 2738 is_table = self._match(TokenType.TABLE) 2739 2740 if is_table: 2741 if self._match(TokenType.LT): 2742 value = self.expression( 2743 exp.Schema, 2744 this="TABLE", 2745 expressions=self._parse_csv(self._parse_struct_types), 2746 ) 2747 if not self._match(TokenType.GT): 2748 self.raise_error("Expecting >") 2749 else: 2750 value = self._parse_schema(exp.var("TABLE")) 2751 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2752 null = True 2753 value = None 2754 else: 2755 value = self._parse_types() 2756 2757 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2758 2759 def _parse_describe(self) -> exp.Describe: 2760 kind = self._match_set(self.CREATABLES) and self._prev.text 2761 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2762 if self._match(TokenType.DOT): 2763 style = None 2764 self._retreat(self._index - 2) 2765 2766 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2767 2768 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2769 this = self._parse_statement() 2770 else: 2771 this = self._parse_table(schema=True) 2772 2773 properties = self._parse_properties() 2774 expressions = properties.expressions if properties else None 2775 partition = self._parse_partition() 2776 return self.expression( 2777 exp.Describe, 2778 this=this, 2779 style=style, 2780 kind=kind, 2781 expressions=expressions, 2782 partition=partition, 2783 format=format, 2784 ) 2785 2786 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2787 kind = self._prev.text.upper() 2788 expressions = [] 2789 2790 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2791 if self._match(TokenType.WHEN): 2792 expression = self._parse_disjunction() 2793 self._match(TokenType.THEN) 2794 else: 2795 expression = None 2796 2797 else_ = self._match(TokenType.ELSE) 2798 2799 if not self._match(TokenType.INTO): 2800 return None 2801 2802 return self.expression( 2803 exp.ConditionalInsert, 2804 this=self.expression( 2805 exp.Insert, 2806 this=self._parse_table(schema=True), 2807 expression=self._parse_derived_table_values(), 2808 ), 2809 expression=expression, 2810 else_=else_, 2811 ) 2812 2813 expression = parse_conditional_insert() 2814 while expression is not None: 2815 expressions.append(expression) 2816 expression = parse_conditional_insert() 2817 2818 return self.expression( 2819 exp.MultitableInserts, 2820 kind=kind, 2821 comments=comments, 2822 expressions=expressions, 2823 source=self._parse_table(), 2824 ) 2825 2826 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2827 comments = [] 2828 hint = self._parse_hint() 2829 overwrite = self._match(TokenType.OVERWRITE) 2830 ignore = self._match(TokenType.IGNORE) 2831 local = self._match_text_seq("LOCAL") 2832 alternative = None 2833 is_function = None 2834 2835 if self._match_text_seq("DIRECTORY"): 2836 this: t.Optional[exp.Expression] = self.expression( 2837 exp.Directory, 2838 this=self._parse_var_or_string(), 2839 local=local, 2840 row_format=self._parse_row_format(match_row=True), 2841 ) 2842 else: 2843 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2844 comments += ensure_list(self._prev_comments) 2845 return self._parse_multitable_inserts(comments) 2846 2847 if self._match(TokenType.OR): 2848 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2849 2850 self._match(TokenType.INTO) 2851 comments += ensure_list(self._prev_comments) 2852 self._match(TokenType.TABLE) 2853 is_function = self._match(TokenType.FUNCTION) 2854 2855 this = ( 2856 self._parse_table(schema=True, parse_partition=True) 2857 if not is_function 2858 else self._parse_function() 2859 ) 2860 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2861 this.set("alias", self._parse_table_alias()) 2862 2863 returning = self._parse_returning() 2864 2865 return self.expression( 2866 exp.Insert, 2867 comments=comments, 2868 hint=hint, 2869 is_function=is_function, 2870 this=this, 2871 stored=self._match_text_seq("STORED") and self._parse_stored(), 2872 by_name=self._match_text_seq("BY", "NAME"), 2873 exists=self._parse_exists(), 2874 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2875 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2876 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2877 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2878 conflict=self._parse_on_conflict(), 2879 returning=returning or self._parse_returning(), 2880 overwrite=overwrite, 2881 alternative=alternative, 2882 ignore=ignore, 2883 source=self._match(TokenType.TABLE) and self._parse_table(), 2884 ) 2885 2886 def _parse_kill(self) -> exp.Kill: 2887 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2888 2889 return self.expression( 2890 exp.Kill, 2891 this=self._parse_primary(), 2892 kind=kind, 2893 ) 2894 2895 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2896 conflict = self._match_text_seq("ON", "CONFLICT") 2897 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2898 2899 if not conflict and not duplicate: 2900 return None 2901 2902 conflict_keys = None 2903 constraint = None 2904 2905 if conflict: 2906 if self._match_text_seq("ON", "CONSTRAINT"): 2907 constraint = self._parse_id_var() 2908 elif self._match(TokenType.L_PAREN): 2909 conflict_keys = self._parse_csv(self._parse_id_var) 2910 self._match_r_paren() 2911 2912 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2913 if self._prev.token_type == TokenType.UPDATE: 2914 self._match(TokenType.SET) 2915 expressions = self._parse_csv(self._parse_equality) 2916 else: 2917 expressions = None 2918 2919 return self.expression( 2920 exp.OnConflict, 2921 duplicate=duplicate, 2922 expressions=expressions, 2923 action=action, 2924 conflict_keys=conflict_keys, 2925 constraint=constraint, 2926 where=self._parse_where(), 2927 ) 2928 2929 def _parse_returning(self) -> t.Optional[exp.Returning]: 2930 if not self._match(TokenType.RETURNING): 2931 return None 2932 return self.expression( 2933 exp.Returning, 2934 expressions=self._parse_csv(self._parse_expression), 2935 into=self._match(TokenType.INTO) and self._parse_table_part(), 2936 ) 2937 2938 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2939 if not self._match(TokenType.FORMAT): 2940 return None 2941 return self._parse_row_format() 2942 2943 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2944 index = self._index 2945 with_ = with_ or self._match_text_seq("WITH") 2946 2947 if not self._match(TokenType.SERDE_PROPERTIES): 2948 self._retreat(index) 2949 return None 2950 return self.expression( 2951 exp.SerdeProperties, 2952 **{ # type: ignore 2953 "expressions": self._parse_wrapped_properties(), 2954 "with": with_, 2955 }, 2956 ) 2957 2958 def _parse_row_format( 2959 self, match_row: bool = False 2960 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2961 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2962 return None 2963 2964 if self._match_text_seq("SERDE"): 2965 this = self._parse_string() 2966 2967 serde_properties = self._parse_serde_properties() 2968 2969 return self.expression( 2970 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2971 ) 2972 2973 self._match_text_seq("DELIMITED") 2974 2975 kwargs = {} 2976 2977 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2978 kwargs["fields"] = self._parse_string() 2979 if self._match_text_seq("ESCAPED", "BY"): 2980 kwargs["escaped"] = self._parse_string() 2981 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2982 kwargs["collection_items"] = self._parse_string() 2983 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2984 kwargs["map_keys"] = self._parse_string() 2985 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2986 kwargs["lines"] = self._parse_string() 2987 if self._match_text_seq("NULL", "DEFINED", "AS"): 2988 kwargs["null"] = self._parse_string() 2989 2990 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2991 2992 def _parse_load(self) -> exp.LoadData | exp.Command: 2993 if self._match_text_seq("DATA"): 2994 local = self._match_text_seq("LOCAL") 2995 self._match_text_seq("INPATH") 2996 inpath = self._parse_string() 2997 overwrite = self._match(TokenType.OVERWRITE) 2998 self._match_pair(TokenType.INTO, TokenType.TABLE) 2999 3000 return self.expression( 3001 exp.LoadData, 3002 this=self._parse_table(schema=True), 3003 local=local, 3004 overwrite=overwrite, 3005 inpath=inpath, 3006 partition=self._parse_partition(), 3007 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3008 serde=self._match_text_seq("SERDE") and self._parse_string(), 3009 ) 3010 return self._parse_as_command(self._prev) 3011 3012 def _parse_delete(self) -> exp.Delete: 3013 # This handles MySQL's "Multiple-Table Syntax" 3014 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3015 tables = None 3016 if not self._match(TokenType.FROM, advance=False): 3017 tables = self._parse_csv(self._parse_table) or None 3018 3019 returning = self._parse_returning() 3020 3021 return self.expression( 3022 exp.Delete, 3023 tables=tables, 3024 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3025 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3026 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3027 where=self._parse_where(), 3028 returning=returning or self._parse_returning(), 3029 limit=self._parse_limit(), 3030 ) 3031 3032 def _parse_update(self) -> exp.Update: 3033 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3034 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3035 returning = self._parse_returning() 3036 return self.expression( 3037 exp.Update, 3038 **{ # type: ignore 3039 "this": this, 3040 "expressions": expressions, 3041 "from": self._parse_from(joins=True), 3042 "where": self._parse_where(), 3043 "returning": returning or self._parse_returning(), 3044 "order": self._parse_order(), 3045 "limit": self._parse_limit(), 3046 }, 3047 ) 3048 3049 def _parse_use(self) -> exp.Use: 3050 return self.expression( 3051 exp.Use, 3052 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3053 this=self._parse_table(schema=False), 3054 ) 3055 3056 def _parse_uncache(self) -> exp.Uncache: 3057 if not self._match(TokenType.TABLE): 3058 self.raise_error("Expecting TABLE after UNCACHE") 3059 3060 return self.expression( 3061 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3062 ) 3063 3064 def _parse_cache(self) -> exp.Cache: 3065 lazy = self._match_text_seq("LAZY") 3066 self._match(TokenType.TABLE) 3067 table = self._parse_table(schema=True) 3068 3069 options = [] 3070 if self._match_text_seq("OPTIONS"): 3071 self._match_l_paren() 3072 k = self._parse_string() 3073 self._match(TokenType.EQ) 3074 v = self._parse_string() 3075 options = [k, v] 3076 self._match_r_paren() 3077 3078 self._match(TokenType.ALIAS) 3079 return self.expression( 3080 exp.Cache, 3081 this=table, 3082 lazy=lazy, 3083 options=options, 3084 expression=self._parse_select(nested=True), 3085 ) 3086 3087 def _parse_partition(self) -> t.Optional[exp.Partition]: 3088 if not self._match_texts(self.PARTITION_KEYWORDS): 3089 return None 3090 3091 return self.expression( 3092 exp.Partition, 3093 subpartition=self._prev.text.upper() == "SUBPARTITION", 3094 expressions=self._parse_wrapped_csv(self._parse_assignment), 3095 ) 3096 3097 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3098 def _parse_value_expression() -> t.Optional[exp.Expression]: 3099 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3100 return exp.var(self._prev.text.upper()) 3101 return self._parse_expression() 3102 3103 if self._match(TokenType.L_PAREN): 3104 expressions = self._parse_csv(_parse_value_expression) 3105 self._match_r_paren() 3106 return self.expression(exp.Tuple, expressions=expressions) 3107 3108 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3109 expression = self._parse_expression() 3110 if expression: 3111 return self.expression(exp.Tuple, expressions=[expression]) 3112 return None 3113 3114 def _parse_projections(self) -> t.List[exp.Expression]: 3115 return self._parse_expressions() 3116 3117 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3118 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3119 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3120 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3121 ) 3122 elif self._match(TokenType.FROM): 3123 from_ = self._parse_from(skip_from_token=True) 3124 # Support parentheses for duckdb FROM-first syntax 3125 select = self._parse_select() 3126 if select: 3127 select.set("from", from_) 3128 this = select 3129 else: 3130 this = exp.select("*").from_(t.cast(exp.From, from_)) 3131 else: 3132 this = ( 3133 self._parse_table() 3134 if table 3135 else self._parse_select(nested=True, parse_set_operation=False) 3136 ) 3137 3138 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3139 # in case a modifier (e.g. join) is following 3140 if table and isinstance(this, exp.Values) and this.alias: 3141 alias = this.args["alias"].pop() 3142 this = exp.Table(this=this, alias=alias) 3143 3144 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3145 3146 return this 3147 3148 def _parse_select( 3149 self, 3150 nested: bool = False, 3151 table: bool = False, 3152 parse_subquery_alias: bool = True, 3153 parse_set_operation: bool = True, 3154 ) -> t.Optional[exp.Expression]: 3155 cte = self._parse_with() 3156 3157 if cte: 3158 this = self._parse_statement() 3159 3160 if not this: 3161 self.raise_error("Failed to parse any statement following CTE") 3162 return cte 3163 3164 if "with" in this.arg_types: 3165 this.set("with", cte) 3166 else: 3167 self.raise_error(f"{this.key} does not support CTE") 3168 this = cte 3169 3170 return this 3171 3172 # duckdb supports leading with FROM x 3173 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3174 3175 if self._match(TokenType.SELECT): 3176 comments = self._prev_comments 3177 3178 hint = self._parse_hint() 3179 3180 if self._next and not self._next.token_type == TokenType.DOT: 3181 all_ = self._match(TokenType.ALL) 3182 distinct = self._match_set(self.DISTINCT_TOKENS) 3183 else: 3184 all_, distinct = None, None 3185 3186 kind = ( 3187 self._match(TokenType.ALIAS) 3188 and self._match_texts(("STRUCT", "VALUE")) 3189 and self._prev.text.upper() 3190 ) 3191 3192 if distinct: 3193 distinct = self.expression( 3194 exp.Distinct, 3195 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3196 ) 3197 3198 if all_ and distinct: 3199 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3200 3201 operation_modifiers = [] 3202 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3203 operation_modifiers.append(exp.var(self._prev.text.upper())) 3204 3205 limit = self._parse_limit(top=True) 3206 projections = self._parse_projections() 3207 3208 this = self.expression( 3209 exp.Select, 3210 kind=kind, 3211 hint=hint, 3212 distinct=distinct, 3213 expressions=projections, 3214 limit=limit, 3215 operation_modifiers=operation_modifiers or None, 3216 ) 3217 this.comments = comments 3218 3219 into = self._parse_into() 3220 if into: 3221 this.set("into", into) 3222 3223 if not from_: 3224 from_ = self._parse_from() 3225 3226 if from_: 3227 this.set("from", from_) 3228 3229 this = self._parse_query_modifiers(this) 3230 elif (table or nested) and self._match(TokenType.L_PAREN): 3231 this = self._parse_wrapped_select(table=table) 3232 3233 # We return early here so that the UNION isn't attached to the subquery by the 3234 # following call to _parse_set_operations, but instead becomes the parent node 3235 self._match_r_paren() 3236 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3237 elif self._match(TokenType.VALUES, advance=False): 3238 this = self._parse_derived_table_values() 3239 elif from_: 3240 this = exp.select("*").from_(from_.this, copy=False) 3241 elif self._match(TokenType.SUMMARIZE): 3242 table = self._match(TokenType.TABLE) 3243 this = self._parse_select() or self._parse_string() or self._parse_table() 3244 return self.expression(exp.Summarize, this=this, table=table) 3245 elif self._match(TokenType.DESCRIBE): 3246 this = self._parse_describe() 3247 elif self._match_text_seq("STREAM"): 3248 this = self._parse_function() 3249 if this: 3250 this = self.expression(exp.Stream, this=this) 3251 else: 3252 self._retreat(self._index - 1) 3253 else: 3254 this = None 3255 3256 return self._parse_set_operations(this) if parse_set_operation else this 3257 3258 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3259 self._match_text_seq("SEARCH") 3260 3261 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3262 3263 if not kind: 3264 return None 3265 3266 self._match_text_seq("FIRST", "BY") 3267 3268 return self.expression( 3269 exp.RecursiveWithSearch, 3270 kind=kind, 3271 this=self._parse_id_var(), 3272 expression=self._match_text_seq("SET") and self._parse_id_var(), 3273 using=self._match_text_seq("USING") and self._parse_id_var(), 3274 ) 3275 3276 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3277 if not skip_with_token and not self._match(TokenType.WITH): 3278 return None 3279 3280 comments = self._prev_comments 3281 recursive = self._match(TokenType.RECURSIVE) 3282 3283 last_comments = None 3284 expressions = [] 3285 while True: 3286 cte = self._parse_cte() 3287 if isinstance(cte, exp.CTE): 3288 expressions.append(cte) 3289 if last_comments: 3290 cte.add_comments(last_comments) 3291 3292 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3293 break 3294 else: 3295 self._match(TokenType.WITH) 3296 3297 last_comments = self._prev_comments 3298 3299 return self.expression( 3300 exp.With, 3301 comments=comments, 3302 expressions=expressions, 3303 recursive=recursive, 3304 search=self._parse_recursive_with_search(), 3305 ) 3306 3307 def _parse_cte(self) -> t.Optional[exp.CTE]: 3308 index = self._index 3309 3310 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3311 if not alias or not alias.this: 3312 self.raise_error("Expected CTE to have alias") 3313 3314 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3315 self._retreat(index) 3316 return None 3317 3318 comments = self._prev_comments 3319 3320 if self._match_text_seq("NOT", "MATERIALIZED"): 3321 materialized = False 3322 elif self._match_text_seq("MATERIALIZED"): 3323 materialized = True 3324 else: 3325 materialized = None 3326 3327 cte = self.expression( 3328 exp.CTE, 3329 this=self._parse_wrapped(self._parse_statement), 3330 alias=alias, 3331 materialized=materialized, 3332 comments=comments, 3333 ) 3334 3335 if isinstance(cte.this, exp.Values): 3336 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3337 3338 return cte 3339 3340 def _parse_table_alias( 3341 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3342 ) -> t.Optional[exp.TableAlias]: 3343 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3344 # so this section tries to parse the clause version and if it fails, it treats the token 3345 # as an identifier (alias) 3346 if self._can_parse_limit_or_offset(): 3347 return None 3348 3349 any_token = self._match(TokenType.ALIAS) 3350 alias = ( 3351 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3352 or self._parse_string_as_identifier() 3353 ) 3354 3355 index = self._index 3356 if self._match(TokenType.L_PAREN): 3357 columns = self._parse_csv(self._parse_function_parameter) 3358 self._match_r_paren() if columns else self._retreat(index) 3359 else: 3360 columns = None 3361 3362 if not alias and not columns: 3363 return None 3364 3365 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3366 3367 # We bubble up comments from the Identifier to the TableAlias 3368 if isinstance(alias, exp.Identifier): 3369 table_alias.add_comments(alias.pop_comments()) 3370 3371 return table_alias 3372 3373 def _parse_subquery( 3374 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3375 ) -> t.Optional[exp.Subquery]: 3376 if not this: 3377 return None 3378 3379 return self.expression( 3380 exp.Subquery, 3381 this=this, 3382 pivots=self._parse_pivots(), 3383 alias=self._parse_table_alias() if parse_alias else None, 3384 sample=self._parse_table_sample(), 3385 ) 3386 3387 def _implicit_unnests_to_explicit(self, this: E) -> E: 3388 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3389 3390 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3391 for i, join in enumerate(this.args.get("joins") or []): 3392 table = join.this 3393 normalized_table = table.copy() 3394 normalized_table.meta["maybe_column"] = True 3395 normalized_table = _norm(normalized_table, dialect=self.dialect) 3396 3397 if isinstance(table, exp.Table) and not join.args.get("on"): 3398 if normalized_table.parts[0].name in refs: 3399 table_as_column = table.to_column() 3400 unnest = exp.Unnest(expressions=[table_as_column]) 3401 3402 # Table.to_column creates a parent Alias node that we want to convert to 3403 # a TableAlias and attach to the Unnest, so it matches the parser's output 3404 if isinstance(table.args.get("alias"), exp.TableAlias): 3405 table_as_column.replace(table_as_column.this) 3406 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3407 3408 table.replace(unnest) 3409 3410 refs.add(normalized_table.alias_or_name) 3411 3412 return this 3413 3414 def _parse_query_modifiers( 3415 self, this: t.Optional[exp.Expression] 3416 ) -> t.Optional[exp.Expression]: 3417 if isinstance(this, self.MODIFIABLES): 3418 for join in self._parse_joins(): 3419 this.append("joins", join) 3420 for lateral in iter(self._parse_lateral, None): 3421 this.append("laterals", lateral) 3422 3423 while True: 3424 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3425 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3426 key, expression = parser(self) 3427 3428 if expression: 3429 this.set(key, expression) 3430 if key == "limit": 3431 offset = expression.args.pop("offset", None) 3432 3433 if offset: 3434 offset = exp.Offset(expression=offset) 3435 this.set("offset", offset) 3436 3437 limit_by_expressions = expression.expressions 3438 expression.set("expressions", None) 3439 offset.set("expressions", limit_by_expressions) 3440 continue 3441 break 3442 3443 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3444 this = self._implicit_unnests_to_explicit(this) 3445 3446 return this 3447 3448 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3449 start = self._curr 3450 while self._curr: 3451 self._advance() 3452 3453 end = self._tokens[self._index - 1] 3454 return exp.Hint(expressions=[self._find_sql(start, end)]) 3455 3456 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3457 return self._parse_function_call() 3458 3459 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3460 start_index = self._index 3461 should_fallback_to_string = False 3462 3463 hints = [] 3464 try: 3465 for hint in iter( 3466 lambda: self._parse_csv( 3467 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3468 ), 3469 [], 3470 ): 3471 hints.extend(hint) 3472 except ParseError: 3473 should_fallback_to_string = True 3474 3475 if should_fallback_to_string or self._curr: 3476 self._retreat(start_index) 3477 return self._parse_hint_fallback_to_string() 3478 3479 return self.expression(exp.Hint, expressions=hints) 3480 3481 def _parse_hint(self) -> t.Optional[exp.Hint]: 3482 if self._match(TokenType.HINT) and self._prev_comments: 3483 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3484 3485 return None 3486 3487 def _parse_into(self) -> t.Optional[exp.Into]: 3488 if not self._match(TokenType.INTO): 3489 return None 3490 3491 temp = self._match(TokenType.TEMPORARY) 3492 unlogged = self._match_text_seq("UNLOGGED") 3493 self._match(TokenType.TABLE) 3494 3495 return self.expression( 3496 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3497 ) 3498 3499 def _parse_from( 3500 self, joins: bool = False, skip_from_token: bool = False 3501 ) -> t.Optional[exp.From]: 3502 if not skip_from_token and not self._match(TokenType.FROM): 3503 return None 3504 3505 return self.expression( 3506 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3507 ) 3508 3509 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3510 return self.expression( 3511 exp.MatchRecognizeMeasure, 3512 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3513 this=self._parse_expression(), 3514 ) 3515 3516 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3517 if not self._match(TokenType.MATCH_RECOGNIZE): 3518 return None 3519 3520 self._match_l_paren() 3521 3522 partition = self._parse_partition_by() 3523 order = self._parse_order() 3524 3525 measures = ( 3526 self._parse_csv(self._parse_match_recognize_measure) 3527 if self._match_text_seq("MEASURES") 3528 else None 3529 ) 3530 3531 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3532 rows = exp.var("ONE ROW PER MATCH") 3533 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3534 text = "ALL ROWS PER MATCH" 3535 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3536 text += " SHOW EMPTY MATCHES" 3537 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3538 text += " OMIT EMPTY MATCHES" 3539 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3540 text += " WITH UNMATCHED ROWS" 3541 rows = exp.var(text) 3542 else: 3543 rows = None 3544 3545 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3546 text = "AFTER MATCH SKIP" 3547 if self._match_text_seq("PAST", "LAST", "ROW"): 3548 text += " PAST LAST ROW" 3549 elif self._match_text_seq("TO", "NEXT", "ROW"): 3550 text += " TO NEXT ROW" 3551 elif self._match_text_seq("TO", "FIRST"): 3552 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3553 elif self._match_text_seq("TO", "LAST"): 3554 text += f" TO LAST {self._advance_any().text}" # type: ignore 3555 after = exp.var(text) 3556 else: 3557 after = None 3558 3559 if self._match_text_seq("PATTERN"): 3560 self._match_l_paren() 3561 3562 if not self._curr: 3563 self.raise_error("Expecting )", self._curr) 3564 3565 paren = 1 3566 start = self._curr 3567 3568 while self._curr and paren > 0: 3569 if self._curr.token_type == TokenType.L_PAREN: 3570 paren += 1 3571 if self._curr.token_type == TokenType.R_PAREN: 3572 paren -= 1 3573 3574 end = self._prev 3575 self._advance() 3576 3577 if paren > 0: 3578 self.raise_error("Expecting )", self._curr) 3579 3580 pattern = exp.var(self._find_sql(start, end)) 3581 else: 3582 pattern = None 3583 3584 define = ( 3585 self._parse_csv(self._parse_name_as_expression) 3586 if self._match_text_seq("DEFINE") 3587 else None 3588 ) 3589 3590 self._match_r_paren() 3591 3592 return self.expression( 3593 exp.MatchRecognize, 3594 partition_by=partition, 3595 order=order, 3596 measures=measures, 3597 rows=rows, 3598 after=after, 3599 pattern=pattern, 3600 define=define, 3601 alias=self._parse_table_alias(), 3602 ) 3603 3604 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3605 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3606 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3607 cross_apply = False 3608 3609 if cross_apply is not None: 3610 this = self._parse_select(table=True) 3611 view = None 3612 outer = None 3613 elif self._match(TokenType.LATERAL): 3614 this = self._parse_select(table=True) 3615 view = self._match(TokenType.VIEW) 3616 outer = self._match(TokenType.OUTER) 3617 else: 3618 return None 3619 3620 if not this: 3621 this = ( 3622 self._parse_unnest() 3623 or self._parse_function() 3624 or self._parse_id_var(any_token=False) 3625 ) 3626 3627 while self._match(TokenType.DOT): 3628 this = exp.Dot( 3629 this=this, 3630 expression=self._parse_function() or self._parse_id_var(any_token=False), 3631 ) 3632 3633 ordinality: t.Optional[bool] = None 3634 3635 if view: 3636 table = self._parse_id_var(any_token=False) 3637 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3638 table_alias: t.Optional[exp.TableAlias] = self.expression( 3639 exp.TableAlias, this=table, columns=columns 3640 ) 3641 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3642 # We move the alias from the lateral's child node to the lateral itself 3643 table_alias = this.args["alias"].pop() 3644 else: 3645 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3646 table_alias = self._parse_table_alias() 3647 3648 return self.expression( 3649 exp.Lateral, 3650 this=this, 3651 view=view, 3652 outer=outer, 3653 alias=table_alias, 3654 cross_apply=cross_apply, 3655 ordinality=ordinality, 3656 ) 3657 3658 def _parse_join_parts( 3659 self, 3660 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3661 return ( 3662 self._match_set(self.JOIN_METHODS) and self._prev, 3663 self._match_set(self.JOIN_SIDES) and self._prev, 3664 self._match_set(self.JOIN_KINDS) and self._prev, 3665 ) 3666 3667 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3668 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3669 this = self._parse_column() 3670 if isinstance(this, exp.Column): 3671 return this.this 3672 return this 3673 3674 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3675 3676 def _parse_join( 3677 self, skip_join_token: bool = False, parse_bracket: bool = False 3678 ) -> t.Optional[exp.Join]: 3679 if self._match(TokenType.COMMA): 3680 table = self._try_parse(self._parse_table) 3681 if table: 3682 return self.expression(exp.Join, this=table) 3683 return None 3684 3685 index = self._index 3686 method, side, kind = self._parse_join_parts() 3687 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3688 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3689 3690 if not skip_join_token and not join: 3691 self._retreat(index) 3692 kind = None 3693 method = None 3694 side = None 3695 3696 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3697 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3698 3699 if not skip_join_token and not join and not outer_apply and not cross_apply: 3700 return None 3701 3702 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3703 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3704 kwargs["expressions"] = self._parse_csv( 3705 lambda: self._parse_table(parse_bracket=parse_bracket) 3706 ) 3707 3708 if method: 3709 kwargs["method"] = method.text 3710 if side: 3711 kwargs["side"] = side.text 3712 if kind: 3713 kwargs["kind"] = kind.text 3714 if hint: 3715 kwargs["hint"] = hint 3716 3717 if self._match(TokenType.MATCH_CONDITION): 3718 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3719 3720 if self._match(TokenType.ON): 3721 kwargs["on"] = self._parse_assignment() 3722 elif self._match(TokenType.USING): 3723 kwargs["using"] = self._parse_using_identifiers() 3724 elif ( 3725 not (outer_apply or cross_apply) 3726 and not isinstance(kwargs["this"], exp.Unnest) 3727 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3728 ): 3729 index = self._index 3730 joins: t.Optional[list] = list(self._parse_joins()) 3731 3732 if joins and self._match(TokenType.ON): 3733 kwargs["on"] = self._parse_assignment() 3734 elif joins and self._match(TokenType.USING): 3735 kwargs["using"] = self._parse_using_identifiers() 3736 else: 3737 joins = None 3738 self._retreat(index) 3739 3740 kwargs["this"].set("joins", joins if joins else None) 3741 3742 kwargs["pivots"] = self._parse_pivots() 3743 3744 comments = [c for token in (method, side, kind) if token for c in token.comments] 3745 return self.expression(exp.Join, comments=comments, **kwargs) 3746 3747 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3748 this = self._parse_assignment() 3749 3750 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3751 return this 3752 3753 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3754 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3755 3756 return this 3757 3758 def _parse_index_params(self) -> exp.IndexParameters: 3759 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3760 3761 if self._match(TokenType.L_PAREN, advance=False): 3762 columns = self._parse_wrapped_csv(self._parse_with_operator) 3763 else: 3764 columns = None 3765 3766 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3767 partition_by = self._parse_partition_by() 3768 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3769 tablespace = ( 3770 self._parse_var(any_token=True) 3771 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3772 else None 3773 ) 3774 where = self._parse_where() 3775 3776 on = self._parse_field() if self._match(TokenType.ON) else None 3777 3778 return self.expression( 3779 exp.IndexParameters, 3780 using=using, 3781 columns=columns, 3782 include=include, 3783 partition_by=partition_by, 3784 where=where, 3785 with_storage=with_storage, 3786 tablespace=tablespace, 3787 on=on, 3788 ) 3789 3790 def _parse_index( 3791 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3792 ) -> t.Optional[exp.Index]: 3793 if index or anonymous: 3794 unique = None 3795 primary = None 3796 amp = None 3797 3798 self._match(TokenType.ON) 3799 self._match(TokenType.TABLE) # hive 3800 table = self._parse_table_parts(schema=True) 3801 else: 3802 unique = self._match(TokenType.UNIQUE) 3803 primary = self._match_text_seq("PRIMARY") 3804 amp = self._match_text_seq("AMP") 3805 3806 if not self._match(TokenType.INDEX): 3807 return None 3808 3809 index = self._parse_id_var() 3810 table = None 3811 3812 params = self._parse_index_params() 3813 3814 return self.expression( 3815 exp.Index, 3816 this=index, 3817 table=table, 3818 unique=unique, 3819 primary=primary, 3820 amp=amp, 3821 params=params, 3822 ) 3823 3824 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3825 hints: t.List[exp.Expression] = [] 3826 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3827 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3828 hints.append( 3829 self.expression( 3830 exp.WithTableHint, 3831 expressions=self._parse_csv( 3832 lambda: self._parse_function() or self._parse_var(any_token=True) 3833 ), 3834 ) 3835 ) 3836 self._match_r_paren() 3837 else: 3838 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3839 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3840 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3841 3842 self._match_set((TokenType.INDEX, TokenType.KEY)) 3843 if self._match(TokenType.FOR): 3844 hint.set("target", self._advance_any() and self._prev.text.upper()) 3845 3846 hint.set("expressions", self._parse_wrapped_id_vars()) 3847 hints.append(hint) 3848 3849 return hints or None 3850 3851 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3852 return ( 3853 (not schema and self._parse_function(optional_parens=False)) 3854 or self._parse_id_var(any_token=False) 3855 or self._parse_string_as_identifier() 3856 or self._parse_placeholder() 3857 ) 3858 3859 def _parse_table_parts( 3860 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3861 ) -> exp.Table: 3862 catalog = None 3863 db = None 3864 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3865 3866 while self._match(TokenType.DOT): 3867 if catalog: 3868 # This allows nesting the table in arbitrarily many dot expressions if needed 3869 table = self.expression( 3870 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3871 ) 3872 else: 3873 catalog = db 3874 db = table 3875 # "" used for tsql FROM a..b case 3876 table = self._parse_table_part(schema=schema) or "" 3877 3878 if ( 3879 wildcard 3880 and self._is_connected() 3881 and (isinstance(table, exp.Identifier) or not table) 3882 and self._match(TokenType.STAR) 3883 ): 3884 if isinstance(table, exp.Identifier): 3885 table.args["this"] += "*" 3886 else: 3887 table = exp.Identifier(this="*") 3888 3889 # We bubble up comments from the Identifier to the Table 3890 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3891 3892 if is_db_reference: 3893 catalog = db 3894 db = table 3895 table = None 3896 3897 if not table and not is_db_reference: 3898 self.raise_error(f"Expected table name but got {self._curr}") 3899 if not db and is_db_reference: 3900 self.raise_error(f"Expected database name but got {self._curr}") 3901 3902 table = self.expression( 3903 exp.Table, 3904 comments=comments, 3905 this=table, 3906 db=db, 3907 catalog=catalog, 3908 ) 3909 3910 changes = self._parse_changes() 3911 if changes: 3912 table.set("changes", changes) 3913 3914 at_before = self._parse_historical_data() 3915 if at_before: 3916 table.set("when", at_before) 3917 3918 pivots = self._parse_pivots() 3919 if pivots: 3920 table.set("pivots", pivots) 3921 3922 return table 3923 3924 def _parse_table( 3925 self, 3926 schema: bool = False, 3927 joins: bool = False, 3928 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3929 parse_bracket: bool = False, 3930 is_db_reference: bool = False, 3931 parse_partition: bool = False, 3932 ) -> t.Optional[exp.Expression]: 3933 lateral = self._parse_lateral() 3934 if lateral: 3935 return lateral 3936 3937 unnest = self._parse_unnest() 3938 if unnest: 3939 return unnest 3940 3941 values = self._parse_derived_table_values() 3942 if values: 3943 return values 3944 3945 subquery = self._parse_select(table=True) 3946 if subquery: 3947 if not subquery.args.get("pivots"): 3948 subquery.set("pivots", self._parse_pivots()) 3949 return subquery 3950 3951 bracket = parse_bracket and self._parse_bracket(None) 3952 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3953 3954 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3955 self._parse_table 3956 ) 3957 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3958 3959 only = self._match(TokenType.ONLY) 3960 3961 this = t.cast( 3962 exp.Expression, 3963 bracket 3964 or rows_from 3965 or self._parse_bracket( 3966 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3967 ), 3968 ) 3969 3970 if only: 3971 this.set("only", only) 3972 3973 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3974 self._match_text_seq("*") 3975 3976 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3977 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3978 this.set("partition", self._parse_partition()) 3979 3980 if schema: 3981 return self._parse_schema(this=this) 3982 3983 version = self._parse_version() 3984 3985 if version: 3986 this.set("version", version) 3987 3988 if self.dialect.ALIAS_POST_TABLESAMPLE: 3989 this.set("sample", self._parse_table_sample()) 3990 3991 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3992 if alias: 3993 this.set("alias", alias) 3994 3995 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3996 return self.expression( 3997 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3998 ) 3999 4000 this.set("hints", self._parse_table_hints()) 4001 4002 if not this.args.get("pivots"): 4003 this.set("pivots", self._parse_pivots()) 4004 4005 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4006 this.set("sample", self._parse_table_sample()) 4007 4008 if joins: 4009 for join in self._parse_joins(): 4010 this.append("joins", join) 4011 4012 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4013 this.set("ordinality", True) 4014 this.set("alias", self._parse_table_alias()) 4015 4016 return this 4017 4018 def _parse_version(self) -> t.Optional[exp.Version]: 4019 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4020 this = "TIMESTAMP" 4021 elif self._match(TokenType.VERSION_SNAPSHOT): 4022 this = "VERSION" 4023 else: 4024 return None 4025 4026 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4027 kind = self._prev.text.upper() 4028 start = self._parse_bitwise() 4029 self._match_texts(("TO", "AND")) 4030 end = self._parse_bitwise() 4031 expression: t.Optional[exp.Expression] = self.expression( 4032 exp.Tuple, expressions=[start, end] 4033 ) 4034 elif self._match_text_seq("CONTAINED", "IN"): 4035 kind = "CONTAINED IN" 4036 expression = self.expression( 4037 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4038 ) 4039 elif self._match(TokenType.ALL): 4040 kind = "ALL" 4041 expression = None 4042 else: 4043 self._match_text_seq("AS", "OF") 4044 kind = "AS OF" 4045 expression = self._parse_type() 4046 4047 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4048 4049 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4050 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4051 index = self._index 4052 historical_data = None 4053 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4054 this = self._prev.text.upper() 4055 kind = ( 4056 self._match(TokenType.L_PAREN) 4057 and self._match_texts(self.HISTORICAL_DATA_KIND) 4058 and self._prev.text.upper() 4059 ) 4060 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4061 4062 if expression: 4063 self._match_r_paren() 4064 historical_data = self.expression( 4065 exp.HistoricalData, this=this, kind=kind, expression=expression 4066 ) 4067 else: 4068 self._retreat(index) 4069 4070 return historical_data 4071 4072 def _parse_changes(self) -> t.Optional[exp.Changes]: 4073 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4074 return None 4075 4076 information = self._parse_var(any_token=True) 4077 self._match_r_paren() 4078 4079 return self.expression( 4080 exp.Changes, 4081 information=information, 4082 at_before=self._parse_historical_data(), 4083 end=self._parse_historical_data(), 4084 ) 4085 4086 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4087 if not self._match(TokenType.UNNEST): 4088 return None 4089 4090 expressions = self._parse_wrapped_csv(self._parse_equality) 4091 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4092 4093 alias = self._parse_table_alias() if with_alias else None 4094 4095 if alias: 4096 if self.dialect.UNNEST_COLUMN_ONLY: 4097 if alias.args.get("columns"): 4098 self.raise_error("Unexpected extra column alias in unnest.") 4099 4100 alias.set("columns", [alias.this]) 4101 alias.set("this", None) 4102 4103 columns = alias.args.get("columns") or [] 4104 if offset and len(expressions) < len(columns): 4105 offset = columns.pop() 4106 4107 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4108 self._match(TokenType.ALIAS) 4109 offset = self._parse_id_var( 4110 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4111 ) or exp.to_identifier("offset") 4112 4113 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4114 4115 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4116 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4117 if not is_derived and not ( 4118 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4119 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4120 ): 4121 return None 4122 4123 expressions = self._parse_csv(self._parse_value) 4124 alias = self._parse_table_alias() 4125 4126 if is_derived: 4127 self._match_r_paren() 4128 4129 return self.expression( 4130 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4131 ) 4132 4133 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4134 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4135 as_modifier and self._match_text_seq("USING", "SAMPLE") 4136 ): 4137 return None 4138 4139 bucket_numerator = None 4140 bucket_denominator = None 4141 bucket_field = None 4142 percent = None 4143 size = None 4144 seed = None 4145 4146 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4147 matched_l_paren = self._match(TokenType.L_PAREN) 4148 4149 if self.TABLESAMPLE_CSV: 4150 num = None 4151 expressions = self._parse_csv(self._parse_primary) 4152 else: 4153 expressions = None 4154 num = ( 4155 self._parse_factor() 4156 if self._match(TokenType.NUMBER, advance=False) 4157 else self._parse_primary() or self._parse_placeholder() 4158 ) 4159 4160 if self._match_text_seq("BUCKET"): 4161 bucket_numerator = self._parse_number() 4162 self._match_text_seq("OUT", "OF") 4163 bucket_denominator = bucket_denominator = self._parse_number() 4164 self._match(TokenType.ON) 4165 bucket_field = self._parse_field() 4166 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4167 percent = num 4168 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4169 size = num 4170 else: 4171 percent = num 4172 4173 if matched_l_paren: 4174 self._match_r_paren() 4175 4176 if self._match(TokenType.L_PAREN): 4177 method = self._parse_var(upper=True) 4178 seed = self._match(TokenType.COMMA) and self._parse_number() 4179 self._match_r_paren() 4180 elif self._match_texts(("SEED", "REPEATABLE")): 4181 seed = self._parse_wrapped(self._parse_number) 4182 4183 if not method and self.DEFAULT_SAMPLING_METHOD: 4184 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4185 4186 return self.expression( 4187 exp.TableSample, 4188 expressions=expressions, 4189 method=method, 4190 bucket_numerator=bucket_numerator, 4191 bucket_denominator=bucket_denominator, 4192 bucket_field=bucket_field, 4193 percent=percent, 4194 size=size, 4195 seed=seed, 4196 ) 4197 4198 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4199 return list(iter(self._parse_pivot, None)) or None 4200 4201 def _parse_joins(self) -> t.Iterator[exp.Join]: 4202 return iter(self._parse_join, None) 4203 4204 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4205 if not self._match(TokenType.INTO): 4206 return None 4207 4208 return self.expression( 4209 exp.UnpivotColumns, 4210 this=self._match_text_seq("NAME") and self._parse_column(), 4211 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4212 ) 4213 4214 # https://duckdb.org/docs/sql/statements/pivot 4215 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4216 def _parse_on() -> t.Optional[exp.Expression]: 4217 this = self._parse_bitwise() 4218 4219 if self._match(TokenType.IN): 4220 # PIVOT ... ON col IN (row_val1, row_val2) 4221 return self._parse_in(this) 4222 if self._match(TokenType.ALIAS, advance=False): 4223 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4224 return self._parse_alias(this) 4225 4226 return this 4227 4228 this = self._parse_table() 4229 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4230 into = self._parse_unpivot_columns() 4231 using = self._match(TokenType.USING) and self._parse_csv( 4232 lambda: self._parse_alias(self._parse_function()) 4233 ) 4234 group = self._parse_group() 4235 4236 return self.expression( 4237 exp.Pivot, 4238 this=this, 4239 expressions=expressions, 4240 using=using, 4241 group=group, 4242 unpivot=is_unpivot, 4243 into=into, 4244 ) 4245 4246 def _parse_pivot_in(self) -> exp.In: 4247 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4248 this = self._parse_select_or_expression() 4249 4250 self._match(TokenType.ALIAS) 4251 alias = self._parse_bitwise() 4252 if alias: 4253 if isinstance(alias, exp.Column) and not alias.db: 4254 alias = alias.this 4255 return self.expression(exp.PivotAlias, this=this, alias=alias) 4256 4257 return this 4258 4259 value = self._parse_column() 4260 4261 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4262 self.raise_error("Expecting IN (") 4263 4264 if self._match(TokenType.ANY): 4265 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4266 else: 4267 exprs = self._parse_csv(_parse_aliased_expression) 4268 4269 self._match_r_paren() 4270 return self.expression(exp.In, this=value, expressions=exprs) 4271 4272 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4273 index = self._index 4274 include_nulls = None 4275 4276 if self._match(TokenType.PIVOT): 4277 unpivot = False 4278 elif self._match(TokenType.UNPIVOT): 4279 unpivot = True 4280 4281 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4282 if self._match_text_seq("INCLUDE", "NULLS"): 4283 include_nulls = True 4284 elif self._match_text_seq("EXCLUDE", "NULLS"): 4285 include_nulls = False 4286 else: 4287 return None 4288 4289 expressions = [] 4290 4291 if not self._match(TokenType.L_PAREN): 4292 self._retreat(index) 4293 return None 4294 4295 if unpivot: 4296 expressions = self._parse_csv(self._parse_column) 4297 else: 4298 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4299 4300 if not expressions: 4301 self.raise_error("Failed to parse PIVOT's aggregation list") 4302 4303 if not self._match(TokenType.FOR): 4304 self.raise_error("Expecting FOR") 4305 4306 fields = [] 4307 while True: 4308 field = self._try_parse(self._parse_pivot_in) 4309 if not field: 4310 break 4311 fields.append(field) 4312 4313 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4314 self._parse_bitwise 4315 ) 4316 4317 group = self._parse_group() 4318 4319 self._match_r_paren() 4320 4321 pivot = self.expression( 4322 exp.Pivot, 4323 expressions=expressions, 4324 fields=fields, 4325 unpivot=unpivot, 4326 include_nulls=include_nulls, 4327 default_on_null=default_on_null, 4328 group=group, 4329 ) 4330 4331 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4332 pivot.set("alias", self._parse_table_alias()) 4333 4334 if not unpivot: 4335 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4336 4337 columns: t.List[exp.Expression] = [] 4338 all_fields = [] 4339 for pivot_field in pivot.fields: 4340 pivot_field_expressions = pivot_field.expressions 4341 4342 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4343 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4344 continue 4345 4346 all_fields.append( 4347 [ 4348 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4349 for fld in pivot_field_expressions 4350 ] 4351 ) 4352 4353 if all_fields: 4354 if names: 4355 all_fields.append(names) 4356 4357 # Generate all possible combinations of the pivot columns 4358 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4359 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4360 for fld_parts_tuple in itertools.product(*all_fields): 4361 fld_parts = list(fld_parts_tuple) 4362 4363 if names and self.PREFIXED_PIVOT_COLUMNS: 4364 # Move the "name" to the front of the list 4365 fld_parts.insert(0, fld_parts.pop(-1)) 4366 4367 columns.append(exp.to_identifier("_".join(fld_parts))) 4368 4369 pivot.set("columns", columns) 4370 4371 return pivot 4372 4373 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4374 return [agg.alias for agg in aggregations if agg.alias] 4375 4376 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4377 if not skip_where_token and not self._match(TokenType.PREWHERE): 4378 return None 4379 4380 return self.expression( 4381 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4382 ) 4383 4384 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4385 if not skip_where_token and not self._match(TokenType.WHERE): 4386 return None 4387 4388 return self.expression( 4389 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4390 ) 4391 4392 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4393 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4394 return None 4395 4396 elements: t.Dict[str, t.Any] = defaultdict(list) 4397 4398 if self._match(TokenType.ALL): 4399 elements["all"] = True 4400 elif self._match(TokenType.DISTINCT): 4401 elements["all"] = False 4402 4403 while True: 4404 index = self._index 4405 4406 elements["expressions"].extend( 4407 self._parse_csv( 4408 lambda: None 4409 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4410 else self._parse_assignment() 4411 ) 4412 ) 4413 4414 before_with_index = self._index 4415 with_prefix = self._match(TokenType.WITH) 4416 4417 if self._match(TokenType.ROLLUP): 4418 elements["rollup"].append( 4419 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4420 ) 4421 elif self._match(TokenType.CUBE): 4422 elements["cube"].append( 4423 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4424 ) 4425 elif self._match(TokenType.GROUPING_SETS): 4426 elements["grouping_sets"].append( 4427 self.expression( 4428 exp.GroupingSets, 4429 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4430 ) 4431 ) 4432 elif self._match_text_seq("TOTALS"): 4433 elements["totals"] = True # type: ignore 4434 4435 if before_with_index <= self._index <= before_with_index + 1: 4436 self._retreat(before_with_index) 4437 break 4438 4439 if index == self._index: 4440 break 4441 4442 return self.expression(exp.Group, **elements) # type: ignore 4443 4444 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4445 return self.expression( 4446 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4447 ) 4448 4449 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4450 if self._match(TokenType.L_PAREN): 4451 grouping_set = self._parse_csv(self._parse_column) 4452 self._match_r_paren() 4453 return self.expression(exp.Tuple, expressions=grouping_set) 4454 4455 return self._parse_column() 4456 4457 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4458 if not skip_having_token and not self._match(TokenType.HAVING): 4459 return None 4460 return self.expression(exp.Having, this=self._parse_assignment()) 4461 4462 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4463 if not self._match(TokenType.QUALIFY): 4464 return None 4465 return self.expression(exp.Qualify, this=self._parse_assignment()) 4466 4467 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4468 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4469 exp.Prior, this=self._parse_bitwise() 4470 ) 4471 connect = self._parse_assignment() 4472 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4473 return connect 4474 4475 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4476 if skip_start_token: 4477 start = None 4478 elif self._match(TokenType.START_WITH): 4479 start = self._parse_assignment() 4480 else: 4481 return None 4482 4483 self._match(TokenType.CONNECT_BY) 4484 nocycle = self._match_text_seq("NOCYCLE") 4485 connect = self._parse_connect_with_prior() 4486 4487 if not start and self._match(TokenType.START_WITH): 4488 start = self._parse_assignment() 4489 4490 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4491 4492 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4493 this = self._parse_id_var(any_token=True) 4494 if self._match(TokenType.ALIAS): 4495 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4496 return this 4497 4498 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4499 if self._match_text_seq("INTERPOLATE"): 4500 return self._parse_wrapped_csv(self._parse_name_as_expression) 4501 return None 4502 4503 def _parse_order( 4504 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4505 ) -> t.Optional[exp.Expression]: 4506 siblings = None 4507 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4508 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4509 return this 4510 4511 siblings = True 4512 4513 return self.expression( 4514 exp.Order, 4515 this=this, 4516 expressions=self._parse_csv(self._parse_ordered), 4517 siblings=siblings, 4518 ) 4519 4520 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4521 if not self._match(token): 4522 return None 4523 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4524 4525 def _parse_ordered( 4526 self, parse_method: t.Optional[t.Callable] = None 4527 ) -> t.Optional[exp.Ordered]: 4528 this = parse_method() if parse_method else self._parse_assignment() 4529 if not this: 4530 return None 4531 4532 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4533 this = exp.var("ALL") 4534 4535 asc = self._match(TokenType.ASC) 4536 desc = self._match(TokenType.DESC) or (asc and False) 4537 4538 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4539 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4540 4541 nulls_first = is_nulls_first or False 4542 explicitly_null_ordered = is_nulls_first or is_nulls_last 4543 4544 if ( 4545 not explicitly_null_ordered 4546 and ( 4547 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4548 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4549 ) 4550 and self.dialect.NULL_ORDERING != "nulls_are_last" 4551 ): 4552 nulls_first = True 4553 4554 if self._match_text_seq("WITH", "FILL"): 4555 with_fill = self.expression( 4556 exp.WithFill, 4557 **{ # type: ignore 4558 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4559 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4560 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4561 "interpolate": self._parse_interpolate(), 4562 }, 4563 ) 4564 else: 4565 with_fill = None 4566 4567 return self.expression( 4568 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4569 ) 4570 4571 def _parse_limit_options(self) -> exp.LimitOptions: 4572 percent = self._match(TokenType.PERCENT) 4573 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4574 self._match_text_seq("ONLY") 4575 with_ties = self._match_text_seq("WITH", "TIES") 4576 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4577 4578 def _parse_limit( 4579 self, 4580 this: t.Optional[exp.Expression] = None, 4581 top: bool = False, 4582 skip_limit_token: bool = False, 4583 ) -> t.Optional[exp.Expression]: 4584 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4585 comments = self._prev_comments 4586 if top: 4587 limit_paren = self._match(TokenType.L_PAREN) 4588 expression = self._parse_term() if limit_paren else self._parse_number() 4589 4590 if limit_paren: 4591 self._match_r_paren() 4592 4593 limit_options = self._parse_limit_options() 4594 else: 4595 limit_options = None 4596 expression = self._parse_term() 4597 4598 if self._match(TokenType.COMMA): 4599 offset = expression 4600 expression = self._parse_term() 4601 else: 4602 offset = None 4603 4604 limit_exp = self.expression( 4605 exp.Limit, 4606 this=this, 4607 expression=expression, 4608 offset=offset, 4609 comments=comments, 4610 limit_options=limit_options, 4611 expressions=self._parse_limit_by(), 4612 ) 4613 4614 return limit_exp 4615 4616 if self._match(TokenType.FETCH): 4617 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4618 direction = self._prev.text.upper() if direction else "FIRST" 4619 4620 count = self._parse_field(tokens=self.FETCH_TOKENS) 4621 4622 return self.expression( 4623 exp.Fetch, 4624 direction=direction, 4625 count=count, 4626 limit_options=self._parse_limit_options(), 4627 ) 4628 4629 return this 4630 4631 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4632 if not self._match(TokenType.OFFSET): 4633 return this 4634 4635 count = self._parse_term() 4636 self._match_set((TokenType.ROW, TokenType.ROWS)) 4637 4638 return self.expression( 4639 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4640 ) 4641 4642 def _can_parse_limit_or_offset(self) -> bool: 4643 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4644 return False 4645 4646 index = self._index 4647 result = bool( 4648 self._try_parse(self._parse_limit, retreat=True) 4649 or self._try_parse(self._parse_offset, retreat=True) 4650 ) 4651 self._retreat(index) 4652 return result 4653 4654 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4655 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4656 4657 def _parse_locks(self) -> t.List[exp.Lock]: 4658 locks = [] 4659 while True: 4660 if self._match_text_seq("FOR", "UPDATE"): 4661 update = True 4662 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4663 "LOCK", "IN", "SHARE", "MODE" 4664 ): 4665 update = False 4666 else: 4667 break 4668 4669 expressions = None 4670 if self._match_text_seq("OF"): 4671 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4672 4673 wait: t.Optional[bool | exp.Expression] = None 4674 if self._match_text_seq("NOWAIT"): 4675 wait = True 4676 elif self._match_text_seq("WAIT"): 4677 wait = self._parse_primary() 4678 elif self._match_text_seq("SKIP", "LOCKED"): 4679 wait = False 4680 4681 locks.append( 4682 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4683 ) 4684 4685 return locks 4686 4687 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4688 start = self._index 4689 _, side_token, kind_token = self._parse_join_parts() 4690 4691 side = side_token.text if side_token else None 4692 kind = kind_token.text if kind_token else None 4693 4694 if not self._match_set(self.SET_OPERATIONS): 4695 self._retreat(start) 4696 return None 4697 4698 token_type = self._prev.token_type 4699 4700 if token_type == TokenType.UNION: 4701 operation: t.Type[exp.SetOperation] = exp.Union 4702 elif token_type == TokenType.EXCEPT: 4703 operation = exp.Except 4704 else: 4705 operation = exp.Intersect 4706 4707 comments = self._prev.comments 4708 4709 if self._match(TokenType.DISTINCT): 4710 distinct: t.Optional[bool] = True 4711 elif self._match(TokenType.ALL): 4712 distinct = False 4713 else: 4714 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4715 if distinct is None: 4716 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4717 4718 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4719 "STRICT", "CORRESPONDING" 4720 ) 4721 if self._match_text_seq("CORRESPONDING"): 4722 by_name = True 4723 if not side and not kind: 4724 kind = "INNER" 4725 4726 on_column_list = None 4727 if by_name and self._match_texts(("ON", "BY")): 4728 on_column_list = self._parse_wrapped_csv(self._parse_column) 4729 4730 expression = self._parse_select(nested=True, parse_set_operation=False) 4731 4732 return self.expression( 4733 operation, 4734 comments=comments, 4735 this=this, 4736 distinct=distinct, 4737 by_name=by_name, 4738 expression=expression, 4739 side=side, 4740 kind=kind, 4741 on=on_column_list, 4742 ) 4743 4744 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4745 while this: 4746 setop = self.parse_set_operation(this) 4747 if not setop: 4748 break 4749 this = setop 4750 4751 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4752 expression = this.expression 4753 4754 if expression: 4755 for arg in self.SET_OP_MODIFIERS: 4756 expr = expression.args.get(arg) 4757 if expr: 4758 this.set(arg, expr.pop()) 4759 4760 return this 4761 4762 def _parse_expression(self) -> t.Optional[exp.Expression]: 4763 return self._parse_alias(self._parse_assignment()) 4764 4765 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4766 this = self._parse_disjunction() 4767 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4768 # This allows us to parse <non-identifier token> := <expr> 4769 this = exp.column( 4770 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4771 ) 4772 4773 while self._match_set(self.ASSIGNMENT): 4774 if isinstance(this, exp.Column) and len(this.parts) == 1: 4775 this = this.this 4776 4777 this = self.expression( 4778 self.ASSIGNMENT[self._prev.token_type], 4779 this=this, 4780 comments=self._prev_comments, 4781 expression=self._parse_assignment(), 4782 ) 4783 4784 return this 4785 4786 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4787 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4788 4789 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4790 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4791 4792 def _parse_equality(self) -> t.Optional[exp.Expression]: 4793 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4794 4795 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4796 return self._parse_tokens(self._parse_range, self.COMPARISON) 4797 4798 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4799 this = this or self._parse_bitwise() 4800 negate = self._match(TokenType.NOT) 4801 4802 if self._match_set(self.RANGE_PARSERS): 4803 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4804 if not expression: 4805 return this 4806 4807 this = expression 4808 elif self._match(TokenType.ISNULL): 4809 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4810 4811 # Postgres supports ISNULL and NOTNULL for conditions. 4812 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4813 if self._match(TokenType.NOTNULL): 4814 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4815 this = self.expression(exp.Not, this=this) 4816 4817 if negate: 4818 this = self._negate_range(this) 4819 4820 if self._match(TokenType.IS): 4821 this = self._parse_is(this) 4822 4823 return this 4824 4825 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4826 if not this: 4827 return this 4828 4829 return self.expression(exp.Not, this=this) 4830 4831 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4832 index = self._index - 1 4833 negate = self._match(TokenType.NOT) 4834 4835 if self._match_text_seq("DISTINCT", "FROM"): 4836 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4837 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4838 4839 if self._match(TokenType.JSON): 4840 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4841 4842 if self._match_text_seq("WITH"): 4843 _with = True 4844 elif self._match_text_seq("WITHOUT"): 4845 _with = False 4846 else: 4847 _with = None 4848 4849 unique = self._match(TokenType.UNIQUE) 4850 self._match_text_seq("KEYS") 4851 expression: t.Optional[exp.Expression] = self.expression( 4852 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4853 ) 4854 else: 4855 expression = self._parse_primary() or self._parse_null() 4856 if not expression: 4857 self._retreat(index) 4858 return None 4859 4860 this = self.expression(exp.Is, this=this, expression=expression) 4861 return self.expression(exp.Not, this=this) if negate else this 4862 4863 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4864 unnest = self._parse_unnest(with_alias=False) 4865 if unnest: 4866 this = self.expression(exp.In, this=this, unnest=unnest) 4867 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4868 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4869 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4870 4871 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4872 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4873 else: 4874 this = self.expression(exp.In, this=this, expressions=expressions) 4875 4876 if matched_l_paren: 4877 self._match_r_paren(this) 4878 elif not self._match(TokenType.R_BRACKET, expression=this): 4879 self.raise_error("Expecting ]") 4880 else: 4881 this = self.expression(exp.In, this=this, field=self._parse_column()) 4882 4883 return this 4884 4885 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4886 low = self._parse_bitwise() 4887 self._match(TokenType.AND) 4888 high = self._parse_bitwise() 4889 return self.expression(exp.Between, this=this, low=low, high=high) 4890 4891 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4892 if not self._match(TokenType.ESCAPE): 4893 return this 4894 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4895 4896 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4897 index = self._index 4898 4899 if not self._match(TokenType.INTERVAL) and match_interval: 4900 return None 4901 4902 if self._match(TokenType.STRING, advance=False): 4903 this = self._parse_primary() 4904 else: 4905 this = self._parse_term() 4906 4907 if not this or ( 4908 isinstance(this, exp.Column) 4909 and not this.table 4910 and not this.this.quoted 4911 and this.name.upper() == "IS" 4912 ): 4913 self._retreat(index) 4914 return None 4915 4916 unit = self._parse_function() or ( 4917 not self._match(TokenType.ALIAS, advance=False) 4918 and self._parse_var(any_token=True, upper=True) 4919 ) 4920 4921 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4922 # each INTERVAL expression into this canonical form so it's easy to transpile 4923 if this and this.is_number: 4924 this = exp.Literal.string(this.to_py()) 4925 elif this and this.is_string: 4926 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4927 if parts and unit: 4928 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4929 unit = None 4930 self._retreat(self._index - 1) 4931 4932 if len(parts) == 1: 4933 this = exp.Literal.string(parts[0][0]) 4934 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4935 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4936 unit = self.expression( 4937 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4938 ) 4939 4940 interval = self.expression(exp.Interval, this=this, unit=unit) 4941 4942 index = self._index 4943 self._match(TokenType.PLUS) 4944 4945 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4946 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4947 return self.expression( 4948 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4949 ) 4950 4951 self._retreat(index) 4952 return interval 4953 4954 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4955 this = self._parse_term() 4956 4957 while True: 4958 if self._match_set(self.BITWISE): 4959 this = self.expression( 4960 self.BITWISE[self._prev.token_type], 4961 this=this, 4962 expression=self._parse_term(), 4963 ) 4964 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4965 this = self.expression( 4966 exp.DPipe, 4967 this=this, 4968 expression=self._parse_term(), 4969 safe=not self.dialect.STRICT_STRING_CONCAT, 4970 ) 4971 elif self._match(TokenType.DQMARK): 4972 this = self.expression( 4973 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4974 ) 4975 elif self._match_pair(TokenType.LT, TokenType.LT): 4976 this = self.expression( 4977 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4978 ) 4979 elif self._match_pair(TokenType.GT, TokenType.GT): 4980 this = self.expression( 4981 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4982 ) 4983 else: 4984 break 4985 4986 return this 4987 4988 def _parse_term(self) -> t.Optional[exp.Expression]: 4989 this = self._parse_factor() 4990 4991 while self._match_set(self.TERM): 4992 klass = self.TERM[self._prev.token_type] 4993 comments = self._prev_comments 4994 expression = self._parse_factor() 4995 4996 this = self.expression(klass, this=this, comments=comments, expression=expression) 4997 4998 if isinstance(this, exp.Collate): 4999 expr = this.expression 5000 5001 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5002 # fallback to Identifier / Var 5003 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5004 ident = expr.this 5005 if isinstance(ident, exp.Identifier): 5006 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5007 5008 return this 5009 5010 def _parse_factor(self) -> t.Optional[exp.Expression]: 5011 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5012 this = parse_method() 5013 5014 while self._match_set(self.FACTOR): 5015 klass = self.FACTOR[self._prev.token_type] 5016 comments = self._prev_comments 5017 expression = parse_method() 5018 5019 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5020 self._retreat(self._index - 1) 5021 return this 5022 5023 this = self.expression(klass, this=this, comments=comments, expression=expression) 5024 5025 if isinstance(this, exp.Div): 5026 this.args["typed"] = self.dialect.TYPED_DIVISION 5027 this.args["safe"] = self.dialect.SAFE_DIVISION 5028 5029 return this 5030 5031 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5032 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5033 5034 def _parse_unary(self) -> t.Optional[exp.Expression]: 5035 if self._match_set(self.UNARY_PARSERS): 5036 return self.UNARY_PARSERS[self._prev.token_type](self) 5037 return self._parse_at_time_zone(self._parse_type()) 5038 5039 def _parse_type( 5040 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5041 ) -> t.Optional[exp.Expression]: 5042 interval = parse_interval and self._parse_interval() 5043 if interval: 5044 return interval 5045 5046 index = self._index 5047 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5048 5049 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5050 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5051 if isinstance(data_type, exp.Cast): 5052 # This constructor can contain ops directly after it, for instance struct unnesting: 5053 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5054 return self._parse_column_ops(data_type) 5055 5056 if data_type: 5057 index2 = self._index 5058 this = self._parse_primary() 5059 5060 if isinstance(this, exp.Literal): 5061 this = self._parse_column_ops(this) 5062 5063 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5064 if parser: 5065 return parser(self, this, data_type) 5066 5067 return self.expression(exp.Cast, this=this, to=data_type) 5068 5069 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5070 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5071 # 5072 # If the index difference here is greater than 1, that means the parser itself must have 5073 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5074 # 5075 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5076 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5077 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5078 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5079 # 5080 # In these cases, we don't really want to return the converted type, but instead retreat 5081 # and try to parse a Column or Identifier in the section below. 5082 if data_type.expressions and index2 - index > 1: 5083 self._retreat(index2) 5084 return self._parse_column_ops(data_type) 5085 5086 self._retreat(index) 5087 5088 if fallback_to_identifier: 5089 return self._parse_id_var() 5090 5091 this = self._parse_column() 5092 return this and self._parse_column_ops(this) 5093 5094 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5095 this = self._parse_type() 5096 if not this: 5097 return None 5098 5099 if isinstance(this, exp.Column) and not this.table: 5100 this = exp.var(this.name.upper()) 5101 5102 return self.expression( 5103 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5104 ) 5105 5106 def _parse_types( 5107 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5108 ) -> t.Optional[exp.Expression]: 5109 index = self._index 5110 5111 this: t.Optional[exp.Expression] = None 5112 prefix = self._match_text_seq("SYSUDTLIB", ".") 5113 5114 if not self._match_set(self.TYPE_TOKENS): 5115 identifier = allow_identifiers and self._parse_id_var( 5116 any_token=False, tokens=(TokenType.VAR,) 5117 ) 5118 if isinstance(identifier, exp.Identifier): 5119 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5120 5121 if len(tokens) != 1: 5122 self.raise_error("Unexpected identifier", self._prev) 5123 5124 if tokens[0].token_type in self.TYPE_TOKENS: 5125 self._prev = tokens[0] 5126 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5127 type_name = identifier.name 5128 5129 while self._match(TokenType.DOT): 5130 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5131 5132 this = exp.DataType.build(type_name, udt=True) 5133 else: 5134 self._retreat(self._index - 1) 5135 return None 5136 else: 5137 return None 5138 5139 type_token = self._prev.token_type 5140 5141 if type_token == TokenType.PSEUDO_TYPE: 5142 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5143 5144 if type_token == TokenType.OBJECT_IDENTIFIER: 5145 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5146 5147 # https://materialize.com/docs/sql/types/map/ 5148 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5149 key_type = self._parse_types( 5150 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5151 ) 5152 if not self._match(TokenType.FARROW): 5153 self._retreat(index) 5154 return None 5155 5156 value_type = self._parse_types( 5157 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5158 ) 5159 if not self._match(TokenType.R_BRACKET): 5160 self._retreat(index) 5161 return None 5162 5163 return exp.DataType( 5164 this=exp.DataType.Type.MAP, 5165 expressions=[key_type, value_type], 5166 nested=True, 5167 prefix=prefix, 5168 ) 5169 5170 nested = type_token in self.NESTED_TYPE_TOKENS 5171 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5172 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5173 expressions = None 5174 maybe_func = False 5175 5176 if self._match(TokenType.L_PAREN): 5177 if is_struct: 5178 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5179 elif nested: 5180 expressions = self._parse_csv( 5181 lambda: self._parse_types( 5182 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5183 ) 5184 ) 5185 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5186 this = expressions[0] 5187 this.set("nullable", True) 5188 self._match_r_paren() 5189 return this 5190 elif type_token in self.ENUM_TYPE_TOKENS: 5191 expressions = self._parse_csv(self._parse_equality) 5192 elif is_aggregate: 5193 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5194 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5195 ) 5196 if not func_or_ident: 5197 return None 5198 expressions = [func_or_ident] 5199 if self._match(TokenType.COMMA): 5200 expressions.extend( 5201 self._parse_csv( 5202 lambda: self._parse_types( 5203 check_func=check_func, 5204 schema=schema, 5205 allow_identifiers=allow_identifiers, 5206 ) 5207 ) 5208 ) 5209 else: 5210 expressions = self._parse_csv(self._parse_type_size) 5211 5212 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5213 if type_token == TokenType.VECTOR and len(expressions) == 2: 5214 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5215 5216 if not expressions or not self._match(TokenType.R_PAREN): 5217 self._retreat(index) 5218 return None 5219 5220 maybe_func = True 5221 5222 values: t.Optional[t.List[exp.Expression]] = None 5223 5224 if nested and self._match(TokenType.LT): 5225 if is_struct: 5226 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5227 else: 5228 expressions = self._parse_csv( 5229 lambda: self._parse_types( 5230 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5231 ) 5232 ) 5233 5234 if not self._match(TokenType.GT): 5235 self.raise_error("Expecting >") 5236 5237 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5238 values = self._parse_csv(self._parse_assignment) 5239 if not values and is_struct: 5240 values = None 5241 self._retreat(self._index - 1) 5242 else: 5243 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5244 5245 if type_token in self.TIMESTAMPS: 5246 if self._match_text_seq("WITH", "TIME", "ZONE"): 5247 maybe_func = False 5248 tz_type = ( 5249 exp.DataType.Type.TIMETZ 5250 if type_token in self.TIMES 5251 else exp.DataType.Type.TIMESTAMPTZ 5252 ) 5253 this = exp.DataType(this=tz_type, expressions=expressions) 5254 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5255 maybe_func = False 5256 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5257 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5258 maybe_func = False 5259 elif type_token == TokenType.INTERVAL: 5260 unit = self._parse_var(upper=True) 5261 if unit: 5262 if self._match_text_seq("TO"): 5263 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5264 5265 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5266 else: 5267 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5268 elif type_token == TokenType.VOID: 5269 this = exp.DataType(this=exp.DataType.Type.NULL) 5270 5271 if maybe_func and check_func: 5272 index2 = self._index 5273 peek = self._parse_string() 5274 5275 if not peek: 5276 self._retreat(index) 5277 return None 5278 5279 self._retreat(index2) 5280 5281 if not this: 5282 if self._match_text_seq("UNSIGNED"): 5283 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5284 if not unsigned_type_token: 5285 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5286 5287 type_token = unsigned_type_token or type_token 5288 5289 this = exp.DataType( 5290 this=exp.DataType.Type[type_token.value], 5291 expressions=expressions, 5292 nested=nested, 5293 prefix=prefix, 5294 ) 5295 5296 # Empty arrays/structs are allowed 5297 if values is not None: 5298 cls = exp.Struct if is_struct else exp.Array 5299 this = exp.cast(cls(expressions=values), this, copy=False) 5300 5301 elif expressions: 5302 this.set("expressions", expressions) 5303 5304 # https://materialize.com/docs/sql/types/list/#type-name 5305 while self._match(TokenType.LIST): 5306 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5307 5308 index = self._index 5309 5310 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5311 matched_array = self._match(TokenType.ARRAY) 5312 5313 while self._curr: 5314 datatype_token = self._prev.token_type 5315 matched_l_bracket = self._match(TokenType.L_BRACKET) 5316 5317 if (not matched_l_bracket and not matched_array) or ( 5318 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5319 ): 5320 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5321 # not to be confused with the fixed size array parsing 5322 break 5323 5324 matched_array = False 5325 values = self._parse_csv(self._parse_assignment) or None 5326 if ( 5327 values 5328 and not schema 5329 and ( 5330 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5331 ) 5332 ): 5333 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5334 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5335 self._retreat(index) 5336 break 5337 5338 this = exp.DataType( 5339 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5340 ) 5341 self._match(TokenType.R_BRACKET) 5342 5343 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5344 converter = self.TYPE_CONVERTERS.get(this.this) 5345 if converter: 5346 this = converter(t.cast(exp.DataType, this)) 5347 5348 return this 5349 5350 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5351 index = self._index 5352 5353 if ( 5354 self._curr 5355 and self._next 5356 and self._curr.token_type in self.TYPE_TOKENS 5357 and self._next.token_type in self.TYPE_TOKENS 5358 ): 5359 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5360 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5361 this = self._parse_id_var() 5362 else: 5363 this = ( 5364 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5365 or self._parse_id_var() 5366 ) 5367 5368 self._match(TokenType.COLON) 5369 5370 if ( 5371 type_required 5372 and not isinstance(this, exp.DataType) 5373 and not self._match_set(self.TYPE_TOKENS, advance=False) 5374 ): 5375 self._retreat(index) 5376 return self._parse_types() 5377 5378 return self._parse_column_def(this) 5379 5380 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5381 if not self._match_text_seq("AT", "TIME", "ZONE"): 5382 return this 5383 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5384 5385 def _parse_column(self) -> t.Optional[exp.Expression]: 5386 this = self._parse_column_reference() 5387 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5388 5389 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5390 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5391 5392 return column 5393 5394 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5395 this = self._parse_field() 5396 if ( 5397 not this 5398 and self._match(TokenType.VALUES, advance=False) 5399 and self.VALUES_FOLLOWED_BY_PAREN 5400 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5401 ): 5402 this = self._parse_id_var() 5403 5404 if isinstance(this, exp.Identifier): 5405 # We bubble up comments from the Identifier to the Column 5406 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5407 5408 return this 5409 5410 def _parse_colon_as_variant_extract( 5411 self, this: t.Optional[exp.Expression] 5412 ) -> t.Optional[exp.Expression]: 5413 casts = [] 5414 json_path = [] 5415 escape = None 5416 5417 while self._match(TokenType.COLON): 5418 start_index = self._index 5419 5420 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5421 path = self._parse_column_ops( 5422 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5423 ) 5424 5425 # The cast :: operator has a lower precedence than the extraction operator :, so 5426 # we rearrange the AST appropriately to avoid casting the JSON path 5427 while isinstance(path, exp.Cast): 5428 casts.append(path.to) 5429 path = path.this 5430 5431 if casts: 5432 dcolon_offset = next( 5433 i 5434 for i, t in enumerate(self._tokens[start_index:]) 5435 if t.token_type == TokenType.DCOLON 5436 ) 5437 end_token = self._tokens[start_index + dcolon_offset - 1] 5438 else: 5439 end_token = self._prev 5440 5441 if path: 5442 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5443 # it'll roundtrip to a string literal in GET_PATH 5444 if isinstance(path, exp.Identifier) and path.quoted: 5445 escape = True 5446 5447 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5448 5449 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5450 # Databricks transforms it back to the colon/dot notation 5451 if json_path: 5452 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5453 5454 if json_path_expr: 5455 json_path_expr.set("escape", escape) 5456 5457 this = self.expression( 5458 exp.JSONExtract, 5459 this=this, 5460 expression=json_path_expr, 5461 variant_extract=True, 5462 ) 5463 5464 while casts: 5465 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5466 5467 return this 5468 5469 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5470 return self._parse_types() 5471 5472 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5473 this = self._parse_bracket(this) 5474 5475 while self._match_set(self.COLUMN_OPERATORS): 5476 op_token = self._prev.token_type 5477 op = self.COLUMN_OPERATORS.get(op_token) 5478 5479 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5480 field = self._parse_dcolon() 5481 if not field: 5482 self.raise_error("Expected type") 5483 elif op and self._curr: 5484 field = self._parse_column_reference() or self._parse_bracket() 5485 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5486 field = self._parse_column_ops(field) 5487 else: 5488 field = self._parse_field(any_token=True, anonymous_func=True) 5489 5490 if isinstance(field, (exp.Func, exp.Window)) and this: 5491 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5492 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5493 this = exp.replace_tree( 5494 this, 5495 lambda n: ( 5496 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5497 if n.table 5498 else n.this 5499 ) 5500 if isinstance(n, exp.Column) 5501 else n, 5502 ) 5503 5504 if op: 5505 this = op(self, this, field) 5506 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5507 this = self.expression( 5508 exp.Column, 5509 comments=this.comments, 5510 this=field, 5511 table=this.this, 5512 db=this.args.get("table"), 5513 catalog=this.args.get("db"), 5514 ) 5515 elif isinstance(field, exp.Window): 5516 # Move the exp.Dot's to the window's function 5517 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5518 field.set("this", window_func) 5519 this = field 5520 else: 5521 this = self.expression(exp.Dot, this=this, expression=field) 5522 5523 if field and field.comments: 5524 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5525 5526 this = self._parse_bracket(this) 5527 5528 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5529 5530 def _parse_primary(self) -> t.Optional[exp.Expression]: 5531 if self._match_set(self.PRIMARY_PARSERS): 5532 token_type = self._prev.token_type 5533 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5534 5535 if token_type == TokenType.STRING: 5536 expressions = [primary] 5537 while self._match(TokenType.STRING): 5538 expressions.append(exp.Literal.string(self._prev.text)) 5539 5540 if len(expressions) > 1: 5541 return self.expression(exp.Concat, expressions=expressions) 5542 5543 return primary 5544 5545 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5546 return exp.Literal.number(f"0.{self._prev.text}") 5547 5548 if self._match(TokenType.L_PAREN): 5549 comments = self._prev_comments 5550 query = self._parse_select() 5551 5552 if query: 5553 expressions = [query] 5554 else: 5555 expressions = self._parse_expressions() 5556 5557 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5558 5559 if not this and self._match(TokenType.R_PAREN, advance=False): 5560 this = self.expression(exp.Tuple) 5561 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5562 this = self._parse_subquery(this=this, parse_alias=False) 5563 elif isinstance(this, exp.Subquery): 5564 this = self._parse_subquery( 5565 this=self._parse_set_operations(this), parse_alias=False 5566 ) 5567 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5568 this = self.expression(exp.Tuple, expressions=expressions) 5569 else: 5570 this = self.expression(exp.Paren, this=this) 5571 5572 if this: 5573 this.add_comments(comments) 5574 5575 self._match_r_paren(expression=this) 5576 return this 5577 5578 return None 5579 5580 def _parse_field( 5581 self, 5582 any_token: bool = False, 5583 tokens: t.Optional[t.Collection[TokenType]] = None, 5584 anonymous_func: bool = False, 5585 ) -> t.Optional[exp.Expression]: 5586 if anonymous_func: 5587 field = ( 5588 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5589 or self._parse_primary() 5590 ) 5591 else: 5592 field = self._parse_primary() or self._parse_function( 5593 anonymous=anonymous_func, any_token=any_token 5594 ) 5595 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5596 5597 def _parse_function( 5598 self, 5599 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5600 anonymous: bool = False, 5601 optional_parens: bool = True, 5602 any_token: bool = False, 5603 ) -> t.Optional[exp.Expression]: 5604 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5605 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5606 fn_syntax = False 5607 if ( 5608 self._match(TokenType.L_BRACE, advance=False) 5609 and self._next 5610 and self._next.text.upper() == "FN" 5611 ): 5612 self._advance(2) 5613 fn_syntax = True 5614 5615 func = self._parse_function_call( 5616 functions=functions, 5617 anonymous=anonymous, 5618 optional_parens=optional_parens, 5619 any_token=any_token, 5620 ) 5621 5622 if fn_syntax: 5623 self._match(TokenType.R_BRACE) 5624 5625 return func 5626 5627 def _parse_function_call( 5628 self, 5629 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5630 anonymous: bool = False, 5631 optional_parens: bool = True, 5632 any_token: bool = False, 5633 ) -> t.Optional[exp.Expression]: 5634 if not self._curr: 5635 return None 5636 5637 comments = self._curr.comments 5638 token = self._curr 5639 token_type = self._curr.token_type 5640 this = self._curr.text 5641 upper = this.upper() 5642 5643 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5644 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5645 self._advance() 5646 return self._parse_window(parser(self)) 5647 5648 if not self._next or self._next.token_type != TokenType.L_PAREN: 5649 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5650 self._advance() 5651 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5652 5653 return None 5654 5655 if any_token: 5656 if token_type in self.RESERVED_TOKENS: 5657 return None 5658 elif token_type not in self.FUNC_TOKENS: 5659 return None 5660 5661 self._advance(2) 5662 5663 parser = self.FUNCTION_PARSERS.get(upper) 5664 if parser and not anonymous: 5665 this = parser(self) 5666 else: 5667 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5668 5669 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5670 this = self.expression( 5671 subquery_predicate, comments=comments, this=self._parse_select() 5672 ) 5673 self._match_r_paren() 5674 return this 5675 5676 if functions is None: 5677 functions = self.FUNCTIONS 5678 5679 function = functions.get(upper) 5680 known_function = function and not anonymous 5681 5682 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5683 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5684 5685 post_func_comments = self._curr and self._curr.comments 5686 if known_function and post_func_comments: 5687 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5688 # call we'll construct it as exp.Anonymous, even if it's "known" 5689 if any( 5690 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5691 for comment in post_func_comments 5692 ): 5693 known_function = False 5694 5695 if alias and known_function: 5696 args = self._kv_to_prop_eq(args) 5697 5698 if known_function: 5699 func_builder = t.cast(t.Callable, function) 5700 5701 if "dialect" in func_builder.__code__.co_varnames: 5702 func = func_builder(args, dialect=self.dialect) 5703 else: 5704 func = func_builder(args) 5705 5706 func = self.validate_expression(func, args) 5707 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5708 func.meta["name"] = this 5709 5710 this = func 5711 else: 5712 if token_type == TokenType.IDENTIFIER: 5713 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5714 this = self.expression(exp.Anonymous, this=this, expressions=args) 5715 5716 if isinstance(this, exp.Expression): 5717 this.add_comments(comments) 5718 5719 self._match_r_paren(this) 5720 return self._parse_window(this) 5721 5722 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5723 return expression 5724 5725 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5726 transformed = [] 5727 5728 for index, e in enumerate(expressions): 5729 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5730 if isinstance(e, exp.Alias): 5731 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5732 5733 if not isinstance(e, exp.PropertyEQ): 5734 e = self.expression( 5735 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5736 ) 5737 5738 if isinstance(e.this, exp.Column): 5739 e.this.replace(e.this.this) 5740 else: 5741 e = self._to_prop_eq(e, index) 5742 5743 transformed.append(e) 5744 5745 return transformed 5746 5747 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5748 return self._parse_statement() 5749 5750 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5751 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5752 5753 def _parse_user_defined_function( 5754 self, kind: t.Optional[TokenType] = None 5755 ) -> t.Optional[exp.Expression]: 5756 this = self._parse_table_parts(schema=True) 5757 5758 if not self._match(TokenType.L_PAREN): 5759 return this 5760 5761 expressions = self._parse_csv(self._parse_function_parameter) 5762 self._match_r_paren() 5763 return self.expression( 5764 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5765 ) 5766 5767 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5768 literal = self._parse_primary() 5769 if literal: 5770 return self.expression(exp.Introducer, this=token.text, expression=literal) 5771 5772 return self._identifier_expression(token) 5773 5774 def _parse_session_parameter(self) -> exp.SessionParameter: 5775 kind = None 5776 this = self._parse_id_var() or self._parse_primary() 5777 5778 if this and self._match(TokenType.DOT): 5779 kind = this.name 5780 this = self._parse_var() or self._parse_primary() 5781 5782 return self.expression(exp.SessionParameter, this=this, kind=kind) 5783 5784 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5785 return self._parse_id_var() 5786 5787 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5788 index = self._index 5789 5790 if self._match(TokenType.L_PAREN): 5791 expressions = t.cast( 5792 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5793 ) 5794 5795 if not self._match(TokenType.R_PAREN): 5796 self._retreat(index) 5797 else: 5798 expressions = [self._parse_lambda_arg()] 5799 5800 if self._match_set(self.LAMBDAS): 5801 return self.LAMBDAS[self._prev.token_type](self, expressions) 5802 5803 self._retreat(index) 5804 5805 this: t.Optional[exp.Expression] 5806 5807 if self._match(TokenType.DISTINCT): 5808 this = self.expression( 5809 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5810 ) 5811 else: 5812 this = self._parse_select_or_expression(alias=alias) 5813 5814 return self._parse_limit( 5815 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5816 ) 5817 5818 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5819 index = self._index 5820 if not self._match(TokenType.L_PAREN): 5821 return this 5822 5823 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5824 # expr can be of both types 5825 if self._match_set(self.SELECT_START_TOKENS): 5826 self._retreat(index) 5827 return this 5828 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5829 self._match_r_paren() 5830 return self.expression(exp.Schema, this=this, expressions=args) 5831 5832 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5833 return self._parse_column_def(self._parse_field(any_token=True)) 5834 5835 def _parse_column_def( 5836 self, this: t.Optional[exp.Expression], computed_column: bool = True 5837 ) -> t.Optional[exp.Expression]: 5838 # column defs are not really columns, they're identifiers 5839 if isinstance(this, exp.Column): 5840 this = this.this 5841 5842 if not computed_column: 5843 self._match(TokenType.ALIAS) 5844 5845 kind = self._parse_types(schema=True) 5846 5847 if self._match_text_seq("FOR", "ORDINALITY"): 5848 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5849 5850 constraints: t.List[exp.Expression] = [] 5851 5852 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5853 ("ALIAS", "MATERIALIZED") 5854 ): 5855 persisted = self._prev.text.upper() == "MATERIALIZED" 5856 constraint_kind = exp.ComputedColumnConstraint( 5857 this=self._parse_assignment(), 5858 persisted=persisted or self._match_text_seq("PERSISTED"), 5859 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5860 ) 5861 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5862 elif ( 5863 kind 5864 and self._match(TokenType.ALIAS, advance=False) 5865 and ( 5866 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5867 or (self._next and self._next.token_type == TokenType.L_PAREN) 5868 ) 5869 ): 5870 self._advance() 5871 constraints.append( 5872 self.expression( 5873 exp.ColumnConstraint, 5874 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5875 ) 5876 ) 5877 5878 while True: 5879 constraint = self._parse_column_constraint() 5880 if not constraint: 5881 break 5882 constraints.append(constraint) 5883 5884 if not kind and not constraints: 5885 return this 5886 5887 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5888 5889 def _parse_auto_increment( 5890 self, 5891 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5892 start = None 5893 increment = None 5894 5895 if self._match(TokenType.L_PAREN, advance=False): 5896 args = self._parse_wrapped_csv(self._parse_bitwise) 5897 start = seq_get(args, 0) 5898 increment = seq_get(args, 1) 5899 elif self._match_text_seq("START"): 5900 start = self._parse_bitwise() 5901 self._match_text_seq("INCREMENT") 5902 increment = self._parse_bitwise() 5903 5904 if start and increment: 5905 return exp.GeneratedAsIdentityColumnConstraint( 5906 start=start, increment=increment, this=False 5907 ) 5908 5909 return exp.AutoIncrementColumnConstraint() 5910 5911 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5912 if not self._match_text_seq("REFRESH"): 5913 self._retreat(self._index - 1) 5914 return None 5915 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5916 5917 def _parse_compress(self) -> exp.CompressColumnConstraint: 5918 if self._match(TokenType.L_PAREN, advance=False): 5919 return self.expression( 5920 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5921 ) 5922 5923 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5924 5925 def _parse_generated_as_identity( 5926 self, 5927 ) -> ( 5928 exp.GeneratedAsIdentityColumnConstraint 5929 | exp.ComputedColumnConstraint 5930 | exp.GeneratedAsRowColumnConstraint 5931 ): 5932 if self._match_text_seq("BY", "DEFAULT"): 5933 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5934 this = self.expression( 5935 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5936 ) 5937 else: 5938 self._match_text_seq("ALWAYS") 5939 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5940 5941 self._match(TokenType.ALIAS) 5942 5943 if self._match_text_seq("ROW"): 5944 start = self._match_text_seq("START") 5945 if not start: 5946 self._match(TokenType.END) 5947 hidden = self._match_text_seq("HIDDEN") 5948 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5949 5950 identity = self._match_text_seq("IDENTITY") 5951 5952 if self._match(TokenType.L_PAREN): 5953 if self._match(TokenType.START_WITH): 5954 this.set("start", self._parse_bitwise()) 5955 if self._match_text_seq("INCREMENT", "BY"): 5956 this.set("increment", self._parse_bitwise()) 5957 if self._match_text_seq("MINVALUE"): 5958 this.set("minvalue", self._parse_bitwise()) 5959 if self._match_text_seq("MAXVALUE"): 5960 this.set("maxvalue", self._parse_bitwise()) 5961 5962 if self._match_text_seq("CYCLE"): 5963 this.set("cycle", True) 5964 elif self._match_text_seq("NO", "CYCLE"): 5965 this.set("cycle", False) 5966 5967 if not identity: 5968 this.set("expression", self._parse_range()) 5969 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5970 args = self._parse_csv(self._parse_bitwise) 5971 this.set("start", seq_get(args, 0)) 5972 this.set("increment", seq_get(args, 1)) 5973 5974 self._match_r_paren() 5975 5976 return this 5977 5978 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5979 self._match_text_seq("LENGTH") 5980 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5981 5982 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5983 if self._match_text_seq("NULL"): 5984 return self.expression(exp.NotNullColumnConstraint) 5985 if self._match_text_seq("CASESPECIFIC"): 5986 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5987 if self._match_text_seq("FOR", "REPLICATION"): 5988 return self.expression(exp.NotForReplicationColumnConstraint) 5989 5990 # Unconsume the `NOT` token 5991 self._retreat(self._index - 1) 5992 return None 5993 5994 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5995 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5996 5997 procedure_option_follows = ( 5998 self._match(TokenType.WITH, advance=False) 5999 and self._next 6000 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6001 ) 6002 6003 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6004 return self.expression( 6005 exp.ColumnConstraint, 6006 this=this, 6007 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6008 ) 6009 6010 return this 6011 6012 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6013 if not self._match(TokenType.CONSTRAINT): 6014 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6015 6016 return self.expression( 6017 exp.Constraint, 6018 this=self._parse_id_var(), 6019 expressions=self._parse_unnamed_constraints(), 6020 ) 6021 6022 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6023 constraints = [] 6024 while True: 6025 constraint = self._parse_unnamed_constraint() or self._parse_function() 6026 if not constraint: 6027 break 6028 constraints.append(constraint) 6029 6030 return constraints 6031 6032 def _parse_unnamed_constraint( 6033 self, constraints: t.Optional[t.Collection[str]] = None 6034 ) -> t.Optional[exp.Expression]: 6035 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6036 constraints or self.CONSTRAINT_PARSERS 6037 ): 6038 return None 6039 6040 constraint = self._prev.text.upper() 6041 if constraint not in self.CONSTRAINT_PARSERS: 6042 self.raise_error(f"No parser found for schema constraint {constraint}.") 6043 6044 return self.CONSTRAINT_PARSERS[constraint](self) 6045 6046 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6047 return self._parse_id_var(any_token=False) 6048 6049 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6050 self._match_text_seq("KEY") 6051 return self.expression( 6052 exp.UniqueColumnConstraint, 6053 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6054 this=self._parse_schema(self._parse_unique_key()), 6055 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6056 on_conflict=self._parse_on_conflict(), 6057 options=self._parse_key_constraint_options(), 6058 ) 6059 6060 def _parse_key_constraint_options(self) -> t.List[str]: 6061 options = [] 6062 while True: 6063 if not self._curr: 6064 break 6065 6066 if self._match(TokenType.ON): 6067 action = None 6068 on = self._advance_any() and self._prev.text 6069 6070 if self._match_text_seq("NO", "ACTION"): 6071 action = "NO ACTION" 6072 elif self._match_text_seq("CASCADE"): 6073 action = "CASCADE" 6074 elif self._match_text_seq("RESTRICT"): 6075 action = "RESTRICT" 6076 elif self._match_pair(TokenType.SET, TokenType.NULL): 6077 action = "SET NULL" 6078 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6079 action = "SET DEFAULT" 6080 else: 6081 self.raise_error("Invalid key constraint") 6082 6083 options.append(f"ON {on} {action}") 6084 else: 6085 var = self._parse_var_from_options( 6086 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6087 ) 6088 if not var: 6089 break 6090 options.append(var.name) 6091 6092 return options 6093 6094 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6095 if match and not self._match(TokenType.REFERENCES): 6096 return None 6097 6098 expressions = None 6099 this = self._parse_table(schema=True) 6100 options = self._parse_key_constraint_options() 6101 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6102 6103 def _parse_foreign_key(self) -> exp.ForeignKey: 6104 expressions = ( 6105 self._parse_wrapped_id_vars() 6106 if not self._match(TokenType.REFERENCES, advance=False) 6107 else None 6108 ) 6109 reference = self._parse_references() 6110 on_options = {} 6111 6112 while self._match(TokenType.ON): 6113 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6114 self.raise_error("Expected DELETE or UPDATE") 6115 6116 kind = self._prev.text.lower() 6117 6118 if self._match_text_seq("NO", "ACTION"): 6119 action = "NO ACTION" 6120 elif self._match(TokenType.SET): 6121 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6122 action = "SET " + self._prev.text.upper() 6123 else: 6124 self._advance() 6125 action = self._prev.text.upper() 6126 6127 on_options[kind] = action 6128 6129 return self.expression( 6130 exp.ForeignKey, 6131 expressions=expressions, 6132 reference=reference, 6133 options=self._parse_key_constraint_options(), 6134 **on_options, # type: ignore 6135 ) 6136 6137 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6138 return self._parse_ordered() or self._parse_field() 6139 6140 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6141 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6142 self._retreat(self._index - 1) 6143 return None 6144 6145 id_vars = self._parse_wrapped_id_vars() 6146 return self.expression( 6147 exp.PeriodForSystemTimeConstraint, 6148 this=seq_get(id_vars, 0), 6149 expression=seq_get(id_vars, 1), 6150 ) 6151 6152 def _parse_primary_key( 6153 self, wrapped_optional: bool = False, in_props: bool = False 6154 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6155 desc = ( 6156 self._match_set((TokenType.ASC, TokenType.DESC)) 6157 and self._prev.token_type == TokenType.DESC 6158 ) 6159 6160 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6161 return self.expression( 6162 exp.PrimaryKeyColumnConstraint, 6163 desc=desc, 6164 options=self._parse_key_constraint_options(), 6165 ) 6166 6167 expressions = self._parse_wrapped_csv( 6168 self._parse_primary_key_part, optional=wrapped_optional 6169 ) 6170 options = self._parse_key_constraint_options() 6171 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6172 6173 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6174 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6175 6176 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6177 """ 6178 Parses a datetime column in ODBC format. We parse the column into the corresponding 6179 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6180 same as we did for `DATE('yyyy-mm-dd')`. 6181 6182 Reference: 6183 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6184 """ 6185 self._match(TokenType.VAR) 6186 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6187 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6188 if not self._match(TokenType.R_BRACE): 6189 self.raise_error("Expected }") 6190 return expression 6191 6192 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6193 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6194 return this 6195 6196 bracket_kind = self._prev.token_type 6197 if ( 6198 bracket_kind == TokenType.L_BRACE 6199 and self._curr 6200 and self._curr.token_type == TokenType.VAR 6201 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6202 ): 6203 return self._parse_odbc_datetime_literal() 6204 6205 expressions = self._parse_csv( 6206 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6207 ) 6208 6209 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6210 self.raise_error("Expected ]") 6211 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6212 self.raise_error("Expected }") 6213 6214 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6215 if bracket_kind == TokenType.L_BRACE: 6216 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6217 elif not this: 6218 this = build_array_constructor( 6219 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6220 ) 6221 else: 6222 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6223 if constructor_type: 6224 return build_array_constructor( 6225 constructor_type, 6226 args=expressions, 6227 bracket_kind=bracket_kind, 6228 dialect=self.dialect, 6229 ) 6230 6231 expressions = apply_index_offset( 6232 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6233 ) 6234 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6235 6236 self._add_comments(this) 6237 return self._parse_bracket(this) 6238 6239 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6240 if self._match(TokenType.COLON): 6241 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6242 return this 6243 6244 def _parse_case(self) -> t.Optional[exp.Expression]: 6245 ifs = [] 6246 default = None 6247 6248 comments = self._prev_comments 6249 expression = self._parse_assignment() 6250 6251 while self._match(TokenType.WHEN): 6252 this = self._parse_assignment() 6253 self._match(TokenType.THEN) 6254 then = self._parse_assignment() 6255 ifs.append(self.expression(exp.If, this=this, true=then)) 6256 6257 if self._match(TokenType.ELSE): 6258 default = self._parse_assignment() 6259 6260 if not self._match(TokenType.END): 6261 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6262 default = exp.column("interval") 6263 else: 6264 self.raise_error("Expected END after CASE", self._prev) 6265 6266 return self.expression( 6267 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6268 ) 6269 6270 def _parse_if(self) -> t.Optional[exp.Expression]: 6271 if self._match(TokenType.L_PAREN): 6272 args = self._parse_csv( 6273 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6274 ) 6275 this = self.validate_expression(exp.If.from_arg_list(args), args) 6276 self._match_r_paren() 6277 else: 6278 index = self._index - 1 6279 6280 if self.NO_PAREN_IF_COMMANDS and index == 0: 6281 return self._parse_as_command(self._prev) 6282 6283 condition = self._parse_assignment() 6284 6285 if not condition: 6286 self._retreat(index) 6287 return None 6288 6289 self._match(TokenType.THEN) 6290 true = self._parse_assignment() 6291 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6292 self._match(TokenType.END) 6293 this = self.expression(exp.If, this=condition, true=true, false=false) 6294 6295 return this 6296 6297 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6298 if not self._match_text_seq("VALUE", "FOR"): 6299 self._retreat(self._index - 1) 6300 return None 6301 6302 return self.expression( 6303 exp.NextValueFor, 6304 this=self._parse_column(), 6305 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6306 ) 6307 6308 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6309 this = self._parse_function() or self._parse_var_or_string(upper=True) 6310 6311 if self._match(TokenType.FROM): 6312 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6313 6314 if not self._match(TokenType.COMMA): 6315 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6316 6317 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6318 6319 def _parse_gap_fill(self) -> exp.GapFill: 6320 self._match(TokenType.TABLE) 6321 this = self._parse_table() 6322 6323 self._match(TokenType.COMMA) 6324 args = [this, *self._parse_csv(self._parse_lambda)] 6325 6326 gap_fill = exp.GapFill.from_arg_list(args) 6327 return self.validate_expression(gap_fill, args) 6328 6329 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6330 this = self._parse_assignment() 6331 6332 if not self._match(TokenType.ALIAS): 6333 if self._match(TokenType.COMMA): 6334 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6335 6336 self.raise_error("Expected AS after CAST") 6337 6338 fmt = None 6339 to = self._parse_types() 6340 6341 default = self._match(TokenType.DEFAULT) 6342 if default: 6343 default = self._parse_bitwise() 6344 self._match_text_seq("ON", "CONVERSION", "ERROR") 6345 6346 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6347 fmt_string = self._parse_string() 6348 fmt = self._parse_at_time_zone(fmt_string) 6349 6350 if not to: 6351 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6352 if to.this in exp.DataType.TEMPORAL_TYPES: 6353 this = self.expression( 6354 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6355 this=this, 6356 format=exp.Literal.string( 6357 format_time( 6358 fmt_string.this if fmt_string else "", 6359 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6360 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6361 ) 6362 ), 6363 safe=safe, 6364 ) 6365 6366 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6367 this.set("zone", fmt.args["zone"]) 6368 return this 6369 elif not to: 6370 self.raise_error("Expected TYPE after CAST") 6371 elif isinstance(to, exp.Identifier): 6372 to = exp.DataType.build(to.name, udt=True) 6373 elif to.this == exp.DataType.Type.CHAR: 6374 if self._match(TokenType.CHARACTER_SET): 6375 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6376 6377 return self.expression( 6378 exp.Cast if strict else exp.TryCast, 6379 this=this, 6380 to=to, 6381 format=fmt, 6382 safe=safe, 6383 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6384 default=default, 6385 ) 6386 6387 def _parse_string_agg(self) -> exp.GroupConcat: 6388 if self._match(TokenType.DISTINCT): 6389 args: t.List[t.Optional[exp.Expression]] = [ 6390 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6391 ] 6392 if self._match(TokenType.COMMA): 6393 args.extend(self._parse_csv(self._parse_assignment)) 6394 else: 6395 args = self._parse_csv(self._parse_assignment) # type: ignore 6396 6397 if self._match_text_seq("ON", "OVERFLOW"): 6398 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6399 if self._match_text_seq("ERROR"): 6400 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6401 else: 6402 self._match_text_seq("TRUNCATE") 6403 on_overflow = self.expression( 6404 exp.OverflowTruncateBehavior, 6405 this=self._parse_string(), 6406 with_count=( 6407 self._match_text_seq("WITH", "COUNT") 6408 or not self._match_text_seq("WITHOUT", "COUNT") 6409 ), 6410 ) 6411 else: 6412 on_overflow = None 6413 6414 index = self._index 6415 if not self._match(TokenType.R_PAREN) and args: 6416 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6417 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6418 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6419 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6420 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6421 6422 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6423 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6424 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6425 if not self._match_text_seq("WITHIN", "GROUP"): 6426 self._retreat(index) 6427 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6428 6429 # The corresponding match_r_paren will be called in parse_function (caller) 6430 self._match_l_paren() 6431 6432 return self.expression( 6433 exp.GroupConcat, 6434 this=self._parse_order(this=seq_get(args, 0)), 6435 separator=seq_get(args, 1), 6436 on_overflow=on_overflow, 6437 ) 6438 6439 def _parse_convert( 6440 self, strict: bool, safe: t.Optional[bool] = None 6441 ) -> t.Optional[exp.Expression]: 6442 this = self._parse_bitwise() 6443 6444 if self._match(TokenType.USING): 6445 to: t.Optional[exp.Expression] = self.expression( 6446 exp.CharacterSet, this=self._parse_var() 6447 ) 6448 elif self._match(TokenType.COMMA): 6449 to = self._parse_types() 6450 else: 6451 to = None 6452 6453 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6454 6455 def _parse_xml_table(self) -> exp.XMLTable: 6456 namespaces = None 6457 passing = None 6458 columns = None 6459 6460 if self._match_text_seq("XMLNAMESPACES", "("): 6461 namespaces = self._parse_xml_namespace() 6462 self._match_text_seq(")", ",") 6463 6464 this = self._parse_string() 6465 6466 if self._match_text_seq("PASSING"): 6467 # The BY VALUE keywords are optional and are provided for semantic clarity 6468 self._match_text_seq("BY", "VALUE") 6469 passing = self._parse_csv(self._parse_column) 6470 6471 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6472 6473 if self._match_text_seq("COLUMNS"): 6474 columns = self._parse_csv(self._parse_field_def) 6475 6476 return self.expression( 6477 exp.XMLTable, 6478 this=this, 6479 namespaces=namespaces, 6480 passing=passing, 6481 columns=columns, 6482 by_ref=by_ref, 6483 ) 6484 6485 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6486 namespaces = [] 6487 6488 while True: 6489 if self._match(TokenType.DEFAULT): 6490 uri = self._parse_string() 6491 else: 6492 uri = self._parse_alias(self._parse_string()) 6493 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6494 if not self._match(TokenType.COMMA): 6495 break 6496 6497 return namespaces 6498 6499 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6500 """ 6501 There are generally two variants of the DECODE function: 6502 6503 - DECODE(bin, charset) 6504 - DECODE(expression, search, result [, search, result] ... [, default]) 6505 6506 The second variant will always be parsed into a CASE expression. Note that NULL 6507 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6508 instead of relying on pattern matching. 6509 """ 6510 args = self._parse_csv(self._parse_assignment) 6511 6512 if len(args) < 3: 6513 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6514 6515 expression, *expressions = args 6516 if not expression: 6517 return None 6518 6519 ifs = [] 6520 for search, result in zip(expressions[::2], expressions[1::2]): 6521 if not search or not result: 6522 return None 6523 6524 if isinstance(search, exp.Literal): 6525 ifs.append( 6526 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6527 ) 6528 elif isinstance(search, exp.Null): 6529 ifs.append( 6530 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6531 ) 6532 else: 6533 cond = exp.or_( 6534 exp.EQ(this=expression.copy(), expression=search), 6535 exp.and_( 6536 exp.Is(this=expression.copy(), expression=exp.Null()), 6537 exp.Is(this=search.copy(), expression=exp.Null()), 6538 copy=False, 6539 ), 6540 copy=False, 6541 ) 6542 ifs.append(exp.If(this=cond, true=result)) 6543 6544 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6545 6546 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6547 self._match_text_seq("KEY") 6548 key = self._parse_column() 6549 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6550 self._match_text_seq("VALUE") 6551 value = self._parse_bitwise() 6552 6553 if not key and not value: 6554 return None 6555 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6556 6557 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6558 if not this or not self._match_text_seq("FORMAT", "JSON"): 6559 return this 6560 6561 return self.expression(exp.FormatJson, this=this) 6562 6563 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6564 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6565 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6566 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6567 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6568 else: 6569 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6570 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6571 6572 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6573 6574 if not empty and not error and not null: 6575 return None 6576 6577 return self.expression( 6578 exp.OnCondition, 6579 empty=empty, 6580 error=error, 6581 null=null, 6582 ) 6583 6584 def _parse_on_handling( 6585 self, on: str, *values: str 6586 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6587 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6588 for value in values: 6589 if self._match_text_seq(value, "ON", on): 6590 return f"{value} ON {on}" 6591 6592 index = self._index 6593 if self._match(TokenType.DEFAULT): 6594 default_value = self._parse_bitwise() 6595 if self._match_text_seq("ON", on): 6596 return default_value 6597 6598 self._retreat(index) 6599 6600 return None 6601 6602 @t.overload 6603 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6604 6605 @t.overload 6606 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6607 6608 def _parse_json_object(self, agg=False): 6609 star = self._parse_star() 6610 expressions = ( 6611 [star] 6612 if star 6613 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6614 ) 6615 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6616 6617 unique_keys = None 6618 if self._match_text_seq("WITH", "UNIQUE"): 6619 unique_keys = True 6620 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6621 unique_keys = False 6622 6623 self._match_text_seq("KEYS") 6624 6625 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6626 self._parse_type() 6627 ) 6628 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6629 6630 return self.expression( 6631 exp.JSONObjectAgg if agg else exp.JSONObject, 6632 expressions=expressions, 6633 null_handling=null_handling, 6634 unique_keys=unique_keys, 6635 return_type=return_type, 6636 encoding=encoding, 6637 ) 6638 6639 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6640 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6641 if not self._match_text_seq("NESTED"): 6642 this = self._parse_id_var() 6643 kind = self._parse_types(allow_identifiers=False) 6644 nested = None 6645 else: 6646 this = None 6647 kind = None 6648 nested = True 6649 6650 path = self._match_text_seq("PATH") and self._parse_string() 6651 nested_schema = nested and self._parse_json_schema() 6652 6653 return self.expression( 6654 exp.JSONColumnDef, 6655 this=this, 6656 kind=kind, 6657 path=path, 6658 nested_schema=nested_schema, 6659 ) 6660 6661 def _parse_json_schema(self) -> exp.JSONSchema: 6662 self._match_text_seq("COLUMNS") 6663 return self.expression( 6664 exp.JSONSchema, 6665 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6666 ) 6667 6668 def _parse_json_table(self) -> exp.JSONTable: 6669 this = self._parse_format_json(self._parse_bitwise()) 6670 path = self._match(TokenType.COMMA) and self._parse_string() 6671 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6672 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6673 schema = self._parse_json_schema() 6674 6675 return exp.JSONTable( 6676 this=this, 6677 schema=schema, 6678 path=path, 6679 error_handling=error_handling, 6680 empty_handling=empty_handling, 6681 ) 6682 6683 def _parse_match_against(self) -> exp.MatchAgainst: 6684 expressions = self._parse_csv(self._parse_column) 6685 6686 self._match_text_seq(")", "AGAINST", "(") 6687 6688 this = self._parse_string() 6689 6690 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6691 modifier = "IN NATURAL LANGUAGE MODE" 6692 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6693 modifier = f"{modifier} WITH QUERY EXPANSION" 6694 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6695 modifier = "IN BOOLEAN MODE" 6696 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6697 modifier = "WITH QUERY EXPANSION" 6698 else: 6699 modifier = None 6700 6701 return self.expression( 6702 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6703 ) 6704 6705 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6706 def _parse_open_json(self) -> exp.OpenJSON: 6707 this = self._parse_bitwise() 6708 path = self._match(TokenType.COMMA) and self._parse_string() 6709 6710 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6711 this = self._parse_field(any_token=True) 6712 kind = self._parse_types() 6713 path = self._parse_string() 6714 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6715 6716 return self.expression( 6717 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6718 ) 6719 6720 expressions = None 6721 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6722 self._match_l_paren() 6723 expressions = self._parse_csv(_parse_open_json_column_def) 6724 6725 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6726 6727 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6728 args = self._parse_csv(self._parse_bitwise) 6729 6730 if self._match(TokenType.IN): 6731 return self.expression( 6732 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6733 ) 6734 6735 if haystack_first: 6736 haystack = seq_get(args, 0) 6737 needle = seq_get(args, 1) 6738 else: 6739 haystack = seq_get(args, 1) 6740 needle = seq_get(args, 0) 6741 6742 return self.expression( 6743 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6744 ) 6745 6746 def _parse_predict(self) -> exp.Predict: 6747 self._match_text_seq("MODEL") 6748 this = self._parse_table() 6749 6750 self._match(TokenType.COMMA) 6751 self._match_text_seq("TABLE") 6752 6753 return self.expression( 6754 exp.Predict, 6755 this=this, 6756 expression=self._parse_table(), 6757 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6758 ) 6759 6760 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6761 args = self._parse_csv(self._parse_table) 6762 return exp.JoinHint(this=func_name.upper(), expressions=args) 6763 6764 def _parse_substring(self) -> exp.Substring: 6765 # Postgres supports the form: substring(string [from int] [for int]) 6766 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6767 6768 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6769 6770 if self._match(TokenType.FROM): 6771 args.append(self._parse_bitwise()) 6772 if self._match(TokenType.FOR): 6773 if len(args) == 1: 6774 args.append(exp.Literal.number(1)) 6775 args.append(self._parse_bitwise()) 6776 6777 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6778 6779 def _parse_trim(self) -> exp.Trim: 6780 # https://www.w3resource.com/sql/character-functions/trim.php 6781 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6782 6783 position = None 6784 collation = None 6785 expression = None 6786 6787 if self._match_texts(self.TRIM_TYPES): 6788 position = self._prev.text.upper() 6789 6790 this = self._parse_bitwise() 6791 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6792 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6793 expression = self._parse_bitwise() 6794 6795 if invert_order: 6796 this, expression = expression, this 6797 6798 if self._match(TokenType.COLLATE): 6799 collation = self._parse_bitwise() 6800 6801 return self.expression( 6802 exp.Trim, this=this, position=position, expression=expression, collation=collation 6803 ) 6804 6805 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6806 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6807 6808 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6809 return self._parse_window(self._parse_id_var(), alias=True) 6810 6811 def _parse_respect_or_ignore_nulls( 6812 self, this: t.Optional[exp.Expression] 6813 ) -> t.Optional[exp.Expression]: 6814 if self._match_text_seq("IGNORE", "NULLS"): 6815 return self.expression(exp.IgnoreNulls, this=this) 6816 if self._match_text_seq("RESPECT", "NULLS"): 6817 return self.expression(exp.RespectNulls, this=this) 6818 return this 6819 6820 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6821 if self._match(TokenType.HAVING): 6822 self._match_texts(("MAX", "MIN")) 6823 max = self._prev.text.upper() != "MIN" 6824 return self.expression( 6825 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6826 ) 6827 6828 return this 6829 6830 def _parse_window( 6831 self, this: t.Optional[exp.Expression], alias: bool = False 6832 ) -> t.Optional[exp.Expression]: 6833 func = this 6834 comments = func.comments if isinstance(func, exp.Expression) else None 6835 6836 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6837 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6838 if self._match_text_seq("WITHIN", "GROUP"): 6839 order = self._parse_wrapped(self._parse_order) 6840 this = self.expression(exp.WithinGroup, this=this, expression=order) 6841 6842 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6843 self._match(TokenType.WHERE) 6844 this = self.expression( 6845 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6846 ) 6847 self._match_r_paren() 6848 6849 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6850 # Some dialects choose to implement and some do not. 6851 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6852 6853 # There is some code above in _parse_lambda that handles 6854 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6855 6856 # The below changes handle 6857 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6858 6859 # Oracle allows both formats 6860 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6861 # and Snowflake chose to do the same for familiarity 6862 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6863 if isinstance(this, exp.AggFunc): 6864 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6865 6866 if ignore_respect and ignore_respect is not this: 6867 ignore_respect.replace(ignore_respect.this) 6868 this = self.expression(ignore_respect.__class__, this=this) 6869 6870 this = self._parse_respect_or_ignore_nulls(this) 6871 6872 # bigquery select from window x AS (partition by ...) 6873 if alias: 6874 over = None 6875 self._match(TokenType.ALIAS) 6876 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6877 return this 6878 else: 6879 over = self._prev.text.upper() 6880 6881 if comments and isinstance(func, exp.Expression): 6882 func.pop_comments() 6883 6884 if not self._match(TokenType.L_PAREN): 6885 return self.expression( 6886 exp.Window, 6887 comments=comments, 6888 this=this, 6889 alias=self._parse_id_var(False), 6890 over=over, 6891 ) 6892 6893 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6894 6895 first = self._match(TokenType.FIRST) 6896 if self._match_text_seq("LAST"): 6897 first = False 6898 6899 partition, order = self._parse_partition_and_order() 6900 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6901 6902 if kind: 6903 self._match(TokenType.BETWEEN) 6904 start = self._parse_window_spec() 6905 self._match(TokenType.AND) 6906 end = self._parse_window_spec() 6907 exclude = ( 6908 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6909 if self._match_text_seq("EXCLUDE") 6910 else None 6911 ) 6912 6913 spec = self.expression( 6914 exp.WindowSpec, 6915 kind=kind, 6916 start=start["value"], 6917 start_side=start["side"], 6918 end=end["value"], 6919 end_side=end["side"], 6920 exclude=exclude, 6921 ) 6922 else: 6923 spec = None 6924 6925 self._match_r_paren() 6926 6927 window = self.expression( 6928 exp.Window, 6929 comments=comments, 6930 this=this, 6931 partition_by=partition, 6932 order=order, 6933 spec=spec, 6934 alias=window_alias, 6935 over=over, 6936 first=first, 6937 ) 6938 6939 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6940 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6941 return self._parse_window(window, alias=alias) 6942 6943 return window 6944 6945 def _parse_partition_and_order( 6946 self, 6947 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6948 return self._parse_partition_by(), self._parse_order() 6949 6950 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6951 self._match(TokenType.BETWEEN) 6952 6953 return { 6954 "value": ( 6955 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6956 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6957 or self._parse_bitwise() 6958 ), 6959 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6960 } 6961 6962 def _parse_alias( 6963 self, this: t.Optional[exp.Expression], explicit: bool = False 6964 ) -> t.Optional[exp.Expression]: 6965 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6966 # so this section tries to parse the clause version and if it fails, it treats the token 6967 # as an identifier (alias) 6968 if self._can_parse_limit_or_offset(): 6969 return this 6970 6971 any_token = self._match(TokenType.ALIAS) 6972 comments = self._prev_comments or [] 6973 6974 if explicit and not any_token: 6975 return this 6976 6977 if self._match(TokenType.L_PAREN): 6978 aliases = self.expression( 6979 exp.Aliases, 6980 comments=comments, 6981 this=this, 6982 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6983 ) 6984 self._match_r_paren(aliases) 6985 return aliases 6986 6987 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6988 self.STRING_ALIASES and self._parse_string_as_identifier() 6989 ) 6990 6991 if alias: 6992 comments.extend(alias.pop_comments()) 6993 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6994 column = this.this 6995 6996 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6997 if not this.comments and column and column.comments: 6998 this.comments = column.pop_comments() 6999 7000 return this 7001 7002 def _parse_id_var( 7003 self, 7004 any_token: bool = True, 7005 tokens: t.Optional[t.Collection[TokenType]] = None, 7006 ) -> t.Optional[exp.Expression]: 7007 expression = self._parse_identifier() 7008 if not expression and ( 7009 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7010 ): 7011 quoted = self._prev.token_type == TokenType.STRING 7012 expression = self._identifier_expression(quoted=quoted) 7013 7014 return expression 7015 7016 def _parse_string(self) -> t.Optional[exp.Expression]: 7017 if self._match_set(self.STRING_PARSERS): 7018 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7019 return self._parse_placeholder() 7020 7021 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7022 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7023 if output: 7024 output.update_positions(self._prev) 7025 return output 7026 7027 def _parse_number(self) -> t.Optional[exp.Expression]: 7028 if self._match_set(self.NUMERIC_PARSERS): 7029 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7030 return self._parse_placeholder() 7031 7032 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7033 if self._match(TokenType.IDENTIFIER): 7034 return self._identifier_expression(quoted=True) 7035 return self._parse_placeholder() 7036 7037 def _parse_var( 7038 self, 7039 any_token: bool = False, 7040 tokens: t.Optional[t.Collection[TokenType]] = None, 7041 upper: bool = False, 7042 ) -> t.Optional[exp.Expression]: 7043 if ( 7044 (any_token and self._advance_any()) 7045 or self._match(TokenType.VAR) 7046 or (self._match_set(tokens) if tokens else False) 7047 ): 7048 return self.expression( 7049 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7050 ) 7051 return self._parse_placeholder() 7052 7053 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7054 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7055 self._advance() 7056 return self._prev 7057 return None 7058 7059 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7060 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7061 7062 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7063 return self._parse_primary() or self._parse_var(any_token=True) 7064 7065 def _parse_null(self) -> t.Optional[exp.Expression]: 7066 if self._match_set(self.NULL_TOKENS): 7067 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7068 return self._parse_placeholder() 7069 7070 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7071 if self._match(TokenType.TRUE): 7072 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7073 if self._match(TokenType.FALSE): 7074 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7075 return self._parse_placeholder() 7076 7077 def _parse_star(self) -> t.Optional[exp.Expression]: 7078 if self._match(TokenType.STAR): 7079 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7080 return self._parse_placeholder() 7081 7082 def _parse_parameter(self) -> exp.Parameter: 7083 this = self._parse_identifier() or self._parse_primary_or_var() 7084 return self.expression(exp.Parameter, this=this) 7085 7086 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7087 if self._match_set(self.PLACEHOLDER_PARSERS): 7088 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7089 if placeholder: 7090 return placeholder 7091 self._advance(-1) 7092 return None 7093 7094 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7095 if not self._match_texts(keywords): 7096 return None 7097 if self._match(TokenType.L_PAREN, advance=False): 7098 return self._parse_wrapped_csv(self._parse_expression) 7099 7100 expression = self._parse_expression() 7101 return [expression] if expression else None 7102 7103 def _parse_csv( 7104 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7105 ) -> t.List[exp.Expression]: 7106 parse_result = parse_method() 7107 items = [parse_result] if parse_result is not None else [] 7108 7109 while self._match(sep): 7110 self._add_comments(parse_result) 7111 parse_result = parse_method() 7112 if parse_result is not None: 7113 items.append(parse_result) 7114 7115 return items 7116 7117 def _parse_tokens( 7118 self, parse_method: t.Callable, expressions: t.Dict 7119 ) -> t.Optional[exp.Expression]: 7120 this = parse_method() 7121 7122 while self._match_set(expressions): 7123 this = self.expression( 7124 expressions[self._prev.token_type], 7125 this=this, 7126 comments=self._prev_comments, 7127 expression=parse_method(), 7128 ) 7129 7130 return this 7131 7132 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7133 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7134 7135 def _parse_wrapped_csv( 7136 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7137 ) -> t.List[exp.Expression]: 7138 return self._parse_wrapped( 7139 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7140 ) 7141 7142 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7143 wrapped = self._match(TokenType.L_PAREN) 7144 if not wrapped and not optional: 7145 self.raise_error("Expecting (") 7146 parse_result = parse_method() 7147 if wrapped: 7148 self._match_r_paren() 7149 return parse_result 7150 7151 def _parse_expressions(self) -> t.List[exp.Expression]: 7152 return self._parse_csv(self._parse_expression) 7153 7154 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7155 return self._parse_select() or self._parse_set_operations( 7156 self._parse_alias(self._parse_assignment(), explicit=True) 7157 if alias 7158 else self._parse_assignment() 7159 ) 7160 7161 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7162 return self._parse_query_modifiers( 7163 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7164 ) 7165 7166 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7167 this = None 7168 if self._match_texts(self.TRANSACTION_KIND): 7169 this = self._prev.text 7170 7171 self._match_texts(("TRANSACTION", "WORK")) 7172 7173 modes = [] 7174 while True: 7175 mode = [] 7176 while self._match(TokenType.VAR): 7177 mode.append(self._prev.text) 7178 7179 if mode: 7180 modes.append(" ".join(mode)) 7181 if not self._match(TokenType.COMMA): 7182 break 7183 7184 return self.expression(exp.Transaction, this=this, modes=modes) 7185 7186 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7187 chain = None 7188 savepoint = None 7189 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7190 7191 self._match_texts(("TRANSACTION", "WORK")) 7192 7193 if self._match_text_seq("TO"): 7194 self._match_text_seq("SAVEPOINT") 7195 savepoint = self._parse_id_var() 7196 7197 if self._match(TokenType.AND): 7198 chain = not self._match_text_seq("NO") 7199 self._match_text_seq("CHAIN") 7200 7201 if is_rollback: 7202 return self.expression(exp.Rollback, savepoint=savepoint) 7203 7204 return self.expression(exp.Commit, chain=chain) 7205 7206 def _parse_refresh(self) -> exp.Refresh: 7207 self._match(TokenType.TABLE) 7208 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7209 7210 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7211 if not self._match_text_seq("ADD"): 7212 return None 7213 7214 self._match(TokenType.COLUMN) 7215 exists_column = self._parse_exists(not_=True) 7216 expression = self._parse_field_def() 7217 7218 if expression: 7219 expression.set("exists", exists_column) 7220 7221 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7222 if self._match_texts(("FIRST", "AFTER")): 7223 position = self._prev.text 7224 column_position = self.expression( 7225 exp.ColumnPosition, this=self._parse_column(), position=position 7226 ) 7227 expression.set("position", column_position) 7228 7229 return expression 7230 7231 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7232 drop = self._match(TokenType.DROP) and self._parse_drop() 7233 if drop and not isinstance(drop, exp.Command): 7234 drop.set("kind", drop.args.get("kind", "COLUMN")) 7235 return drop 7236 7237 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7238 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7239 return self.expression( 7240 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7241 ) 7242 7243 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7244 index = self._index - 1 7245 7246 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7247 return self._parse_csv( 7248 lambda: self.expression( 7249 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7250 ) 7251 ) 7252 7253 self._retreat(index) 7254 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7255 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7256 7257 if self._match_text_seq("ADD", "COLUMNS"): 7258 schema = self._parse_schema() 7259 if schema: 7260 return [schema] 7261 return [] 7262 7263 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7264 7265 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7266 if self._match_texts(self.ALTER_ALTER_PARSERS): 7267 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7268 7269 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7270 # keyword after ALTER we default to parsing this statement 7271 self._match(TokenType.COLUMN) 7272 column = self._parse_field(any_token=True) 7273 7274 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7275 return self.expression(exp.AlterColumn, this=column, drop=True) 7276 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7277 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7278 if self._match(TokenType.COMMENT): 7279 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7280 if self._match_text_seq("DROP", "NOT", "NULL"): 7281 return self.expression( 7282 exp.AlterColumn, 7283 this=column, 7284 drop=True, 7285 allow_null=True, 7286 ) 7287 if self._match_text_seq("SET", "NOT", "NULL"): 7288 return self.expression( 7289 exp.AlterColumn, 7290 this=column, 7291 allow_null=False, 7292 ) 7293 7294 if self._match_text_seq("SET", "VISIBLE"): 7295 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7296 if self._match_text_seq("SET", "INVISIBLE"): 7297 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7298 7299 self._match_text_seq("SET", "DATA") 7300 self._match_text_seq("TYPE") 7301 return self.expression( 7302 exp.AlterColumn, 7303 this=column, 7304 dtype=self._parse_types(), 7305 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7306 using=self._match(TokenType.USING) and self._parse_assignment(), 7307 ) 7308 7309 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7310 if self._match_texts(("ALL", "EVEN", "AUTO")): 7311 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7312 7313 self._match_text_seq("KEY", "DISTKEY") 7314 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7315 7316 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7317 if compound: 7318 self._match_text_seq("SORTKEY") 7319 7320 if self._match(TokenType.L_PAREN, advance=False): 7321 return self.expression( 7322 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7323 ) 7324 7325 self._match_texts(("AUTO", "NONE")) 7326 return self.expression( 7327 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7328 ) 7329 7330 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7331 index = self._index - 1 7332 7333 partition_exists = self._parse_exists() 7334 if self._match(TokenType.PARTITION, advance=False): 7335 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7336 7337 self._retreat(index) 7338 return self._parse_csv(self._parse_drop_column) 7339 7340 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7341 if self._match(TokenType.COLUMN): 7342 exists = self._parse_exists() 7343 old_column = self._parse_column() 7344 to = self._match_text_seq("TO") 7345 new_column = self._parse_column() 7346 7347 if old_column is None or to is None or new_column is None: 7348 return None 7349 7350 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7351 7352 self._match_text_seq("TO") 7353 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7354 7355 def _parse_alter_table_set(self) -> exp.AlterSet: 7356 alter_set = self.expression(exp.AlterSet) 7357 7358 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7359 "TABLE", "PROPERTIES" 7360 ): 7361 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7362 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7363 alter_set.set("expressions", [self._parse_assignment()]) 7364 elif self._match_texts(("LOGGED", "UNLOGGED")): 7365 alter_set.set("option", exp.var(self._prev.text.upper())) 7366 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7367 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7368 elif self._match_text_seq("LOCATION"): 7369 alter_set.set("location", self._parse_field()) 7370 elif self._match_text_seq("ACCESS", "METHOD"): 7371 alter_set.set("access_method", self._parse_field()) 7372 elif self._match_text_seq("TABLESPACE"): 7373 alter_set.set("tablespace", self._parse_field()) 7374 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7375 alter_set.set("file_format", [self._parse_field()]) 7376 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7377 alter_set.set("file_format", self._parse_wrapped_options()) 7378 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7379 alter_set.set("copy_options", self._parse_wrapped_options()) 7380 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7381 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7382 else: 7383 if self._match_text_seq("SERDE"): 7384 alter_set.set("serde", self._parse_field()) 7385 7386 alter_set.set("expressions", [self._parse_properties()]) 7387 7388 return alter_set 7389 7390 def _parse_alter(self) -> exp.Alter | exp.Command: 7391 start = self._prev 7392 7393 alter_token = self._match_set(self.ALTERABLES) and self._prev 7394 if not alter_token: 7395 return self._parse_as_command(start) 7396 7397 exists = self._parse_exists() 7398 only = self._match_text_seq("ONLY") 7399 this = self._parse_table(schema=True) 7400 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7401 7402 if self._next: 7403 self._advance() 7404 7405 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7406 if parser: 7407 actions = ensure_list(parser(self)) 7408 not_valid = self._match_text_seq("NOT", "VALID") 7409 options = self._parse_csv(self._parse_property) 7410 7411 if not self._curr and actions: 7412 return self.expression( 7413 exp.Alter, 7414 this=this, 7415 kind=alter_token.text.upper(), 7416 exists=exists, 7417 actions=actions, 7418 only=only, 7419 options=options, 7420 cluster=cluster, 7421 not_valid=not_valid, 7422 ) 7423 7424 return self._parse_as_command(start) 7425 7426 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7427 start = self._prev 7428 # https://duckdb.org/docs/sql/statements/analyze 7429 if not self._curr: 7430 return self.expression(exp.Analyze) 7431 7432 options = [] 7433 while self._match_texts(self.ANALYZE_STYLES): 7434 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7435 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7436 else: 7437 options.append(self._prev.text.upper()) 7438 7439 this: t.Optional[exp.Expression] = None 7440 inner_expression: t.Optional[exp.Expression] = None 7441 7442 kind = self._curr and self._curr.text.upper() 7443 7444 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7445 this = self._parse_table_parts() 7446 elif self._match_text_seq("TABLES"): 7447 if self._match_set((TokenType.FROM, TokenType.IN)): 7448 kind = f"{kind} {self._prev.text.upper()}" 7449 this = self._parse_table(schema=True, is_db_reference=True) 7450 elif self._match_text_seq("DATABASE"): 7451 this = self._parse_table(schema=True, is_db_reference=True) 7452 elif self._match_text_seq("CLUSTER"): 7453 this = self._parse_table() 7454 # Try matching inner expr keywords before fallback to parse table. 7455 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7456 kind = None 7457 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7458 else: 7459 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7460 kind = None 7461 this = self._parse_table_parts() 7462 7463 partition = self._try_parse(self._parse_partition) 7464 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7465 return self._parse_as_command(start) 7466 7467 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7468 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7469 "WITH", "ASYNC", "MODE" 7470 ): 7471 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7472 else: 7473 mode = None 7474 7475 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7476 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7477 7478 properties = self._parse_properties() 7479 return self.expression( 7480 exp.Analyze, 7481 kind=kind, 7482 this=this, 7483 mode=mode, 7484 partition=partition, 7485 properties=properties, 7486 expression=inner_expression, 7487 options=options, 7488 ) 7489 7490 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7491 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7492 this = None 7493 kind = self._prev.text.upper() 7494 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7495 expressions = [] 7496 7497 if not self._match_text_seq("STATISTICS"): 7498 self.raise_error("Expecting token STATISTICS") 7499 7500 if self._match_text_seq("NOSCAN"): 7501 this = "NOSCAN" 7502 elif self._match(TokenType.FOR): 7503 if self._match_text_seq("ALL", "COLUMNS"): 7504 this = "FOR ALL COLUMNS" 7505 if self._match_texts("COLUMNS"): 7506 this = "FOR COLUMNS" 7507 expressions = self._parse_csv(self._parse_column_reference) 7508 elif self._match_text_seq("SAMPLE"): 7509 sample = self._parse_number() 7510 expressions = [ 7511 self.expression( 7512 exp.AnalyzeSample, 7513 sample=sample, 7514 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7515 ) 7516 ] 7517 7518 return self.expression( 7519 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7520 ) 7521 7522 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7523 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7524 kind = None 7525 this = None 7526 expression: t.Optional[exp.Expression] = None 7527 if self._match_text_seq("REF", "UPDATE"): 7528 kind = "REF" 7529 this = "UPDATE" 7530 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7531 this = "UPDATE SET DANGLING TO NULL" 7532 elif self._match_text_seq("STRUCTURE"): 7533 kind = "STRUCTURE" 7534 if self._match_text_seq("CASCADE", "FAST"): 7535 this = "CASCADE FAST" 7536 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7537 ("ONLINE", "OFFLINE") 7538 ): 7539 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7540 expression = self._parse_into() 7541 7542 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7543 7544 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7545 this = self._prev.text.upper() 7546 if self._match_text_seq("COLUMNS"): 7547 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7548 return None 7549 7550 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7551 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7552 if self._match_text_seq("STATISTICS"): 7553 return self.expression(exp.AnalyzeDelete, kind=kind) 7554 return None 7555 7556 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7557 if self._match_text_seq("CHAINED", "ROWS"): 7558 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7559 return None 7560 7561 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7562 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7563 this = self._prev.text.upper() 7564 expression: t.Optional[exp.Expression] = None 7565 expressions = [] 7566 update_options = None 7567 7568 if self._match_text_seq("HISTOGRAM", "ON"): 7569 expressions = self._parse_csv(self._parse_column_reference) 7570 with_expressions = [] 7571 while self._match(TokenType.WITH): 7572 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7573 if self._match_texts(("SYNC", "ASYNC")): 7574 if self._match_text_seq("MODE", advance=False): 7575 with_expressions.append(f"{self._prev.text.upper()} MODE") 7576 self._advance() 7577 else: 7578 buckets = self._parse_number() 7579 if self._match_text_seq("BUCKETS"): 7580 with_expressions.append(f"{buckets} BUCKETS") 7581 if with_expressions: 7582 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7583 7584 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7585 TokenType.UPDATE, advance=False 7586 ): 7587 update_options = self._prev.text.upper() 7588 self._advance() 7589 elif self._match_text_seq("USING", "DATA"): 7590 expression = self.expression(exp.UsingData, this=self._parse_string()) 7591 7592 return self.expression( 7593 exp.AnalyzeHistogram, 7594 this=this, 7595 expressions=expressions, 7596 expression=expression, 7597 update_options=update_options, 7598 ) 7599 7600 def _parse_merge(self) -> exp.Merge: 7601 self._match(TokenType.INTO) 7602 target = self._parse_table() 7603 7604 if target and self._match(TokenType.ALIAS, advance=False): 7605 target.set("alias", self._parse_table_alias()) 7606 7607 self._match(TokenType.USING) 7608 using = self._parse_table() 7609 7610 self._match(TokenType.ON) 7611 on = self._parse_assignment() 7612 7613 return self.expression( 7614 exp.Merge, 7615 this=target, 7616 using=using, 7617 on=on, 7618 whens=self._parse_when_matched(), 7619 returning=self._parse_returning(), 7620 ) 7621 7622 def _parse_when_matched(self) -> exp.Whens: 7623 whens = [] 7624 7625 while self._match(TokenType.WHEN): 7626 matched = not self._match(TokenType.NOT) 7627 self._match_text_seq("MATCHED") 7628 source = ( 7629 False 7630 if self._match_text_seq("BY", "TARGET") 7631 else self._match_text_seq("BY", "SOURCE") 7632 ) 7633 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7634 7635 self._match(TokenType.THEN) 7636 7637 if self._match(TokenType.INSERT): 7638 this = self._parse_star() 7639 if this: 7640 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7641 else: 7642 then = self.expression( 7643 exp.Insert, 7644 this=exp.var("ROW") 7645 if self._match_text_seq("ROW") 7646 else self._parse_value(values=False), 7647 expression=self._match_text_seq("VALUES") and self._parse_value(), 7648 ) 7649 elif self._match(TokenType.UPDATE): 7650 expressions = self._parse_star() 7651 if expressions: 7652 then = self.expression(exp.Update, expressions=expressions) 7653 else: 7654 then = self.expression( 7655 exp.Update, 7656 expressions=self._match(TokenType.SET) 7657 and self._parse_csv(self._parse_equality), 7658 ) 7659 elif self._match(TokenType.DELETE): 7660 then = self.expression(exp.Var, this=self._prev.text) 7661 else: 7662 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7663 7664 whens.append( 7665 self.expression( 7666 exp.When, 7667 matched=matched, 7668 source=source, 7669 condition=condition, 7670 then=then, 7671 ) 7672 ) 7673 return self.expression(exp.Whens, expressions=whens) 7674 7675 def _parse_show(self) -> t.Optional[exp.Expression]: 7676 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7677 if parser: 7678 return parser(self) 7679 return self._parse_as_command(self._prev) 7680 7681 def _parse_set_item_assignment( 7682 self, kind: t.Optional[str] = None 7683 ) -> t.Optional[exp.Expression]: 7684 index = self._index 7685 7686 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7687 return self._parse_set_transaction(global_=kind == "GLOBAL") 7688 7689 left = self._parse_primary() or self._parse_column() 7690 assignment_delimiter = self._match_texts(("=", "TO")) 7691 7692 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7693 self._retreat(index) 7694 return None 7695 7696 right = self._parse_statement() or self._parse_id_var() 7697 if isinstance(right, (exp.Column, exp.Identifier)): 7698 right = exp.var(right.name) 7699 7700 this = self.expression(exp.EQ, this=left, expression=right) 7701 return self.expression(exp.SetItem, this=this, kind=kind) 7702 7703 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7704 self._match_text_seq("TRANSACTION") 7705 characteristics = self._parse_csv( 7706 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7707 ) 7708 return self.expression( 7709 exp.SetItem, 7710 expressions=characteristics, 7711 kind="TRANSACTION", 7712 **{"global": global_}, # type: ignore 7713 ) 7714 7715 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7716 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7717 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7718 7719 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7720 index = self._index 7721 set_ = self.expression( 7722 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7723 ) 7724 7725 if self._curr: 7726 self._retreat(index) 7727 return self._parse_as_command(self._prev) 7728 7729 return set_ 7730 7731 def _parse_var_from_options( 7732 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7733 ) -> t.Optional[exp.Var]: 7734 start = self._curr 7735 if not start: 7736 return None 7737 7738 option = start.text.upper() 7739 continuations = options.get(option) 7740 7741 index = self._index 7742 self._advance() 7743 for keywords in continuations or []: 7744 if isinstance(keywords, str): 7745 keywords = (keywords,) 7746 7747 if self._match_text_seq(*keywords): 7748 option = f"{option} {' '.join(keywords)}" 7749 break 7750 else: 7751 if continuations or continuations is None: 7752 if raise_unmatched: 7753 self.raise_error(f"Unknown option {option}") 7754 7755 self._retreat(index) 7756 return None 7757 7758 return exp.var(option) 7759 7760 def _parse_as_command(self, start: Token) -> exp.Command: 7761 while self._curr: 7762 self._advance() 7763 text = self._find_sql(start, self._prev) 7764 size = len(start.text) 7765 self._warn_unsupported() 7766 return exp.Command(this=text[:size], expression=text[size:]) 7767 7768 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7769 settings = [] 7770 7771 self._match_l_paren() 7772 kind = self._parse_id_var() 7773 7774 if self._match(TokenType.L_PAREN): 7775 while True: 7776 key = self._parse_id_var() 7777 value = self._parse_primary() 7778 if not key and value is None: 7779 break 7780 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7781 self._match(TokenType.R_PAREN) 7782 7783 self._match_r_paren() 7784 7785 return self.expression( 7786 exp.DictProperty, 7787 this=this, 7788 kind=kind.this if kind else None, 7789 settings=settings, 7790 ) 7791 7792 def _parse_dict_range(self, this: str) -> exp.DictRange: 7793 self._match_l_paren() 7794 has_min = self._match_text_seq("MIN") 7795 if has_min: 7796 min = self._parse_var() or self._parse_primary() 7797 self._match_text_seq("MAX") 7798 max = self._parse_var() or self._parse_primary() 7799 else: 7800 max = self._parse_var() or self._parse_primary() 7801 min = exp.Literal.number(0) 7802 self._match_r_paren() 7803 return self.expression(exp.DictRange, this=this, min=min, max=max) 7804 7805 def _parse_comprehension( 7806 self, this: t.Optional[exp.Expression] 7807 ) -> t.Optional[exp.Comprehension]: 7808 index = self._index 7809 expression = self._parse_column() 7810 if not self._match(TokenType.IN): 7811 self._retreat(index - 1) 7812 return None 7813 iterator = self._parse_column() 7814 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7815 return self.expression( 7816 exp.Comprehension, 7817 this=this, 7818 expression=expression, 7819 iterator=iterator, 7820 condition=condition, 7821 ) 7822 7823 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7824 if self._match(TokenType.HEREDOC_STRING): 7825 return self.expression(exp.Heredoc, this=self._prev.text) 7826 7827 if not self._match_text_seq("$"): 7828 return None 7829 7830 tags = ["$"] 7831 tag_text = None 7832 7833 if self._is_connected(): 7834 self._advance() 7835 tags.append(self._prev.text.upper()) 7836 else: 7837 self.raise_error("No closing $ found") 7838 7839 if tags[-1] != "$": 7840 if self._is_connected() and self._match_text_seq("$"): 7841 tag_text = tags[-1] 7842 tags.append("$") 7843 else: 7844 self.raise_error("No closing $ found") 7845 7846 heredoc_start = self._curr 7847 7848 while self._curr: 7849 if self._match_text_seq(*tags, advance=False): 7850 this = self._find_sql(heredoc_start, self._prev) 7851 self._advance(len(tags)) 7852 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7853 7854 self._advance() 7855 7856 self.raise_error(f"No closing {''.join(tags)} found") 7857 return None 7858 7859 def _find_parser( 7860 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7861 ) -> t.Optional[t.Callable]: 7862 if not self._curr: 7863 return None 7864 7865 index = self._index 7866 this = [] 7867 while True: 7868 # The current token might be multiple words 7869 curr = self._curr.text.upper() 7870 key = curr.split(" ") 7871 this.append(curr) 7872 7873 self._advance() 7874 result, trie = in_trie(trie, key) 7875 if result == TrieResult.FAILED: 7876 break 7877 7878 if result == TrieResult.EXISTS: 7879 subparser = parsers[" ".join(this)] 7880 return subparser 7881 7882 self._retreat(index) 7883 return None 7884 7885 def _match(self, token_type, advance=True, expression=None): 7886 if not self._curr: 7887 return None 7888 7889 if self._curr.token_type == token_type: 7890 if advance: 7891 self._advance() 7892 self._add_comments(expression) 7893 return True 7894 7895 return None 7896 7897 def _match_set(self, types, advance=True): 7898 if not self._curr: 7899 return None 7900 7901 if self._curr.token_type in types: 7902 if advance: 7903 self._advance() 7904 return True 7905 7906 return None 7907 7908 def _match_pair(self, token_type_a, token_type_b, advance=True): 7909 if not self._curr or not self._next: 7910 return None 7911 7912 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7913 if advance: 7914 self._advance(2) 7915 return True 7916 7917 return None 7918 7919 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7920 if not self._match(TokenType.L_PAREN, expression=expression): 7921 self.raise_error("Expecting (") 7922 7923 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7924 if not self._match(TokenType.R_PAREN, expression=expression): 7925 self.raise_error("Expecting )") 7926 7927 def _match_texts(self, texts, advance=True): 7928 if ( 7929 self._curr 7930 and self._curr.token_type != TokenType.STRING 7931 and self._curr.text.upper() in texts 7932 ): 7933 if advance: 7934 self._advance() 7935 return True 7936 return None 7937 7938 def _match_text_seq(self, *texts, advance=True): 7939 index = self._index 7940 for text in texts: 7941 if ( 7942 self._curr 7943 and self._curr.token_type != TokenType.STRING 7944 and self._curr.text.upper() == text 7945 ): 7946 self._advance() 7947 else: 7948 self._retreat(index) 7949 return None 7950 7951 if not advance: 7952 self._retreat(index) 7953 7954 return True 7955 7956 def _replace_lambda( 7957 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7958 ) -> t.Optional[exp.Expression]: 7959 if not node: 7960 return node 7961 7962 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7963 7964 for column in node.find_all(exp.Column): 7965 typ = lambda_types.get(column.parts[0].name) 7966 if typ is not None: 7967 dot_or_id = column.to_dot() if column.table else column.this 7968 7969 if typ: 7970 dot_or_id = self.expression( 7971 exp.Cast, 7972 this=dot_or_id, 7973 to=typ, 7974 ) 7975 7976 parent = column.parent 7977 7978 while isinstance(parent, exp.Dot): 7979 if not isinstance(parent.parent, exp.Dot): 7980 parent.replace(dot_or_id) 7981 break 7982 parent = parent.parent 7983 else: 7984 if column is node: 7985 node = dot_or_id 7986 else: 7987 column.replace(dot_or_id) 7988 return node 7989 7990 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7991 start = self._prev 7992 7993 # Not to be confused with TRUNCATE(number, decimals) function call 7994 if self._match(TokenType.L_PAREN): 7995 self._retreat(self._index - 2) 7996 return self._parse_function() 7997 7998 # Clickhouse supports TRUNCATE DATABASE as well 7999 is_database = self._match(TokenType.DATABASE) 8000 8001 self._match(TokenType.TABLE) 8002 8003 exists = self._parse_exists(not_=False) 8004 8005 expressions = self._parse_csv( 8006 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8007 ) 8008 8009 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8010 8011 if self._match_text_seq("RESTART", "IDENTITY"): 8012 identity = "RESTART" 8013 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8014 identity = "CONTINUE" 8015 else: 8016 identity = None 8017 8018 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8019 option = self._prev.text 8020 else: 8021 option = None 8022 8023 partition = self._parse_partition() 8024 8025 # Fallback case 8026 if self._curr: 8027 return self._parse_as_command(start) 8028 8029 return self.expression( 8030 exp.TruncateTable, 8031 expressions=expressions, 8032 is_database=is_database, 8033 exists=exists, 8034 cluster=cluster, 8035 identity=identity, 8036 option=option, 8037 partition=partition, 8038 ) 8039 8040 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8041 this = self._parse_ordered(self._parse_opclass) 8042 8043 if not self._match(TokenType.WITH): 8044 return this 8045 8046 op = self._parse_var(any_token=True) 8047 8048 return self.expression(exp.WithOperator, this=this, op=op) 8049 8050 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8051 self._match(TokenType.EQ) 8052 self._match(TokenType.L_PAREN) 8053 8054 opts: t.List[t.Optional[exp.Expression]] = [] 8055 option: exp.Expression | None 8056 while self._curr and not self._match(TokenType.R_PAREN): 8057 if self._match_text_seq("FORMAT_NAME", "="): 8058 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8059 option = self._parse_format_name() 8060 else: 8061 option = self._parse_property() 8062 8063 if option is None: 8064 self.raise_error("Unable to parse option") 8065 break 8066 8067 opts.append(option) 8068 8069 return opts 8070 8071 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8072 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8073 8074 options = [] 8075 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8076 option = self._parse_var(any_token=True) 8077 prev = self._prev.text.upper() 8078 8079 # Different dialects might separate options and values by white space, "=" and "AS" 8080 self._match(TokenType.EQ) 8081 self._match(TokenType.ALIAS) 8082 8083 param = self.expression(exp.CopyParameter, this=option) 8084 8085 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8086 TokenType.L_PAREN, advance=False 8087 ): 8088 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8089 param.set("expressions", self._parse_wrapped_options()) 8090 elif prev == "FILE_FORMAT": 8091 # T-SQL's external file format case 8092 param.set("expression", self._parse_field()) 8093 else: 8094 param.set("expression", self._parse_unquoted_field()) 8095 8096 options.append(param) 8097 self._match(sep) 8098 8099 return options 8100 8101 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8102 expr = self.expression(exp.Credentials) 8103 8104 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8105 expr.set("storage", self._parse_field()) 8106 if self._match_text_seq("CREDENTIALS"): 8107 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8108 creds = ( 8109 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8110 ) 8111 expr.set("credentials", creds) 8112 if self._match_text_seq("ENCRYPTION"): 8113 expr.set("encryption", self._parse_wrapped_options()) 8114 if self._match_text_seq("IAM_ROLE"): 8115 expr.set("iam_role", self._parse_field()) 8116 if self._match_text_seq("REGION"): 8117 expr.set("region", self._parse_field()) 8118 8119 return expr 8120 8121 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8122 return self._parse_field() 8123 8124 def _parse_copy(self) -> exp.Copy | exp.Command: 8125 start = self._prev 8126 8127 self._match(TokenType.INTO) 8128 8129 this = ( 8130 self._parse_select(nested=True, parse_subquery_alias=False) 8131 if self._match(TokenType.L_PAREN, advance=False) 8132 else self._parse_table(schema=True) 8133 ) 8134 8135 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8136 8137 files = self._parse_csv(self._parse_file_location) 8138 credentials = self._parse_credentials() 8139 8140 self._match_text_seq("WITH") 8141 8142 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8143 8144 # Fallback case 8145 if self._curr: 8146 return self._parse_as_command(start) 8147 8148 return self.expression( 8149 exp.Copy, 8150 this=this, 8151 kind=kind, 8152 credentials=credentials, 8153 files=files, 8154 params=params, 8155 ) 8156 8157 def _parse_normalize(self) -> exp.Normalize: 8158 return self.expression( 8159 exp.Normalize, 8160 this=self._parse_bitwise(), 8161 form=self._match(TokenType.COMMA) and self._parse_var(), 8162 ) 8163 8164 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8165 args = self._parse_csv(lambda: self._parse_lambda()) 8166 8167 this = seq_get(args, 0) 8168 decimals = seq_get(args, 1) 8169 8170 return expr_type( 8171 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8172 ) 8173 8174 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8175 if self._match_text_seq("COLUMNS", "(", advance=False): 8176 this = self._parse_function() 8177 if isinstance(this, exp.Columns): 8178 this.set("unpack", True) 8179 return this 8180 8181 return self.expression( 8182 exp.Star, 8183 **{ # type: ignore 8184 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8185 "replace": self._parse_star_op("REPLACE"), 8186 "rename": self._parse_star_op("RENAME"), 8187 }, 8188 ) 8189 8190 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8191 privilege_parts = [] 8192 8193 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8194 # (end of privilege list) or L_PAREN (start of column list) are met 8195 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8196 privilege_parts.append(self._curr.text.upper()) 8197 self._advance() 8198 8199 this = exp.var(" ".join(privilege_parts)) 8200 expressions = ( 8201 self._parse_wrapped_csv(self._parse_column) 8202 if self._match(TokenType.L_PAREN, advance=False) 8203 else None 8204 ) 8205 8206 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8207 8208 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8209 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8210 principal = self._parse_id_var() 8211 8212 if not principal: 8213 return None 8214 8215 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8216 8217 def _parse_grant(self) -> exp.Grant | exp.Command: 8218 start = self._prev 8219 8220 privileges = self._parse_csv(self._parse_grant_privilege) 8221 8222 self._match(TokenType.ON) 8223 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8224 8225 # Attempt to parse the securable e.g. MySQL allows names 8226 # such as "foo.*", "*.*" which are not easily parseable yet 8227 securable = self._try_parse(self._parse_table_parts) 8228 8229 if not securable or not self._match_text_seq("TO"): 8230 return self._parse_as_command(start) 8231 8232 principals = self._parse_csv(self._parse_grant_principal) 8233 8234 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8235 8236 if self._curr: 8237 return self._parse_as_command(start) 8238 8239 return self.expression( 8240 exp.Grant, 8241 privileges=privileges, 8242 kind=kind, 8243 securable=securable, 8244 principals=principals, 8245 grant_option=grant_option, 8246 ) 8247 8248 def _parse_overlay(self) -> exp.Overlay: 8249 return self.expression( 8250 exp.Overlay, 8251 **{ # type: ignore 8252 "this": self._parse_bitwise(), 8253 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8254 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8255 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8256 }, 8257 ) 8258 8259 def _parse_format_name(self) -> exp.Property: 8260 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8261 # for FILE_FORMAT = <format_name> 8262 return self.expression( 8263 exp.Property, 8264 this=exp.var("FORMAT_NAME"), 8265 value=self._parse_string() or self._parse_table_parts(), 8266 ) 8267 8268 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8269 args: t.List[exp.Expression] = [] 8270 8271 if self._match(TokenType.DISTINCT): 8272 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8273 self._match(TokenType.COMMA) 8274 8275 args.extend(self._parse_csv(self._parse_assignment)) 8276 8277 return self.expression( 8278 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8279 ) 8280 8281 def _identifier_expression( 8282 self, token: t.Optional[Token] = None, **kwargs: t.Any 8283 ) -> exp.Identifier: 8284 token = token or self._prev 8285 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8286 expression.update_positions(token) 8287 return expression
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1520 def __init__( 1521 self, 1522 error_level: t.Optional[ErrorLevel] = None, 1523 error_message_context: int = 100, 1524 max_errors: int = 3, 1525 dialect: DialectType = None, 1526 ): 1527 from sqlglot.dialects import Dialect 1528 1529 self.error_level = error_level or ErrorLevel.IMMEDIATE 1530 self.error_message_context = error_message_context 1531 self.max_errors = max_errors 1532 self.dialect = Dialect.get_or_raise(dialect) 1533 self.reset()
1545 def parse( 1546 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1547 ) -> t.List[t.Optional[exp.Expression]]: 1548 """ 1549 Parses a list of tokens and returns a list of syntax trees, one tree 1550 per parsed SQL statement. 1551 1552 Args: 1553 raw_tokens: The list of tokens. 1554 sql: The original SQL string, used to produce helpful debug messages. 1555 1556 Returns: 1557 The list of the produced syntax trees. 1558 """ 1559 return self._parse( 1560 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1561 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1563 def parse_into( 1564 self, 1565 expression_types: exp.IntoType, 1566 raw_tokens: t.List[Token], 1567 sql: t.Optional[str] = None, 1568 ) -> t.List[t.Optional[exp.Expression]]: 1569 """ 1570 Parses a list of tokens into a given Expression type. If a collection of Expression 1571 types is given instead, this method will try to parse the token list into each one 1572 of them, stopping at the first for which the parsing succeeds. 1573 1574 Args: 1575 expression_types: The expression type(s) to try and parse the token list into. 1576 raw_tokens: The list of tokens. 1577 sql: The original SQL string, used to produce helpful debug messages. 1578 1579 Returns: 1580 The target Expression. 1581 """ 1582 errors = [] 1583 for expression_type in ensure_list(expression_types): 1584 parser = self.EXPRESSION_PARSERS.get(expression_type) 1585 if not parser: 1586 raise TypeError(f"No parser registered for {expression_type}") 1587 1588 try: 1589 return self._parse(parser, raw_tokens, sql) 1590 except ParseError as e: 1591 e.errors[0]["into_expression"] = expression_type 1592 errors.append(e) 1593 1594 raise ParseError( 1595 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1596 errors=merge_errors(errors), 1597 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1637 def check_errors(self) -> None: 1638 """Logs or raises any found errors, depending on the chosen error level setting.""" 1639 if self.error_level == ErrorLevel.WARN: 1640 for error in self.errors: 1641 logger.error(str(error)) 1642 elif self.error_level == ErrorLevel.RAISE and self.errors: 1643 raise ParseError( 1644 concat_messages(self.errors, self.max_errors), 1645 errors=merge_errors(self.errors), 1646 )
Logs or raises any found errors, depending on the chosen error level setting.
1648 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1649 """ 1650 Appends an error in the list of recorded errors or raises it, depending on the chosen 1651 error level setting. 1652 """ 1653 token = token or self._curr or self._prev or Token.string("") 1654 start = token.start 1655 end = token.end + 1 1656 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1657 highlight = self.sql[start:end] 1658 end_context = self.sql[end : end + self.error_message_context] 1659 1660 error = ParseError.new( 1661 f"{message}. Line {token.line}, Col: {token.col}.\n" 1662 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1663 description=message, 1664 line=token.line, 1665 col=token.col, 1666 start_context=start_context, 1667 highlight=highlight, 1668 end_context=end_context, 1669 ) 1670 1671 if self.error_level == ErrorLevel.IMMEDIATE: 1672 raise error 1673 1674 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1676 def expression( 1677 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1678 ) -> E: 1679 """ 1680 Creates a new, validated Expression. 1681 1682 Args: 1683 exp_class: The expression class to instantiate. 1684 comments: An optional list of comments to attach to the expression. 1685 kwargs: The arguments to set for the expression along with their respective values. 1686 1687 Returns: 1688 The target expression. 1689 """ 1690 instance = exp_class(**kwargs) 1691 instance.add_comments(comments) if comments else self._add_comments(instance) 1692 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1699 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1700 """ 1701 Validates an Expression, making sure that all its mandatory arguments are set. 1702 1703 Args: 1704 expression: The expression to validate. 1705 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1706 1707 Returns: 1708 The validated expression. 1709 """ 1710 if self.error_level != ErrorLevel.IGNORE: 1711 for error_message in expression.error_messages(args): 1712 self.raise_error(error_message) 1713 1714 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4687 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4688 start = self._index 4689 _, side_token, kind_token = self._parse_join_parts() 4690 4691 side = side_token.text if side_token else None 4692 kind = kind_token.text if kind_token else None 4693 4694 if not self._match_set(self.SET_OPERATIONS): 4695 self._retreat(start) 4696 return None 4697 4698 token_type = self._prev.token_type 4699 4700 if token_type == TokenType.UNION: 4701 operation: t.Type[exp.SetOperation] = exp.Union 4702 elif token_type == TokenType.EXCEPT: 4703 operation = exp.Except 4704 else: 4705 operation = exp.Intersect 4706 4707 comments = self._prev.comments 4708 4709 if self._match(TokenType.DISTINCT): 4710 distinct: t.Optional[bool] = True 4711 elif self._match(TokenType.ALL): 4712 distinct = False 4713 else: 4714 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4715 if distinct is None: 4716 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4717 4718 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4719 "STRICT", "CORRESPONDING" 4720 ) 4721 if self._match_text_seq("CORRESPONDING"): 4722 by_name = True 4723 if not side and not kind: 4724 kind = "INNER" 4725 4726 on_column_list = None 4727 if by_name and self._match_texts(("ON", "BY")): 4728 on_column_list = self._parse_wrapped_csv(self._parse_column) 4729 4730 expression = self._parse_select(nested=True, parse_set_operation=False) 4731 4732 return self.expression( 4733 operation, 4734 comments=comments, 4735 this=this, 4736 distinct=distinct, 4737 by_name=by_name, 4738 expression=expression, 4739 side=side, 4740 kind=kind, 4741 on=on_column_list, 4742 )