sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.DECIMAL256, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.POINT, 361 TokenType.RING, 362 TokenType.LINESTRING, 363 TokenType.MULTILINESTRING, 364 TokenType.POLYGON, 365 TokenType.MULTIPOLYGON, 366 TokenType.HLLSKETCH, 367 TokenType.HSTORE, 368 TokenType.PSEUDO_TYPE, 369 TokenType.SUPER, 370 TokenType.SERIAL, 371 TokenType.SMALLSERIAL, 372 TokenType.BIGSERIAL, 373 TokenType.XML, 374 TokenType.YEAR, 375 TokenType.UNIQUEIDENTIFIER, 376 TokenType.USERDEFINED, 377 TokenType.MONEY, 378 TokenType.SMALLMONEY, 379 TokenType.ROWVERSION, 380 TokenType.IMAGE, 381 TokenType.VARIANT, 382 TokenType.VECTOR, 383 TokenType.OBJECT, 384 TokenType.OBJECT_IDENTIFIER, 385 TokenType.INET, 386 TokenType.IPADDRESS, 387 TokenType.IPPREFIX, 388 TokenType.IPV4, 389 TokenType.IPV6, 390 TokenType.UNKNOWN, 391 TokenType.NULL, 392 TokenType.NAME, 393 TokenType.TDIGEST, 394 *ENUM_TYPE_TOKENS, 395 *NESTED_TYPE_TOKENS, 396 *AGGREGATE_TYPE_TOKENS, 397 } 398 399 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 400 TokenType.BIGINT: TokenType.UBIGINT, 401 TokenType.INT: TokenType.UINT, 402 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 403 TokenType.SMALLINT: TokenType.USMALLINT, 404 TokenType.TINYINT: TokenType.UTINYINT, 405 TokenType.DECIMAL: TokenType.UDECIMAL, 406 } 407 408 SUBQUERY_PREDICATES = { 409 TokenType.ANY: exp.Any, 410 TokenType.ALL: exp.All, 411 TokenType.EXISTS: exp.Exists, 412 TokenType.SOME: exp.Any, 413 } 414 415 RESERVED_TOKENS = { 416 *Tokenizer.SINGLE_TOKENS.values(), 417 TokenType.SELECT, 418 } - {TokenType.IDENTIFIER} 419 420 DB_CREATABLES = { 421 TokenType.DATABASE, 422 TokenType.DICTIONARY, 423 TokenType.MODEL, 424 TokenType.SCHEMA, 425 TokenType.SEQUENCE, 426 TokenType.STORAGE_INTEGRATION, 427 TokenType.TABLE, 428 TokenType.TAG, 429 TokenType.VIEW, 430 TokenType.WAREHOUSE, 431 TokenType.STREAMLIT, 432 TokenType.SINK, 433 TokenType.SOURCE, 434 } 435 436 CREATABLES = { 437 TokenType.COLUMN, 438 TokenType.CONSTRAINT, 439 TokenType.FOREIGN_KEY, 440 TokenType.FUNCTION, 441 TokenType.INDEX, 442 TokenType.PROCEDURE, 443 *DB_CREATABLES, 444 } 445 446 ALTERABLES = { 447 TokenType.INDEX, 448 TokenType.TABLE, 449 TokenType.VIEW, 450 } 451 452 # Tokens that can represent identifiers 453 ID_VAR_TOKENS = { 454 TokenType.ALL, 455 TokenType.ATTACH, 456 TokenType.VAR, 457 TokenType.ANTI, 458 TokenType.APPLY, 459 TokenType.ASC, 460 TokenType.ASOF, 461 TokenType.AUTO_INCREMENT, 462 TokenType.BEGIN, 463 TokenType.BPCHAR, 464 TokenType.CACHE, 465 TokenType.CASE, 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.COMMENT, 469 TokenType.COMMIT, 470 TokenType.CONSTRAINT, 471 TokenType.COPY, 472 TokenType.CUBE, 473 TokenType.DEFAULT, 474 TokenType.DELETE, 475 TokenType.DESC, 476 TokenType.DESCRIBE, 477 TokenType.DETACH, 478 TokenType.DICTIONARY, 479 TokenType.DIV, 480 TokenType.END, 481 TokenType.EXECUTE, 482 TokenType.ESCAPE, 483 TokenType.FALSE, 484 TokenType.FIRST, 485 TokenType.FILTER, 486 TokenType.FINAL, 487 TokenType.FORMAT, 488 TokenType.FULL, 489 TokenType.IDENTIFIER, 490 TokenType.IS, 491 TokenType.ISNULL, 492 TokenType.INTERVAL, 493 TokenType.KEEP, 494 TokenType.KILL, 495 TokenType.LEFT, 496 TokenType.LOAD, 497 TokenType.MERGE, 498 TokenType.NATURAL, 499 TokenType.NEXT, 500 TokenType.OFFSET, 501 TokenType.OPERATOR, 502 TokenType.ORDINALITY, 503 TokenType.OVERLAPS, 504 TokenType.OVERWRITE, 505 TokenType.PARTITION, 506 TokenType.PERCENT, 507 TokenType.PIVOT, 508 TokenType.PRAGMA, 509 TokenType.RANGE, 510 TokenType.RECURSIVE, 511 TokenType.REFERENCES, 512 TokenType.REFRESH, 513 TokenType.RENAME, 514 TokenType.REPLACE, 515 TokenType.RIGHT, 516 TokenType.ROLLUP, 517 TokenType.ROW, 518 TokenType.ROWS, 519 TokenType.SEMI, 520 TokenType.SET, 521 TokenType.SETTINGS, 522 TokenType.SHOW, 523 TokenType.TEMPORARY, 524 TokenType.TOP, 525 TokenType.TRUE, 526 TokenType.TRUNCATE, 527 TokenType.UNIQUE, 528 TokenType.UNNEST, 529 TokenType.UNPIVOT, 530 TokenType.UPDATE, 531 TokenType.USE, 532 TokenType.VOLATILE, 533 TokenType.WINDOW, 534 *CREATABLES, 535 *SUBQUERY_PREDICATES, 536 *TYPE_TOKENS, 537 *NO_PAREN_FUNCTIONS, 538 } 539 ID_VAR_TOKENS.remove(TokenType.UNION) 540 541 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 542 543 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 544 TokenType.ANTI, 545 TokenType.APPLY, 546 TokenType.ASOF, 547 TokenType.FULL, 548 TokenType.LEFT, 549 TokenType.LOCK, 550 TokenType.NATURAL, 551 TokenType.OFFSET, 552 TokenType.RIGHT, 553 TokenType.SEMI, 554 TokenType.WINDOW, 555 } 556 557 ALIAS_TOKENS = ID_VAR_TOKENS 558 559 ARRAY_CONSTRUCTORS = { 560 "ARRAY": exp.Array, 561 "LIST": exp.List, 562 } 563 564 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 565 566 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 567 568 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 569 570 FUNC_TOKENS = { 571 TokenType.COLLATE, 572 TokenType.COMMAND, 573 TokenType.CURRENT_DATE, 574 TokenType.CURRENT_DATETIME, 575 TokenType.CURRENT_TIMESTAMP, 576 TokenType.CURRENT_TIME, 577 TokenType.CURRENT_USER, 578 TokenType.FILTER, 579 TokenType.FIRST, 580 TokenType.FORMAT, 581 TokenType.GLOB, 582 TokenType.IDENTIFIER, 583 TokenType.INDEX, 584 TokenType.ISNULL, 585 TokenType.ILIKE, 586 TokenType.INSERT, 587 TokenType.LIKE, 588 TokenType.MERGE, 589 TokenType.NEXT, 590 TokenType.OFFSET, 591 TokenType.PRIMARY_KEY, 592 TokenType.RANGE, 593 TokenType.REPLACE, 594 TokenType.RLIKE, 595 TokenType.ROW, 596 TokenType.UNNEST, 597 TokenType.VAR, 598 TokenType.LEFT, 599 TokenType.RIGHT, 600 TokenType.SEQUENCE, 601 TokenType.DATE, 602 TokenType.DATETIME, 603 TokenType.TABLE, 604 TokenType.TIMESTAMP, 605 TokenType.TIMESTAMPTZ, 606 TokenType.TRUNCATE, 607 TokenType.WINDOW, 608 TokenType.XOR, 609 *TYPE_TOKENS, 610 *SUBQUERY_PREDICATES, 611 } 612 613 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 614 TokenType.AND: exp.And, 615 } 616 617 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 618 TokenType.COLON_EQ: exp.PropertyEQ, 619 } 620 621 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 622 TokenType.OR: exp.Or, 623 } 624 625 EQUALITY = { 626 TokenType.EQ: exp.EQ, 627 TokenType.NEQ: exp.NEQ, 628 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 629 } 630 631 COMPARISON = { 632 TokenType.GT: exp.GT, 633 TokenType.GTE: exp.GTE, 634 TokenType.LT: exp.LT, 635 TokenType.LTE: exp.LTE, 636 } 637 638 BITWISE = { 639 TokenType.AMP: exp.BitwiseAnd, 640 TokenType.CARET: exp.BitwiseXor, 641 TokenType.PIPE: exp.BitwiseOr, 642 } 643 644 TERM = { 645 TokenType.DASH: exp.Sub, 646 TokenType.PLUS: exp.Add, 647 TokenType.MOD: exp.Mod, 648 TokenType.COLLATE: exp.Collate, 649 } 650 651 FACTOR = { 652 TokenType.DIV: exp.IntDiv, 653 TokenType.LR_ARROW: exp.Distance, 654 TokenType.SLASH: exp.Div, 655 TokenType.STAR: exp.Mul, 656 } 657 658 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 659 660 TIMES = { 661 TokenType.TIME, 662 TokenType.TIMETZ, 663 } 664 665 TIMESTAMPS = { 666 TokenType.TIMESTAMP, 667 TokenType.TIMESTAMPTZ, 668 TokenType.TIMESTAMPLTZ, 669 *TIMES, 670 } 671 672 SET_OPERATIONS = { 673 TokenType.UNION, 674 TokenType.INTERSECT, 675 TokenType.EXCEPT, 676 } 677 678 JOIN_METHODS = { 679 TokenType.ASOF, 680 TokenType.NATURAL, 681 TokenType.POSITIONAL, 682 } 683 684 JOIN_SIDES = { 685 TokenType.LEFT, 686 TokenType.RIGHT, 687 TokenType.FULL, 688 } 689 690 JOIN_KINDS = { 691 TokenType.ANTI, 692 TokenType.CROSS, 693 TokenType.INNER, 694 TokenType.OUTER, 695 TokenType.SEMI, 696 TokenType.STRAIGHT_JOIN, 697 } 698 699 JOIN_HINTS: t.Set[str] = set() 700 701 LAMBDAS = { 702 TokenType.ARROW: lambda self, expressions: self.expression( 703 exp.Lambda, 704 this=self._replace_lambda( 705 self._parse_assignment(), 706 expressions, 707 ), 708 expressions=expressions, 709 ), 710 TokenType.FARROW: lambda self, expressions: self.expression( 711 exp.Kwarg, 712 this=exp.var(expressions[0].name), 713 expression=self._parse_assignment(), 714 ), 715 } 716 717 COLUMN_OPERATORS = { 718 TokenType.DOT: None, 719 TokenType.DCOLON: lambda self, this, to: self.expression( 720 exp.Cast if self.STRICT_CAST else exp.TryCast, 721 this=this, 722 to=to, 723 ), 724 TokenType.ARROW: lambda self, this, path: self.expression( 725 exp.JSONExtract, 726 this=this, 727 expression=self.dialect.to_json_path(path), 728 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 729 ), 730 TokenType.DARROW: lambda self, this, path: self.expression( 731 exp.JSONExtractScalar, 732 this=this, 733 expression=self.dialect.to_json_path(path), 734 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 735 ), 736 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 737 exp.JSONBExtract, 738 this=this, 739 expression=path, 740 ), 741 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 742 exp.JSONBExtractScalar, 743 this=this, 744 expression=path, 745 ), 746 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 747 exp.JSONBContains, 748 this=this, 749 expression=key, 750 ), 751 } 752 753 EXPRESSION_PARSERS = { 754 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 755 exp.Column: lambda self: self._parse_column(), 756 exp.Condition: lambda self: self._parse_assignment(), 757 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 758 exp.Expression: lambda self: self._parse_expression(), 759 exp.From: lambda self: self._parse_from(joins=True), 760 exp.Group: lambda self: self._parse_group(), 761 exp.Having: lambda self: self._parse_having(), 762 exp.Hint: lambda self: self._parse_hint_body(), 763 exp.Identifier: lambda self: self._parse_id_var(), 764 exp.Join: lambda self: self._parse_join(), 765 exp.Lambda: lambda self: self._parse_lambda(), 766 exp.Lateral: lambda self: self._parse_lateral(), 767 exp.Limit: lambda self: self._parse_limit(), 768 exp.Offset: lambda self: self._parse_offset(), 769 exp.Order: lambda self: self._parse_order(), 770 exp.Ordered: lambda self: self._parse_ordered(), 771 exp.Properties: lambda self: self._parse_properties(), 772 exp.Qualify: lambda self: self._parse_qualify(), 773 exp.Returning: lambda self: self._parse_returning(), 774 exp.Select: lambda self: self._parse_select(), 775 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 776 exp.Table: lambda self: self._parse_table_parts(), 777 exp.TableAlias: lambda self: self._parse_table_alias(), 778 exp.Tuple: lambda self: self._parse_value(), 779 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 780 exp.Where: lambda self: self._parse_where(), 781 exp.Window: lambda self: self._parse_named_window(), 782 exp.With: lambda self: self._parse_with(), 783 "JOIN_TYPE": lambda self: self._parse_join_parts(), 784 } 785 786 STATEMENT_PARSERS = { 787 TokenType.ALTER: lambda self: self._parse_alter(), 788 TokenType.BEGIN: lambda self: self._parse_transaction(), 789 TokenType.CACHE: lambda self: self._parse_cache(), 790 TokenType.COMMENT: lambda self: self._parse_comment(), 791 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 792 TokenType.COPY: lambda self: self._parse_copy(), 793 TokenType.CREATE: lambda self: self._parse_create(), 794 TokenType.DELETE: lambda self: self._parse_delete(), 795 TokenType.DESC: lambda self: self._parse_describe(), 796 TokenType.DESCRIBE: lambda self: self._parse_describe(), 797 TokenType.DROP: lambda self: self._parse_drop(), 798 TokenType.GRANT: lambda self: self._parse_grant(), 799 TokenType.INSERT: lambda self: self._parse_insert(), 800 TokenType.KILL: lambda self: self._parse_kill(), 801 TokenType.LOAD: lambda self: self._parse_load(), 802 TokenType.MERGE: lambda self: self._parse_merge(), 803 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 804 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 805 TokenType.REFRESH: lambda self: self._parse_refresh(), 806 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 807 TokenType.SET: lambda self: self._parse_set(), 808 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 809 TokenType.UNCACHE: lambda self: self._parse_uncache(), 810 TokenType.UPDATE: lambda self: self._parse_update(), 811 TokenType.USE: lambda self: self.expression( 812 exp.Use, 813 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 814 this=self._parse_table(schema=False), 815 ), 816 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 817 } 818 819 UNARY_PARSERS = { 820 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 821 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 822 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 823 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 824 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 825 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 826 } 827 828 STRING_PARSERS = { 829 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 830 exp.RawString, this=token.text 831 ), 832 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 833 exp.National, this=token.text 834 ), 835 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 836 TokenType.STRING: lambda self, token: self.expression( 837 exp.Literal, this=token.text, is_string=True 838 ), 839 TokenType.UNICODE_STRING: lambda self, token: self.expression( 840 exp.UnicodeString, 841 this=token.text, 842 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 843 ), 844 } 845 846 NUMERIC_PARSERS = { 847 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 848 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 849 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 850 TokenType.NUMBER: lambda self, token: self.expression( 851 exp.Literal, this=token.text, is_string=False 852 ), 853 } 854 855 PRIMARY_PARSERS = { 856 **STRING_PARSERS, 857 **NUMERIC_PARSERS, 858 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 859 TokenType.NULL: lambda self, _: self.expression(exp.Null), 860 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 861 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 862 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 863 TokenType.STAR: lambda self, _: self._parse_star_ops(), 864 } 865 866 PLACEHOLDER_PARSERS = { 867 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 868 TokenType.PARAMETER: lambda self: self._parse_parameter(), 869 TokenType.COLON: lambda self: ( 870 self.expression(exp.Placeholder, this=self._prev.text) 871 if self._match_set(self.ID_VAR_TOKENS) 872 else None 873 ), 874 } 875 876 RANGE_PARSERS = { 877 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 878 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 879 TokenType.GLOB: binary_range_parser(exp.Glob), 880 TokenType.ILIKE: binary_range_parser(exp.ILike), 881 TokenType.IN: lambda self, this: self._parse_in(this), 882 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 883 TokenType.IS: lambda self, this: self._parse_is(this), 884 TokenType.LIKE: binary_range_parser(exp.Like), 885 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 886 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 887 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 888 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 889 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 890 } 891 892 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 893 "ALLOWED_VALUES": lambda self: self.expression( 894 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 895 ), 896 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 897 "AUTO": lambda self: self._parse_auto_property(), 898 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 899 "BACKUP": lambda self: self.expression( 900 exp.BackupProperty, this=self._parse_var(any_token=True) 901 ), 902 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 903 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 904 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 905 "CHECKSUM": lambda self: self._parse_checksum(), 906 "CLUSTER BY": lambda self: self._parse_cluster(), 907 "CLUSTERED": lambda self: self._parse_clustered_by(), 908 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 909 exp.CollateProperty, **kwargs 910 ), 911 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 912 "CONTAINS": lambda self: self._parse_contains_property(), 913 "COPY": lambda self: self._parse_copy_property(), 914 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 915 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 916 "DEFINER": lambda self: self._parse_definer(), 917 "DETERMINISTIC": lambda self: self.expression( 918 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 919 ), 920 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 921 "DUPLICATE": lambda self: self._parse_duplicate(), 922 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 923 "DISTKEY": lambda self: self._parse_distkey(), 924 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 925 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 926 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 927 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 928 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 929 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 930 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 931 "FREESPACE": lambda self: self._parse_freespace(), 932 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 933 "HEAP": lambda self: self.expression(exp.HeapProperty), 934 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 935 "IMMUTABLE": lambda self: self.expression( 936 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 937 ), 938 "INHERITS": lambda self: self.expression( 939 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 940 ), 941 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 942 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 943 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 944 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 945 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 946 "LIKE": lambda self: self._parse_create_like(), 947 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 948 "LOCK": lambda self: self._parse_locking(), 949 "LOCKING": lambda self: self._parse_locking(), 950 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 951 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 952 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 953 "MODIFIES": lambda self: self._parse_modifies_property(), 954 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 955 "NO": lambda self: self._parse_no_property(), 956 "ON": lambda self: self._parse_on_property(), 957 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 958 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 959 "PARTITION": lambda self: self._parse_partitioned_of(), 960 "PARTITION BY": lambda self: self._parse_partitioned_by(), 961 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 962 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 963 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 964 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 965 "READS": lambda self: self._parse_reads_property(), 966 "REMOTE": lambda self: self._parse_remote_with_connection(), 967 "RETURNS": lambda self: self._parse_returns(), 968 "STRICT": lambda self: self.expression(exp.StrictProperty), 969 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 970 "ROW": lambda self: self._parse_row(), 971 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 972 "SAMPLE": lambda self: self.expression( 973 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 974 ), 975 "SECURE": lambda self: self.expression(exp.SecureProperty), 976 "SECURITY": lambda self: self._parse_security(), 977 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 978 "SETTINGS": lambda self: self._parse_settings_property(), 979 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 980 "SORTKEY": lambda self: self._parse_sortkey(), 981 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 982 "STABLE": lambda self: self.expression( 983 exp.StabilityProperty, this=exp.Literal.string("STABLE") 984 ), 985 "STORED": lambda self: self._parse_stored(), 986 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 987 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 988 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 989 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 990 "TO": lambda self: self._parse_to_table(), 991 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 992 "TRANSFORM": lambda self: self.expression( 993 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 994 ), 995 "TTL": lambda self: self._parse_ttl(), 996 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 997 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 998 "VOLATILE": lambda self: self._parse_volatile_property(), 999 "WITH": lambda self: self._parse_with_property(), 1000 } 1001 1002 CONSTRAINT_PARSERS = { 1003 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1004 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1005 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1006 "CHARACTER SET": lambda self: self.expression( 1007 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1008 ), 1009 "CHECK": lambda self: self.expression( 1010 exp.CheckColumnConstraint, 1011 this=self._parse_wrapped(self._parse_assignment), 1012 enforced=self._match_text_seq("ENFORCED"), 1013 ), 1014 "COLLATE": lambda self: self.expression( 1015 exp.CollateColumnConstraint, 1016 this=self._parse_identifier() or self._parse_column(), 1017 ), 1018 "COMMENT": lambda self: self.expression( 1019 exp.CommentColumnConstraint, this=self._parse_string() 1020 ), 1021 "COMPRESS": lambda self: self._parse_compress(), 1022 "CLUSTERED": lambda self: self.expression( 1023 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1024 ), 1025 "NONCLUSTERED": lambda self: self.expression( 1026 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1027 ), 1028 "DEFAULT": lambda self: self.expression( 1029 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1030 ), 1031 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1032 "EPHEMERAL": lambda self: self.expression( 1033 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1034 ), 1035 "EXCLUDE": lambda self: self.expression( 1036 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1037 ), 1038 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1039 "FORMAT": lambda self: self.expression( 1040 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1041 ), 1042 "GENERATED": lambda self: self._parse_generated_as_identity(), 1043 "IDENTITY": lambda self: self._parse_auto_increment(), 1044 "INLINE": lambda self: self._parse_inline(), 1045 "LIKE": lambda self: self._parse_create_like(), 1046 "NOT": lambda self: self._parse_not_constraint(), 1047 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1048 "ON": lambda self: ( 1049 self._match(TokenType.UPDATE) 1050 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1051 ) 1052 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1053 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1054 "PERIOD": lambda self: self._parse_period_for_system_time(), 1055 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1056 "REFERENCES": lambda self: self._parse_references(match=False), 1057 "TITLE": lambda self: self.expression( 1058 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1059 ), 1060 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1061 "UNIQUE": lambda self: self._parse_unique(), 1062 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1063 "WATERMARK": lambda self: self.expression( 1064 exp.WatermarkColumnConstraint, 1065 this=self._match(TokenType.FOR) and self._parse_column(), 1066 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1067 ), 1068 "WITH": lambda self: self.expression( 1069 exp.Properties, expressions=self._parse_wrapped_properties() 1070 ), 1071 } 1072 1073 ALTER_PARSERS = { 1074 "ADD": lambda self: self._parse_alter_table_add(), 1075 "AS": lambda self: self._parse_select(), 1076 "ALTER": lambda self: self._parse_alter_table_alter(), 1077 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1078 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1079 "DROP": lambda self: self._parse_alter_table_drop(), 1080 "RENAME": lambda self: self._parse_alter_table_rename(), 1081 "SET": lambda self: self._parse_alter_table_set(), 1082 "SWAP": lambda self: self.expression( 1083 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1084 ), 1085 } 1086 1087 ALTER_ALTER_PARSERS = { 1088 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1089 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1090 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1091 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1092 } 1093 1094 SCHEMA_UNNAMED_CONSTRAINTS = { 1095 "CHECK", 1096 "EXCLUDE", 1097 "FOREIGN KEY", 1098 "LIKE", 1099 "PERIOD", 1100 "PRIMARY KEY", 1101 "UNIQUE", 1102 "WATERMARK", 1103 } 1104 1105 NO_PAREN_FUNCTION_PARSERS = { 1106 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1107 "CASE": lambda self: self._parse_case(), 1108 "CONNECT_BY_ROOT": lambda self: self.expression( 1109 exp.ConnectByRoot, this=self._parse_column() 1110 ), 1111 "IF": lambda self: self._parse_if(), 1112 } 1113 1114 INVALID_FUNC_NAME_TOKENS = { 1115 TokenType.IDENTIFIER, 1116 TokenType.STRING, 1117 } 1118 1119 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1120 1121 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1122 1123 FUNCTION_PARSERS = { 1124 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1125 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1126 "DECODE": lambda self: self._parse_decode(), 1127 "EXTRACT": lambda self: self._parse_extract(), 1128 "GAP_FILL": lambda self: self._parse_gap_fill(), 1129 "JSON_OBJECT": lambda self: self._parse_json_object(), 1130 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1131 "JSON_TABLE": lambda self: self._parse_json_table(), 1132 "MATCH": lambda self: self._parse_match_against(), 1133 "NORMALIZE": lambda self: self._parse_normalize(), 1134 "OPENJSON": lambda self: self._parse_open_json(), 1135 "OVERLAY": lambda self: self._parse_overlay(), 1136 "POSITION": lambda self: self._parse_position(), 1137 "PREDICT": lambda self: self._parse_predict(), 1138 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1139 "STRING_AGG": lambda self: self._parse_string_agg(), 1140 "SUBSTRING": lambda self: self._parse_substring(), 1141 "TRIM": lambda self: self._parse_trim(), 1142 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1143 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1144 } 1145 1146 QUERY_MODIFIER_PARSERS = { 1147 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1148 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1149 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1150 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1151 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1152 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1153 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1154 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1155 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1156 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1157 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1158 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1159 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1160 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1161 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1162 TokenType.CLUSTER_BY: lambda self: ( 1163 "cluster", 1164 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1165 ), 1166 TokenType.DISTRIBUTE_BY: lambda self: ( 1167 "distribute", 1168 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1169 ), 1170 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1171 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1172 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1173 } 1174 1175 SET_PARSERS = { 1176 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1177 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1178 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1179 "TRANSACTION": lambda self: self._parse_set_transaction(), 1180 } 1181 1182 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1183 1184 TYPE_LITERAL_PARSERS = { 1185 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1186 } 1187 1188 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1189 1190 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1191 1192 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1193 1194 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1195 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1196 "ISOLATION": ( 1197 ("LEVEL", "REPEATABLE", "READ"), 1198 ("LEVEL", "READ", "COMMITTED"), 1199 ("LEVEL", "READ", "UNCOMITTED"), 1200 ("LEVEL", "SERIALIZABLE"), 1201 ), 1202 "READ": ("WRITE", "ONLY"), 1203 } 1204 1205 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1206 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1207 ) 1208 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1209 1210 CREATE_SEQUENCE: OPTIONS_TYPE = { 1211 "SCALE": ("EXTEND", "NOEXTEND"), 1212 "SHARD": ("EXTEND", "NOEXTEND"), 1213 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1214 **dict.fromkeys( 1215 ( 1216 "SESSION", 1217 "GLOBAL", 1218 "KEEP", 1219 "NOKEEP", 1220 "ORDER", 1221 "NOORDER", 1222 "NOCACHE", 1223 "CYCLE", 1224 "NOCYCLE", 1225 "NOMINVALUE", 1226 "NOMAXVALUE", 1227 "NOSCALE", 1228 "NOSHARD", 1229 ), 1230 tuple(), 1231 ), 1232 } 1233 1234 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1235 1236 USABLES: OPTIONS_TYPE = dict.fromkeys( 1237 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1238 ) 1239 1240 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1241 1242 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1243 "TYPE": ("EVOLUTION",), 1244 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1245 } 1246 1247 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1248 1249 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1250 1251 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1252 "NOT": ("ENFORCED",), 1253 "MATCH": ( 1254 "FULL", 1255 "PARTIAL", 1256 "SIMPLE", 1257 ), 1258 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1259 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1260 } 1261 1262 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1263 1264 CLONE_KEYWORDS = {"CLONE", "COPY"} 1265 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1266 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1267 1268 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1269 1270 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1271 1272 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1273 1274 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1275 1276 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1277 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1278 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1279 1280 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1281 1282 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1283 1284 ADD_CONSTRAINT_TOKENS = { 1285 TokenType.CONSTRAINT, 1286 TokenType.FOREIGN_KEY, 1287 TokenType.INDEX, 1288 TokenType.KEY, 1289 TokenType.PRIMARY_KEY, 1290 TokenType.UNIQUE, 1291 } 1292 1293 DISTINCT_TOKENS = {TokenType.DISTINCT} 1294 1295 NULL_TOKENS = {TokenType.NULL} 1296 1297 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1298 1299 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1300 1301 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1302 1303 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1304 1305 ODBC_DATETIME_LITERALS = { 1306 "d": exp.Date, 1307 "t": exp.Time, 1308 "ts": exp.Timestamp, 1309 } 1310 1311 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1312 1313 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1314 1315 # The style options for the DESCRIBE statement 1316 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1317 1318 OPERATION_MODIFIERS: t.Set[str] = set() 1319 1320 STRICT_CAST = True 1321 1322 PREFIXED_PIVOT_COLUMNS = False 1323 IDENTIFY_PIVOT_STRINGS = False 1324 1325 LOG_DEFAULTS_TO_LN = False 1326 1327 # Whether ADD is present for each column added by ALTER TABLE 1328 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1329 1330 # Whether the table sample clause expects CSV syntax 1331 TABLESAMPLE_CSV = False 1332 1333 # The default method used for table sampling 1334 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1335 1336 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1337 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1338 1339 # Whether the TRIM function expects the characters to trim as its first argument 1340 TRIM_PATTERN_FIRST = False 1341 1342 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1343 STRING_ALIASES = False 1344 1345 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1346 MODIFIERS_ATTACHED_TO_SET_OP = True 1347 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1348 1349 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1350 NO_PAREN_IF_COMMANDS = True 1351 1352 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1353 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1354 1355 # Whether the `:` operator is used to extract a value from a VARIANT column 1356 COLON_IS_VARIANT_EXTRACT = False 1357 1358 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1359 # If this is True and '(' is not found, the keyword will be treated as an identifier 1360 VALUES_FOLLOWED_BY_PAREN = True 1361 1362 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1363 SUPPORTS_IMPLICIT_UNNEST = False 1364 1365 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1366 INTERVAL_SPANS = True 1367 1368 # Whether a PARTITION clause can follow a table reference 1369 SUPPORTS_PARTITION_SELECTION = False 1370 1371 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1372 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1373 1374 __slots__ = ( 1375 "error_level", 1376 "error_message_context", 1377 "max_errors", 1378 "dialect", 1379 "sql", 1380 "errors", 1381 "_tokens", 1382 "_index", 1383 "_curr", 1384 "_next", 1385 "_prev", 1386 "_prev_comments", 1387 ) 1388 1389 # Autofilled 1390 SHOW_TRIE: t.Dict = {} 1391 SET_TRIE: t.Dict = {} 1392 1393 def __init__( 1394 self, 1395 error_level: t.Optional[ErrorLevel] = None, 1396 error_message_context: int = 100, 1397 max_errors: int = 3, 1398 dialect: DialectType = None, 1399 ): 1400 from sqlglot.dialects import Dialect 1401 1402 self.error_level = error_level or ErrorLevel.IMMEDIATE 1403 self.error_message_context = error_message_context 1404 self.max_errors = max_errors 1405 self.dialect = Dialect.get_or_raise(dialect) 1406 self.reset() 1407 1408 def reset(self): 1409 self.sql = "" 1410 self.errors = [] 1411 self._tokens = [] 1412 self._index = 0 1413 self._curr = None 1414 self._next = None 1415 self._prev = None 1416 self._prev_comments = None 1417 1418 def parse( 1419 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1420 ) -> t.List[t.Optional[exp.Expression]]: 1421 """ 1422 Parses a list of tokens and returns a list of syntax trees, one tree 1423 per parsed SQL statement. 1424 1425 Args: 1426 raw_tokens: The list of tokens. 1427 sql: The original SQL string, used to produce helpful debug messages. 1428 1429 Returns: 1430 The list of the produced syntax trees. 1431 """ 1432 return self._parse( 1433 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1434 ) 1435 1436 def parse_into( 1437 self, 1438 expression_types: exp.IntoType, 1439 raw_tokens: t.List[Token], 1440 sql: t.Optional[str] = None, 1441 ) -> t.List[t.Optional[exp.Expression]]: 1442 """ 1443 Parses a list of tokens into a given Expression type. If a collection of Expression 1444 types is given instead, this method will try to parse the token list into each one 1445 of them, stopping at the first for which the parsing succeeds. 1446 1447 Args: 1448 expression_types: The expression type(s) to try and parse the token list into. 1449 raw_tokens: The list of tokens. 1450 sql: The original SQL string, used to produce helpful debug messages. 1451 1452 Returns: 1453 The target Expression. 1454 """ 1455 errors = [] 1456 for expression_type in ensure_list(expression_types): 1457 parser = self.EXPRESSION_PARSERS.get(expression_type) 1458 if not parser: 1459 raise TypeError(f"No parser registered for {expression_type}") 1460 1461 try: 1462 return self._parse(parser, raw_tokens, sql) 1463 except ParseError as e: 1464 e.errors[0]["into_expression"] = expression_type 1465 errors.append(e) 1466 1467 raise ParseError( 1468 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1469 errors=merge_errors(errors), 1470 ) from errors[-1] 1471 1472 def _parse( 1473 self, 1474 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1475 raw_tokens: t.List[Token], 1476 sql: t.Optional[str] = None, 1477 ) -> t.List[t.Optional[exp.Expression]]: 1478 self.reset() 1479 self.sql = sql or "" 1480 1481 total = len(raw_tokens) 1482 chunks: t.List[t.List[Token]] = [[]] 1483 1484 for i, token in enumerate(raw_tokens): 1485 if token.token_type == TokenType.SEMICOLON: 1486 if token.comments: 1487 chunks.append([token]) 1488 1489 if i < total - 1: 1490 chunks.append([]) 1491 else: 1492 chunks[-1].append(token) 1493 1494 expressions = [] 1495 1496 for tokens in chunks: 1497 self._index = -1 1498 self._tokens = tokens 1499 self._advance() 1500 1501 expressions.append(parse_method(self)) 1502 1503 if self._index < len(self._tokens): 1504 self.raise_error("Invalid expression / Unexpected token") 1505 1506 self.check_errors() 1507 1508 return expressions 1509 1510 def check_errors(self) -> None: 1511 """Logs or raises any found errors, depending on the chosen error level setting.""" 1512 if self.error_level == ErrorLevel.WARN: 1513 for error in self.errors: 1514 logger.error(str(error)) 1515 elif self.error_level == ErrorLevel.RAISE and self.errors: 1516 raise ParseError( 1517 concat_messages(self.errors, self.max_errors), 1518 errors=merge_errors(self.errors), 1519 ) 1520 1521 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1522 """ 1523 Appends an error in the list of recorded errors or raises it, depending on the chosen 1524 error level setting. 1525 """ 1526 token = token or self._curr or self._prev or Token.string("") 1527 start = token.start 1528 end = token.end + 1 1529 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1530 highlight = self.sql[start:end] 1531 end_context = self.sql[end : end + self.error_message_context] 1532 1533 error = ParseError.new( 1534 f"{message}. Line {token.line}, Col: {token.col}.\n" 1535 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1536 description=message, 1537 line=token.line, 1538 col=token.col, 1539 start_context=start_context, 1540 highlight=highlight, 1541 end_context=end_context, 1542 ) 1543 1544 if self.error_level == ErrorLevel.IMMEDIATE: 1545 raise error 1546 1547 self.errors.append(error) 1548 1549 def expression( 1550 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1551 ) -> E: 1552 """ 1553 Creates a new, validated Expression. 1554 1555 Args: 1556 exp_class: The expression class to instantiate. 1557 comments: An optional list of comments to attach to the expression. 1558 kwargs: The arguments to set for the expression along with their respective values. 1559 1560 Returns: 1561 The target expression. 1562 """ 1563 instance = exp_class(**kwargs) 1564 instance.add_comments(comments) if comments else self._add_comments(instance) 1565 return self.validate_expression(instance) 1566 1567 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1568 if expression and self._prev_comments: 1569 expression.add_comments(self._prev_comments) 1570 self._prev_comments = None 1571 1572 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1573 """ 1574 Validates an Expression, making sure that all its mandatory arguments are set. 1575 1576 Args: 1577 expression: The expression to validate. 1578 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1579 1580 Returns: 1581 The validated expression. 1582 """ 1583 if self.error_level != ErrorLevel.IGNORE: 1584 for error_message in expression.error_messages(args): 1585 self.raise_error(error_message) 1586 1587 return expression 1588 1589 def _find_sql(self, start: Token, end: Token) -> str: 1590 return self.sql[start.start : end.end + 1] 1591 1592 def _is_connected(self) -> bool: 1593 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1594 1595 def _advance(self, times: int = 1) -> None: 1596 self._index += times 1597 self._curr = seq_get(self._tokens, self._index) 1598 self._next = seq_get(self._tokens, self._index + 1) 1599 1600 if self._index > 0: 1601 self._prev = self._tokens[self._index - 1] 1602 self._prev_comments = self._prev.comments 1603 else: 1604 self._prev = None 1605 self._prev_comments = None 1606 1607 def _retreat(self, index: int) -> None: 1608 if index != self._index: 1609 self._advance(index - self._index) 1610 1611 def _warn_unsupported(self) -> None: 1612 if len(self._tokens) <= 1: 1613 return 1614 1615 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1616 # interested in emitting a warning for the one being currently processed. 1617 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1618 1619 logger.warning( 1620 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1621 ) 1622 1623 def _parse_command(self) -> exp.Command: 1624 self._warn_unsupported() 1625 return self.expression( 1626 exp.Command, 1627 comments=self._prev_comments, 1628 this=self._prev.text.upper(), 1629 expression=self._parse_string(), 1630 ) 1631 1632 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1633 """ 1634 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1635 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1636 solve this by setting & resetting the parser state accordingly 1637 """ 1638 index = self._index 1639 error_level = self.error_level 1640 1641 self.error_level = ErrorLevel.IMMEDIATE 1642 try: 1643 this = parse_method() 1644 except ParseError: 1645 this = None 1646 finally: 1647 if not this or retreat: 1648 self._retreat(index) 1649 self.error_level = error_level 1650 1651 return this 1652 1653 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1654 start = self._prev 1655 exists = self._parse_exists() if allow_exists else None 1656 1657 self._match(TokenType.ON) 1658 1659 materialized = self._match_text_seq("MATERIALIZED") 1660 kind = self._match_set(self.CREATABLES) and self._prev 1661 if not kind: 1662 return self._parse_as_command(start) 1663 1664 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1665 this = self._parse_user_defined_function(kind=kind.token_type) 1666 elif kind.token_type == TokenType.TABLE: 1667 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1668 elif kind.token_type == TokenType.COLUMN: 1669 this = self._parse_column() 1670 else: 1671 this = self._parse_id_var() 1672 1673 self._match(TokenType.IS) 1674 1675 return self.expression( 1676 exp.Comment, 1677 this=this, 1678 kind=kind.text, 1679 expression=self._parse_string(), 1680 exists=exists, 1681 materialized=materialized, 1682 ) 1683 1684 def _parse_to_table( 1685 self, 1686 ) -> exp.ToTableProperty: 1687 table = self._parse_table_parts(schema=True) 1688 return self.expression(exp.ToTableProperty, this=table) 1689 1690 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1691 def _parse_ttl(self) -> exp.Expression: 1692 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1693 this = self._parse_bitwise() 1694 1695 if self._match_text_seq("DELETE"): 1696 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1697 if self._match_text_seq("RECOMPRESS"): 1698 return self.expression( 1699 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1700 ) 1701 if self._match_text_seq("TO", "DISK"): 1702 return self.expression( 1703 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1704 ) 1705 if self._match_text_seq("TO", "VOLUME"): 1706 return self.expression( 1707 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1708 ) 1709 1710 return this 1711 1712 expressions = self._parse_csv(_parse_ttl_action) 1713 where = self._parse_where() 1714 group = self._parse_group() 1715 1716 aggregates = None 1717 if group and self._match(TokenType.SET): 1718 aggregates = self._parse_csv(self._parse_set_item) 1719 1720 return self.expression( 1721 exp.MergeTreeTTL, 1722 expressions=expressions, 1723 where=where, 1724 group=group, 1725 aggregates=aggregates, 1726 ) 1727 1728 def _parse_statement(self) -> t.Optional[exp.Expression]: 1729 if self._curr is None: 1730 return None 1731 1732 if self._match_set(self.STATEMENT_PARSERS): 1733 comments = self._prev_comments 1734 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1735 stmt.add_comments(comments, prepend=True) 1736 return stmt 1737 1738 if self._match_set(self.dialect.tokenizer.COMMANDS): 1739 return self._parse_command() 1740 1741 expression = self._parse_expression() 1742 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1743 return self._parse_query_modifiers(expression) 1744 1745 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1746 start = self._prev 1747 temporary = self._match(TokenType.TEMPORARY) 1748 materialized = self._match_text_seq("MATERIALIZED") 1749 1750 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1751 if not kind: 1752 return self._parse_as_command(start) 1753 1754 concurrently = self._match_text_seq("CONCURRENTLY") 1755 if_exists = exists or self._parse_exists() 1756 1757 if kind == "COLUMN": 1758 this = self._parse_column() 1759 else: 1760 this = self._parse_table_parts( 1761 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1762 ) 1763 1764 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1765 1766 if self._match(TokenType.L_PAREN, advance=False): 1767 expressions = self._parse_wrapped_csv(self._parse_types) 1768 else: 1769 expressions = None 1770 1771 return self.expression( 1772 exp.Drop, 1773 exists=if_exists, 1774 this=this, 1775 expressions=expressions, 1776 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1777 temporary=temporary, 1778 materialized=materialized, 1779 cascade=self._match_text_seq("CASCADE"), 1780 constraints=self._match_text_seq("CONSTRAINTS"), 1781 purge=self._match_text_seq("PURGE"), 1782 cluster=cluster, 1783 concurrently=concurrently, 1784 ) 1785 1786 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1787 return ( 1788 self._match_text_seq("IF") 1789 and (not not_ or self._match(TokenType.NOT)) 1790 and self._match(TokenType.EXISTS) 1791 ) 1792 1793 def _parse_create(self) -> exp.Create | exp.Command: 1794 # Note: this can't be None because we've matched a statement parser 1795 start = self._prev 1796 1797 replace = ( 1798 start.token_type == TokenType.REPLACE 1799 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1800 or self._match_pair(TokenType.OR, TokenType.ALTER) 1801 ) 1802 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1803 1804 unique = self._match(TokenType.UNIQUE) 1805 1806 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1807 clustered = True 1808 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1809 "COLUMNSTORE" 1810 ): 1811 clustered = False 1812 else: 1813 clustered = None 1814 1815 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1816 self._advance() 1817 1818 properties = None 1819 create_token = self._match_set(self.CREATABLES) and self._prev 1820 1821 if not create_token: 1822 # exp.Properties.Location.POST_CREATE 1823 properties = self._parse_properties() 1824 create_token = self._match_set(self.CREATABLES) and self._prev 1825 1826 if not properties or not create_token: 1827 return self._parse_as_command(start) 1828 1829 concurrently = self._match_text_seq("CONCURRENTLY") 1830 exists = self._parse_exists(not_=True) 1831 this = None 1832 expression: t.Optional[exp.Expression] = None 1833 indexes = None 1834 no_schema_binding = None 1835 begin = None 1836 end = None 1837 clone = None 1838 1839 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1840 nonlocal properties 1841 if properties and temp_props: 1842 properties.expressions.extend(temp_props.expressions) 1843 elif temp_props: 1844 properties = temp_props 1845 1846 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1847 this = self._parse_user_defined_function(kind=create_token.token_type) 1848 1849 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1850 extend_props(self._parse_properties()) 1851 1852 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1853 extend_props(self._parse_properties()) 1854 1855 if not expression: 1856 if self._match(TokenType.COMMAND): 1857 expression = self._parse_as_command(self._prev) 1858 else: 1859 begin = self._match(TokenType.BEGIN) 1860 return_ = self._match_text_seq("RETURN") 1861 1862 if self._match(TokenType.STRING, advance=False): 1863 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1864 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1865 expression = self._parse_string() 1866 extend_props(self._parse_properties()) 1867 else: 1868 expression = self._parse_user_defined_function_expression() 1869 1870 end = self._match_text_seq("END") 1871 1872 if return_: 1873 expression = self.expression(exp.Return, this=expression) 1874 elif create_token.token_type == TokenType.INDEX: 1875 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1876 if not self._match(TokenType.ON): 1877 index = self._parse_id_var() 1878 anonymous = False 1879 else: 1880 index = None 1881 anonymous = True 1882 1883 this = self._parse_index(index=index, anonymous=anonymous) 1884 elif create_token.token_type in self.DB_CREATABLES: 1885 table_parts = self._parse_table_parts( 1886 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1887 ) 1888 1889 # exp.Properties.Location.POST_NAME 1890 self._match(TokenType.COMMA) 1891 extend_props(self._parse_properties(before=True)) 1892 1893 this = self._parse_schema(this=table_parts) 1894 1895 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1896 extend_props(self._parse_properties()) 1897 1898 self._match(TokenType.ALIAS) 1899 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1900 # exp.Properties.Location.POST_ALIAS 1901 extend_props(self._parse_properties()) 1902 1903 if create_token.token_type == TokenType.SEQUENCE: 1904 expression = self._parse_types() 1905 extend_props(self._parse_properties()) 1906 else: 1907 expression = self._parse_ddl_select() 1908 1909 if create_token.token_type == TokenType.TABLE: 1910 # exp.Properties.Location.POST_EXPRESSION 1911 extend_props(self._parse_properties()) 1912 1913 indexes = [] 1914 while True: 1915 index = self._parse_index() 1916 1917 # exp.Properties.Location.POST_INDEX 1918 extend_props(self._parse_properties()) 1919 if not index: 1920 break 1921 else: 1922 self._match(TokenType.COMMA) 1923 indexes.append(index) 1924 elif create_token.token_type == TokenType.VIEW: 1925 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1926 no_schema_binding = True 1927 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1928 extend_props(self._parse_properties()) 1929 1930 shallow = self._match_text_seq("SHALLOW") 1931 1932 if self._match_texts(self.CLONE_KEYWORDS): 1933 copy = self._prev.text.lower() == "copy" 1934 clone = self.expression( 1935 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1936 ) 1937 1938 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1939 return self._parse_as_command(start) 1940 1941 create_kind_text = create_token.text.upper() 1942 return self.expression( 1943 exp.Create, 1944 this=this, 1945 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1946 replace=replace, 1947 refresh=refresh, 1948 unique=unique, 1949 expression=expression, 1950 exists=exists, 1951 properties=properties, 1952 indexes=indexes, 1953 no_schema_binding=no_schema_binding, 1954 begin=begin, 1955 end=end, 1956 clone=clone, 1957 concurrently=concurrently, 1958 clustered=clustered, 1959 ) 1960 1961 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1962 seq = exp.SequenceProperties() 1963 1964 options = [] 1965 index = self._index 1966 1967 while self._curr: 1968 self._match(TokenType.COMMA) 1969 if self._match_text_seq("INCREMENT"): 1970 self._match_text_seq("BY") 1971 self._match_text_seq("=") 1972 seq.set("increment", self._parse_term()) 1973 elif self._match_text_seq("MINVALUE"): 1974 seq.set("minvalue", self._parse_term()) 1975 elif self._match_text_seq("MAXVALUE"): 1976 seq.set("maxvalue", self._parse_term()) 1977 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1978 self._match_text_seq("=") 1979 seq.set("start", self._parse_term()) 1980 elif self._match_text_seq("CACHE"): 1981 # T-SQL allows empty CACHE which is initialized dynamically 1982 seq.set("cache", self._parse_number() or True) 1983 elif self._match_text_seq("OWNED", "BY"): 1984 # "OWNED BY NONE" is the default 1985 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1986 else: 1987 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1988 if opt: 1989 options.append(opt) 1990 else: 1991 break 1992 1993 seq.set("options", options if options else None) 1994 return None if self._index == index else seq 1995 1996 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1997 # only used for teradata currently 1998 self._match(TokenType.COMMA) 1999 2000 kwargs = { 2001 "no": self._match_text_seq("NO"), 2002 "dual": self._match_text_seq("DUAL"), 2003 "before": self._match_text_seq("BEFORE"), 2004 "default": self._match_text_seq("DEFAULT"), 2005 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2006 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2007 "after": self._match_text_seq("AFTER"), 2008 "minimum": self._match_texts(("MIN", "MINIMUM")), 2009 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2010 } 2011 2012 if self._match_texts(self.PROPERTY_PARSERS): 2013 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2014 try: 2015 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2016 except TypeError: 2017 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2018 2019 return None 2020 2021 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2022 return self._parse_wrapped_csv(self._parse_property) 2023 2024 def _parse_property(self) -> t.Optional[exp.Expression]: 2025 if self._match_texts(self.PROPERTY_PARSERS): 2026 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2027 2028 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2029 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2030 2031 if self._match_text_seq("COMPOUND", "SORTKEY"): 2032 return self._parse_sortkey(compound=True) 2033 2034 if self._match_text_seq("SQL", "SECURITY"): 2035 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2036 2037 index = self._index 2038 key = self._parse_column() 2039 2040 if not self._match(TokenType.EQ): 2041 self._retreat(index) 2042 return self._parse_sequence_properties() 2043 2044 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2045 if isinstance(key, exp.Column): 2046 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2047 2048 value = self._parse_bitwise() or self._parse_var(any_token=True) 2049 2050 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2051 if isinstance(value, exp.Column): 2052 value = exp.var(value.name) 2053 2054 return self.expression(exp.Property, this=key, value=value) 2055 2056 def _parse_stored(self) -> exp.FileFormatProperty: 2057 self._match(TokenType.ALIAS) 2058 2059 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2060 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2061 2062 return self.expression( 2063 exp.FileFormatProperty, 2064 this=( 2065 self.expression( 2066 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2067 ) 2068 if input_format or output_format 2069 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2070 ), 2071 ) 2072 2073 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2074 field = self._parse_field() 2075 if isinstance(field, exp.Identifier) and not field.quoted: 2076 field = exp.var(field) 2077 2078 return field 2079 2080 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2081 self._match(TokenType.EQ) 2082 self._match(TokenType.ALIAS) 2083 2084 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2085 2086 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2087 properties = [] 2088 while True: 2089 if before: 2090 prop = self._parse_property_before() 2091 else: 2092 prop = self._parse_property() 2093 if not prop: 2094 break 2095 for p in ensure_list(prop): 2096 properties.append(p) 2097 2098 if properties: 2099 return self.expression(exp.Properties, expressions=properties) 2100 2101 return None 2102 2103 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2104 return self.expression( 2105 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2106 ) 2107 2108 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2109 if self._match_texts(("DEFINER", "INVOKER")): 2110 security_specifier = self._prev.text.upper() 2111 return self.expression(exp.SecurityProperty, this=security_specifier) 2112 return None 2113 2114 def _parse_settings_property(self) -> exp.SettingsProperty: 2115 return self.expression( 2116 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2117 ) 2118 2119 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2120 if self._index >= 2: 2121 pre_volatile_token = self._tokens[self._index - 2] 2122 else: 2123 pre_volatile_token = None 2124 2125 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2126 return exp.VolatileProperty() 2127 2128 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2129 2130 def _parse_retention_period(self) -> exp.Var: 2131 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2132 number = self._parse_number() 2133 number_str = f"{number} " if number else "" 2134 unit = self._parse_var(any_token=True) 2135 return exp.var(f"{number_str}{unit}") 2136 2137 def _parse_system_versioning_property( 2138 self, with_: bool = False 2139 ) -> exp.WithSystemVersioningProperty: 2140 self._match(TokenType.EQ) 2141 prop = self.expression( 2142 exp.WithSystemVersioningProperty, 2143 **{ # type: ignore 2144 "on": True, 2145 "with": with_, 2146 }, 2147 ) 2148 2149 if self._match_text_seq("OFF"): 2150 prop.set("on", False) 2151 return prop 2152 2153 self._match(TokenType.ON) 2154 if self._match(TokenType.L_PAREN): 2155 while self._curr and not self._match(TokenType.R_PAREN): 2156 if self._match_text_seq("HISTORY_TABLE", "="): 2157 prop.set("this", self._parse_table_parts()) 2158 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2159 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2160 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2161 prop.set("retention_period", self._parse_retention_period()) 2162 2163 self._match(TokenType.COMMA) 2164 2165 return prop 2166 2167 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2168 self._match(TokenType.EQ) 2169 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2170 prop = self.expression(exp.DataDeletionProperty, on=on) 2171 2172 if self._match(TokenType.L_PAREN): 2173 while self._curr and not self._match(TokenType.R_PAREN): 2174 if self._match_text_seq("FILTER_COLUMN", "="): 2175 prop.set("filter_column", self._parse_column()) 2176 elif self._match_text_seq("RETENTION_PERIOD", "="): 2177 prop.set("retention_period", self._parse_retention_period()) 2178 2179 self._match(TokenType.COMMA) 2180 2181 return prop 2182 2183 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2184 kind = "HASH" 2185 expressions: t.Optional[t.List[exp.Expression]] = None 2186 if self._match_text_seq("BY", "HASH"): 2187 expressions = self._parse_wrapped_csv(self._parse_id_var) 2188 elif self._match_text_seq("BY", "RANDOM"): 2189 kind = "RANDOM" 2190 2191 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2192 buckets: t.Optional[exp.Expression] = None 2193 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2194 buckets = self._parse_number() 2195 2196 return self.expression( 2197 exp.DistributedByProperty, 2198 expressions=expressions, 2199 kind=kind, 2200 buckets=buckets, 2201 order=self._parse_order(), 2202 ) 2203 2204 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2205 self._match_text_seq("KEY") 2206 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2207 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2208 2209 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2210 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2211 prop = self._parse_system_versioning_property(with_=True) 2212 self._match_r_paren() 2213 return prop 2214 2215 if self._match(TokenType.L_PAREN, advance=False): 2216 return self._parse_wrapped_properties() 2217 2218 if self._match_text_seq("JOURNAL"): 2219 return self._parse_withjournaltable() 2220 2221 if self._match_texts(self.VIEW_ATTRIBUTES): 2222 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2223 2224 if self._match_text_seq("DATA"): 2225 return self._parse_withdata(no=False) 2226 elif self._match_text_seq("NO", "DATA"): 2227 return self._parse_withdata(no=True) 2228 2229 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2230 return self._parse_serde_properties(with_=True) 2231 2232 if self._match(TokenType.SCHEMA): 2233 return self.expression( 2234 exp.WithSchemaBindingProperty, 2235 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2236 ) 2237 2238 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2239 return self.expression( 2240 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2241 ) 2242 2243 if not self._next: 2244 return None 2245 2246 return self._parse_withisolatedloading() 2247 2248 def _parse_procedure_option(self) -> exp.Expression | None: 2249 if self._match_text_seq("EXECUTE", "AS"): 2250 return self.expression( 2251 exp.ExecuteAsProperty, 2252 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2253 or self._parse_string(), 2254 ) 2255 2256 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2257 2258 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2259 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2260 self._match(TokenType.EQ) 2261 2262 user = self._parse_id_var() 2263 self._match(TokenType.PARAMETER) 2264 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2265 2266 if not user or not host: 2267 return None 2268 2269 return exp.DefinerProperty(this=f"{user}@{host}") 2270 2271 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2272 self._match(TokenType.TABLE) 2273 self._match(TokenType.EQ) 2274 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2275 2276 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2277 return self.expression(exp.LogProperty, no=no) 2278 2279 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2280 return self.expression(exp.JournalProperty, **kwargs) 2281 2282 def _parse_checksum(self) -> exp.ChecksumProperty: 2283 self._match(TokenType.EQ) 2284 2285 on = None 2286 if self._match(TokenType.ON): 2287 on = True 2288 elif self._match_text_seq("OFF"): 2289 on = False 2290 2291 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2292 2293 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2294 return self.expression( 2295 exp.Cluster, 2296 expressions=( 2297 self._parse_wrapped_csv(self._parse_ordered) 2298 if wrapped 2299 else self._parse_csv(self._parse_ordered) 2300 ), 2301 ) 2302 2303 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2304 self._match_text_seq("BY") 2305 2306 self._match_l_paren() 2307 expressions = self._parse_csv(self._parse_column) 2308 self._match_r_paren() 2309 2310 if self._match_text_seq("SORTED", "BY"): 2311 self._match_l_paren() 2312 sorted_by = self._parse_csv(self._parse_ordered) 2313 self._match_r_paren() 2314 else: 2315 sorted_by = None 2316 2317 self._match(TokenType.INTO) 2318 buckets = self._parse_number() 2319 self._match_text_seq("BUCKETS") 2320 2321 return self.expression( 2322 exp.ClusteredByProperty, 2323 expressions=expressions, 2324 sorted_by=sorted_by, 2325 buckets=buckets, 2326 ) 2327 2328 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2329 if not self._match_text_seq("GRANTS"): 2330 self._retreat(self._index - 1) 2331 return None 2332 2333 return self.expression(exp.CopyGrantsProperty) 2334 2335 def _parse_freespace(self) -> exp.FreespaceProperty: 2336 self._match(TokenType.EQ) 2337 return self.expression( 2338 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2339 ) 2340 2341 def _parse_mergeblockratio( 2342 self, no: bool = False, default: bool = False 2343 ) -> exp.MergeBlockRatioProperty: 2344 if self._match(TokenType.EQ): 2345 return self.expression( 2346 exp.MergeBlockRatioProperty, 2347 this=self._parse_number(), 2348 percent=self._match(TokenType.PERCENT), 2349 ) 2350 2351 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2352 2353 def _parse_datablocksize( 2354 self, 2355 default: t.Optional[bool] = None, 2356 minimum: t.Optional[bool] = None, 2357 maximum: t.Optional[bool] = None, 2358 ) -> exp.DataBlocksizeProperty: 2359 self._match(TokenType.EQ) 2360 size = self._parse_number() 2361 2362 units = None 2363 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2364 units = self._prev.text 2365 2366 return self.expression( 2367 exp.DataBlocksizeProperty, 2368 size=size, 2369 units=units, 2370 default=default, 2371 minimum=minimum, 2372 maximum=maximum, 2373 ) 2374 2375 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2376 self._match(TokenType.EQ) 2377 always = self._match_text_seq("ALWAYS") 2378 manual = self._match_text_seq("MANUAL") 2379 never = self._match_text_seq("NEVER") 2380 default = self._match_text_seq("DEFAULT") 2381 2382 autotemp = None 2383 if self._match_text_seq("AUTOTEMP"): 2384 autotemp = self._parse_schema() 2385 2386 return self.expression( 2387 exp.BlockCompressionProperty, 2388 always=always, 2389 manual=manual, 2390 never=never, 2391 default=default, 2392 autotemp=autotemp, 2393 ) 2394 2395 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2396 index = self._index 2397 no = self._match_text_seq("NO") 2398 concurrent = self._match_text_seq("CONCURRENT") 2399 2400 if not self._match_text_seq("ISOLATED", "LOADING"): 2401 self._retreat(index) 2402 return None 2403 2404 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2405 return self.expression( 2406 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2407 ) 2408 2409 def _parse_locking(self) -> exp.LockingProperty: 2410 if self._match(TokenType.TABLE): 2411 kind = "TABLE" 2412 elif self._match(TokenType.VIEW): 2413 kind = "VIEW" 2414 elif self._match(TokenType.ROW): 2415 kind = "ROW" 2416 elif self._match_text_seq("DATABASE"): 2417 kind = "DATABASE" 2418 else: 2419 kind = None 2420 2421 if kind in ("DATABASE", "TABLE", "VIEW"): 2422 this = self._parse_table_parts() 2423 else: 2424 this = None 2425 2426 if self._match(TokenType.FOR): 2427 for_or_in = "FOR" 2428 elif self._match(TokenType.IN): 2429 for_or_in = "IN" 2430 else: 2431 for_or_in = None 2432 2433 if self._match_text_seq("ACCESS"): 2434 lock_type = "ACCESS" 2435 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2436 lock_type = "EXCLUSIVE" 2437 elif self._match_text_seq("SHARE"): 2438 lock_type = "SHARE" 2439 elif self._match_text_seq("READ"): 2440 lock_type = "READ" 2441 elif self._match_text_seq("WRITE"): 2442 lock_type = "WRITE" 2443 elif self._match_text_seq("CHECKSUM"): 2444 lock_type = "CHECKSUM" 2445 else: 2446 lock_type = None 2447 2448 override = self._match_text_seq("OVERRIDE") 2449 2450 return self.expression( 2451 exp.LockingProperty, 2452 this=this, 2453 kind=kind, 2454 for_or_in=for_or_in, 2455 lock_type=lock_type, 2456 override=override, 2457 ) 2458 2459 def _parse_partition_by(self) -> t.List[exp.Expression]: 2460 if self._match(TokenType.PARTITION_BY): 2461 return self._parse_csv(self._parse_assignment) 2462 return [] 2463 2464 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2465 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2466 if self._match_text_seq("MINVALUE"): 2467 return exp.var("MINVALUE") 2468 if self._match_text_seq("MAXVALUE"): 2469 return exp.var("MAXVALUE") 2470 return self._parse_bitwise() 2471 2472 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2473 expression = None 2474 from_expressions = None 2475 to_expressions = None 2476 2477 if self._match(TokenType.IN): 2478 this = self._parse_wrapped_csv(self._parse_bitwise) 2479 elif self._match(TokenType.FROM): 2480 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2481 self._match_text_seq("TO") 2482 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2483 elif self._match_text_seq("WITH", "(", "MODULUS"): 2484 this = self._parse_number() 2485 self._match_text_seq(",", "REMAINDER") 2486 expression = self._parse_number() 2487 self._match_r_paren() 2488 else: 2489 self.raise_error("Failed to parse partition bound spec.") 2490 2491 return self.expression( 2492 exp.PartitionBoundSpec, 2493 this=this, 2494 expression=expression, 2495 from_expressions=from_expressions, 2496 to_expressions=to_expressions, 2497 ) 2498 2499 # https://www.postgresql.org/docs/current/sql-createtable.html 2500 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2501 if not self._match_text_seq("OF"): 2502 self._retreat(self._index - 1) 2503 return None 2504 2505 this = self._parse_table(schema=True) 2506 2507 if self._match(TokenType.DEFAULT): 2508 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2509 elif self._match_text_seq("FOR", "VALUES"): 2510 expression = self._parse_partition_bound_spec() 2511 else: 2512 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2513 2514 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2515 2516 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2517 self._match(TokenType.EQ) 2518 return self.expression( 2519 exp.PartitionedByProperty, 2520 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2521 ) 2522 2523 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2524 if self._match_text_seq("AND", "STATISTICS"): 2525 statistics = True 2526 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2527 statistics = False 2528 else: 2529 statistics = None 2530 2531 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2532 2533 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2534 if self._match_text_seq("SQL"): 2535 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2536 return None 2537 2538 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2539 if self._match_text_seq("SQL", "DATA"): 2540 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2541 return None 2542 2543 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2544 if self._match_text_seq("PRIMARY", "INDEX"): 2545 return exp.NoPrimaryIndexProperty() 2546 if self._match_text_seq("SQL"): 2547 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2548 return None 2549 2550 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2551 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2552 return exp.OnCommitProperty() 2553 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2554 return exp.OnCommitProperty(delete=True) 2555 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2556 2557 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2558 if self._match_text_seq("SQL", "DATA"): 2559 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2560 return None 2561 2562 def _parse_distkey(self) -> exp.DistKeyProperty: 2563 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2564 2565 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2566 table = self._parse_table(schema=True) 2567 2568 options = [] 2569 while self._match_texts(("INCLUDING", "EXCLUDING")): 2570 this = self._prev.text.upper() 2571 2572 id_var = self._parse_id_var() 2573 if not id_var: 2574 return None 2575 2576 options.append( 2577 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2578 ) 2579 2580 return self.expression(exp.LikeProperty, this=table, expressions=options) 2581 2582 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2583 return self.expression( 2584 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2585 ) 2586 2587 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2588 self._match(TokenType.EQ) 2589 return self.expression( 2590 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2591 ) 2592 2593 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2594 self._match_text_seq("WITH", "CONNECTION") 2595 return self.expression( 2596 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2597 ) 2598 2599 def _parse_returns(self) -> exp.ReturnsProperty: 2600 value: t.Optional[exp.Expression] 2601 null = None 2602 is_table = self._match(TokenType.TABLE) 2603 2604 if is_table: 2605 if self._match(TokenType.LT): 2606 value = self.expression( 2607 exp.Schema, 2608 this="TABLE", 2609 expressions=self._parse_csv(self._parse_struct_types), 2610 ) 2611 if not self._match(TokenType.GT): 2612 self.raise_error("Expecting >") 2613 else: 2614 value = self._parse_schema(exp.var("TABLE")) 2615 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2616 null = True 2617 value = None 2618 else: 2619 value = self._parse_types() 2620 2621 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2622 2623 def _parse_describe(self) -> exp.Describe: 2624 kind = self._match_set(self.CREATABLES) and self._prev.text 2625 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2626 if self._match(TokenType.DOT): 2627 style = None 2628 self._retreat(self._index - 2) 2629 2630 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2631 2632 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2633 this = self._parse_statement() 2634 else: 2635 this = self._parse_table(schema=True) 2636 2637 properties = self._parse_properties() 2638 expressions = properties.expressions if properties else None 2639 partition = self._parse_partition() 2640 return self.expression( 2641 exp.Describe, 2642 this=this, 2643 style=style, 2644 kind=kind, 2645 expressions=expressions, 2646 partition=partition, 2647 format=format, 2648 ) 2649 2650 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2651 kind = self._prev.text.upper() 2652 expressions = [] 2653 2654 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2655 if self._match(TokenType.WHEN): 2656 expression = self._parse_disjunction() 2657 self._match(TokenType.THEN) 2658 else: 2659 expression = None 2660 2661 else_ = self._match(TokenType.ELSE) 2662 2663 if not self._match(TokenType.INTO): 2664 return None 2665 2666 return self.expression( 2667 exp.ConditionalInsert, 2668 this=self.expression( 2669 exp.Insert, 2670 this=self._parse_table(schema=True), 2671 expression=self._parse_derived_table_values(), 2672 ), 2673 expression=expression, 2674 else_=else_, 2675 ) 2676 2677 expression = parse_conditional_insert() 2678 while expression is not None: 2679 expressions.append(expression) 2680 expression = parse_conditional_insert() 2681 2682 return self.expression( 2683 exp.MultitableInserts, 2684 kind=kind, 2685 comments=comments, 2686 expressions=expressions, 2687 source=self._parse_table(), 2688 ) 2689 2690 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2691 comments = [] 2692 hint = self._parse_hint() 2693 overwrite = self._match(TokenType.OVERWRITE) 2694 ignore = self._match(TokenType.IGNORE) 2695 local = self._match_text_seq("LOCAL") 2696 alternative = None 2697 is_function = None 2698 2699 if self._match_text_seq("DIRECTORY"): 2700 this: t.Optional[exp.Expression] = self.expression( 2701 exp.Directory, 2702 this=self._parse_var_or_string(), 2703 local=local, 2704 row_format=self._parse_row_format(match_row=True), 2705 ) 2706 else: 2707 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2708 comments += ensure_list(self._prev_comments) 2709 return self._parse_multitable_inserts(comments) 2710 2711 if self._match(TokenType.OR): 2712 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2713 2714 self._match(TokenType.INTO) 2715 comments += ensure_list(self._prev_comments) 2716 self._match(TokenType.TABLE) 2717 is_function = self._match(TokenType.FUNCTION) 2718 2719 this = ( 2720 self._parse_table(schema=True, parse_partition=True) 2721 if not is_function 2722 else self._parse_function() 2723 ) 2724 2725 returning = self._parse_returning() 2726 2727 return self.expression( 2728 exp.Insert, 2729 comments=comments, 2730 hint=hint, 2731 is_function=is_function, 2732 this=this, 2733 stored=self._match_text_seq("STORED") and self._parse_stored(), 2734 by_name=self._match_text_seq("BY", "NAME"), 2735 exists=self._parse_exists(), 2736 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2737 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2738 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2739 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2740 conflict=self._parse_on_conflict(), 2741 returning=returning or self._parse_returning(), 2742 overwrite=overwrite, 2743 alternative=alternative, 2744 ignore=ignore, 2745 source=self._match(TokenType.TABLE) and self._parse_table(), 2746 ) 2747 2748 def _parse_kill(self) -> exp.Kill: 2749 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2750 2751 return self.expression( 2752 exp.Kill, 2753 this=self._parse_primary(), 2754 kind=kind, 2755 ) 2756 2757 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2758 conflict = self._match_text_seq("ON", "CONFLICT") 2759 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2760 2761 if not conflict and not duplicate: 2762 return None 2763 2764 conflict_keys = None 2765 constraint = None 2766 2767 if conflict: 2768 if self._match_text_seq("ON", "CONSTRAINT"): 2769 constraint = self._parse_id_var() 2770 elif self._match(TokenType.L_PAREN): 2771 conflict_keys = self._parse_csv(self._parse_id_var) 2772 self._match_r_paren() 2773 2774 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2775 if self._prev.token_type == TokenType.UPDATE: 2776 self._match(TokenType.SET) 2777 expressions = self._parse_csv(self._parse_equality) 2778 else: 2779 expressions = None 2780 2781 return self.expression( 2782 exp.OnConflict, 2783 duplicate=duplicate, 2784 expressions=expressions, 2785 action=action, 2786 conflict_keys=conflict_keys, 2787 constraint=constraint, 2788 ) 2789 2790 def _parse_returning(self) -> t.Optional[exp.Returning]: 2791 if not self._match(TokenType.RETURNING): 2792 return None 2793 return self.expression( 2794 exp.Returning, 2795 expressions=self._parse_csv(self._parse_expression), 2796 into=self._match(TokenType.INTO) and self._parse_table_part(), 2797 ) 2798 2799 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2800 if not self._match(TokenType.FORMAT): 2801 return None 2802 return self._parse_row_format() 2803 2804 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2805 index = self._index 2806 with_ = with_ or self._match_text_seq("WITH") 2807 2808 if not self._match(TokenType.SERDE_PROPERTIES): 2809 self._retreat(index) 2810 return None 2811 return self.expression( 2812 exp.SerdeProperties, 2813 **{ # type: ignore 2814 "expressions": self._parse_wrapped_properties(), 2815 "with": with_, 2816 }, 2817 ) 2818 2819 def _parse_row_format( 2820 self, match_row: bool = False 2821 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2822 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2823 return None 2824 2825 if self._match_text_seq("SERDE"): 2826 this = self._parse_string() 2827 2828 serde_properties = self._parse_serde_properties() 2829 2830 return self.expression( 2831 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2832 ) 2833 2834 self._match_text_seq("DELIMITED") 2835 2836 kwargs = {} 2837 2838 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2839 kwargs["fields"] = self._parse_string() 2840 if self._match_text_seq("ESCAPED", "BY"): 2841 kwargs["escaped"] = self._parse_string() 2842 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2843 kwargs["collection_items"] = self._parse_string() 2844 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2845 kwargs["map_keys"] = self._parse_string() 2846 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2847 kwargs["lines"] = self._parse_string() 2848 if self._match_text_seq("NULL", "DEFINED", "AS"): 2849 kwargs["null"] = self._parse_string() 2850 2851 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2852 2853 def _parse_load(self) -> exp.LoadData | exp.Command: 2854 if self._match_text_seq("DATA"): 2855 local = self._match_text_seq("LOCAL") 2856 self._match_text_seq("INPATH") 2857 inpath = self._parse_string() 2858 overwrite = self._match(TokenType.OVERWRITE) 2859 self._match_pair(TokenType.INTO, TokenType.TABLE) 2860 2861 return self.expression( 2862 exp.LoadData, 2863 this=self._parse_table(schema=True), 2864 local=local, 2865 overwrite=overwrite, 2866 inpath=inpath, 2867 partition=self._parse_partition(), 2868 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2869 serde=self._match_text_seq("SERDE") and self._parse_string(), 2870 ) 2871 return self._parse_as_command(self._prev) 2872 2873 def _parse_delete(self) -> exp.Delete: 2874 # This handles MySQL's "Multiple-Table Syntax" 2875 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2876 tables = None 2877 if not self._match(TokenType.FROM, advance=False): 2878 tables = self._parse_csv(self._parse_table) or None 2879 2880 returning = self._parse_returning() 2881 2882 return self.expression( 2883 exp.Delete, 2884 tables=tables, 2885 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2886 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2887 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2888 where=self._parse_where(), 2889 returning=returning or self._parse_returning(), 2890 limit=self._parse_limit(), 2891 ) 2892 2893 def _parse_update(self) -> exp.Update: 2894 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2895 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2896 returning = self._parse_returning() 2897 return self.expression( 2898 exp.Update, 2899 **{ # type: ignore 2900 "this": this, 2901 "expressions": expressions, 2902 "from": self._parse_from(joins=True), 2903 "where": self._parse_where(), 2904 "returning": returning or self._parse_returning(), 2905 "order": self._parse_order(), 2906 "limit": self._parse_limit(), 2907 }, 2908 ) 2909 2910 def _parse_uncache(self) -> exp.Uncache: 2911 if not self._match(TokenType.TABLE): 2912 self.raise_error("Expecting TABLE after UNCACHE") 2913 2914 return self.expression( 2915 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2916 ) 2917 2918 def _parse_cache(self) -> exp.Cache: 2919 lazy = self._match_text_seq("LAZY") 2920 self._match(TokenType.TABLE) 2921 table = self._parse_table(schema=True) 2922 2923 options = [] 2924 if self._match_text_seq("OPTIONS"): 2925 self._match_l_paren() 2926 k = self._parse_string() 2927 self._match(TokenType.EQ) 2928 v = self._parse_string() 2929 options = [k, v] 2930 self._match_r_paren() 2931 2932 self._match(TokenType.ALIAS) 2933 return self.expression( 2934 exp.Cache, 2935 this=table, 2936 lazy=lazy, 2937 options=options, 2938 expression=self._parse_select(nested=True), 2939 ) 2940 2941 def _parse_partition(self) -> t.Optional[exp.Partition]: 2942 if not self._match(TokenType.PARTITION): 2943 return None 2944 2945 return self.expression( 2946 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2947 ) 2948 2949 def _parse_value(self) -> t.Optional[exp.Tuple]: 2950 def _parse_value_expression() -> t.Optional[exp.Expression]: 2951 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 2952 return exp.var(self._prev.text.upper()) 2953 return self._parse_expression() 2954 2955 if self._match(TokenType.L_PAREN): 2956 expressions = self._parse_csv(_parse_value_expression) 2957 self._match_r_paren() 2958 return self.expression(exp.Tuple, expressions=expressions) 2959 2960 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2961 expression = self._parse_expression() 2962 if expression: 2963 return self.expression(exp.Tuple, expressions=[expression]) 2964 return None 2965 2966 def _parse_projections(self) -> t.List[exp.Expression]: 2967 return self._parse_expressions() 2968 2969 def _parse_select( 2970 self, 2971 nested: bool = False, 2972 table: bool = False, 2973 parse_subquery_alias: bool = True, 2974 parse_set_operation: bool = True, 2975 ) -> t.Optional[exp.Expression]: 2976 cte = self._parse_with() 2977 2978 if cte: 2979 this = self._parse_statement() 2980 2981 if not this: 2982 self.raise_error("Failed to parse any statement following CTE") 2983 return cte 2984 2985 if "with" in this.arg_types: 2986 this.set("with", cte) 2987 else: 2988 self.raise_error(f"{this.key} does not support CTE") 2989 this = cte 2990 2991 return this 2992 2993 # duckdb supports leading with FROM x 2994 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2995 2996 if self._match(TokenType.SELECT): 2997 comments = self._prev_comments 2998 2999 hint = self._parse_hint() 3000 3001 if self._next and not self._next.token_type == TokenType.DOT: 3002 all_ = self._match(TokenType.ALL) 3003 distinct = self._match_set(self.DISTINCT_TOKENS) 3004 else: 3005 all_, distinct = None, None 3006 3007 kind = ( 3008 self._match(TokenType.ALIAS) 3009 and self._match_texts(("STRUCT", "VALUE")) 3010 and self._prev.text.upper() 3011 ) 3012 3013 if distinct: 3014 distinct = self.expression( 3015 exp.Distinct, 3016 on=self._parse_value() if self._match(TokenType.ON) else None, 3017 ) 3018 3019 if all_ and distinct: 3020 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3021 3022 operation_modifiers = [] 3023 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3024 operation_modifiers.append(exp.var(self._prev.text.upper())) 3025 3026 limit = self._parse_limit(top=True) 3027 projections = self._parse_projections() 3028 3029 this = self.expression( 3030 exp.Select, 3031 kind=kind, 3032 hint=hint, 3033 distinct=distinct, 3034 expressions=projections, 3035 limit=limit, 3036 operation_modifiers=operation_modifiers or None, 3037 ) 3038 this.comments = comments 3039 3040 into = self._parse_into() 3041 if into: 3042 this.set("into", into) 3043 3044 if not from_: 3045 from_ = self._parse_from() 3046 3047 if from_: 3048 this.set("from", from_) 3049 3050 this = self._parse_query_modifiers(this) 3051 elif (table or nested) and self._match(TokenType.L_PAREN): 3052 if self._match(TokenType.PIVOT): 3053 this = self._parse_simplified_pivot() 3054 elif self._match(TokenType.FROM): 3055 this = exp.select("*").from_( 3056 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3057 ) 3058 else: 3059 this = ( 3060 self._parse_table() 3061 if table 3062 else self._parse_select(nested=True, parse_set_operation=False) 3063 ) 3064 3065 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3066 # in case a modifier (e.g. join) is following 3067 if table and isinstance(this, exp.Values) and this.alias: 3068 alias = this.args["alias"].pop() 3069 this = exp.Table(this=this, alias=alias) 3070 3071 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3072 3073 self._match_r_paren() 3074 3075 # We return early here so that the UNION isn't attached to the subquery by the 3076 # following call to _parse_set_operations, but instead becomes the parent node 3077 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3078 elif self._match(TokenType.VALUES, advance=False): 3079 this = self._parse_derived_table_values() 3080 elif from_: 3081 this = exp.select("*").from_(from_.this, copy=False) 3082 elif self._match(TokenType.SUMMARIZE): 3083 table = self._match(TokenType.TABLE) 3084 this = self._parse_select() or self._parse_string() or self._parse_table() 3085 return self.expression(exp.Summarize, this=this, table=table) 3086 elif self._match(TokenType.DESCRIBE): 3087 this = self._parse_describe() 3088 elif self._match_text_seq("STREAM"): 3089 this = self._parse_function() 3090 if this: 3091 this = self.expression(exp.Stream, this=this) 3092 else: 3093 self._retreat(self._index - 1) 3094 else: 3095 this = None 3096 3097 return self._parse_set_operations(this) if parse_set_operation else this 3098 3099 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3100 if not skip_with_token and not self._match(TokenType.WITH): 3101 return None 3102 3103 comments = self._prev_comments 3104 recursive = self._match(TokenType.RECURSIVE) 3105 3106 last_comments = None 3107 expressions = [] 3108 while True: 3109 expressions.append(self._parse_cte()) 3110 if last_comments: 3111 expressions[-1].add_comments(last_comments) 3112 3113 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3114 break 3115 else: 3116 self._match(TokenType.WITH) 3117 3118 last_comments = self._prev_comments 3119 3120 return self.expression( 3121 exp.With, comments=comments, expressions=expressions, recursive=recursive 3122 ) 3123 3124 def _parse_cte(self) -> exp.CTE: 3125 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3126 if not alias or not alias.this: 3127 self.raise_error("Expected CTE to have alias") 3128 3129 self._match(TokenType.ALIAS) 3130 comments = self._prev_comments 3131 3132 if self._match_text_seq("NOT", "MATERIALIZED"): 3133 materialized = False 3134 elif self._match_text_seq("MATERIALIZED"): 3135 materialized = True 3136 else: 3137 materialized = None 3138 3139 return self.expression( 3140 exp.CTE, 3141 this=self._parse_wrapped(self._parse_statement), 3142 alias=alias, 3143 materialized=materialized, 3144 comments=comments, 3145 ) 3146 3147 def _parse_table_alias( 3148 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3149 ) -> t.Optional[exp.TableAlias]: 3150 any_token = self._match(TokenType.ALIAS) 3151 alias = ( 3152 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3153 or self._parse_string_as_identifier() 3154 ) 3155 3156 index = self._index 3157 if self._match(TokenType.L_PAREN): 3158 columns = self._parse_csv(self._parse_function_parameter) 3159 self._match_r_paren() if columns else self._retreat(index) 3160 else: 3161 columns = None 3162 3163 if not alias and not columns: 3164 return None 3165 3166 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3167 3168 # We bubble up comments from the Identifier to the TableAlias 3169 if isinstance(alias, exp.Identifier): 3170 table_alias.add_comments(alias.pop_comments()) 3171 3172 return table_alias 3173 3174 def _parse_subquery( 3175 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3176 ) -> t.Optional[exp.Subquery]: 3177 if not this: 3178 return None 3179 3180 return self.expression( 3181 exp.Subquery, 3182 this=this, 3183 pivots=self._parse_pivots(), 3184 alias=self._parse_table_alias() if parse_alias else None, 3185 sample=self._parse_table_sample(), 3186 ) 3187 3188 def _implicit_unnests_to_explicit(self, this: E) -> E: 3189 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3190 3191 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3192 for i, join in enumerate(this.args.get("joins") or []): 3193 table = join.this 3194 normalized_table = table.copy() 3195 normalized_table.meta["maybe_column"] = True 3196 normalized_table = _norm(normalized_table, dialect=self.dialect) 3197 3198 if isinstance(table, exp.Table) and not join.args.get("on"): 3199 if normalized_table.parts[0].name in refs: 3200 table_as_column = table.to_column() 3201 unnest = exp.Unnest(expressions=[table_as_column]) 3202 3203 # Table.to_column creates a parent Alias node that we want to convert to 3204 # a TableAlias and attach to the Unnest, so it matches the parser's output 3205 if isinstance(table.args.get("alias"), exp.TableAlias): 3206 table_as_column.replace(table_as_column.this) 3207 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3208 3209 table.replace(unnest) 3210 3211 refs.add(normalized_table.alias_or_name) 3212 3213 return this 3214 3215 def _parse_query_modifiers( 3216 self, this: t.Optional[exp.Expression] 3217 ) -> t.Optional[exp.Expression]: 3218 if isinstance(this, (exp.Query, exp.Table)): 3219 for join in self._parse_joins(): 3220 this.append("joins", join) 3221 for lateral in iter(self._parse_lateral, None): 3222 this.append("laterals", lateral) 3223 3224 while True: 3225 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3226 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3227 key, expression = parser(self) 3228 3229 if expression: 3230 this.set(key, expression) 3231 if key == "limit": 3232 offset = expression.args.pop("offset", None) 3233 3234 if offset: 3235 offset = exp.Offset(expression=offset) 3236 this.set("offset", offset) 3237 3238 limit_by_expressions = expression.expressions 3239 expression.set("expressions", None) 3240 offset.set("expressions", limit_by_expressions) 3241 continue 3242 break 3243 3244 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3245 this = self._implicit_unnests_to_explicit(this) 3246 3247 return this 3248 3249 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3250 start = self._curr 3251 while self._curr: 3252 self._advance() 3253 3254 end = self._tokens[self._index - 1] 3255 return exp.Hint(expressions=[self._find_sql(start, end)]) 3256 3257 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3258 return self._parse_function_call() 3259 3260 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3261 start_index = self._index 3262 should_fallback_to_string = False 3263 3264 hints = [] 3265 try: 3266 for hint in iter( 3267 lambda: self._parse_csv( 3268 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3269 ), 3270 [], 3271 ): 3272 hints.extend(hint) 3273 except ParseError: 3274 should_fallback_to_string = True 3275 3276 if should_fallback_to_string or self._curr: 3277 self._retreat(start_index) 3278 return self._parse_hint_fallback_to_string() 3279 3280 return self.expression(exp.Hint, expressions=hints) 3281 3282 def _parse_hint(self) -> t.Optional[exp.Hint]: 3283 if self._match(TokenType.HINT) and self._prev_comments: 3284 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3285 3286 return None 3287 3288 def _parse_into(self) -> t.Optional[exp.Into]: 3289 if not self._match(TokenType.INTO): 3290 return None 3291 3292 temp = self._match(TokenType.TEMPORARY) 3293 unlogged = self._match_text_seq("UNLOGGED") 3294 self._match(TokenType.TABLE) 3295 3296 return self.expression( 3297 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3298 ) 3299 3300 def _parse_from( 3301 self, joins: bool = False, skip_from_token: bool = False 3302 ) -> t.Optional[exp.From]: 3303 if not skip_from_token and not self._match(TokenType.FROM): 3304 return None 3305 3306 return self.expression( 3307 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3308 ) 3309 3310 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3311 return self.expression( 3312 exp.MatchRecognizeMeasure, 3313 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3314 this=self._parse_expression(), 3315 ) 3316 3317 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3318 if not self._match(TokenType.MATCH_RECOGNIZE): 3319 return None 3320 3321 self._match_l_paren() 3322 3323 partition = self._parse_partition_by() 3324 order = self._parse_order() 3325 3326 measures = ( 3327 self._parse_csv(self._parse_match_recognize_measure) 3328 if self._match_text_seq("MEASURES") 3329 else None 3330 ) 3331 3332 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3333 rows = exp.var("ONE ROW PER MATCH") 3334 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3335 text = "ALL ROWS PER MATCH" 3336 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3337 text += " SHOW EMPTY MATCHES" 3338 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3339 text += " OMIT EMPTY MATCHES" 3340 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3341 text += " WITH UNMATCHED ROWS" 3342 rows = exp.var(text) 3343 else: 3344 rows = None 3345 3346 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3347 text = "AFTER MATCH SKIP" 3348 if self._match_text_seq("PAST", "LAST", "ROW"): 3349 text += " PAST LAST ROW" 3350 elif self._match_text_seq("TO", "NEXT", "ROW"): 3351 text += " TO NEXT ROW" 3352 elif self._match_text_seq("TO", "FIRST"): 3353 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3354 elif self._match_text_seq("TO", "LAST"): 3355 text += f" TO LAST {self._advance_any().text}" # type: ignore 3356 after = exp.var(text) 3357 else: 3358 after = None 3359 3360 if self._match_text_seq("PATTERN"): 3361 self._match_l_paren() 3362 3363 if not self._curr: 3364 self.raise_error("Expecting )", self._curr) 3365 3366 paren = 1 3367 start = self._curr 3368 3369 while self._curr and paren > 0: 3370 if self._curr.token_type == TokenType.L_PAREN: 3371 paren += 1 3372 if self._curr.token_type == TokenType.R_PAREN: 3373 paren -= 1 3374 3375 end = self._prev 3376 self._advance() 3377 3378 if paren > 0: 3379 self.raise_error("Expecting )", self._curr) 3380 3381 pattern = exp.var(self._find_sql(start, end)) 3382 else: 3383 pattern = None 3384 3385 define = ( 3386 self._parse_csv(self._parse_name_as_expression) 3387 if self._match_text_seq("DEFINE") 3388 else None 3389 ) 3390 3391 self._match_r_paren() 3392 3393 return self.expression( 3394 exp.MatchRecognize, 3395 partition_by=partition, 3396 order=order, 3397 measures=measures, 3398 rows=rows, 3399 after=after, 3400 pattern=pattern, 3401 define=define, 3402 alias=self._parse_table_alias(), 3403 ) 3404 3405 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3406 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3407 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3408 cross_apply = False 3409 3410 if cross_apply is not None: 3411 this = self._parse_select(table=True) 3412 view = None 3413 outer = None 3414 elif self._match(TokenType.LATERAL): 3415 this = self._parse_select(table=True) 3416 view = self._match(TokenType.VIEW) 3417 outer = self._match(TokenType.OUTER) 3418 else: 3419 return None 3420 3421 if not this: 3422 this = ( 3423 self._parse_unnest() 3424 or self._parse_function() 3425 or self._parse_id_var(any_token=False) 3426 ) 3427 3428 while self._match(TokenType.DOT): 3429 this = exp.Dot( 3430 this=this, 3431 expression=self._parse_function() or self._parse_id_var(any_token=False), 3432 ) 3433 3434 if view: 3435 table = self._parse_id_var(any_token=False) 3436 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3437 table_alias: t.Optional[exp.TableAlias] = self.expression( 3438 exp.TableAlias, this=table, columns=columns 3439 ) 3440 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3441 # We move the alias from the lateral's child node to the lateral itself 3442 table_alias = this.args["alias"].pop() 3443 else: 3444 table_alias = self._parse_table_alias() 3445 3446 return self.expression( 3447 exp.Lateral, 3448 this=this, 3449 view=view, 3450 outer=outer, 3451 alias=table_alias, 3452 cross_apply=cross_apply, 3453 ) 3454 3455 def _parse_join_parts( 3456 self, 3457 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3458 return ( 3459 self._match_set(self.JOIN_METHODS) and self._prev, 3460 self._match_set(self.JOIN_SIDES) and self._prev, 3461 self._match_set(self.JOIN_KINDS) and self._prev, 3462 ) 3463 3464 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3465 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3466 this = self._parse_column() 3467 if isinstance(this, exp.Column): 3468 return this.this 3469 return this 3470 3471 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3472 3473 def _parse_join( 3474 self, skip_join_token: bool = False, parse_bracket: bool = False 3475 ) -> t.Optional[exp.Join]: 3476 if self._match(TokenType.COMMA): 3477 return self.expression(exp.Join, this=self._parse_table()) 3478 3479 index = self._index 3480 method, side, kind = self._parse_join_parts() 3481 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3482 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3483 3484 if not skip_join_token and not join: 3485 self._retreat(index) 3486 kind = None 3487 method = None 3488 side = None 3489 3490 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3491 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3492 3493 if not skip_join_token and not join and not outer_apply and not cross_apply: 3494 return None 3495 3496 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3497 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3498 kwargs["expressions"] = self._parse_csv( 3499 lambda: self._parse_table(parse_bracket=parse_bracket) 3500 ) 3501 3502 if method: 3503 kwargs["method"] = method.text 3504 if side: 3505 kwargs["side"] = side.text 3506 if kind: 3507 kwargs["kind"] = kind.text 3508 if hint: 3509 kwargs["hint"] = hint 3510 3511 if self._match(TokenType.MATCH_CONDITION): 3512 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3513 3514 if self._match(TokenType.ON): 3515 kwargs["on"] = self._parse_assignment() 3516 elif self._match(TokenType.USING): 3517 kwargs["using"] = self._parse_using_identifiers() 3518 elif ( 3519 not (outer_apply or cross_apply) 3520 and not isinstance(kwargs["this"], exp.Unnest) 3521 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3522 ): 3523 index = self._index 3524 joins: t.Optional[list] = list(self._parse_joins()) 3525 3526 if joins and self._match(TokenType.ON): 3527 kwargs["on"] = self._parse_assignment() 3528 elif joins and self._match(TokenType.USING): 3529 kwargs["using"] = self._parse_using_identifiers() 3530 else: 3531 joins = None 3532 self._retreat(index) 3533 3534 kwargs["this"].set("joins", joins if joins else None) 3535 3536 comments = [c for token in (method, side, kind) if token for c in token.comments] 3537 return self.expression(exp.Join, comments=comments, **kwargs) 3538 3539 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3540 this = self._parse_assignment() 3541 3542 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3543 return this 3544 3545 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3546 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3547 3548 return this 3549 3550 def _parse_index_params(self) -> exp.IndexParameters: 3551 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3552 3553 if self._match(TokenType.L_PAREN, advance=False): 3554 columns = self._parse_wrapped_csv(self._parse_with_operator) 3555 else: 3556 columns = None 3557 3558 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3559 partition_by = self._parse_partition_by() 3560 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3561 tablespace = ( 3562 self._parse_var(any_token=True) 3563 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3564 else None 3565 ) 3566 where = self._parse_where() 3567 3568 on = self._parse_field() if self._match(TokenType.ON) else None 3569 3570 return self.expression( 3571 exp.IndexParameters, 3572 using=using, 3573 columns=columns, 3574 include=include, 3575 partition_by=partition_by, 3576 where=where, 3577 with_storage=with_storage, 3578 tablespace=tablespace, 3579 on=on, 3580 ) 3581 3582 def _parse_index( 3583 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3584 ) -> t.Optional[exp.Index]: 3585 if index or anonymous: 3586 unique = None 3587 primary = None 3588 amp = None 3589 3590 self._match(TokenType.ON) 3591 self._match(TokenType.TABLE) # hive 3592 table = self._parse_table_parts(schema=True) 3593 else: 3594 unique = self._match(TokenType.UNIQUE) 3595 primary = self._match_text_seq("PRIMARY") 3596 amp = self._match_text_seq("AMP") 3597 3598 if not self._match(TokenType.INDEX): 3599 return None 3600 3601 index = self._parse_id_var() 3602 table = None 3603 3604 params = self._parse_index_params() 3605 3606 return self.expression( 3607 exp.Index, 3608 this=index, 3609 table=table, 3610 unique=unique, 3611 primary=primary, 3612 amp=amp, 3613 params=params, 3614 ) 3615 3616 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3617 hints: t.List[exp.Expression] = [] 3618 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3619 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3620 hints.append( 3621 self.expression( 3622 exp.WithTableHint, 3623 expressions=self._parse_csv( 3624 lambda: self._parse_function() or self._parse_var(any_token=True) 3625 ), 3626 ) 3627 ) 3628 self._match_r_paren() 3629 else: 3630 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3631 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3632 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3633 3634 self._match_set((TokenType.INDEX, TokenType.KEY)) 3635 if self._match(TokenType.FOR): 3636 hint.set("target", self._advance_any() and self._prev.text.upper()) 3637 3638 hint.set("expressions", self._parse_wrapped_id_vars()) 3639 hints.append(hint) 3640 3641 return hints or None 3642 3643 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3644 return ( 3645 (not schema and self._parse_function(optional_parens=False)) 3646 or self._parse_id_var(any_token=False) 3647 or self._parse_string_as_identifier() 3648 or self._parse_placeholder() 3649 ) 3650 3651 def _parse_table_parts( 3652 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3653 ) -> exp.Table: 3654 catalog = None 3655 db = None 3656 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3657 3658 while self._match(TokenType.DOT): 3659 if catalog: 3660 # This allows nesting the table in arbitrarily many dot expressions if needed 3661 table = self.expression( 3662 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3663 ) 3664 else: 3665 catalog = db 3666 db = table 3667 # "" used for tsql FROM a..b case 3668 table = self._parse_table_part(schema=schema) or "" 3669 3670 if ( 3671 wildcard 3672 and self._is_connected() 3673 and (isinstance(table, exp.Identifier) or not table) 3674 and self._match(TokenType.STAR) 3675 ): 3676 if isinstance(table, exp.Identifier): 3677 table.args["this"] += "*" 3678 else: 3679 table = exp.Identifier(this="*") 3680 3681 # We bubble up comments from the Identifier to the Table 3682 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3683 3684 if is_db_reference: 3685 catalog = db 3686 db = table 3687 table = None 3688 3689 if not table and not is_db_reference: 3690 self.raise_error(f"Expected table name but got {self._curr}") 3691 if not db and is_db_reference: 3692 self.raise_error(f"Expected database name but got {self._curr}") 3693 3694 table = self.expression( 3695 exp.Table, 3696 comments=comments, 3697 this=table, 3698 db=db, 3699 catalog=catalog, 3700 ) 3701 3702 changes = self._parse_changes() 3703 if changes: 3704 table.set("changes", changes) 3705 3706 at_before = self._parse_historical_data() 3707 if at_before: 3708 table.set("when", at_before) 3709 3710 pivots = self._parse_pivots() 3711 if pivots: 3712 table.set("pivots", pivots) 3713 3714 return table 3715 3716 def _parse_table( 3717 self, 3718 schema: bool = False, 3719 joins: bool = False, 3720 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3721 parse_bracket: bool = False, 3722 is_db_reference: bool = False, 3723 parse_partition: bool = False, 3724 ) -> t.Optional[exp.Expression]: 3725 lateral = self._parse_lateral() 3726 if lateral: 3727 return lateral 3728 3729 unnest = self._parse_unnest() 3730 if unnest: 3731 return unnest 3732 3733 values = self._parse_derived_table_values() 3734 if values: 3735 return values 3736 3737 subquery = self._parse_select(table=True) 3738 if subquery: 3739 if not subquery.args.get("pivots"): 3740 subquery.set("pivots", self._parse_pivots()) 3741 return subquery 3742 3743 bracket = parse_bracket and self._parse_bracket(None) 3744 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3745 3746 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3747 self._parse_table 3748 ) 3749 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3750 3751 only = self._match(TokenType.ONLY) 3752 3753 this = t.cast( 3754 exp.Expression, 3755 bracket 3756 or rows_from 3757 or self._parse_bracket( 3758 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3759 ), 3760 ) 3761 3762 if only: 3763 this.set("only", only) 3764 3765 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3766 self._match_text_seq("*") 3767 3768 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3769 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3770 this.set("partition", self._parse_partition()) 3771 3772 if schema: 3773 return self._parse_schema(this=this) 3774 3775 version = self._parse_version() 3776 3777 if version: 3778 this.set("version", version) 3779 3780 if self.dialect.ALIAS_POST_TABLESAMPLE: 3781 this.set("sample", self._parse_table_sample()) 3782 3783 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3784 if alias: 3785 this.set("alias", alias) 3786 3787 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3788 return self.expression( 3789 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3790 ) 3791 3792 this.set("hints", self._parse_table_hints()) 3793 3794 if not this.args.get("pivots"): 3795 this.set("pivots", self._parse_pivots()) 3796 3797 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3798 this.set("sample", self._parse_table_sample()) 3799 3800 if joins: 3801 for join in self._parse_joins(): 3802 this.append("joins", join) 3803 3804 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3805 this.set("ordinality", True) 3806 this.set("alias", self._parse_table_alias()) 3807 3808 return this 3809 3810 def _parse_version(self) -> t.Optional[exp.Version]: 3811 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3812 this = "TIMESTAMP" 3813 elif self._match(TokenType.VERSION_SNAPSHOT): 3814 this = "VERSION" 3815 else: 3816 return None 3817 3818 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3819 kind = self._prev.text.upper() 3820 start = self._parse_bitwise() 3821 self._match_texts(("TO", "AND")) 3822 end = self._parse_bitwise() 3823 expression: t.Optional[exp.Expression] = self.expression( 3824 exp.Tuple, expressions=[start, end] 3825 ) 3826 elif self._match_text_seq("CONTAINED", "IN"): 3827 kind = "CONTAINED IN" 3828 expression = self.expression( 3829 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3830 ) 3831 elif self._match(TokenType.ALL): 3832 kind = "ALL" 3833 expression = None 3834 else: 3835 self._match_text_seq("AS", "OF") 3836 kind = "AS OF" 3837 expression = self._parse_type() 3838 3839 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3840 3841 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3842 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3843 index = self._index 3844 historical_data = None 3845 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3846 this = self._prev.text.upper() 3847 kind = ( 3848 self._match(TokenType.L_PAREN) 3849 and self._match_texts(self.HISTORICAL_DATA_KIND) 3850 and self._prev.text.upper() 3851 ) 3852 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3853 3854 if expression: 3855 self._match_r_paren() 3856 historical_data = self.expression( 3857 exp.HistoricalData, this=this, kind=kind, expression=expression 3858 ) 3859 else: 3860 self._retreat(index) 3861 3862 return historical_data 3863 3864 def _parse_changes(self) -> t.Optional[exp.Changes]: 3865 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3866 return None 3867 3868 information = self._parse_var(any_token=True) 3869 self._match_r_paren() 3870 3871 return self.expression( 3872 exp.Changes, 3873 information=information, 3874 at_before=self._parse_historical_data(), 3875 end=self._parse_historical_data(), 3876 ) 3877 3878 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3879 if not self._match(TokenType.UNNEST): 3880 return None 3881 3882 expressions = self._parse_wrapped_csv(self._parse_equality) 3883 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3884 3885 alias = self._parse_table_alias() if with_alias else None 3886 3887 if alias: 3888 if self.dialect.UNNEST_COLUMN_ONLY: 3889 if alias.args.get("columns"): 3890 self.raise_error("Unexpected extra column alias in unnest.") 3891 3892 alias.set("columns", [alias.this]) 3893 alias.set("this", None) 3894 3895 columns = alias.args.get("columns") or [] 3896 if offset and len(expressions) < len(columns): 3897 offset = columns.pop() 3898 3899 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3900 self._match(TokenType.ALIAS) 3901 offset = self._parse_id_var( 3902 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3903 ) or exp.to_identifier("offset") 3904 3905 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3906 3907 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3908 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3909 if not is_derived and not ( 3910 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3911 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3912 ): 3913 return None 3914 3915 expressions = self._parse_csv(self._parse_value) 3916 alias = self._parse_table_alias() 3917 3918 if is_derived: 3919 self._match_r_paren() 3920 3921 return self.expression( 3922 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3923 ) 3924 3925 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3926 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3927 as_modifier and self._match_text_seq("USING", "SAMPLE") 3928 ): 3929 return None 3930 3931 bucket_numerator = None 3932 bucket_denominator = None 3933 bucket_field = None 3934 percent = None 3935 size = None 3936 seed = None 3937 3938 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3939 matched_l_paren = self._match(TokenType.L_PAREN) 3940 3941 if self.TABLESAMPLE_CSV: 3942 num = None 3943 expressions = self._parse_csv(self._parse_primary) 3944 else: 3945 expressions = None 3946 num = ( 3947 self._parse_factor() 3948 if self._match(TokenType.NUMBER, advance=False) 3949 else self._parse_primary() or self._parse_placeholder() 3950 ) 3951 3952 if self._match_text_seq("BUCKET"): 3953 bucket_numerator = self._parse_number() 3954 self._match_text_seq("OUT", "OF") 3955 bucket_denominator = bucket_denominator = self._parse_number() 3956 self._match(TokenType.ON) 3957 bucket_field = self._parse_field() 3958 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3959 percent = num 3960 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3961 size = num 3962 else: 3963 percent = num 3964 3965 if matched_l_paren: 3966 self._match_r_paren() 3967 3968 if self._match(TokenType.L_PAREN): 3969 method = self._parse_var(upper=True) 3970 seed = self._match(TokenType.COMMA) and self._parse_number() 3971 self._match_r_paren() 3972 elif self._match_texts(("SEED", "REPEATABLE")): 3973 seed = self._parse_wrapped(self._parse_number) 3974 3975 if not method and self.DEFAULT_SAMPLING_METHOD: 3976 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3977 3978 return self.expression( 3979 exp.TableSample, 3980 expressions=expressions, 3981 method=method, 3982 bucket_numerator=bucket_numerator, 3983 bucket_denominator=bucket_denominator, 3984 bucket_field=bucket_field, 3985 percent=percent, 3986 size=size, 3987 seed=seed, 3988 ) 3989 3990 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3991 return list(iter(self._parse_pivot, None)) or None 3992 3993 def _parse_joins(self) -> t.Iterator[exp.Join]: 3994 return iter(self._parse_join, None) 3995 3996 # https://duckdb.org/docs/sql/statements/pivot 3997 def _parse_simplified_pivot(self) -> exp.Pivot: 3998 def _parse_on() -> t.Optional[exp.Expression]: 3999 this = self._parse_bitwise() 4000 return self._parse_in(this) if self._match(TokenType.IN) else this 4001 4002 this = self._parse_table() 4003 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4004 using = self._match(TokenType.USING) and self._parse_csv( 4005 lambda: self._parse_alias(self._parse_function()) 4006 ) 4007 group = self._parse_group() 4008 return self.expression( 4009 exp.Pivot, this=this, expressions=expressions, using=using, group=group 4010 ) 4011 4012 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4013 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4014 this = self._parse_select_or_expression() 4015 4016 self._match(TokenType.ALIAS) 4017 alias = self._parse_bitwise() 4018 if alias: 4019 if isinstance(alias, exp.Column) and not alias.db: 4020 alias = alias.this 4021 return self.expression(exp.PivotAlias, this=this, alias=alias) 4022 4023 return this 4024 4025 value = self._parse_column() 4026 4027 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4028 self.raise_error("Expecting IN (") 4029 4030 if self._match(TokenType.ANY): 4031 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4032 else: 4033 exprs = self._parse_csv(_parse_aliased_expression) 4034 4035 self._match_r_paren() 4036 return self.expression(exp.In, this=value, expressions=exprs) 4037 4038 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4039 index = self._index 4040 include_nulls = None 4041 4042 if self._match(TokenType.PIVOT): 4043 unpivot = False 4044 elif self._match(TokenType.UNPIVOT): 4045 unpivot = True 4046 4047 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4048 if self._match_text_seq("INCLUDE", "NULLS"): 4049 include_nulls = True 4050 elif self._match_text_seq("EXCLUDE", "NULLS"): 4051 include_nulls = False 4052 else: 4053 return None 4054 4055 expressions = [] 4056 4057 if not self._match(TokenType.L_PAREN): 4058 self._retreat(index) 4059 return None 4060 4061 if unpivot: 4062 expressions = self._parse_csv(self._parse_column) 4063 else: 4064 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4065 4066 if not expressions: 4067 self.raise_error("Failed to parse PIVOT's aggregation list") 4068 4069 if not self._match(TokenType.FOR): 4070 self.raise_error("Expecting FOR") 4071 4072 field = self._parse_pivot_in() 4073 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4074 self._parse_bitwise 4075 ) 4076 4077 self._match_r_paren() 4078 4079 pivot = self.expression( 4080 exp.Pivot, 4081 expressions=expressions, 4082 field=field, 4083 unpivot=unpivot, 4084 include_nulls=include_nulls, 4085 default_on_null=default_on_null, 4086 ) 4087 4088 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4089 pivot.set("alias", self._parse_table_alias()) 4090 4091 if not unpivot: 4092 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4093 4094 columns: t.List[exp.Expression] = [] 4095 for fld in pivot.args["field"].expressions: 4096 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4097 for name in names: 4098 if self.PREFIXED_PIVOT_COLUMNS: 4099 name = f"{name}_{field_name}" if name else field_name 4100 else: 4101 name = f"{field_name}_{name}" if name else field_name 4102 4103 columns.append(exp.to_identifier(name)) 4104 4105 pivot.set("columns", columns) 4106 4107 return pivot 4108 4109 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4110 return [agg.alias for agg in aggregations] 4111 4112 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4113 if not skip_where_token and not self._match(TokenType.PREWHERE): 4114 return None 4115 4116 return self.expression( 4117 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4118 ) 4119 4120 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4121 if not skip_where_token and not self._match(TokenType.WHERE): 4122 return None 4123 4124 return self.expression( 4125 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4126 ) 4127 4128 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4129 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4130 return None 4131 4132 elements: t.Dict[str, t.Any] = defaultdict(list) 4133 4134 if self._match(TokenType.ALL): 4135 elements["all"] = True 4136 elif self._match(TokenType.DISTINCT): 4137 elements["all"] = False 4138 4139 while True: 4140 index = self._index 4141 4142 elements["expressions"].extend( 4143 self._parse_csv( 4144 lambda: None 4145 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4146 else self._parse_assignment() 4147 ) 4148 ) 4149 4150 before_with_index = self._index 4151 with_prefix = self._match(TokenType.WITH) 4152 4153 if self._match(TokenType.ROLLUP): 4154 elements["rollup"].append( 4155 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4156 ) 4157 elif self._match(TokenType.CUBE): 4158 elements["cube"].append( 4159 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4160 ) 4161 elif self._match(TokenType.GROUPING_SETS): 4162 elements["grouping_sets"].append( 4163 self.expression( 4164 exp.GroupingSets, 4165 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4166 ) 4167 ) 4168 elif self._match_text_seq("TOTALS"): 4169 elements["totals"] = True # type: ignore 4170 4171 if before_with_index <= self._index <= before_with_index + 1: 4172 self._retreat(before_with_index) 4173 break 4174 4175 if index == self._index: 4176 break 4177 4178 return self.expression(exp.Group, **elements) # type: ignore 4179 4180 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4181 return self.expression( 4182 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4183 ) 4184 4185 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4186 if self._match(TokenType.L_PAREN): 4187 grouping_set = self._parse_csv(self._parse_column) 4188 self._match_r_paren() 4189 return self.expression(exp.Tuple, expressions=grouping_set) 4190 4191 return self._parse_column() 4192 4193 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4194 if not skip_having_token and not self._match(TokenType.HAVING): 4195 return None 4196 return self.expression(exp.Having, this=self._parse_assignment()) 4197 4198 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4199 if not self._match(TokenType.QUALIFY): 4200 return None 4201 return self.expression(exp.Qualify, this=self._parse_assignment()) 4202 4203 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4204 if skip_start_token: 4205 start = None 4206 elif self._match(TokenType.START_WITH): 4207 start = self._parse_assignment() 4208 else: 4209 return None 4210 4211 self._match(TokenType.CONNECT_BY) 4212 nocycle = self._match_text_seq("NOCYCLE") 4213 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4214 exp.Prior, this=self._parse_bitwise() 4215 ) 4216 connect = self._parse_assignment() 4217 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4218 4219 if not start and self._match(TokenType.START_WITH): 4220 start = self._parse_assignment() 4221 4222 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4223 4224 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4225 this = self._parse_id_var(any_token=True) 4226 if self._match(TokenType.ALIAS): 4227 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4228 return this 4229 4230 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4231 if self._match_text_seq("INTERPOLATE"): 4232 return self._parse_wrapped_csv(self._parse_name_as_expression) 4233 return None 4234 4235 def _parse_order( 4236 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4237 ) -> t.Optional[exp.Expression]: 4238 siblings = None 4239 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4240 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4241 return this 4242 4243 siblings = True 4244 4245 return self.expression( 4246 exp.Order, 4247 this=this, 4248 expressions=self._parse_csv(self._parse_ordered), 4249 siblings=siblings, 4250 ) 4251 4252 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4253 if not self._match(token): 4254 return None 4255 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4256 4257 def _parse_ordered( 4258 self, parse_method: t.Optional[t.Callable] = None 4259 ) -> t.Optional[exp.Ordered]: 4260 this = parse_method() if parse_method else self._parse_assignment() 4261 if not this: 4262 return None 4263 4264 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4265 this = exp.var("ALL") 4266 4267 asc = self._match(TokenType.ASC) 4268 desc = self._match(TokenType.DESC) or (asc and False) 4269 4270 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4271 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4272 4273 nulls_first = is_nulls_first or False 4274 explicitly_null_ordered = is_nulls_first or is_nulls_last 4275 4276 if ( 4277 not explicitly_null_ordered 4278 and ( 4279 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4280 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4281 ) 4282 and self.dialect.NULL_ORDERING != "nulls_are_last" 4283 ): 4284 nulls_first = True 4285 4286 if self._match_text_seq("WITH", "FILL"): 4287 with_fill = self.expression( 4288 exp.WithFill, 4289 **{ # type: ignore 4290 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4291 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4292 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4293 "interpolate": self._parse_interpolate(), 4294 }, 4295 ) 4296 else: 4297 with_fill = None 4298 4299 return self.expression( 4300 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4301 ) 4302 4303 def _parse_limit( 4304 self, 4305 this: t.Optional[exp.Expression] = None, 4306 top: bool = False, 4307 skip_limit_token: bool = False, 4308 ) -> t.Optional[exp.Expression]: 4309 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4310 comments = self._prev_comments 4311 if top: 4312 limit_paren = self._match(TokenType.L_PAREN) 4313 expression = self._parse_term() if limit_paren else self._parse_number() 4314 4315 if limit_paren: 4316 self._match_r_paren() 4317 else: 4318 expression = self._parse_term() 4319 4320 if self._match(TokenType.COMMA): 4321 offset = expression 4322 expression = self._parse_term() 4323 else: 4324 offset = None 4325 4326 limit_exp = self.expression( 4327 exp.Limit, 4328 this=this, 4329 expression=expression, 4330 offset=offset, 4331 comments=comments, 4332 expressions=self._parse_limit_by(), 4333 ) 4334 4335 return limit_exp 4336 4337 if self._match(TokenType.FETCH): 4338 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4339 direction = self._prev.text.upper() if direction else "FIRST" 4340 4341 count = self._parse_field(tokens=self.FETCH_TOKENS) 4342 percent = self._match(TokenType.PERCENT) 4343 4344 self._match_set((TokenType.ROW, TokenType.ROWS)) 4345 4346 only = self._match_text_seq("ONLY") 4347 with_ties = self._match_text_seq("WITH", "TIES") 4348 4349 if only and with_ties: 4350 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4351 4352 return self.expression( 4353 exp.Fetch, 4354 direction=direction, 4355 count=count, 4356 percent=percent, 4357 with_ties=with_ties, 4358 ) 4359 4360 return this 4361 4362 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4363 if not self._match(TokenType.OFFSET): 4364 return this 4365 4366 count = self._parse_term() 4367 self._match_set((TokenType.ROW, TokenType.ROWS)) 4368 4369 return self.expression( 4370 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4371 ) 4372 4373 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4374 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4375 4376 def _parse_locks(self) -> t.List[exp.Lock]: 4377 locks = [] 4378 while True: 4379 if self._match_text_seq("FOR", "UPDATE"): 4380 update = True 4381 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4382 "LOCK", "IN", "SHARE", "MODE" 4383 ): 4384 update = False 4385 else: 4386 break 4387 4388 expressions = None 4389 if self._match_text_seq("OF"): 4390 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4391 4392 wait: t.Optional[bool | exp.Expression] = None 4393 if self._match_text_seq("NOWAIT"): 4394 wait = True 4395 elif self._match_text_seq("WAIT"): 4396 wait = self._parse_primary() 4397 elif self._match_text_seq("SKIP", "LOCKED"): 4398 wait = False 4399 4400 locks.append( 4401 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4402 ) 4403 4404 return locks 4405 4406 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4407 while this and self._match_set(self.SET_OPERATIONS): 4408 token_type = self._prev.token_type 4409 4410 if token_type == TokenType.UNION: 4411 operation: t.Type[exp.SetOperation] = exp.Union 4412 elif token_type == TokenType.EXCEPT: 4413 operation = exp.Except 4414 else: 4415 operation = exp.Intersect 4416 4417 comments = self._prev.comments 4418 4419 if self._match(TokenType.DISTINCT): 4420 distinct: t.Optional[bool] = True 4421 elif self._match(TokenType.ALL): 4422 distinct = False 4423 else: 4424 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4425 if distinct is None: 4426 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4427 4428 by_name = self._match_text_seq("BY", "NAME") 4429 expression = self._parse_select(nested=True, parse_set_operation=False) 4430 4431 this = self.expression( 4432 operation, 4433 comments=comments, 4434 this=this, 4435 distinct=distinct, 4436 by_name=by_name, 4437 expression=expression, 4438 ) 4439 4440 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4441 expression = this.expression 4442 4443 if expression: 4444 for arg in self.SET_OP_MODIFIERS: 4445 expr = expression.args.get(arg) 4446 if expr: 4447 this.set(arg, expr.pop()) 4448 4449 return this 4450 4451 def _parse_expression(self) -> t.Optional[exp.Expression]: 4452 return self._parse_alias(self._parse_assignment()) 4453 4454 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4455 this = self._parse_disjunction() 4456 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4457 # This allows us to parse <non-identifier token> := <expr> 4458 this = exp.column( 4459 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4460 ) 4461 4462 while self._match_set(self.ASSIGNMENT): 4463 if isinstance(this, exp.Column) and len(this.parts) == 1: 4464 this = this.this 4465 4466 this = self.expression( 4467 self.ASSIGNMENT[self._prev.token_type], 4468 this=this, 4469 comments=self._prev_comments, 4470 expression=self._parse_assignment(), 4471 ) 4472 4473 return this 4474 4475 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4476 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4477 4478 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4479 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4480 4481 def _parse_equality(self) -> t.Optional[exp.Expression]: 4482 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4483 4484 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4485 return self._parse_tokens(self._parse_range, self.COMPARISON) 4486 4487 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4488 this = this or self._parse_bitwise() 4489 negate = self._match(TokenType.NOT) 4490 4491 if self._match_set(self.RANGE_PARSERS): 4492 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4493 if not expression: 4494 return this 4495 4496 this = expression 4497 elif self._match(TokenType.ISNULL): 4498 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4499 4500 # Postgres supports ISNULL and NOTNULL for conditions. 4501 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4502 if self._match(TokenType.NOTNULL): 4503 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4504 this = self.expression(exp.Not, this=this) 4505 4506 if negate: 4507 this = self._negate_range(this) 4508 4509 if self._match(TokenType.IS): 4510 this = self._parse_is(this) 4511 4512 return this 4513 4514 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4515 if not this: 4516 return this 4517 4518 return self.expression(exp.Not, this=this) 4519 4520 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4521 index = self._index - 1 4522 negate = self._match(TokenType.NOT) 4523 4524 if self._match_text_seq("DISTINCT", "FROM"): 4525 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4526 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4527 4528 if self._match(TokenType.JSON): 4529 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4530 4531 if self._match_text_seq("WITH"): 4532 _with = True 4533 elif self._match_text_seq("WITHOUT"): 4534 _with = False 4535 else: 4536 _with = None 4537 4538 unique = self._match(TokenType.UNIQUE) 4539 self._match_text_seq("KEYS") 4540 expression: t.Optional[exp.Expression] = self.expression( 4541 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4542 ) 4543 else: 4544 expression = self._parse_primary() or self._parse_null() 4545 if not expression: 4546 self._retreat(index) 4547 return None 4548 4549 this = self.expression(exp.Is, this=this, expression=expression) 4550 return self.expression(exp.Not, this=this) if negate else this 4551 4552 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4553 unnest = self._parse_unnest(with_alias=False) 4554 if unnest: 4555 this = self.expression(exp.In, this=this, unnest=unnest) 4556 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4557 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4558 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4559 4560 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4561 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4562 else: 4563 this = self.expression(exp.In, this=this, expressions=expressions) 4564 4565 if matched_l_paren: 4566 self._match_r_paren(this) 4567 elif not self._match(TokenType.R_BRACKET, expression=this): 4568 self.raise_error("Expecting ]") 4569 else: 4570 this = self.expression(exp.In, this=this, field=self._parse_column()) 4571 4572 return this 4573 4574 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4575 low = self._parse_bitwise() 4576 self._match(TokenType.AND) 4577 high = self._parse_bitwise() 4578 return self.expression(exp.Between, this=this, low=low, high=high) 4579 4580 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4581 if not self._match(TokenType.ESCAPE): 4582 return this 4583 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4584 4585 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4586 index = self._index 4587 4588 if not self._match(TokenType.INTERVAL) and match_interval: 4589 return None 4590 4591 if self._match(TokenType.STRING, advance=False): 4592 this = self._parse_primary() 4593 else: 4594 this = self._parse_term() 4595 4596 if not this or ( 4597 isinstance(this, exp.Column) 4598 and not this.table 4599 and not this.this.quoted 4600 and this.name.upper() == "IS" 4601 ): 4602 self._retreat(index) 4603 return None 4604 4605 unit = self._parse_function() or ( 4606 not self._match(TokenType.ALIAS, advance=False) 4607 and self._parse_var(any_token=True, upper=True) 4608 ) 4609 4610 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4611 # each INTERVAL expression into this canonical form so it's easy to transpile 4612 if this and this.is_number: 4613 this = exp.Literal.string(this.to_py()) 4614 elif this and this.is_string: 4615 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4616 if len(parts) == 1: 4617 if unit: 4618 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4619 self._retreat(self._index - 1) 4620 4621 this = exp.Literal.string(parts[0][0]) 4622 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4623 4624 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4625 unit = self.expression( 4626 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4627 ) 4628 4629 interval = self.expression(exp.Interval, this=this, unit=unit) 4630 4631 index = self._index 4632 self._match(TokenType.PLUS) 4633 4634 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4635 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4636 return self.expression( 4637 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4638 ) 4639 4640 self._retreat(index) 4641 return interval 4642 4643 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4644 this = self._parse_term() 4645 4646 while True: 4647 if self._match_set(self.BITWISE): 4648 this = self.expression( 4649 self.BITWISE[self._prev.token_type], 4650 this=this, 4651 expression=self._parse_term(), 4652 ) 4653 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4654 this = self.expression( 4655 exp.DPipe, 4656 this=this, 4657 expression=self._parse_term(), 4658 safe=not self.dialect.STRICT_STRING_CONCAT, 4659 ) 4660 elif self._match(TokenType.DQMARK): 4661 this = self.expression( 4662 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4663 ) 4664 elif self._match_pair(TokenType.LT, TokenType.LT): 4665 this = self.expression( 4666 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4667 ) 4668 elif self._match_pair(TokenType.GT, TokenType.GT): 4669 this = self.expression( 4670 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4671 ) 4672 else: 4673 break 4674 4675 return this 4676 4677 def _parse_term(self) -> t.Optional[exp.Expression]: 4678 this = self._parse_factor() 4679 4680 while self._match_set(self.TERM): 4681 klass = self.TERM[self._prev.token_type] 4682 comments = self._prev_comments 4683 expression = self._parse_factor() 4684 4685 this = self.expression(klass, this=this, comments=comments, expression=expression) 4686 4687 if isinstance(this, exp.Collate): 4688 expr = this.expression 4689 4690 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4691 # fallback to Identifier / Var 4692 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4693 ident = expr.this 4694 if isinstance(ident, exp.Identifier): 4695 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4696 4697 return this 4698 4699 def _parse_factor(self) -> t.Optional[exp.Expression]: 4700 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4701 this = parse_method() 4702 4703 while self._match_set(self.FACTOR): 4704 klass = self.FACTOR[self._prev.token_type] 4705 comments = self._prev_comments 4706 expression = parse_method() 4707 4708 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4709 self._retreat(self._index - 1) 4710 return this 4711 4712 this = self.expression(klass, this=this, comments=comments, expression=expression) 4713 4714 if isinstance(this, exp.Div): 4715 this.args["typed"] = self.dialect.TYPED_DIVISION 4716 this.args["safe"] = self.dialect.SAFE_DIVISION 4717 4718 return this 4719 4720 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4721 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4722 4723 def _parse_unary(self) -> t.Optional[exp.Expression]: 4724 if self._match_set(self.UNARY_PARSERS): 4725 return self.UNARY_PARSERS[self._prev.token_type](self) 4726 return self._parse_at_time_zone(self._parse_type()) 4727 4728 def _parse_type( 4729 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4730 ) -> t.Optional[exp.Expression]: 4731 interval = parse_interval and self._parse_interval() 4732 if interval: 4733 return interval 4734 4735 index = self._index 4736 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4737 4738 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4739 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4740 if isinstance(data_type, exp.Cast): 4741 # This constructor can contain ops directly after it, for instance struct unnesting: 4742 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4743 return self._parse_column_ops(data_type) 4744 4745 if data_type: 4746 index2 = self._index 4747 this = self._parse_primary() 4748 4749 if isinstance(this, exp.Literal): 4750 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4751 if parser: 4752 return parser(self, this, data_type) 4753 4754 return self.expression(exp.Cast, this=this, to=data_type) 4755 4756 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4757 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4758 # 4759 # If the index difference here is greater than 1, that means the parser itself must have 4760 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4761 # 4762 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4763 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4764 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4765 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4766 # 4767 # In these cases, we don't really want to return the converted type, but instead retreat 4768 # and try to parse a Column or Identifier in the section below. 4769 if data_type.expressions and index2 - index > 1: 4770 self._retreat(index2) 4771 return self._parse_column_ops(data_type) 4772 4773 self._retreat(index) 4774 4775 if fallback_to_identifier: 4776 return self._parse_id_var() 4777 4778 this = self._parse_column() 4779 return this and self._parse_column_ops(this) 4780 4781 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4782 this = self._parse_type() 4783 if not this: 4784 return None 4785 4786 if isinstance(this, exp.Column) and not this.table: 4787 this = exp.var(this.name.upper()) 4788 4789 return self.expression( 4790 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4791 ) 4792 4793 def _parse_types( 4794 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4795 ) -> t.Optional[exp.Expression]: 4796 index = self._index 4797 4798 this: t.Optional[exp.Expression] = None 4799 prefix = self._match_text_seq("SYSUDTLIB", ".") 4800 4801 if not self._match_set(self.TYPE_TOKENS): 4802 identifier = allow_identifiers and self._parse_id_var( 4803 any_token=False, tokens=(TokenType.VAR,) 4804 ) 4805 if isinstance(identifier, exp.Identifier): 4806 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4807 4808 if len(tokens) != 1: 4809 self.raise_error("Unexpected identifier", self._prev) 4810 4811 if tokens[0].token_type in self.TYPE_TOKENS: 4812 self._prev = tokens[0] 4813 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4814 type_name = identifier.name 4815 4816 while self._match(TokenType.DOT): 4817 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4818 4819 this = exp.DataType.build(type_name, udt=True) 4820 else: 4821 self._retreat(self._index - 1) 4822 return None 4823 else: 4824 return None 4825 4826 type_token = self._prev.token_type 4827 4828 if type_token == TokenType.PSEUDO_TYPE: 4829 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4830 4831 if type_token == TokenType.OBJECT_IDENTIFIER: 4832 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4833 4834 # https://materialize.com/docs/sql/types/map/ 4835 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4836 key_type = self._parse_types( 4837 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4838 ) 4839 if not self._match(TokenType.FARROW): 4840 self._retreat(index) 4841 return None 4842 4843 value_type = self._parse_types( 4844 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4845 ) 4846 if not self._match(TokenType.R_BRACKET): 4847 self._retreat(index) 4848 return None 4849 4850 return exp.DataType( 4851 this=exp.DataType.Type.MAP, 4852 expressions=[key_type, value_type], 4853 nested=True, 4854 prefix=prefix, 4855 ) 4856 4857 nested = type_token in self.NESTED_TYPE_TOKENS 4858 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4859 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4860 expressions = None 4861 maybe_func = False 4862 4863 if self._match(TokenType.L_PAREN): 4864 if is_struct: 4865 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4866 elif nested: 4867 expressions = self._parse_csv( 4868 lambda: self._parse_types( 4869 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4870 ) 4871 ) 4872 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4873 this = expressions[0] 4874 this.set("nullable", True) 4875 self._match_r_paren() 4876 return this 4877 elif type_token in self.ENUM_TYPE_TOKENS: 4878 expressions = self._parse_csv(self._parse_equality) 4879 elif is_aggregate: 4880 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4881 any_token=False, tokens=(TokenType.VAR,) 4882 ) 4883 if not func_or_ident or not self._match(TokenType.COMMA): 4884 return None 4885 expressions = self._parse_csv( 4886 lambda: self._parse_types( 4887 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4888 ) 4889 ) 4890 expressions.insert(0, func_or_ident) 4891 else: 4892 expressions = self._parse_csv(self._parse_type_size) 4893 4894 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4895 if type_token == TokenType.VECTOR and len(expressions) == 2: 4896 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4897 4898 if not expressions or not self._match(TokenType.R_PAREN): 4899 self._retreat(index) 4900 return None 4901 4902 maybe_func = True 4903 4904 values: t.Optional[t.List[exp.Expression]] = None 4905 4906 if nested and self._match(TokenType.LT): 4907 if is_struct: 4908 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4909 else: 4910 expressions = self._parse_csv( 4911 lambda: self._parse_types( 4912 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4913 ) 4914 ) 4915 4916 if not self._match(TokenType.GT): 4917 self.raise_error("Expecting >") 4918 4919 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4920 values = self._parse_csv(self._parse_assignment) 4921 if not values and is_struct: 4922 values = None 4923 self._retreat(self._index - 1) 4924 else: 4925 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4926 4927 if type_token in self.TIMESTAMPS: 4928 if self._match_text_seq("WITH", "TIME", "ZONE"): 4929 maybe_func = False 4930 tz_type = ( 4931 exp.DataType.Type.TIMETZ 4932 if type_token in self.TIMES 4933 else exp.DataType.Type.TIMESTAMPTZ 4934 ) 4935 this = exp.DataType(this=tz_type, expressions=expressions) 4936 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4937 maybe_func = False 4938 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4939 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4940 maybe_func = False 4941 elif type_token == TokenType.INTERVAL: 4942 unit = self._parse_var(upper=True) 4943 if unit: 4944 if self._match_text_seq("TO"): 4945 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4946 4947 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4948 else: 4949 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4950 4951 if maybe_func and check_func: 4952 index2 = self._index 4953 peek = self._parse_string() 4954 4955 if not peek: 4956 self._retreat(index) 4957 return None 4958 4959 self._retreat(index2) 4960 4961 if not this: 4962 if self._match_text_seq("UNSIGNED"): 4963 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4964 if not unsigned_type_token: 4965 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4966 4967 type_token = unsigned_type_token or type_token 4968 4969 this = exp.DataType( 4970 this=exp.DataType.Type[type_token.value], 4971 expressions=expressions, 4972 nested=nested, 4973 prefix=prefix, 4974 ) 4975 4976 # Empty arrays/structs are allowed 4977 if values is not None: 4978 cls = exp.Struct if is_struct else exp.Array 4979 this = exp.cast(cls(expressions=values), this, copy=False) 4980 4981 elif expressions: 4982 this.set("expressions", expressions) 4983 4984 # https://materialize.com/docs/sql/types/list/#type-name 4985 while self._match(TokenType.LIST): 4986 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4987 4988 index = self._index 4989 4990 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4991 matched_array = self._match(TokenType.ARRAY) 4992 4993 while self._curr: 4994 datatype_token = self._prev.token_type 4995 matched_l_bracket = self._match(TokenType.L_BRACKET) 4996 if not matched_l_bracket and not matched_array: 4997 break 4998 4999 matched_array = False 5000 values = self._parse_csv(self._parse_assignment) or None 5001 if ( 5002 values 5003 and not schema 5004 and ( 5005 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5006 ) 5007 ): 5008 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5009 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5010 self._retreat(index) 5011 break 5012 5013 this = exp.DataType( 5014 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5015 ) 5016 self._match(TokenType.R_BRACKET) 5017 5018 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5019 converter = self.TYPE_CONVERTERS.get(this.this) 5020 if converter: 5021 this = converter(t.cast(exp.DataType, this)) 5022 5023 return this 5024 5025 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5026 index = self._index 5027 5028 if ( 5029 self._curr 5030 and self._next 5031 and self._curr.token_type in self.TYPE_TOKENS 5032 and self._next.token_type in self.TYPE_TOKENS 5033 ): 5034 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5035 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5036 this = self._parse_id_var() 5037 else: 5038 this = ( 5039 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5040 or self._parse_id_var() 5041 ) 5042 5043 self._match(TokenType.COLON) 5044 5045 if ( 5046 type_required 5047 and not isinstance(this, exp.DataType) 5048 and not self._match_set(self.TYPE_TOKENS, advance=False) 5049 ): 5050 self._retreat(index) 5051 return self._parse_types() 5052 5053 return self._parse_column_def(this) 5054 5055 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5056 if not self._match_text_seq("AT", "TIME", "ZONE"): 5057 return this 5058 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5059 5060 def _parse_column(self) -> t.Optional[exp.Expression]: 5061 this = self._parse_column_reference() 5062 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5063 5064 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5065 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5066 5067 return column 5068 5069 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5070 this = self._parse_field() 5071 if ( 5072 not this 5073 and self._match(TokenType.VALUES, advance=False) 5074 and self.VALUES_FOLLOWED_BY_PAREN 5075 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5076 ): 5077 this = self._parse_id_var() 5078 5079 if isinstance(this, exp.Identifier): 5080 # We bubble up comments from the Identifier to the Column 5081 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5082 5083 return this 5084 5085 def _parse_colon_as_variant_extract( 5086 self, this: t.Optional[exp.Expression] 5087 ) -> t.Optional[exp.Expression]: 5088 casts = [] 5089 json_path = [] 5090 escape = None 5091 5092 while self._match(TokenType.COLON): 5093 start_index = self._index 5094 5095 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5096 path = self._parse_column_ops( 5097 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5098 ) 5099 5100 # The cast :: operator has a lower precedence than the extraction operator :, so 5101 # we rearrange the AST appropriately to avoid casting the JSON path 5102 while isinstance(path, exp.Cast): 5103 casts.append(path.to) 5104 path = path.this 5105 5106 if casts: 5107 dcolon_offset = next( 5108 i 5109 for i, t in enumerate(self._tokens[start_index:]) 5110 if t.token_type == TokenType.DCOLON 5111 ) 5112 end_token = self._tokens[start_index + dcolon_offset - 1] 5113 else: 5114 end_token = self._prev 5115 5116 if path: 5117 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5118 # it'll roundtrip to a string literal in GET_PATH 5119 if isinstance(path, exp.Identifier) and path.quoted: 5120 escape = True 5121 5122 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5123 5124 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5125 # Databricks transforms it back to the colon/dot notation 5126 if json_path: 5127 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5128 5129 if json_path_expr: 5130 json_path_expr.set("escape", escape) 5131 5132 this = self.expression( 5133 exp.JSONExtract, 5134 this=this, 5135 expression=json_path_expr, 5136 variant_extract=True, 5137 ) 5138 5139 while casts: 5140 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5141 5142 return this 5143 5144 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5145 return self._parse_types() 5146 5147 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5148 this = self._parse_bracket(this) 5149 5150 while self._match_set(self.COLUMN_OPERATORS): 5151 op_token = self._prev.token_type 5152 op = self.COLUMN_OPERATORS.get(op_token) 5153 5154 if op_token == TokenType.DCOLON: 5155 field = self._parse_dcolon() 5156 if not field: 5157 self.raise_error("Expected type") 5158 elif op and self._curr: 5159 field = self._parse_column_reference() or self._parse_bracket() 5160 else: 5161 field = self._parse_field(any_token=True, anonymous_func=True) 5162 5163 if isinstance(field, (exp.Func, exp.Window)) and this: 5164 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5165 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5166 this = exp.replace_tree( 5167 this, 5168 lambda n: ( 5169 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5170 if n.table 5171 else n.this 5172 ) 5173 if isinstance(n, exp.Column) 5174 else n, 5175 ) 5176 5177 if op: 5178 this = op(self, this, field) 5179 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5180 this = self.expression( 5181 exp.Column, 5182 comments=this.comments, 5183 this=field, 5184 table=this.this, 5185 db=this.args.get("table"), 5186 catalog=this.args.get("db"), 5187 ) 5188 elif isinstance(field, exp.Window): 5189 # Move the exp.Dot's to the window's function 5190 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5191 field.set("this", window_func) 5192 this = field 5193 else: 5194 this = self.expression(exp.Dot, this=this, expression=field) 5195 5196 if field and field.comments: 5197 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5198 5199 this = self._parse_bracket(this) 5200 5201 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5202 5203 def _parse_primary(self) -> t.Optional[exp.Expression]: 5204 if self._match_set(self.PRIMARY_PARSERS): 5205 token_type = self._prev.token_type 5206 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5207 5208 if token_type == TokenType.STRING: 5209 expressions = [primary] 5210 while self._match(TokenType.STRING): 5211 expressions.append(exp.Literal.string(self._prev.text)) 5212 5213 if len(expressions) > 1: 5214 return self.expression(exp.Concat, expressions=expressions) 5215 5216 return primary 5217 5218 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5219 return exp.Literal.number(f"0.{self._prev.text}") 5220 5221 if self._match(TokenType.L_PAREN): 5222 comments = self._prev_comments 5223 query = self._parse_select() 5224 5225 if query: 5226 expressions = [query] 5227 else: 5228 expressions = self._parse_expressions() 5229 5230 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5231 5232 if not this and self._match(TokenType.R_PAREN, advance=False): 5233 this = self.expression(exp.Tuple) 5234 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5235 this = self._parse_subquery(this=this, parse_alias=False) 5236 elif isinstance(this, exp.Subquery): 5237 this = self._parse_subquery( 5238 this=self._parse_set_operations(this), parse_alias=False 5239 ) 5240 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5241 this = self.expression(exp.Tuple, expressions=expressions) 5242 else: 5243 this = self.expression(exp.Paren, this=this) 5244 5245 if this: 5246 this.add_comments(comments) 5247 5248 self._match_r_paren(expression=this) 5249 return this 5250 5251 return None 5252 5253 def _parse_field( 5254 self, 5255 any_token: bool = False, 5256 tokens: t.Optional[t.Collection[TokenType]] = None, 5257 anonymous_func: bool = False, 5258 ) -> t.Optional[exp.Expression]: 5259 if anonymous_func: 5260 field = ( 5261 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5262 or self._parse_primary() 5263 ) 5264 else: 5265 field = self._parse_primary() or self._parse_function( 5266 anonymous=anonymous_func, any_token=any_token 5267 ) 5268 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5269 5270 def _parse_function( 5271 self, 5272 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5273 anonymous: bool = False, 5274 optional_parens: bool = True, 5275 any_token: bool = False, 5276 ) -> t.Optional[exp.Expression]: 5277 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5278 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5279 fn_syntax = False 5280 if ( 5281 self._match(TokenType.L_BRACE, advance=False) 5282 and self._next 5283 and self._next.text.upper() == "FN" 5284 ): 5285 self._advance(2) 5286 fn_syntax = True 5287 5288 func = self._parse_function_call( 5289 functions=functions, 5290 anonymous=anonymous, 5291 optional_parens=optional_parens, 5292 any_token=any_token, 5293 ) 5294 5295 if fn_syntax: 5296 self._match(TokenType.R_BRACE) 5297 5298 return func 5299 5300 def _parse_function_call( 5301 self, 5302 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5303 anonymous: bool = False, 5304 optional_parens: bool = True, 5305 any_token: bool = False, 5306 ) -> t.Optional[exp.Expression]: 5307 if not self._curr: 5308 return None 5309 5310 comments = self._curr.comments 5311 token_type = self._curr.token_type 5312 this = self._curr.text 5313 upper = this.upper() 5314 5315 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5316 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5317 self._advance() 5318 return self._parse_window(parser(self)) 5319 5320 if not self._next or self._next.token_type != TokenType.L_PAREN: 5321 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5322 self._advance() 5323 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5324 5325 return None 5326 5327 if any_token: 5328 if token_type in self.RESERVED_TOKENS: 5329 return None 5330 elif token_type not in self.FUNC_TOKENS: 5331 return None 5332 5333 self._advance(2) 5334 5335 parser = self.FUNCTION_PARSERS.get(upper) 5336 if parser and not anonymous: 5337 this = parser(self) 5338 else: 5339 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5340 5341 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5342 this = self.expression( 5343 subquery_predicate, comments=comments, this=self._parse_select() 5344 ) 5345 self._match_r_paren() 5346 return this 5347 5348 if functions is None: 5349 functions = self.FUNCTIONS 5350 5351 function = functions.get(upper) 5352 5353 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5354 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5355 5356 if alias: 5357 args = self._kv_to_prop_eq(args) 5358 5359 if function and not anonymous: 5360 if "dialect" in function.__code__.co_varnames: 5361 func = function(args, dialect=self.dialect) 5362 else: 5363 func = function(args) 5364 5365 func = self.validate_expression(func, args) 5366 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5367 func.meta["name"] = this 5368 5369 this = func 5370 else: 5371 if token_type == TokenType.IDENTIFIER: 5372 this = exp.Identifier(this=this, quoted=True) 5373 this = self.expression(exp.Anonymous, this=this, expressions=args) 5374 5375 if isinstance(this, exp.Expression): 5376 this.add_comments(comments) 5377 5378 self._match_r_paren(this) 5379 return self._parse_window(this) 5380 5381 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5382 return expression 5383 5384 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5385 transformed = [] 5386 5387 for index, e in enumerate(expressions): 5388 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5389 if isinstance(e, exp.Alias): 5390 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5391 5392 if not isinstance(e, exp.PropertyEQ): 5393 e = self.expression( 5394 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5395 ) 5396 5397 if isinstance(e.this, exp.Column): 5398 e.this.replace(e.this.this) 5399 else: 5400 e = self._to_prop_eq(e, index) 5401 5402 transformed.append(e) 5403 5404 return transformed 5405 5406 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5407 return self._parse_statement() 5408 5409 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5410 return self._parse_column_def(self._parse_id_var()) 5411 5412 def _parse_user_defined_function( 5413 self, kind: t.Optional[TokenType] = None 5414 ) -> t.Optional[exp.Expression]: 5415 this = self._parse_id_var() 5416 5417 while self._match(TokenType.DOT): 5418 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5419 5420 if not self._match(TokenType.L_PAREN): 5421 return this 5422 5423 expressions = self._parse_csv(self._parse_function_parameter) 5424 self._match_r_paren() 5425 return self.expression( 5426 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5427 ) 5428 5429 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5430 literal = self._parse_primary() 5431 if literal: 5432 return self.expression(exp.Introducer, this=token.text, expression=literal) 5433 5434 return self.expression(exp.Identifier, this=token.text) 5435 5436 def _parse_session_parameter(self) -> exp.SessionParameter: 5437 kind = None 5438 this = self._parse_id_var() or self._parse_primary() 5439 5440 if this and self._match(TokenType.DOT): 5441 kind = this.name 5442 this = self._parse_var() or self._parse_primary() 5443 5444 return self.expression(exp.SessionParameter, this=this, kind=kind) 5445 5446 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5447 return self._parse_id_var() 5448 5449 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5450 index = self._index 5451 5452 if self._match(TokenType.L_PAREN): 5453 expressions = t.cast( 5454 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5455 ) 5456 5457 if not self._match(TokenType.R_PAREN): 5458 self._retreat(index) 5459 else: 5460 expressions = [self._parse_lambda_arg()] 5461 5462 if self._match_set(self.LAMBDAS): 5463 return self.LAMBDAS[self._prev.token_type](self, expressions) 5464 5465 self._retreat(index) 5466 5467 this: t.Optional[exp.Expression] 5468 5469 if self._match(TokenType.DISTINCT): 5470 this = self.expression( 5471 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5472 ) 5473 else: 5474 this = self._parse_select_or_expression(alias=alias) 5475 5476 return self._parse_limit( 5477 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5478 ) 5479 5480 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5481 index = self._index 5482 if not self._match(TokenType.L_PAREN): 5483 return this 5484 5485 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5486 # expr can be of both types 5487 if self._match_set(self.SELECT_START_TOKENS): 5488 self._retreat(index) 5489 return this 5490 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5491 self._match_r_paren() 5492 return self.expression(exp.Schema, this=this, expressions=args) 5493 5494 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5495 return self._parse_column_def(self._parse_field(any_token=True)) 5496 5497 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5498 # column defs are not really columns, they're identifiers 5499 if isinstance(this, exp.Column): 5500 this = this.this 5501 5502 kind = self._parse_types(schema=True) 5503 5504 if self._match_text_seq("FOR", "ORDINALITY"): 5505 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5506 5507 constraints: t.List[exp.Expression] = [] 5508 5509 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5510 ("ALIAS", "MATERIALIZED") 5511 ): 5512 persisted = self._prev.text.upper() == "MATERIALIZED" 5513 constraint_kind = exp.ComputedColumnConstraint( 5514 this=self._parse_assignment(), 5515 persisted=persisted or self._match_text_seq("PERSISTED"), 5516 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5517 ) 5518 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5519 elif ( 5520 kind 5521 and self._match(TokenType.ALIAS, advance=False) 5522 and ( 5523 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5524 or (self._next and self._next.token_type == TokenType.L_PAREN) 5525 ) 5526 ): 5527 self._advance() 5528 constraints.append( 5529 self.expression( 5530 exp.ColumnConstraint, 5531 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5532 ) 5533 ) 5534 5535 while True: 5536 constraint = self._parse_column_constraint() 5537 if not constraint: 5538 break 5539 constraints.append(constraint) 5540 5541 if not kind and not constraints: 5542 return this 5543 5544 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5545 5546 def _parse_auto_increment( 5547 self, 5548 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5549 start = None 5550 increment = None 5551 5552 if self._match(TokenType.L_PAREN, advance=False): 5553 args = self._parse_wrapped_csv(self._parse_bitwise) 5554 start = seq_get(args, 0) 5555 increment = seq_get(args, 1) 5556 elif self._match_text_seq("START"): 5557 start = self._parse_bitwise() 5558 self._match_text_seq("INCREMENT") 5559 increment = self._parse_bitwise() 5560 5561 if start and increment: 5562 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5563 5564 return exp.AutoIncrementColumnConstraint() 5565 5566 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5567 if not self._match_text_seq("REFRESH"): 5568 self._retreat(self._index - 1) 5569 return None 5570 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5571 5572 def _parse_compress(self) -> exp.CompressColumnConstraint: 5573 if self._match(TokenType.L_PAREN, advance=False): 5574 return self.expression( 5575 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5576 ) 5577 5578 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5579 5580 def _parse_generated_as_identity( 5581 self, 5582 ) -> ( 5583 exp.GeneratedAsIdentityColumnConstraint 5584 | exp.ComputedColumnConstraint 5585 | exp.GeneratedAsRowColumnConstraint 5586 ): 5587 if self._match_text_seq("BY", "DEFAULT"): 5588 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5589 this = self.expression( 5590 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5591 ) 5592 else: 5593 self._match_text_seq("ALWAYS") 5594 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5595 5596 self._match(TokenType.ALIAS) 5597 5598 if self._match_text_seq("ROW"): 5599 start = self._match_text_seq("START") 5600 if not start: 5601 self._match(TokenType.END) 5602 hidden = self._match_text_seq("HIDDEN") 5603 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5604 5605 identity = self._match_text_seq("IDENTITY") 5606 5607 if self._match(TokenType.L_PAREN): 5608 if self._match(TokenType.START_WITH): 5609 this.set("start", self._parse_bitwise()) 5610 if self._match_text_seq("INCREMENT", "BY"): 5611 this.set("increment", self._parse_bitwise()) 5612 if self._match_text_seq("MINVALUE"): 5613 this.set("minvalue", self._parse_bitwise()) 5614 if self._match_text_seq("MAXVALUE"): 5615 this.set("maxvalue", self._parse_bitwise()) 5616 5617 if self._match_text_seq("CYCLE"): 5618 this.set("cycle", True) 5619 elif self._match_text_seq("NO", "CYCLE"): 5620 this.set("cycle", False) 5621 5622 if not identity: 5623 this.set("expression", self._parse_range()) 5624 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5625 args = self._parse_csv(self._parse_bitwise) 5626 this.set("start", seq_get(args, 0)) 5627 this.set("increment", seq_get(args, 1)) 5628 5629 self._match_r_paren() 5630 5631 return this 5632 5633 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5634 self._match_text_seq("LENGTH") 5635 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5636 5637 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5638 if self._match_text_seq("NULL"): 5639 return self.expression(exp.NotNullColumnConstraint) 5640 if self._match_text_seq("CASESPECIFIC"): 5641 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5642 if self._match_text_seq("FOR", "REPLICATION"): 5643 return self.expression(exp.NotForReplicationColumnConstraint) 5644 5645 # Unconsume the `NOT` token 5646 self._retreat(self._index - 1) 5647 return None 5648 5649 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5650 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5651 5652 procedure_option_follows = ( 5653 self._match(TokenType.WITH, advance=False) 5654 and self._next 5655 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5656 ) 5657 5658 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5659 return self.expression( 5660 exp.ColumnConstraint, 5661 this=this, 5662 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5663 ) 5664 5665 return this 5666 5667 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5668 if not self._match(TokenType.CONSTRAINT): 5669 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5670 5671 return self.expression( 5672 exp.Constraint, 5673 this=self._parse_id_var(), 5674 expressions=self._parse_unnamed_constraints(), 5675 ) 5676 5677 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5678 constraints = [] 5679 while True: 5680 constraint = self._parse_unnamed_constraint() or self._parse_function() 5681 if not constraint: 5682 break 5683 constraints.append(constraint) 5684 5685 return constraints 5686 5687 def _parse_unnamed_constraint( 5688 self, constraints: t.Optional[t.Collection[str]] = None 5689 ) -> t.Optional[exp.Expression]: 5690 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5691 constraints or self.CONSTRAINT_PARSERS 5692 ): 5693 return None 5694 5695 constraint = self._prev.text.upper() 5696 if constraint not in self.CONSTRAINT_PARSERS: 5697 self.raise_error(f"No parser found for schema constraint {constraint}.") 5698 5699 return self.CONSTRAINT_PARSERS[constraint](self) 5700 5701 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5702 return self._parse_id_var(any_token=False) 5703 5704 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5705 self._match_text_seq("KEY") 5706 return self.expression( 5707 exp.UniqueColumnConstraint, 5708 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5709 this=self._parse_schema(self._parse_unique_key()), 5710 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5711 on_conflict=self._parse_on_conflict(), 5712 ) 5713 5714 def _parse_key_constraint_options(self) -> t.List[str]: 5715 options = [] 5716 while True: 5717 if not self._curr: 5718 break 5719 5720 if self._match(TokenType.ON): 5721 action = None 5722 on = self._advance_any() and self._prev.text 5723 5724 if self._match_text_seq("NO", "ACTION"): 5725 action = "NO ACTION" 5726 elif self._match_text_seq("CASCADE"): 5727 action = "CASCADE" 5728 elif self._match_text_seq("RESTRICT"): 5729 action = "RESTRICT" 5730 elif self._match_pair(TokenType.SET, TokenType.NULL): 5731 action = "SET NULL" 5732 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5733 action = "SET DEFAULT" 5734 else: 5735 self.raise_error("Invalid key constraint") 5736 5737 options.append(f"ON {on} {action}") 5738 else: 5739 var = self._parse_var_from_options( 5740 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5741 ) 5742 if not var: 5743 break 5744 options.append(var.name) 5745 5746 return options 5747 5748 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5749 if match and not self._match(TokenType.REFERENCES): 5750 return None 5751 5752 expressions = None 5753 this = self._parse_table(schema=True) 5754 options = self._parse_key_constraint_options() 5755 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5756 5757 def _parse_foreign_key(self) -> exp.ForeignKey: 5758 expressions = self._parse_wrapped_id_vars() 5759 reference = self._parse_references() 5760 options = {} 5761 5762 while self._match(TokenType.ON): 5763 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5764 self.raise_error("Expected DELETE or UPDATE") 5765 5766 kind = self._prev.text.lower() 5767 5768 if self._match_text_seq("NO", "ACTION"): 5769 action = "NO ACTION" 5770 elif self._match(TokenType.SET): 5771 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5772 action = "SET " + self._prev.text.upper() 5773 else: 5774 self._advance() 5775 action = self._prev.text.upper() 5776 5777 options[kind] = action 5778 5779 return self.expression( 5780 exp.ForeignKey, 5781 expressions=expressions, 5782 reference=reference, 5783 **options, # type: ignore 5784 ) 5785 5786 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5787 return self._parse_field() 5788 5789 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5790 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5791 self._retreat(self._index - 1) 5792 return None 5793 5794 id_vars = self._parse_wrapped_id_vars() 5795 return self.expression( 5796 exp.PeriodForSystemTimeConstraint, 5797 this=seq_get(id_vars, 0), 5798 expression=seq_get(id_vars, 1), 5799 ) 5800 5801 def _parse_primary_key( 5802 self, wrapped_optional: bool = False, in_props: bool = False 5803 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5804 desc = ( 5805 self._match_set((TokenType.ASC, TokenType.DESC)) 5806 and self._prev.token_type == TokenType.DESC 5807 ) 5808 5809 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5810 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5811 5812 expressions = self._parse_wrapped_csv( 5813 self._parse_primary_key_part, optional=wrapped_optional 5814 ) 5815 options = self._parse_key_constraint_options() 5816 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5817 5818 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5819 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5820 5821 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5822 """ 5823 Parses a datetime column in ODBC format. We parse the column into the corresponding 5824 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5825 same as we did for `DATE('yyyy-mm-dd')`. 5826 5827 Reference: 5828 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5829 """ 5830 self._match(TokenType.VAR) 5831 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5832 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5833 if not self._match(TokenType.R_BRACE): 5834 self.raise_error("Expected }") 5835 return expression 5836 5837 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5838 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5839 return this 5840 5841 bracket_kind = self._prev.token_type 5842 if ( 5843 bracket_kind == TokenType.L_BRACE 5844 and self._curr 5845 and self._curr.token_type == TokenType.VAR 5846 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5847 ): 5848 return self._parse_odbc_datetime_literal() 5849 5850 expressions = self._parse_csv( 5851 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5852 ) 5853 5854 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5855 self.raise_error("Expected ]") 5856 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5857 self.raise_error("Expected }") 5858 5859 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5860 if bracket_kind == TokenType.L_BRACE: 5861 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5862 elif not this: 5863 this = build_array_constructor( 5864 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5865 ) 5866 else: 5867 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5868 if constructor_type: 5869 return build_array_constructor( 5870 constructor_type, 5871 args=expressions, 5872 bracket_kind=bracket_kind, 5873 dialect=self.dialect, 5874 ) 5875 5876 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5877 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5878 5879 self._add_comments(this) 5880 return self._parse_bracket(this) 5881 5882 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5883 if self._match(TokenType.COLON): 5884 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5885 return this 5886 5887 def _parse_case(self) -> t.Optional[exp.Expression]: 5888 ifs = [] 5889 default = None 5890 5891 comments = self._prev_comments 5892 expression = self._parse_assignment() 5893 5894 while self._match(TokenType.WHEN): 5895 this = self._parse_assignment() 5896 self._match(TokenType.THEN) 5897 then = self._parse_assignment() 5898 ifs.append(self.expression(exp.If, this=this, true=then)) 5899 5900 if self._match(TokenType.ELSE): 5901 default = self._parse_assignment() 5902 5903 if not self._match(TokenType.END): 5904 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5905 default = exp.column("interval") 5906 else: 5907 self.raise_error("Expected END after CASE", self._prev) 5908 5909 return self.expression( 5910 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5911 ) 5912 5913 def _parse_if(self) -> t.Optional[exp.Expression]: 5914 if self._match(TokenType.L_PAREN): 5915 args = self._parse_csv(self._parse_assignment) 5916 this = self.validate_expression(exp.If.from_arg_list(args), args) 5917 self._match_r_paren() 5918 else: 5919 index = self._index - 1 5920 5921 if self.NO_PAREN_IF_COMMANDS and index == 0: 5922 return self._parse_as_command(self._prev) 5923 5924 condition = self._parse_assignment() 5925 5926 if not condition: 5927 self._retreat(index) 5928 return None 5929 5930 self._match(TokenType.THEN) 5931 true = self._parse_assignment() 5932 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5933 self._match(TokenType.END) 5934 this = self.expression(exp.If, this=condition, true=true, false=false) 5935 5936 return this 5937 5938 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5939 if not self._match_text_seq("VALUE", "FOR"): 5940 self._retreat(self._index - 1) 5941 return None 5942 5943 return self.expression( 5944 exp.NextValueFor, 5945 this=self._parse_column(), 5946 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5947 ) 5948 5949 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5950 this = self._parse_function() or self._parse_var_or_string(upper=True) 5951 5952 if self._match(TokenType.FROM): 5953 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5954 5955 if not self._match(TokenType.COMMA): 5956 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5957 5958 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5959 5960 def _parse_gap_fill(self) -> exp.GapFill: 5961 self._match(TokenType.TABLE) 5962 this = self._parse_table() 5963 5964 self._match(TokenType.COMMA) 5965 args = [this, *self._parse_csv(self._parse_lambda)] 5966 5967 gap_fill = exp.GapFill.from_arg_list(args) 5968 return self.validate_expression(gap_fill, args) 5969 5970 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5971 this = self._parse_assignment() 5972 5973 if not self._match(TokenType.ALIAS): 5974 if self._match(TokenType.COMMA): 5975 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5976 5977 self.raise_error("Expected AS after CAST") 5978 5979 fmt = None 5980 to = self._parse_types() 5981 5982 if self._match(TokenType.FORMAT): 5983 fmt_string = self._parse_string() 5984 fmt = self._parse_at_time_zone(fmt_string) 5985 5986 if not to: 5987 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5988 if to.this in exp.DataType.TEMPORAL_TYPES: 5989 this = self.expression( 5990 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5991 this=this, 5992 format=exp.Literal.string( 5993 format_time( 5994 fmt_string.this if fmt_string else "", 5995 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5996 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5997 ) 5998 ), 5999 safe=safe, 6000 ) 6001 6002 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6003 this.set("zone", fmt.args["zone"]) 6004 return this 6005 elif not to: 6006 self.raise_error("Expected TYPE after CAST") 6007 elif isinstance(to, exp.Identifier): 6008 to = exp.DataType.build(to.name, udt=True) 6009 elif to.this == exp.DataType.Type.CHAR: 6010 if self._match(TokenType.CHARACTER_SET): 6011 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6012 6013 return self.expression( 6014 exp.Cast if strict else exp.TryCast, 6015 this=this, 6016 to=to, 6017 format=fmt, 6018 safe=safe, 6019 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6020 ) 6021 6022 def _parse_string_agg(self) -> exp.GroupConcat: 6023 if self._match(TokenType.DISTINCT): 6024 args: t.List[t.Optional[exp.Expression]] = [ 6025 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6026 ] 6027 if self._match(TokenType.COMMA): 6028 args.extend(self._parse_csv(self._parse_assignment)) 6029 else: 6030 args = self._parse_csv(self._parse_assignment) # type: ignore 6031 6032 if self._match_text_seq("ON", "OVERFLOW"): 6033 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6034 if self._match_text_seq("ERROR"): 6035 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6036 else: 6037 self._match_text_seq("TRUNCATE") 6038 on_overflow = self.expression( 6039 exp.OverflowTruncateBehavior, 6040 this=self._parse_string(), 6041 with_count=( 6042 self._match_text_seq("WITH", "COUNT") 6043 or not self._match_text_seq("WITHOUT", "COUNT") 6044 ), 6045 ) 6046 else: 6047 on_overflow = None 6048 6049 index = self._index 6050 if not self._match(TokenType.R_PAREN) and args: 6051 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6052 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6053 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6054 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6055 6056 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6057 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6058 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6059 if not self._match_text_seq("WITHIN", "GROUP"): 6060 self._retreat(index) 6061 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6062 6063 # The corresponding match_r_paren will be called in parse_function (caller) 6064 self._match_l_paren() 6065 6066 return self.expression( 6067 exp.GroupConcat, 6068 this=self._parse_order(this=seq_get(args, 0)), 6069 separator=seq_get(args, 1), 6070 on_overflow=on_overflow, 6071 ) 6072 6073 def _parse_convert( 6074 self, strict: bool, safe: t.Optional[bool] = None 6075 ) -> t.Optional[exp.Expression]: 6076 this = self._parse_bitwise() 6077 6078 if self._match(TokenType.USING): 6079 to: t.Optional[exp.Expression] = self.expression( 6080 exp.CharacterSet, this=self._parse_var() 6081 ) 6082 elif self._match(TokenType.COMMA): 6083 to = self._parse_types() 6084 else: 6085 to = None 6086 6087 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6088 6089 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6090 """ 6091 There are generally two variants of the DECODE function: 6092 6093 - DECODE(bin, charset) 6094 - DECODE(expression, search, result [, search, result] ... [, default]) 6095 6096 The second variant will always be parsed into a CASE expression. Note that NULL 6097 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6098 instead of relying on pattern matching. 6099 """ 6100 args = self._parse_csv(self._parse_assignment) 6101 6102 if len(args) < 3: 6103 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6104 6105 expression, *expressions = args 6106 if not expression: 6107 return None 6108 6109 ifs = [] 6110 for search, result in zip(expressions[::2], expressions[1::2]): 6111 if not search or not result: 6112 return None 6113 6114 if isinstance(search, exp.Literal): 6115 ifs.append( 6116 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6117 ) 6118 elif isinstance(search, exp.Null): 6119 ifs.append( 6120 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6121 ) 6122 else: 6123 cond = exp.or_( 6124 exp.EQ(this=expression.copy(), expression=search), 6125 exp.and_( 6126 exp.Is(this=expression.copy(), expression=exp.Null()), 6127 exp.Is(this=search.copy(), expression=exp.Null()), 6128 copy=False, 6129 ), 6130 copy=False, 6131 ) 6132 ifs.append(exp.If(this=cond, true=result)) 6133 6134 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6135 6136 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6137 self._match_text_seq("KEY") 6138 key = self._parse_column() 6139 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6140 self._match_text_seq("VALUE") 6141 value = self._parse_bitwise() 6142 6143 if not key and not value: 6144 return None 6145 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6146 6147 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6148 if not this or not self._match_text_seq("FORMAT", "JSON"): 6149 return this 6150 6151 return self.expression(exp.FormatJson, this=this) 6152 6153 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6154 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6155 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6156 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6157 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6158 else: 6159 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6160 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6161 6162 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6163 6164 if not empty and not error and not null: 6165 return None 6166 6167 return self.expression( 6168 exp.OnCondition, 6169 empty=empty, 6170 error=error, 6171 null=null, 6172 ) 6173 6174 def _parse_on_handling( 6175 self, on: str, *values: str 6176 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6177 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6178 for value in values: 6179 if self._match_text_seq(value, "ON", on): 6180 return f"{value} ON {on}" 6181 6182 index = self._index 6183 if self._match(TokenType.DEFAULT): 6184 default_value = self._parse_bitwise() 6185 if self._match_text_seq("ON", on): 6186 return default_value 6187 6188 self._retreat(index) 6189 6190 return None 6191 6192 @t.overload 6193 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6194 6195 @t.overload 6196 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6197 6198 def _parse_json_object(self, agg=False): 6199 star = self._parse_star() 6200 expressions = ( 6201 [star] 6202 if star 6203 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6204 ) 6205 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6206 6207 unique_keys = None 6208 if self._match_text_seq("WITH", "UNIQUE"): 6209 unique_keys = True 6210 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6211 unique_keys = False 6212 6213 self._match_text_seq("KEYS") 6214 6215 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6216 self._parse_type() 6217 ) 6218 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6219 6220 return self.expression( 6221 exp.JSONObjectAgg if agg else exp.JSONObject, 6222 expressions=expressions, 6223 null_handling=null_handling, 6224 unique_keys=unique_keys, 6225 return_type=return_type, 6226 encoding=encoding, 6227 ) 6228 6229 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6230 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6231 if not self._match_text_seq("NESTED"): 6232 this = self._parse_id_var() 6233 kind = self._parse_types(allow_identifiers=False) 6234 nested = None 6235 else: 6236 this = None 6237 kind = None 6238 nested = True 6239 6240 path = self._match_text_seq("PATH") and self._parse_string() 6241 nested_schema = nested and self._parse_json_schema() 6242 6243 return self.expression( 6244 exp.JSONColumnDef, 6245 this=this, 6246 kind=kind, 6247 path=path, 6248 nested_schema=nested_schema, 6249 ) 6250 6251 def _parse_json_schema(self) -> exp.JSONSchema: 6252 self._match_text_seq("COLUMNS") 6253 return self.expression( 6254 exp.JSONSchema, 6255 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6256 ) 6257 6258 def _parse_json_table(self) -> exp.JSONTable: 6259 this = self._parse_format_json(self._parse_bitwise()) 6260 path = self._match(TokenType.COMMA) and self._parse_string() 6261 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6262 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6263 schema = self._parse_json_schema() 6264 6265 return exp.JSONTable( 6266 this=this, 6267 schema=schema, 6268 path=path, 6269 error_handling=error_handling, 6270 empty_handling=empty_handling, 6271 ) 6272 6273 def _parse_match_against(self) -> exp.MatchAgainst: 6274 expressions = self._parse_csv(self._parse_column) 6275 6276 self._match_text_seq(")", "AGAINST", "(") 6277 6278 this = self._parse_string() 6279 6280 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6281 modifier = "IN NATURAL LANGUAGE MODE" 6282 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6283 modifier = f"{modifier} WITH QUERY EXPANSION" 6284 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6285 modifier = "IN BOOLEAN MODE" 6286 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6287 modifier = "WITH QUERY EXPANSION" 6288 else: 6289 modifier = None 6290 6291 return self.expression( 6292 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6293 ) 6294 6295 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6296 def _parse_open_json(self) -> exp.OpenJSON: 6297 this = self._parse_bitwise() 6298 path = self._match(TokenType.COMMA) and self._parse_string() 6299 6300 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6301 this = self._parse_field(any_token=True) 6302 kind = self._parse_types() 6303 path = self._parse_string() 6304 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6305 6306 return self.expression( 6307 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6308 ) 6309 6310 expressions = None 6311 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6312 self._match_l_paren() 6313 expressions = self._parse_csv(_parse_open_json_column_def) 6314 6315 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6316 6317 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6318 args = self._parse_csv(self._parse_bitwise) 6319 6320 if self._match(TokenType.IN): 6321 return self.expression( 6322 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6323 ) 6324 6325 if haystack_first: 6326 haystack = seq_get(args, 0) 6327 needle = seq_get(args, 1) 6328 else: 6329 needle = seq_get(args, 0) 6330 haystack = seq_get(args, 1) 6331 6332 return self.expression( 6333 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6334 ) 6335 6336 def _parse_predict(self) -> exp.Predict: 6337 self._match_text_seq("MODEL") 6338 this = self._parse_table() 6339 6340 self._match(TokenType.COMMA) 6341 self._match_text_seq("TABLE") 6342 6343 return self.expression( 6344 exp.Predict, 6345 this=this, 6346 expression=self._parse_table(), 6347 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6348 ) 6349 6350 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6351 args = self._parse_csv(self._parse_table) 6352 return exp.JoinHint(this=func_name.upper(), expressions=args) 6353 6354 def _parse_substring(self) -> exp.Substring: 6355 # Postgres supports the form: substring(string [from int] [for int]) 6356 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6357 6358 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6359 6360 if self._match(TokenType.FROM): 6361 args.append(self._parse_bitwise()) 6362 if self._match(TokenType.FOR): 6363 if len(args) == 1: 6364 args.append(exp.Literal.number(1)) 6365 args.append(self._parse_bitwise()) 6366 6367 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6368 6369 def _parse_trim(self) -> exp.Trim: 6370 # https://www.w3resource.com/sql/character-functions/trim.php 6371 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6372 6373 position = None 6374 collation = None 6375 expression = None 6376 6377 if self._match_texts(self.TRIM_TYPES): 6378 position = self._prev.text.upper() 6379 6380 this = self._parse_bitwise() 6381 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6382 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6383 expression = self._parse_bitwise() 6384 6385 if invert_order: 6386 this, expression = expression, this 6387 6388 if self._match(TokenType.COLLATE): 6389 collation = self._parse_bitwise() 6390 6391 return self.expression( 6392 exp.Trim, this=this, position=position, expression=expression, collation=collation 6393 ) 6394 6395 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6396 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6397 6398 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6399 return self._parse_window(self._parse_id_var(), alias=True) 6400 6401 def _parse_respect_or_ignore_nulls( 6402 self, this: t.Optional[exp.Expression] 6403 ) -> t.Optional[exp.Expression]: 6404 if self._match_text_seq("IGNORE", "NULLS"): 6405 return self.expression(exp.IgnoreNulls, this=this) 6406 if self._match_text_seq("RESPECT", "NULLS"): 6407 return self.expression(exp.RespectNulls, this=this) 6408 return this 6409 6410 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6411 if self._match(TokenType.HAVING): 6412 self._match_texts(("MAX", "MIN")) 6413 max = self._prev.text.upper() != "MIN" 6414 return self.expression( 6415 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6416 ) 6417 6418 return this 6419 6420 def _parse_window( 6421 self, this: t.Optional[exp.Expression], alias: bool = False 6422 ) -> t.Optional[exp.Expression]: 6423 func = this 6424 comments = func.comments if isinstance(func, exp.Expression) else None 6425 6426 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6427 self._match(TokenType.WHERE) 6428 this = self.expression( 6429 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6430 ) 6431 self._match_r_paren() 6432 6433 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6434 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6435 if self._match_text_seq("WITHIN", "GROUP"): 6436 order = self._parse_wrapped(self._parse_order) 6437 this = self.expression(exp.WithinGroup, this=this, expression=order) 6438 6439 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6440 # Some dialects choose to implement and some do not. 6441 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6442 6443 # There is some code above in _parse_lambda that handles 6444 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6445 6446 # The below changes handle 6447 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6448 6449 # Oracle allows both formats 6450 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6451 # and Snowflake chose to do the same for familiarity 6452 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6453 if isinstance(this, exp.AggFunc): 6454 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6455 6456 if ignore_respect and ignore_respect is not this: 6457 ignore_respect.replace(ignore_respect.this) 6458 this = self.expression(ignore_respect.__class__, this=this) 6459 6460 this = self._parse_respect_or_ignore_nulls(this) 6461 6462 # bigquery select from window x AS (partition by ...) 6463 if alias: 6464 over = None 6465 self._match(TokenType.ALIAS) 6466 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6467 return this 6468 else: 6469 over = self._prev.text.upper() 6470 6471 if comments and isinstance(func, exp.Expression): 6472 func.pop_comments() 6473 6474 if not self._match(TokenType.L_PAREN): 6475 return self.expression( 6476 exp.Window, 6477 comments=comments, 6478 this=this, 6479 alias=self._parse_id_var(False), 6480 over=over, 6481 ) 6482 6483 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6484 6485 first = self._match(TokenType.FIRST) 6486 if self._match_text_seq("LAST"): 6487 first = False 6488 6489 partition, order = self._parse_partition_and_order() 6490 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6491 6492 if kind: 6493 self._match(TokenType.BETWEEN) 6494 start = self._parse_window_spec() 6495 self._match(TokenType.AND) 6496 end = self._parse_window_spec() 6497 6498 spec = self.expression( 6499 exp.WindowSpec, 6500 kind=kind, 6501 start=start["value"], 6502 start_side=start["side"], 6503 end=end["value"], 6504 end_side=end["side"], 6505 ) 6506 else: 6507 spec = None 6508 6509 self._match_r_paren() 6510 6511 window = self.expression( 6512 exp.Window, 6513 comments=comments, 6514 this=this, 6515 partition_by=partition, 6516 order=order, 6517 spec=spec, 6518 alias=window_alias, 6519 over=over, 6520 first=first, 6521 ) 6522 6523 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6524 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6525 return self._parse_window(window, alias=alias) 6526 6527 return window 6528 6529 def _parse_partition_and_order( 6530 self, 6531 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6532 return self._parse_partition_by(), self._parse_order() 6533 6534 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6535 self._match(TokenType.BETWEEN) 6536 6537 return { 6538 "value": ( 6539 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6540 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6541 or self._parse_bitwise() 6542 ), 6543 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6544 } 6545 6546 def _parse_alias( 6547 self, this: t.Optional[exp.Expression], explicit: bool = False 6548 ) -> t.Optional[exp.Expression]: 6549 any_token = self._match(TokenType.ALIAS) 6550 comments = self._prev_comments or [] 6551 6552 if explicit and not any_token: 6553 return this 6554 6555 if self._match(TokenType.L_PAREN): 6556 aliases = self.expression( 6557 exp.Aliases, 6558 comments=comments, 6559 this=this, 6560 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6561 ) 6562 self._match_r_paren(aliases) 6563 return aliases 6564 6565 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6566 self.STRING_ALIASES and self._parse_string_as_identifier() 6567 ) 6568 6569 if alias: 6570 comments.extend(alias.pop_comments()) 6571 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6572 column = this.this 6573 6574 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6575 if not this.comments and column and column.comments: 6576 this.comments = column.pop_comments() 6577 6578 return this 6579 6580 def _parse_id_var( 6581 self, 6582 any_token: bool = True, 6583 tokens: t.Optional[t.Collection[TokenType]] = None, 6584 ) -> t.Optional[exp.Expression]: 6585 expression = self._parse_identifier() 6586 if not expression and ( 6587 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6588 ): 6589 quoted = self._prev.token_type == TokenType.STRING 6590 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6591 6592 return expression 6593 6594 def _parse_string(self) -> t.Optional[exp.Expression]: 6595 if self._match_set(self.STRING_PARSERS): 6596 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6597 return self._parse_placeholder() 6598 6599 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6600 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6601 6602 def _parse_number(self) -> t.Optional[exp.Expression]: 6603 if self._match_set(self.NUMERIC_PARSERS): 6604 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6605 return self._parse_placeholder() 6606 6607 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6608 if self._match(TokenType.IDENTIFIER): 6609 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6610 return self._parse_placeholder() 6611 6612 def _parse_var( 6613 self, 6614 any_token: bool = False, 6615 tokens: t.Optional[t.Collection[TokenType]] = None, 6616 upper: bool = False, 6617 ) -> t.Optional[exp.Expression]: 6618 if ( 6619 (any_token and self._advance_any()) 6620 or self._match(TokenType.VAR) 6621 or (self._match_set(tokens) if tokens else False) 6622 ): 6623 return self.expression( 6624 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6625 ) 6626 return self._parse_placeholder() 6627 6628 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6629 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6630 self._advance() 6631 return self._prev 6632 return None 6633 6634 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6635 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6636 6637 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6638 return self._parse_primary() or self._parse_var(any_token=True) 6639 6640 def _parse_null(self) -> t.Optional[exp.Expression]: 6641 if self._match_set(self.NULL_TOKENS): 6642 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6643 return self._parse_placeholder() 6644 6645 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6646 if self._match(TokenType.TRUE): 6647 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6648 if self._match(TokenType.FALSE): 6649 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6650 return self._parse_placeholder() 6651 6652 def _parse_star(self) -> t.Optional[exp.Expression]: 6653 if self._match(TokenType.STAR): 6654 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6655 return self._parse_placeholder() 6656 6657 def _parse_parameter(self) -> exp.Parameter: 6658 this = self._parse_identifier() or self._parse_primary_or_var() 6659 return self.expression(exp.Parameter, this=this) 6660 6661 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6662 if self._match_set(self.PLACEHOLDER_PARSERS): 6663 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6664 if placeholder: 6665 return placeholder 6666 self._advance(-1) 6667 return None 6668 6669 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6670 if not self._match_texts(keywords): 6671 return None 6672 if self._match(TokenType.L_PAREN, advance=False): 6673 return self._parse_wrapped_csv(self._parse_expression) 6674 6675 expression = self._parse_expression() 6676 return [expression] if expression else None 6677 6678 def _parse_csv( 6679 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6680 ) -> t.List[exp.Expression]: 6681 parse_result = parse_method() 6682 items = [parse_result] if parse_result is not None else [] 6683 6684 while self._match(sep): 6685 self._add_comments(parse_result) 6686 parse_result = parse_method() 6687 if parse_result is not None: 6688 items.append(parse_result) 6689 6690 return items 6691 6692 def _parse_tokens( 6693 self, parse_method: t.Callable, expressions: t.Dict 6694 ) -> t.Optional[exp.Expression]: 6695 this = parse_method() 6696 6697 while self._match_set(expressions): 6698 this = self.expression( 6699 expressions[self._prev.token_type], 6700 this=this, 6701 comments=self._prev_comments, 6702 expression=parse_method(), 6703 ) 6704 6705 return this 6706 6707 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6708 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6709 6710 def _parse_wrapped_csv( 6711 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6712 ) -> t.List[exp.Expression]: 6713 return self._parse_wrapped( 6714 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6715 ) 6716 6717 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6718 wrapped = self._match(TokenType.L_PAREN) 6719 if not wrapped and not optional: 6720 self.raise_error("Expecting (") 6721 parse_result = parse_method() 6722 if wrapped: 6723 self._match_r_paren() 6724 return parse_result 6725 6726 def _parse_expressions(self) -> t.List[exp.Expression]: 6727 return self._parse_csv(self._parse_expression) 6728 6729 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6730 return self._parse_select() or self._parse_set_operations( 6731 self._parse_expression() if alias else self._parse_assignment() 6732 ) 6733 6734 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6735 return self._parse_query_modifiers( 6736 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6737 ) 6738 6739 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6740 this = None 6741 if self._match_texts(self.TRANSACTION_KIND): 6742 this = self._prev.text 6743 6744 self._match_texts(("TRANSACTION", "WORK")) 6745 6746 modes = [] 6747 while True: 6748 mode = [] 6749 while self._match(TokenType.VAR): 6750 mode.append(self._prev.text) 6751 6752 if mode: 6753 modes.append(" ".join(mode)) 6754 if not self._match(TokenType.COMMA): 6755 break 6756 6757 return self.expression(exp.Transaction, this=this, modes=modes) 6758 6759 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6760 chain = None 6761 savepoint = None 6762 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6763 6764 self._match_texts(("TRANSACTION", "WORK")) 6765 6766 if self._match_text_seq("TO"): 6767 self._match_text_seq("SAVEPOINT") 6768 savepoint = self._parse_id_var() 6769 6770 if self._match(TokenType.AND): 6771 chain = not self._match_text_seq("NO") 6772 self._match_text_seq("CHAIN") 6773 6774 if is_rollback: 6775 return self.expression(exp.Rollback, savepoint=savepoint) 6776 6777 return self.expression(exp.Commit, chain=chain) 6778 6779 def _parse_refresh(self) -> exp.Refresh: 6780 self._match(TokenType.TABLE) 6781 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6782 6783 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6784 if not self._match_text_seq("ADD"): 6785 return None 6786 6787 self._match(TokenType.COLUMN) 6788 exists_column = self._parse_exists(not_=True) 6789 expression = self._parse_field_def() 6790 6791 if expression: 6792 expression.set("exists", exists_column) 6793 6794 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6795 if self._match_texts(("FIRST", "AFTER")): 6796 position = self._prev.text 6797 column_position = self.expression( 6798 exp.ColumnPosition, this=self._parse_column(), position=position 6799 ) 6800 expression.set("position", column_position) 6801 6802 return expression 6803 6804 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6805 drop = self._match(TokenType.DROP) and self._parse_drop() 6806 if drop and not isinstance(drop, exp.Command): 6807 drop.set("kind", drop.args.get("kind", "COLUMN")) 6808 return drop 6809 6810 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6811 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6812 return self.expression( 6813 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6814 ) 6815 6816 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6817 index = self._index - 1 6818 6819 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6820 return self._parse_csv( 6821 lambda: self.expression( 6822 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6823 ) 6824 ) 6825 6826 self._retreat(index) 6827 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6828 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6829 6830 if self._match_text_seq("ADD", "COLUMNS"): 6831 schema = self._parse_schema() 6832 if schema: 6833 return [schema] 6834 return [] 6835 6836 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6837 6838 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6839 if self._match_texts(self.ALTER_ALTER_PARSERS): 6840 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6841 6842 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6843 # keyword after ALTER we default to parsing this statement 6844 self._match(TokenType.COLUMN) 6845 column = self._parse_field(any_token=True) 6846 6847 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6848 return self.expression(exp.AlterColumn, this=column, drop=True) 6849 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6850 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6851 if self._match(TokenType.COMMENT): 6852 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6853 if self._match_text_seq("DROP", "NOT", "NULL"): 6854 return self.expression( 6855 exp.AlterColumn, 6856 this=column, 6857 drop=True, 6858 allow_null=True, 6859 ) 6860 if self._match_text_seq("SET", "NOT", "NULL"): 6861 return self.expression( 6862 exp.AlterColumn, 6863 this=column, 6864 allow_null=False, 6865 ) 6866 self._match_text_seq("SET", "DATA") 6867 self._match_text_seq("TYPE") 6868 return self.expression( 6869 exp.AlterColumn, 6870 this=column, 6871 dtype=self._parse_types(), 6872 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6873 using=self._match(TokenType.USING) and self._parse_assignment(), 6874 ) 6875 6876 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6877 if self._match_texts(("ALL", "EVEN", "AUTO")): 6878 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6879 6880 self._match_text_seq("KEY", "DISTKEY") 6881 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6882 6883 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6884 if compound: 6885 self._match_text_seq("SORTKEY") 6886 6887 if self._match(TokenType.L_PAREN, advance=False): 6888 return self.expression( 6889 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6890 ) 6891 6892 self._match_texts(("AUTO", "NONE")) 6893 return self.expression( 6894 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6895 ) 6896 6897 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6898 index = self._index - 1 6899 6900 partition_exists = self._parse_exists() 6901 if self._match(TokenType.PARTITION, advance=False): 6902 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6903 6904 self._retreat(index) 6905 return self._parse_csv(self._parse_drop_column) 6906 6907 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6908 if self._match(TokenType.COLUMN): 6909 exists = self._parse_exists() 6910 old_column = self._parse_column() 6911 to = self._match_text_seq("TO") 6912 new_column = self._parse_column() 6913 6914 if old_column is None or to is None or new_column is None: 6915 return None 6916 6917 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6918 6919 self._match_text_seq("TO") 6920 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6921 6922 def _parse_alter_table_set(self) -> exp.AlterSet: 6923 alter_set = self.expression(exp.AlterSet) 6924 6925 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6926 "TABLE", "PROPERTIES" 6927 ): 6928 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6929 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6930 alter_set.set("expressions", [self._parse_assignment()]) 6931 elif self._match_texts(("LOGGED", "UNLOGGED")): 6932 alter_set.set("option", exp.var(self._prev.text.upper())) 6933 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6934 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6935 elif self._match_text_seq("LOCATION"): 6936 alter_set.set("location", self._parse_field()) 6937 elif self._match_text_seq("ACCESS", "METHOD"): 6938 alter_set.set("access_method", self._parse_field()) 6939 elif self._match_text_seq("TABLESPACE"): 6940 alter_set.set("tablespace", self._parse_field()) 6941 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6942 alter_set.set("file_format", [self._parse_field()]) 6943 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6944 alter_set.set("file_format", self._parse_wrapped_options()) 6945 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6946 alter_set.set("copy_options", self._parse_wrapped_options()) 6947 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6948 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6949 else: 6950 if self._match_text_seq("SERDE"): 6951 alter_set.set("serde", self._parse_field()) 6952 6953 alter_set.set("expressions", [self._parse_properties()]) 6954 6955 return alter_set 6956 6957 def _parse_alter(self) -> exp.Alter | exp.Command: 6958 start = self._prev 6959 6960 alter_token = self._match_set(self.ALTERABLES) and self._prev 6961 if not alter_token: 6962 return self._parse_as_command(start) 6963 6964 exists = self._parse_exists() 6965 only = self._match_text_seq("ONLY") 6966 this = self._parse_table(schema=True) 6967 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6968 6969 if self._next: 6970 self._advance() 6971 6972 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6973 if parser: 6974 actions = ensure_list(parser(self)) 6975 not_valid = self._match_text_seq("NOT", "VALID") 6976 options = self._parse_csv(self._parse_property) 6977 6978 if not self._curr and actions: 6979 return self.expression( 6980 exp.Alter, 6981 this=this, 6982 kind=alter_token.text.upper(), 6983 exists=exists, 6984 actions=actions, 6985 only=only, 6986 options=options, 6987 cluster=cluster, 6988 not_valid=not_valid, 6989 ) 6990 6991 return self._parse_as_command(start) 6992 6993 def _parse_merge(self) -> exp.Merge: 6994 self._match(TokenType.INTO) 6995 target = self._parse_table() 6996 6997 if target and self._match(TokenType.ALIAS, advance=False): 6998 target.set("alias", self._parse_table_alias()) 6999 7000 self._match(TokenType.USING) 7001 using = self._parse_table() 7002 7003 self._match(TokenType.ON) 7004 on = self._parse_assignment() 7005 7006 return self.expression( 7007 exp.Merge, 7008 this=target, 7009 using=using, 7010 on=on, 7011 expressions=self._parse_when_matched(), 7012 returning=self._parse_returning(), 7013 ) 7014 7015 def _parse_when_matched(self) -> t.List[exp.When]: 7016 whens = [] 7017 7018 while self._match(TokenType.WHEN): 7019 matched = not self._match(TokenType.NOT) 7020 self._match_text_seq("MATCHED") 7021 source = ( 7022 False 7023 if self._match_text_seq("BY", "TARGET") 7024 else self._match_text_seq("BY", "SOURCE") 7025 ) 7026 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7027 7028 self._match(TokenType.THEN) 7029 7030 if self._match(TokenType.INSERT): 7031 this = self._parse_star() 7032 if this: 7033 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7034 else: 7035 then = self.expression( 7036 exp.Insert, 7037 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7038 expression=self._match_text_seq("VALUES") and self._parse_value(), 7039 ) 7040 elif self._match(TokenType.UPDATE): 7041 expressions = self._parse_star() 7042 if expressions: 7043 then = self.expression(exp.Update, expressions=expressions) 7044 else: 7045 then = self.expression( 7046 exp.Update, 7047 expressions=self._match(TokenType.SET) 7048 and self._parse_csv(self._parse_equality), 7049 ) 7050 elif self._match(TokenType.DELETE): 7051 then = self.expression(exp.Var, this=self._prev.text) 7052 else: 7053 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7054 7055 whens.append( 7056 self.expression( 7057 exp.When, 7058 matched=matched, 7059 source=source, 7060 condition=condition, 7061 then=then, 7062 ) 7063 ) 7064 return whens 7065 7066 def _parse_show(self) -> t.Optional[exp.Expression]: 7067 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7068 if parser: 7069 return parser(self) 7070 return self._parse_as_command(self._prev) 7071 7072 def _parse_set_item_assignment( 7073 self, kind: t.Optional[str] = None 7074 ) -> t.Optional[exp.Expression]: 7075 index = self._index 7076 7077 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7078 return self._parse_set_transaction(global_=kind == "GLOBAL") 7079 7080 left = self._parse_primary() or self._parse_column() 7081 assignment_delimiter = self._match_texts(("=", "TO")) 7082 7083 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7084 self._retreat(index) 7085 return None 7086 7087 right = self._parse_statement() or self._parse_id_var() 7088 if isinstance(right, (exp.Column, exp.Identifier)): 7089 right = exp.var(right.name) 7090 7091 this = self.expression(exp.EQ, this=left, expression=right) 7092 return self.expression(exp.SetItem, this=this, kind=kind) 7093 7094 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7095 self._match_text_seq("TRANSACTION") 7096 characteristics = self._parse_csv( 7097 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7098 ) 7099 return self.expression( 7100 exp.SetItem, 7101 expressions=characteristics, 7102 kind="TRANSACTION", 7103 **{"global": global_}, # type: ignore 7104 ) 7105 7106 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7107 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7108 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7109 7110 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7111 index = self._index 7112 set_ = self.expression( 7113 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7114 ) 7115 7116 if self._curr: 7117 self._retreat(index) 7118 return self._parse_as_command(self._prev) 7119 7120 return set_ 7121 7122 def _parse_var_from_options( 7123 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7124 ) -> t.Optional[exp.Var]: 7125 start = self._curr 7126 if not start: 7127 return None 7128 7129 option = start.text.upper() 7130 continuations = options.get(option) 7131 7132 index = self._index 7133 self._advance() 7134 for keywords in continuations or []: 7135 if isinstance(keywords, str): 7136 keywords = (keywords,) 7137 7138 if self._match_text_seq(*keywords): 7139 option = f"{option} {' '.join(keywords)}" 7140 break 7141 else: 7142 if continuations or continuations is None: 7143 if raise_unmatched: 7144 self.raise_error(f"Unknown option {option}") 7145 7146 self._retreat(index) 7147 return None 7148 7149 return exp.var(option) 7150 7151 def _parse_as_command(self, start: Token) -> exp.Command: 7152 while self._curr: 7153 self._advance() 7154 text = self._find_sql(start, self._prev) 7155 size = len(start.text) 7156 self._warn_unsupported() 7157 return exp.Command(this=text[:size], expression=text[size:]) 7158 7159 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7160 settings = [] 7161 7162 self._match_l_paren() 7163 kind = self._parse_id_var() 7164 7165 if self._match(TokenType.L_PAREN): 7166 while True: 7167 key = self._parse_id_var() 7168 value = self._parse_primary() 7169 if not key and value is None: 7170 break 7171 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7172 self._match(TokenType.R_PAREN) 7173 7174 self._match_r_paren() 7175 7176 return self.expression( 7177 exp.DictProperty, 7178 this=this, 7179 kind=kind.this if kind else None, 7180 settings=settings, 7181 ) 7182 7183 def _parse_dict_range(self, this: str) -> exp.DictRange: 7184 self._match_l_paren() 7185 has_min = self._match_text_seq("MIN") 7186 if has_min: 7187 min = self._parse_var() or self._parse_primary() 7188 self._match_text_seq("MAX") 7189 max = self._parse_var() or self._parse_primary() 7190 else: 7191 max = self._parse_var() or self._parse_primary() 7192 min = exp.Literal.number(0) 7193 self._match_r_paren() 7194 return self.expression(exp.DictRange, this=this, min=min, max=max) 7195 7196 def _parse_comprehension( 7197 self, this: t.Optional[exp.Expression] 7198 ) -> t.Optional[exp.Comprehension]: 7199 index = self._index 7200 expression = self._parse_column() 7201 if not self._match(TokenType.IN): 7202 self._retreat(index - 1) 7203 return None 7204 iterator = self._parse_column() 7205 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7206 return self.expression( 7207 exp.Comprehension, 7208 this=this, 7209 expression=expression, 7210 iterator=iterator, 7211 condition=condition, 7212 ) 7213 7214 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7215 if self._match(TokenType.HEREDOC_STRING): 7216 return self.expression(exp.Heredoc, this=self._prev.text) 7217 7218 if not self._match_text_seq("$"): 7219 return None 7220 7221 tags = ["$"] 7222 tag_text = None 7223 7224 if self._is_connected(): 7225 self._advance() 7226 tags.append(self._prev.text.upper()) 7227 else: 7228 self.raise_error("No closing $ found") 7229 7230 if tags[-1] != "$": 7231 if self._is_connected() and self._match_text_seq("$"): 7232 tag_text = tags[-1] 7233 tags.append("$") 7234 else: 7235 self.raise_error("No closing $ found") 7236 7237 heredoc_start = self._curr 7238 7239 while self._curr: 7240 if self._match_text_seq(*tags, advance=False): 7241 this = self._find_sql(heredoc_start, self._prev) 7242 self._advance(len(tags)) 7243 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7244 7245 self._advance() 7246 7247 self.raise_error(f"No closing {''.join(tags)} found") 7248 return None 7249 7250 def _find_parser( 7251 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7252 ) -> t.Optional[t.Callable]: 7253 if not self._curr: 7254 return None 7255 7256 index = self._index 7257 this = [] 7258 while True: 7259 # The current token might be multiple words 7260 curr = self._curr.text.upper() 7261 key = curr.split(" ") 7262 this.append(curr) 7263 7264 self._advance() 7265 result, trie = in_trie(trie, key) 7266 if result == TrieResult.FAILED: 7267 break 7268 7269 if result == TrieResult.EXISTS: 7270 subparser = parsers[" ".join(this)] 7271 return subparser 7272 7273 self._retreat(index) 7274 return None 7275 7276 def _match(self, token_type, advance=True, expression=None): 7277 if not self._curr: 7278 return None 7279 7280 if self._curr.token_type == token_type: 7281 if advance: 7282 self._advance() 7283 self._add_comments(expression) 7284 return True 7285 7286 return None 7287 7288 def _match_set(self, types, advance=True): 7289 if not self._curr: 7290 return None 7291 7292 if self._curr.token_type in types: 7293 if advance: 7294 self._advance() 7295 return True 7296 7297 return None 7298 7299 def _match_pair(self, token_type_a, token_type_b, advance=True): 7300 if not self._curr or not self._next: 7301 return None 7302 7303 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7304 if advance: 7305 self._advance(2) 7306 return True 7307 7308 return None 7309 7310 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7311 if not self._match(TokenType.L_PAREN, expression=expression): 7312 self.raise_error("Expecting (") 7313 7314 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7315 if not self._match(TokenType.R_PAREN, expression=expression): 7316 self.raise_error("Expecting )") 7317 7318 def _match_texts(self, texts, advance=True): 7319 if ( 7320 self._curr 7321 and self._curr.token_type != TokenType.STRING 7322 and self._curr.text.upper() in texts 7323 ): 7324 if advance: 7325 self._advance() 7326 return True 7327 return None 7328 7329 def _match_text_seq(self, *texts, advance=True): 7330 index = self._index 7331 for text in texts: 7332 if ( 7333 self._curr 7334 and self._curr.token_type != TokenType.STRING 7335 and self._curr.text.upper() == text 7336 ): 7337 self._advance() 7338 else: 7339 self._retreat(index) 7340 return None 7341 7342 if not advance: 7343 self._retreat(index) 7344 7345 return True 7346 7347 def _replace_lambda( 7348 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7349 ) -> t.Optional[exp.Expression]: 7350 if not node: 7351 return node 7352 7353 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7354 7355 for column in node.find_all(exp.Column): 7356 typ = lambda_types.get(column.parts[0].name) 7357 if typ is not None: 7358 dot_or_id = column.to_dot() if column.table else column.this 7359 7360 if typ: 7361 dot_or_id = self.expression( 7362 exp.Cast, 7363 this=dot_or_id, 7364 to=typ, 7365 ) 7366 7367 parent = column.parent 7368 7369 while isinstance(parent, exp.Dot): 7370 if not isinstance(parent.parent, exp.Dot): 7371 parent.replace(dot_or_id) 7372 break 7373 parent = parent.parent 7374 else: 7375 if column is node: 7376 node = dot_or_id 7377 else: 7378 column.replace(dot_or_id) 7379 return node 7380 7381 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7382 start = self._prev 7383 7384 # Not to be confused with TRUNCATE(number, decimals) function call 7385 if self._match(TokenType.L_PAREN): 7386 self._retreat(self._index - 2) 7387 return self._parse_function() 7388 7389 # Clickhouse supports TRUNCATE DATABASE as well 7390 is_database = self._match(TokenType.DATABASE) 7391 7392 self._match(TokenType.TABLE) 7393 7394 exists = self._parse_exists(not_=False) 7395 7396 expressions = self._parse_csv( 7397 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7398 ) 7399 7400 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7401 7402 if self._match_text_seq("RESTART", "IDENTITY"): 7403 identity = "RESTART" 7404 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7405 identity = "CONTINUE" 7406 else: 7407 identity = None 7408 7409 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7410 option = self._prev.text 7411 else: 7412 option = None 7413 7414 partition = self._parse_partition() 7415 7416 # Fallback case 7417 if self._curr: 7418 return self._parse_as_command(start) 7419 7420 return self.expression( 7421 exp.TruncateTable, 7422 expressions=expressions, 7423 is_database=is_database, 7424 exists=exists, 7425 cluster=cluster, 7426 identity=identity, 7427 option=option, 7428 partition=partition, 7429 ) 7430 7431 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7432 this = self._parse_ordered(self._parse_opclass) 7433 7434 if not self._match(TokenType.WITH): 7435 return this 7436 7437 op = self._parse_var(any_token=True) 7438 7439 return self.expression(exp.WithOperator, this=this, op=op) 7440 7441 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7442 self._match(TokenType.EQ) 7443 self._match(TokenType.L_PAREN) 7444 7445 opts: t.List[t.Optional[exp.Expression]] = [] 7446 while self._curr and not self._match(TokenType.R_PAREN): 7447 if self._match_text_seq("FORMAT_NAME", "="): 7448 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7449 # so we parse it separately to use _parse_field() 7450 prop = self.expression( 7451 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7452 ) 7453 opts.append(prop) 7454 else: 7455 opts.append(self._parse_property()) 7456 7457 self._match(TokenType.COMMA) 7458 7459 return opts 7460 7461 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7462 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7463 7464 options = [] 7465 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7466 option = self._parse_var(any_token=True) 7467 prev = self._prev.text.upper() 7468 7469 # Different dialects might separate options and values by white space, "=" and "AS" 7470 self._match(TokenType.EQ) 7471 self._match(TokenType.ALIAS) 7472 7473 param = self.expression(exp.CopyParameter, this=option) 7474 7475 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7476 TokenType.L_PAREN, advance=False 7477 ): 7478 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7479 param.set("expressions", self._parse_wrapped_options()) 7480 elif prev == "FILE_FORMAT": 7481 # T-SQL's external file format case 7482 param.set("expression", self._parse_field()) 7483 else: 7484 param.set("expression", self._parse_unquoted_field()) 7485 7486 options.append(param) 7487 self._match(sep) 7488 7489 return options 7490 7491 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7492 expr = self.expression(exp.Credentials) 7493 7494 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7495 expr.set("storage", self._parse_field()) 7496 if self._match_text_seq("CREDENTIALS"): 7497 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7498 creds = ( 7499 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7500 ) 7501 expr.set("credentials", creds) 7502 if self._match_text_seq("ENCRYPTION"): 7503 expr.set("encryption", self._parse_wrapped_options()) 7504 if self._match_text_seq("IAM_ROLE"): 7505 expr.set("iam_role", self._parse_field()) 7506 if self._match_text_seq("REGION"): 7507 expr.set("region", self._parse_field()) 7508 7509 return expr 7510 7511 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7512 return self._parse_field() 7513 7514 def _parse_copy(self) -> exp.Copy | exp.Command: 7515 start = self._prev 7516 7517 self._match(TokenType.INTO) 7518 7519 this = ( 7520 self._parse_select(nested=True, parse_subquery_alias=False) 7521 if self._match(TokenType.L_PAREN, advance=False) 7522 else self._parse_table(schema=True) 7523 ) 7524 7525 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7526 7527 files = self._parse_csv(self._parse_file_location) 7528 credentials = self._parse_credentials() 7529 7530 self._match_text_seq("WITH") 7531 7532 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7533 7534 # Fallback case 7535 if self._curr: 7536 return self._parse_as_command(start) 7537 7538 return self.expression( 7539 exp.Copy, 7540 this=this, 7541 kind=kind, 7542 credentials=credentials, 7543 files=files, 7544 params=params, 7545 ) 7546 7547 def _parse_normalize(self) -> exp.Normalize: 7548 return self.expression( 7549 exp.Normalize, 7550 this=self._parse_bitwise(), 7551 form=self._match(TokenType.COMMA) and self._parse_var(), 7552 ) 7553 7554 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7555 if self._match_text_seq("COLUMNS", "(", advance=False): 7556 this = self._parse_function() 7557 if isinstance(this, exp.Columns): 7558 this.set("unpack", True) 7559 return this 7560 7561 return self.expression( 7562 exp.Star, 7563 **{ # type: ignore 7564 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7565 "replace": self._parse_star_op("REPLACE"), 7566 "rename": self._parse_star_op("RENAME"), 7567 }, 7568 ) 7569 7570 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7571 privilege_parts = [] 7572 7573 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7574 # (end of privilege list) or L_PAREN (start of column list) are met 7575 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7576 privilege_parts.append(self._curr.text.upper()) 7577 self._advance() 7578 7579 this = exp.var(" ".join(privilege_parts)) 7580 expressions = ( 7581 self._parse_wrapped_csv(self._parse_column) 7582 if self._match(TokenType.L_PAREN, advance=False) 7583 else None 7584 ) 7585 7586 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7587 7588 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7589 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7590 principal = self._parse_id_var() 7591 7592 if not principal: 7593 return None 7594 7595 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7596 7597 def _parse_grant(self) -> exp.Grant | exp.Command: 7598 start = self._prev 7599 7600 privileges = self._parse_csv(self._parse_grant_privilege) 7601 7602 self._match(TokenType.ON) 7603 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7604 7605 # Attempt to parse the securable e.g. MySQL allows names 7606 # such as "foo.*", "*.*" which are not easily parseable yet 7607 securable = self._try_parse(self._parse_table_parts) 7608 7609 if not securable or not self._match_text_seq("TO"): 7610 return self._parse_as_command(start) 7611 7612 principals = self._parse_csv(self._parse_grant_principal) 7613 7614 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7615 7616 if self._curr: 7617 return self._parse_as_command(start) 7618 7619 return self.expression( 7620 exp.Grant, 7621 privileges=privileges, 7622 kind=kind, 7623 securable=securable, 7624 principals=principals, 7625 grant_option=grant_option, 7626 ) 7627 7628 def _parse_overlay(self) -> exp.Overlay: 7629 return self.expression( 7630 exp.Overlay, 7631 **{ # type: ignore 7632 "this": self._parse_bitwise(), 7633 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7634 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7635 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7636 }, 7637 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.DECIMAL256, 356 TokenType.UDECIMAL, 357 TokenType.BIGDECIMAL, 358 TokenType.UUID, 359 TokenType.GEOGRAPHY, 360 TokenType.GEOMETRY, 361 TokenType.POINT, 362 TokenType.RING, 363 TokenType.LINESTRING, 364 TokenType.MULTILINESTRING, 365 TokenType.POLYGON, 366 TokenType.MULTIPOLYGON, 367 TokenType.HLLSKETCH, 368 TokenType.HSTORE, 369 TokenType.PSEUDO_TYPE, 370 TokenType.SUPER, 371 TokenType.SERIAL, 372 TokenType.SMALLSERIAL, 373 TokenType.BIGSERIAL, 374 TokenType.XML, 375 TokenType.YEAR, 376 TokenType.UNIQUEIDENTIFIER, 377 TokenType.USERDEFINED, 378 TokenType.MONEY, 379 TokenType.SMALLMONEY, 380 TokenType.ROWVERSION, 381 TokenType.IMAGE, 382 TokenType.VARIANT, 383 TokenType.VECTOR, 384 TokenType.OBJECT, 385 TokenType.OBJECT_IDENTIFIER, 386 TokenType.INET, 387 TokenType.IPADDRESS, 388 TokenType.IPPREFIX, 389 TokenType.IPV4, 390 TokenType.IPV6, 391 TokenType.UNKNOWN, 392 TokenType.NULL, 393 TokenType.NAME, 394 TokenType.TDIGEST, 395 *ENUM_TYPE_TOKENS, 396 *NESTED_TYPE_TOKENS, 397 *AGGREGATE_TYPE_TOKENS, 398 } 399 400 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 401 TokenType.BIGINT: TokenType.UBIGINT, 402 TokenType.INT: TokenType.UINT, 403 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 404 TokenType.SMALLINT: TokenType.USMALLINT, 405 TokenType.TINYINT: TokenType.UTINYINT, 406 TokenType.DECIMAL: TokenType.UDECIMAL, 407 } 408 409 SUBQUERY_PREDICATES = { 410 TokenType.ANY: exp.Any, 411 TokenType.ALL: exp.All, 412 TokenType.EXISTS: exp.Exists, 413 TokenType.SOME: exp.Any, 414 } 415 416 RESERVED_TOKENS = { 417 *Tokenizer.SINGLE_TOKENS.values(), 418 TokenType.SELECT, 419 } - {TokenType.IDENTIFIER} 420 421 DB_CREATABLES = { 422 TokenType.DATABASE, 423 TokenType.DICTIONARY, 424 TokenType.MODEL, 425 TokenType.SCHEMA, 426 TokenType.SEQUENCE, 427 TokenType.STORAGE_INTEGRATION, 428 TokenType.TABLE, 429 TokenType.TAG, 430 TokenType.VIEW, 431 TokenType.WAREHOUSE, 432 TokenType.STREAMLIT, 433 TokenType.SINK, 434 TokenType.SOURCE, 435 } 436 437 CREATABLES = { 438 TokenType.COLUMN, 439 TokenType.CONSTRAINT, 440 TokenType.FOREIGN_KEY, 441 TokenType.FUNCTION, 442 TokenType.INDEX, 443 TokenType.PROCEDURE, 444 *DB_CREATABLES, 445 } 446 447 ALTERABLES = { 448 TokenType.INDEX, 449 TokenType.TABLE, 450 TokenType.VIEW, 451 } 452 453 # Tokens that can represent identifiers 454 ID_VAR_TOKENS = { 455 TokenType.ALL, 456 TokenType.ATTACH, 457 TokenType.VAR, 458 TokenType.ANTI, 459 TokenType.APPLY, 460 TokenType.ASC, 461 TokenType.ASOF, 462 TokenType.AUTO_INCREMENT, 463 TokenType.BEGIN, 464 TokenType.BPCHAR, 465 TokenType.CACHE, 466 TokenType.CASE, 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.COMMENT, 470 TokenType.COMMIT, 471 TokenType.CONSTRAINT, 472 TokenType.COPY, 473 TokenType.CUBE, 474 TokenType.DEFAULT, 475 TokenType.DELETE, 476 TokenType.DESC, 477 TokenType.DESCRIBE, 478 TokenType.DETACH, 479 TokenType.DICTIONARY, 480 TokenType.DIV, 481 TokenType.END, 482 TokenType.EXECUTE, 483 TokenType.ESCAPE, 484 TokenType.FALSE, 485 TokenType.FIRST, 486 TokenType.FILTER, 487 TokenType.FINAL, 488 TokenType.FORMAT, 489 TokenType.FULL, 490 TokenType.IDENTIFIER, 491 TokenType.IS, 492 TokenType.ISNULL, 493 TokenType.INTERVAL, 494 TokenType.KEEP, 495 TokenType.KILL, 496 TokenType.LEFT, 497 TokenType.LOAD, 498 TokenType.MERGE, 499 TokenType.NATURAL, 500 TokenType.NEXT, 501 TokenType.OFFSET, 502 TokenType.OPERATOR, 503 TokenType.ORDINALITY, 504 TokenType.OVERLAPS, 505 TokenType.OVERWRITE, 506 TokenType.PARTITION, 507 TokenType.PERCENT, 508 TokenType.PIVOT, 509 TokenType.PRAGMA, 510 TokenType.RANGE, 511 TokenType.RECURSIVE, 512 TokenType.REFERENCES, 513 TokenType.REFRESH, 514 TokenType.RENAME, 515 TokenType.REPLACE, 516 TokenType.RIGHT, 517 TokenType.ROLLUP, 518 TokenType.ROW, 519 TokenType.ROWS, 520 TokenType.SEMI, 521 TokenType.SET, 522 TokenType.SETTINGS, 523 TokenType.SHOW, 524 TokenType.TEMPORARY, 525 TokenType.TOP, 526 TokenType.TRUE, 527 TokenType.TRUNCATE, 528 TokenType.UNIQUE, 529 TokenType.UNNEST, 530 TokenType.UNPIVOT, 531 TokenType.UPDATE, 532 TokenType.USE, 533 TokenType.VOLATILE, 534 TokenType.WINDOW, 535 *CREATABLES, 536 *SUBQUERY_PREDICATES, 537 *TYPE_TOKENS, 538 *NO_PAREN_FUNCTIONS, 539 } 540 ID_VAR_TOKENS.remove(TokenType.UNION) 541 542 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 543 544 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 545 TokenType.ANTI, 546 TokenType.APPLY, 547 TokenType.ASOF, 548 TokenType.FULL, 549 TokenType.LEFT, 550 TokenType.LOCK, 551 TokenType.NATURAL, 552 TokenType.OFFSET, 553 TokenType.RIGHT, 554 TokenType.SEMI, 555 TokenType.WINDOW, 556 } 557 558 ALIAS_TOKENS = ID_VAR_TOKENS 559 560 ARRAY_CONSTRUCTORS = { 561 "ARRAY": exp.Array, 562 "LIST": exp.List, 563 } 564 565 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 566 567 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 568 569 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 570 571 FUNC_TOKENS = { 572 TokenType.COLLATE, 573 TokenType.COMMAND, 574 TokenType.CURRENT_DATE, 575 TokenType.CURRENT_DATETIME, 576 TokenType.CURRENT_TIMESTAMP, 577 TokenType.CURRENT_TIME, 578 TokenType.CURRENT_USER, 579 TokenType.FILTER, 580 TokenType.FIRST, 581 TokenType.FORMAT, 582 TokenType.GLOB, 583 TokenType.IDENTIFIER, 584 TokenType.INDEX, 585 TokenType.ISNULL, 586 TokenType.ILIKE, 587 TokenType.INSERT, 588 TokenType.LIKE, 589 TokenType.MERGE, 590 TokenType.NEXT, 591 TokenType.OFFSET, 592 TokenType.PRIMARY_KEY, 593 TokenType.RANGE, 594 TokenType.REPLACE, 595 TokenType.RLIKE, 596 TokenType.ROW, 597 TokenType.UNNEST, 598 TokenType.VAR, 599 TokenType.LEFT, 600 TokenType.RIGHT, 601 TokenType.SEQUENCE, 602 TokenType.DATE, 603 TokenType.DATETIME, 604 TokenType.TABLE, 605 TokenType.TIMESTAMP, 606 TokenType.TIMESTAMPTZ, 607 TokenType.TRUNCATE, 608 TokenType.WINDOW, 609 TokenType.XOR, 610 *TYPE_TOKENS, 611 *SUBQUERY_PREDICATES, 612 } 613 614 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 615 TokenType.AND: exp.And, 616 } 617 618 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 619 TokenType.COLON_EQ: exp.PropertyEQ, 620 } 621 622 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 623 TokenType.OR: exp.Or, 624 } 625 626 EQUALITY = { 627 TokenType.EQ: exp.EQ, 628 TokenType.NEQ: exp.NEQ, 629 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 630 } 631 632 COMPARISON = { 633 TokenType.GT: exp.GT, 634 TokenType.GTE: exp.GTE, 635 TokenType.LT: exp.LT, 636 TokenType.LTE: exp.LTE, 637 } 638 639 BITWISE = { 640 TokenType.AMP: exp.BitwiseAnd, 641 TokenType.CARET: exp.BitwiseXor, 642 TokenType.PIPE: exp.BitwiseOr, 643 } 644 645 TERM = { 646 TokenType.DASH: exp.Sub, 647 TokenType.PLUS: exp.Add, 648 TokenType.MOD: exp.Mod, 649 TokenType.COLLATE: exp.Collate, 650 } 651 652 FACTOR = { 653 TokenType.DIV: exp.IntDiv, 654 TokenType.LR_ARROW: exp.Distance, 655 TokenType.SLASH: exp.Div, 656 TokenType.STAR: exp.Mul, 657 } 658 659 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 660 661 TIMES = { 662 TokenType.TIME, 663 TokenType.TIMETZ, 664 } 665 666 TIMESTAMPS = { 667 TokenType.TIMESTAMP, 668 TokenType.TIMESTAMPTZ, 669 TokenType.TIMESTAMPLTZ, 670 *TIMES, 671 } 672 673 SET_OPERATIONS = { 674 TokenType.UNION, 675 TokenType.INTERSECT, 676 TokenType.EXCEPT, 677 } 678 679 JOIN_METHODS = { 680 TokenType.ASOF, 681 TokenType.NATURAL, 682 TokenType.POSITIONAL, 683 } 684 685 JOIN_SIDES = { 686 TokenType.LEFT, 687 TokenType.RIGHT, 688 TokenType.FULL, 689 } 690 691 JOIN_KINDS = { 692 TokenType.ANTI, 693 TokenType.CROSS, 694 TokenType.INNER, 695 TokenType.OUTER, 696 TokenType.SEMI, 697 TokenType.STRAIGHT_JOIN, 698 } 699 700 JOIN_HINTS: t.Set[str] = set() 701 702 LAMBDAS = { 703 TokenType.ARROW: lambda self, expressions: self.expression( 704 exp.Lambda, 705 this=self._replace_lambda( 706 self._parse_assignment(), 707 expressions, 708 ), 709 expressions=expressions, 710 ), 711 TokenType.FARROW: lambda self, expressions: self.expression( 712 exp.Kwarg, 713 this=exp.var(expressions[0].name), 714 expression=self._parse_assignment(), 715 ), 716 } 717 718 COLUMN_OPERATORS = { 719 TokenType.DOT: None, 720 TokenType.DCOLON: lambda self, this, to: self.expression( 721 exp.Cast if self.STRICT_CAST else exp.TryCast, 722 this=this, 723 to=to, 724 ), 725 TokenType.ARROW: lambda self, this, path: self.expression( 726 exp.JSONExtract, 727 this=this, 728 expression=self.dialect.to_json_path(path), 729 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 730 ), 731 TokenType.DARROW: lambda self, this, path: self.expression( 732 exp.JSONExtractScalar, 733 this=this, 734 expression=self.dialect.to_json_path(path), 735 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 736 ), 737 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 738 exp.JSONBExtract, 739 this=this, 740 expression=path, 741 ), 742 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 743 exp.JSONBExtractScalar, 744 this=this, 745 expression=path, 746 ), 747 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 748 exp.JSONBContains, 749 this=this, 750 expression=key, 751 ), 752 } 753 754 EXPRESSION_PARSERS = { 755 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 756 exp.Column: lambda self: self._parse_column(), 757 exp.Condition: lambda self: self._parse_assignment(), 758 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 759 exp.Expression: lambda self: self._parse_expression(), 760 exp.From: lambda self: self._parse_from(joins=True), 761 exp.Group: lambda self: self._parse_group(), 762 exp.Having: lambda self: self._parse_having(), 763 exp.Hint: lambda self: self._parse_hint_body(), 764 exp.Identifier: lambda self: self._parse_id_var(), 765 exp.Join: lambda self: self._parse_join(), 766 exp.Lambda: lambda self: self._parse_lambda(), 767 exp.Lateral: lambda self: self._parse_lateral(), 768 exp.Limit: lambda self: self._parse_limit(), 769 exp.Offset: lambda self: self._parse_offset(), 770 exp.Order: lambda self: self._parse_order(), 771 exp.Ordered: lambda self: self._parse_ordered(), 772 exp.Properties: lambda self: self._parse_properties(), 773 exp.Qualify: lambda self: self._parse_qualify(), 774 exp.Returning: lambda self: self._parse_returning(), 775 exp.Select: lambda self: self._parse_select(), 776 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 777 exp.Table: lambda self: self._parse_table_parts(), 778 exp.TableAlias: lambda self: self._parse_table_alias(), 779 exp.Tuple: lambda self: self._parse_value(), 780 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 781 exp.Where: lambda self: self._parse_where(), 782 exp.Window: lambda self: self._parse_named_window(), 783 exp.With: lambda self: self._parse_with(), 784 "JOIN_TYPE": lambda self: self._parse_join_parts(), 785 } 786 787 STATEMENT_PARSERS = { 788 TokenType.ALTER: lambda self: self._parse_alter(), 789 TokenType.BEGIN: lambda self: self._parse_transaction(), 790 TokenType.CACHE: lambda self: self._parse_cache(), 791 TokenType.COMMENT: lambda self: self._parse_comment(), 792 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 793 TokenType.COPY: lambda self: self._parse_copy(), 794 TokenType.CREATE: lambda self: self._parse_create(), 795 TokenType.DELETE: lambda self: self._parse_delete(), 796 TokenType.DESC: lambda self: self._parse_describe(), 797 TokenType.DESCRIBE: lambda self: self._parse_describe(), 798 TokenType.DROP: lambda self: self._parse_drop(), 799 TokenType.GRANT: lambda self: self._parse_grant(), 800 TokenType.INSERT: lambda self: self._parse_insert(), 801 TokenType.KILL: lambda self: self._parse_kill(), 802 TokenType.LOAD: lambda self: self._parse_load(), 803 TokenType.MERGE: lambda self: self._parse_merge(), 804 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 805 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 806 TokenType.REFRESH: lambda self: self._parse_refresh(), 807 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 808 TokenType.SET: lambda self: self._parse_set(), 809 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 810 TokenType.UNCACHE: lambda self: self._parse_uncache(), 811 TokenType.UPDATE: lambda self: self._parse_update(), 812 TokenType.USE: lambda self: self.expression( 813 exp.Use, 814 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 815 this=self._parse_table(schema=False), 816 ), 817 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 818 } 819 820 UNARY_PARSERS = { 821 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 822 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 823 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 824 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 825 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 826 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 827 } 828 829 STRING_PARSERS = { 830 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 831 exp.RawString, this=token.text 832 ), 833 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 834 exp.National, this=token.text 835 ), 836 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 837 TokenType.STRING: lambda self, token: self.expression( 838 exp.Literal, this=token.text, is_string=True 839 ), 840 TokenType.UNICODE_STRING: lambda self, token: self.expression( 841 exp.UnicodeString, 842 this=token.text, 843 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 844 ), 845 } 846 847 NUMERIC_PARSERS = { 848 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 849 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 850 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 851 TokenType.NUMBER: lambda self, token: self.expression( 852 exp.Literal, this=token.text, is_string=False 853 ), 854 } 855 856 PRIMARY_PARSERS = { 857 **STRING_PARSERS, 858 **NUMERIC_PARSERS, 859 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 860 TokenType.NULL: lambda self, _: self.expression(exp.Null), 861 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 862 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 863 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 864 TokenType.STAR: lambda self, _: self._parse_star_ops(), 865 } 866 867 PLACEHOLDER_PARSERS = { 868 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 869 TokenType.PARAMETER: lambda self: self._parse_parameter(), 870 TokenType.COLON: lambda self: ( 871 self.expression(exp.Placeholder, this=self._prev.text) 872 if self._match_set(self.ID_VAR_TOKENS) 873 else None 874 ), 875 } 876 877 RANGE_PARSERS = { 878 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 879 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 880 TokenType.GLOB: binary_range_parser(exp.Glob), 881 TokenType.ILIKE: binary_range_parser(exp.ILike), 882 TokenType.IN: lambda self, this: self._parse_in(this), 883 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 884 TokenType.IS: lambda self, this: self._parse_is(this), 885 TokenType.LIKE: binary_range_parser(exp.Like), 886 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 887 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 888 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 889 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 890 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 891 } 892 893 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 894 "ALLOWED_VALUES": lambda self: self.expression( 895 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 896 ), 897 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 898 "AUTO": lambda self: self._parse_auto_property(), 899 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 900 "BACKUP": lambda self: self.expression( 901 exp.BackupProperty, this=self._parse_var(any_token=True) 902 ), 903 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 904 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 905 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 906 "CHECKSUM": lambda self: self._parse_checksum(), 907 "CLUSTER BY": lambda self: self._parse_cluster(), 908 "CLUSTERED": lambda self: self._parse_clustered_by(), 909 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 910 exp.CollateProperty, **kwargs 911 ), 912 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 913 "CONTAINS": lambda self: self._parse_contains_property(), 914 "COPY": lambda self: self._parse_copy_property(), 915 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 916 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 917 "DEFINER": lambda self: self._parse_definer(), 918 "DETERMINISTIC": lambda self: self.expression( 919 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 920 ), 921 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 922 "DUPLICATE": lambda self: self._parse_duplicate(), 923 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 924 "DISTKEY": lambda self: self._parse_distkey(), 925 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 926 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 927 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 928 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 929 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 930 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 931 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 932 "FREESPACE": lambda self: self._parse_freespace(), 933 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 934 "HEAP": lambda self: self.expression(exp.HeapProperty), 935 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 936 "IMMUTABLE": lambda self: self.expression( 937 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 938 ), 939 "INHERITS": lambda self: self.expression( 940 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 941 ), 942 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 943 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 944 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 945 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 946 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 947 "LIKE": lambda self: self._parse_create_like(), 948 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 949 "LOCK": lambda self: self._parse_locking(), 950 "LOCKING": lambda self: self._parse_locking(), 951 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 952 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 953 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 954 "MODIFIES": lambda self: self._parse_modifies_property(), 955 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 956 "NO": lambda self: self._parse_no_property(), 957 "ON": lambda self: self._parse_on_property(), 958 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 959 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 960 "PARTITION": lambda self: self._parse_partitioned_of(), 961 "PARTITION BY": lambda self: self._parse_partitioned_by(), 962 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 963 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 964 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 965 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 966 "READS": lambda self: self._parse_reads_property(), 967 "REMOTE": lambda self: self._parse_remote_with_connection(), 968 "RETURNS": lambda self: self._parse_returns(), 969 "STRICT": lambda self: self.expression(exp.StrictProperty), 970 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 971 "ROW": lambda self: self._parse_row(), 972 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 973 "SAMPLE": lambda self: self.expression( 974 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 975 ), 976 "SECURE": lambda self: self.expression(exp.SecureProperty), 977 "SECURITY": lambda self: self._parse_security(), 978 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 979 "SETTINGS": lambda self: self._parse_settings_property(), 980 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 981 "SORTKEY": lambda self: self._parse_sortkey(), 982 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 983 "STABLE": lambda self: self.expression( 984 exp.StabilityProperty, this=exp.Literal.string("STABLE") 985 ), 986 "STORED": lambda self: self._parse_stored(), 987 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 988 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 989 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 990 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 991 "TO": lambda self: self._parse_to_table(), 992 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 993 "TRANSFORM": lambda self: self.expression( 994 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 995 ), 996 "TTL": lambda self: self._parse_ttl(), 997 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 998 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 999 "VOLATILE": lambda self: self._parse_volatile_property(), 1000 "WITH": lambda self: self._parse_with_property(), 1001 } 1002 1003 CONSTRAINT_PARSERS = { 1004 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1005 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1006 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1007 "CHARACTER SET": lambda self: self.expression( 1008 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1009 ), 1010 "CHECK": lambda self: self.expression( 1011 exp.CheckColumnConstraint, 1012 this=self._parse_wrapped(self._parse_assignment), 1013 enforced=self._match_text_seq("ENFORCED"), 1014 ), 1015 "COLLATE": lambda self: self.expression( 1016 exp.CollateColumnConstraint, 1017 this=self._parse_identifier() or self._parse_column(), 1018 ), 1019 "COMMENT": lambda self: self.expression( 1020 exp.CommentColumnConstraint, this=self._parse_string() 1021 ), 1022 "COMPRESS": lambda self: self._parse_compress(), 1023 "CLUSTERED": lambda self: self.expression( 1024 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1025 ), 1026 "NONCLUSTERED": lambda self: self.expression( 1027 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1028 ), 1029 "DEFAULT": lambda self: self.expression( 1030 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1031 ), 1032 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1033 "EPHEMERAL": lambda self: self.expression( 1034 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1035 ), 1036 "EXCLUDE": lambda self: self.expression( 1037 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1038 ), 1039 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1040 "FORMAT": lambda self: self.expression( 1041 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1042 ), 1043 "GENERATED": lambda self: self._parse_generated_as_identity(), 1044 "IDENTITY": lambda self: self._parse_auto_increment(), 1045 "INLINE": lambda self: self._parse_inline(), 1046 "LIKE": lambda self: self._parse_create_like(), 1047 "NOT": lambda self: self._parse_not_constraint(), 1048 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1049 "ON": lambda self: ( 1050 self._match(TokenType.UPDATE) 1051 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1052 ) 1053 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1054 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1055 "PERIOD": lambda self: self._parse_period_for_system_time(), 1056 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1057 "REFERENCES": lambda self: self._parse_references(match=False), 1058 "TITLE": lambda self: self.expression( 1059 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1060 ), 1061 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1062 "UNIQUE": lambda self: self._parse_unique(), 1063 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1064 "WATERMARK": lambda self: self.expression( 1065 exp.WatermarkColumnConstraint, 1066 this=self._match(TokenType.FOR) and self._parse_column(), 1067 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1068 ), 1069 "WITH": lambda self: self.expression( 1070 exp.Properties, expressions=self._parse_wrapped_properties() 1071 ), 1072 } 1073 1074 ALTER_PARSERS = { 1075 "ADD": lambda self: self._parse_alter_table_add(), 1076 "AS": lambda self: self._parse_select(), 1077 "ALTER": lambda self: self._parse_alter_table_alter(), 1078 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1079 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1080 "DROP": lambda self: self._parse_alter_table_drop(), 1081 "RENAME": lambda self: self._parse_alter_table_rename(), 1082 "SET": lambda self: self._parse_alter_table_set(), 1083 "SWAP": lambda self: self.expression( 1084 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1085 ), 1086 } 1087 1088 ALTER_ALTER_PARSERS = { 1089 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1090 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1091 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1092 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1093 } 1094 1095 SCHEMA_UNNAMED_CONSTRAINTS = { 1096 "CHECK", 1097 "EXCLUDE", 1098 "FOREIGN KEY", 1099 "LIKE", 1100 "PERIOD", 1101 "PRIMARY KEY", 1102 "UNIQUE", 1103 "WATERMARK", 1104 } 1105 1106 NO_PAREN_FUNCTION_PARSERS = { 1107 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1108 "CASE": lambda self: self._parse_case(), 1109 "CONNECT_BY_ROOT": lambda self: self.expression( 1110 exp.ConnectByRoot, this=self._parse_column() 1111 ), 1112 "IF": lambda self: self._parse_if(), 1113 } 1114 1115 INVALID_FUNC_NAME_TOKENS = { 1116 TokenType.IDENTIFIER, 1117 TokenType.STRING, 1118 } 1119 1120 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1121 1122 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1123 1124 FUNCTION_PARSERS = { 1125 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1126 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1127 "DECODE": lambda self: self._parse_decode(), 1128 "EXTRACT": lambda self: self._parse_extract(), 1129 "GAP_FILL": lambda self: self._parse_gap_fill(), 1130 "JSON_OBJECT": lambda self: self._parse_json_object(), 1131 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1132 "JSON_TABLE": lambda self: self._parse_json_table(), 1133 "MATCH": lambda self: self._parse_match_against(), 1134 "NORMALIZE": lambda self: self._parse_normalize(), 1135 "OPENJSON": lambda self: self._parse_open_json(), 1136 "OVERLAY": lambda self: self._parse_overlay(), 1137 "POSITION": lambda self: self._parse_position(), 1138 "PREDICT": lambda self: self._parse_predict(), 1139 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1140 "STRING_AGG": lambda self: self._parse_string_agg(), 1141 "SUBSTRING": lambda self: self._parse_substring(), 1142 "TRIM": lambda self: self._parse_trim(), 1143 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1144 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1145 } 1146 1147 QUERY_MODIFIER_PARSERS = { 1148 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1149 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1150 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1151 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1152 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1153 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1154 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1155 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1156 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1157 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1158 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1159 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1160 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1161 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1162 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1163 TokenType.CLUSTER_BY: lambda self: ( 1164 "cluster", 1165 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1166 ), 1167 TokenType.DISTRIBUTE_BY: lambda self: ( 1168 "distribute", 1169 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1170 ), 1171 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1172 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1173 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1174 } 1175 1176 SET_PARSERS = { 1177 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1178 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1179 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1180 "TRANSACTION": lambda self: self._parse_set_transaction(), 1181 } 1182 1183 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1184 1185 TYPE_LITERAL_PARSERS = { 1186 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1187 } 1188 1189 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1190 1191 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1192 1193 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1194 1195 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1196 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1197 "ISOLATION": ( 1198 ("LEVEL", "REPEATABLE", "READ"), 1199 ("LEVEL", "READ", "COMMITTED"), 1200 ("LEVEL", "READ", "UNCOMITTED"), 1201 ("LEVEL", "SERIALIZABLE"), 1202 ), 1203 "READ": ("WRITE", "ONLY"), 1204 } 1205 1206 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1207 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1208 ) 1209 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1210 1211 CREATE_SEQUENCE: OPTIONS_TYPE = { 1212 "SCALE": ("EXTEND", "NOEXTEND"), 1213 "SHARD": ("EXTEND", "NOEXTEND"), 1214 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1215 **dict.fromkeys( 1216 ( 1217 "SESSION", 1218 "GLOBAL", 1219 "KEEP", 1220 "NOKEEP", 1221 "ORDER", 1222 "NOORDER", 1223 "NOCACHE", 1224 "CYCLE", 1225 "NOCYCLE", 1226 "NOMINVALUE", 1227 "NOMAXVALUE", 1228 "NOSCALE", 1229 "NOSHARD", 1230 ), 1231 tuple(), 1232 ), 1233 } 1234 1235 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1236 1237 USABLES: OPTIONS_TYPE = dict.fromkeys( 1238 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1239 ) 1240 1241 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1242 1243 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1244 "TYPE": ("EVOLUTION",), 1245 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1246 } 1247 1248 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1249 1250 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1251 1252 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1253 "NOT": ("ENFORCED",), 1254 "MATCH": ( 1255 "FULL", 1256 "PARTIAL", 1257 "SIMPLE", 1258 ), 1259 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1260 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1261 } 1262 1263 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1264 1265 CLONE_KEYWORDS = {"CLONE", "COPY"} 1266 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1267 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1268 1269 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1270 1271 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1272 1273 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1274 1275 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1276 1277 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1278 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1279 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1280 1281 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1282 1283 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1284 1285 ADD_CONSTRAINT_TOKENS = { 1286 TokenType.CONSTRAINT, 1287 TokenType.FOREIGN_KEY, 1288 TokenType.INDEX, 1289 TokenType.KEY, 1290 TokenType.PRIMARY_KEY, 1291 TokenType.UNIQUE, 1292 } 1293 1294 DISTINCT_TOKENS = {TokenType.DISTINCT} 1295 1296 NULL_TOKENS = {TokenType.NULL} 1297 1298 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1299 1300 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1301 1302 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1303 1304 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1305 1306 ODBC_DATETIME_LITERALS = { 1307 "d": exp.Date, 1308 "t": exp.Time, 1309 "ts": exp.Timestamp, 1310 } 1311 1312 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1313 1314 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1315 1316 # The style options for the DESCRIBE statement 1317 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1318 1319 OPERATION_MODIFIERS: t.Set[str] = set() 1320 1321 STRICT_CAST = True 1322 1323 PREFIXED_PIVOT_COLUMNS = False 1324 IDENTIFY_PIVOT_STRINGS = False 1325 1326 LOG_DEFAULTS_TO_LN = False 1327 1328 # Whether ADD is present for each column added by ALTER TABLE 1329 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1330 1331 # Whether the table sample clause expects CSV syntax 1332 TABLESAMPLE_CSV = False 1333 1334 # The default method used for table sampling 1335 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1336 1337 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1338 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1339 1340 # Whether the TRIM function expects the characters to trim as its first argument 1341 TRIM_PATTERN_FIRST = False 1342 1343 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1344 STRING_ALIASES = False 1345 1346 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1347 MODIFIERS_ATTACHED_TO_SET_OP = True 1348 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1349 1350 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1351 NO_PAREN_IF_COMMANDS = True 1352 1353 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1354 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1355 1356 # Whether the `:` operator is used to extract a value from a VARIANT column 1357 COLON_IS_VARIANT_EXTRACT = False 1358 1359 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1360 # If this is True and '(' is not found, the keyword will be treated as an identifier 1361 VALUES_FOLLOWED_BY_PAREN = True 1362 1363 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1364 SUPPORTS_IMPLICIT_UNNEST = False 1365 1366 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1367 INTERVAL_SPANS = True 1368 1369 # Whether a PARTITION clause can follow a table reference 1370 SUPPORTS_PARTITION_SELECTION = False 1371 1372 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1373 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1374 1375 __slots__ = ( 1376 "error_level", 1377 "error_message_context", 1378 "max_errors", 1379 "dialect", 1380 "sql", 1381 "errors", 1382 "_tokens", 1383 "_index", 1384 "_curr", 1385 "_next", 1386 "_prev", 1387 "_prev_comments", 1388 ) 1389 1390 # Autofilled 1391 SHOW_TRIE: t.Dict = {} 1392 SET_TRIE: t.Dict = {} 1393 1394 def __init__( 1395 self, 1396 error_level: t.Optional[ErrorLevel] = None, 1397 error_message_context: int = 100, 1398 max_errors: int = 3, 1399 dialect: DialectType = None, 1400 ): 1401 from sqlglot.dialects import Dialect 1402 1403 self.error_level = error_level or ErrorLevel.IMMEDIATE 1404 self.error_message_context = error_message_context 1405 self.max_errors = max_errors 1406 self.dialect = Dialect.get_or_raise(dialect) 1407 self.reset() 1408 1409 def reset(self): 1410 self.sql = "" 1411 self.errors = [] 1412 self._tokens = [] 1413 self._index = 0 1414 self._curr = None 1415 self._next = None 1416 self._prev = None 1417 self._prev_comments = None 1418 1419 def parse( 1420 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1421 ) -> t.List[t.Optional[exp.Expression]]: 1422 """ 1423 Parses a list of tokens and returns a list of syntax trees, one tree 1424 per parsed SQL statement. 1425 1426 Args: 1427 raw_tokens: The list of tokens. 1428 sql: The original SQL string, used to produce helpful debug messages. 1429 1430 Returns: 1431 The list of the produced syntax trees. 1432 """ 1433 return self._parse( 1434 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1435 ) 1436 1437 def parse_into( 1438 self, 1439 expression_types: exp.IntoType, 1440 raw_tokens: t.List[Token], 1441 sql: t.Optional[str] = None, 1442 ) -> t.List[t.Optional[exp.Expression]]: 1443 """ 1444 Parses a list of tokens into a given Expression type. If a collection of Expression 1445 types is given instead, this method will try to parse the token list into each one 1446 of them, stopping at the first for which the parsing succeeds. 1447 1448 Args: 1449 expression_types: The expression type(s) to try and parse the token list into. 1450 raw_tokens: The list of tokens. 1451 sql: The original SQL string, used to produce helpful debug messages. 1452 1453 Returns: 1454 The target Expression. 1455 """ 1456 errors = [] 1457 for expression_type in ensure_list(expression_types): 1458 parser = self.EXPRESSION_PARSERS.get(expression_type) 1459 if not parser: 1460 raise TypeError(f"No parser registered for {expression_type}") 1461 1462 try: 1463 return self._parse(parser, raw_tokens, sql) 1464 except ParseError as e: 1465 e.errors[0]["into_expression"] = expression_type 1466 errors.append(e) 1467 1468 raise ParseError( 1469 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1470 errors=merge_errors(errors), 1471 ) from errors[-1] 1472 1473 def _parse( 1474 self, 1475 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1476 raw_tokens: t.List[Token], 1477 sql: t.Optional[str] = None, 1478 ) -> t.List[t.Optional[exp.Expression]]: 1479 self.reset() 1480 self.sql = sql or "" 1481 1482 total = len(raw_tokens) 1483 chunks: t.List[t.List[Token]] = [[]] 1484 1485 for i, token in enumerate(raw_tokens): 1486 if token.token_type == TokenType.SEMICOLON: 1487 if token.comments: 1488 chunks.append([token]) 1489 1490 if i < total - 1: 1491 chunks.append([]) 1492 else: 1493 chunks[-1].append(token) 1494 1495 expressions = [] 1496 1497 for tokens in chunks: 1498 self._index = -1 1499 self._tokens = tokens 1500 self._advance() 1501 1502 expressions.append(parse_method(self)) 1503 1504 if self._index < len(self._tokens): 1505 self.raise_error("Invalid expression / Unexpected token") 1506 1507 self.check_errors() 1508 1509 return expressions 1510 1511 def check_errors(self) -> None: 1512 """Logs or raises any found errors, depending on the chosen error level setting.""" 1513 if self.error_level == ErrorLevel.WARN: 1514 for error in self.errors: 1515 logger.error(str(error)) 1516 elif self.error_level == ErrorLevel.RAISE and self.errors: 1517 raise ParseError( 1518 concat_messages(self.errors, self.max_errors), 1519 errors=merge_errors(self.errors), 1520 ) 1521 1522 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1523 """ 1524 Appends an error in the list of recorded errors or raises it, depending on the chosen 1525 error level setting. 1526 """ 1527 token = token or self._curr or self._prev or Token.string("") 1528 start = token.start 1529 end = token.end + 1 1530 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1531 highlight = self.sql[start:end] 1532 end_context = self.sql[end : end + self.error_message_context] 1533 1534 error = ParseError.new( 1535 f"{message}. Line {token.line}, Col: {token.col}.\n" 1536 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1537 description=message, 1538 line=token.line, 1539 col=token.col, 1540 start_context=start_context, 1541 highlight=highlight, 1542 end_context=end_context, 1543 ) 1544 1545 if self.error_level == ErrorLevel.IMMEDIATE: 1546 raise error 1547 1548 self.errors.append(error) 1549 1550 def expression( 1551 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1552 ) -> E: 1553 """ 1554 Creates a new, validated Expression. 1555 1556 Args: 1557 exp_class: The expression class to instantiate. 1558 comments: An optional list of comments to attach to the expression. 1559 kwargs: The arguments to set for the expression along with their respective values. 1560 1561 Returns: 1562 The target expression. 1563 """ 1564 instance = exp_class(**kwargs) 1565 instance.add_comments(comments) if comments else self._add_comments(instance) 1566 return self.validate_expression(instance) 1567 1568 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1569 if expression and self._prev_comments: 1570 expression.add_comments(self._prev_comments) 1571 self._prev_comments = None 1572 1573 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1574 """ 1575 Validates an Expression, making sure that all its mandatory arguments are set. 1576 1577 Args: 1578 expression: The expression to validate. 1579 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1580 1581 Returns: 1582 The validated expression. 1583 """ 1584 if self.error_level != ErrorLevel.IGNORE: 1585 for error_message in expression.error_messages(args): 1586 self.raise_error(error_message) 1587 1588 return expression 1589 1590 def _find_sql(self, start: Token, end: Token) -> str: 1591 return self.sql[start.start : end.end + 1] 1592 1593 def _is_connected(self) -> bool: 1594 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1595 1596 def _advance(self, times: int = 1) -> None: 1597 self._index += times 1598 self._curr = seq_get(self._tokens, self._index) 1599 self._next = seq_get(self._tokens, self._index + 1) 1600 1601 if self._index > 0: 1602 self._prev = self._tokens[self._index - 1] 1603 self._prev_comments = self._prev.comments 1604 else: 1605 self._prev = None 1606 self._prev_comments = None 1607 1608 def _retreat(self, index: int) -> None: 1609 if index != self._index: 1610 self._advance(index - self._index) 1611 1612 def _warn_unsupported(self) -> None: 1613 if len(self._tokens) <= 1: 1614 return 1615 1616 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1617 # interested in emitting a warning for the one being currently processed. 1618 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1619 1620 logger.warning( 1621 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1622 ) 1623 1624 def _parse_command(self) -> exp.Command: 1625 self._warn_unsupported() 1626 return self.expression( 1627 exp.Command, 1628 comments=self._prev_comments, 1629 this=self._prev.text.upper(), 1630 expression=self._parse_string(), 1631 ) 1632 1633 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1634 """ 1635 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1636 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1637 solve this by setting & resetting the parser state accordingly 1638 """ 1639 index = self._index 1640 error_level = self.error_level 1641 1642 self.error_level = ErrorLevel.IMMEDIATE 1643 try: 1644 this = parse_method() 1645 except ParseError: 1646 this = None 1647 finally: 1648 if not this or retreat: 1649 self._retreat(index) 1650 self.error_level = error_level 1651 1652 return this 1653 1654 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1655 start = self._prev 1656 exists = self._parse_exists() if allow_exists else None 1657 1658 self._match(TokenType.ON) 1659 1660 materialized = self._match_text_seq("MATERIALIZED") 1661 kind = self._match_set(self.CREATABLES) and self._prev 1662 if not kind: 1663 return self._parse_as_command(start) 1664 1665 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1666 this = self._parse_user_defined_function(kind=kind.token_type) 1667 elif kind.token_type == TokenType.TABLE: 1668 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1669 elif kind.token_type == TokenType.COLUMN: 1670 this = self._parse_column() 1671 else: 1672 this = self._parse_id_var() 1673 1674 self._match(TokenType.IS) 1675 1676 return self.expression( 1677 exp.Comment, 1678 this=this, 1679 kind=kind.text, 1680 expression=self._parse_string(), 1681 exists=exists, 1682 materialized=materialized, 1683 ) 1684 1685 def _parse_to_table( 1686 self, 1687 ) -> exp.ToTableProperty: 1688 table = self._parse_table_parts(schema=True) 1689 return self.expression(exp.ToTableProperty, this=table) 1690 1691 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1692 def _parse_ttl(self) -> exp.Expression: 1693 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1694 this = self._parse_bitwise() 1695 1696 if self._match_text_seq("DELETE"): 1697 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1698 if self._match_text_seq("RECOMPRESS"): 1699 return self.expression( 1700 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1701 ) 1702 if self._match_text_seq("TO", "DISK"): 1703 return self.expression( 1704 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1705 ) 1706 if self._match_text_seq("TO", "VOLUME"): 1707 return self.expression( 1708 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1709 ) 1710 1711 return this 1712 1713 expressions = self._parse_csv(_parse_ttl_action) 1714 where = self._parse_where() 1715 group = self._parse_group() 1716 1717 aggregates = None 1718 if group and self._match(TokenType.SET): 1719 aggregates = self._parse_csv(self._parse_set_item) 1720 1721 return self.expression( 1722 exp.MergeTreeTTL, 1723 expressions=expressions, 1724 where=where, 1725 group=group, 1726 aggregates=aggregates, 1727 ) 1728 1729 def _parse_statement(self) -> t.Optional[exp.Expression]: 1730 if self._curr is None: 1731 return None 1732 1733 if self._match_set(self.STATEMENT_PARSERS): 1734 comments = self._prev_comments 1735 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1736 stmt.add_comments(comments, prepend=True) 1737 return stmt 1738 1739 if self._match_set(self.dialect.tokenizer.COMMANDS): 1740 return self._parse_command() 1741 1742 expression = self._parse_expression() 1743 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1744 return self._parse_query_modifiers(expression) 1745 1746 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1747 start = self._prev 1748 temporary = self._match(TokenType.TEMPORARY) 1749 materialized = self._match_text_seq("MATERIALIZED") 1750 1751 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1752 if not kind: 1753 return self._parse_as_command(start) 1754 1755 concurrently = self._match_text_seq("CONCURRENTLY") 1756 if_exists = exists or self._parse_exists() 1757 1758 if kind == "COLUMN": 1759 this = self._parse_column() 1760 else: 1761 this = self._parse_table_parts( 1762 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1763 ) 1764 1765 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1766 1767 if self._match(TokenType.L_PAREN, advance=False): 1768 expressions = self._parse_wrapped_csv(self._parse_types) 1769 else: 1770 expressions = None 1771 1772 return self.expression( 1773 exp.Drop, 1774 exists=if_exists, 1775 this=this, 1776 expressions=expressions, 1777 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1778 temporary=temporary, 1779 materialized=materialized, 1780 cascade=self._match_text_seq("CASCADE"), 1781 constraints=self._match_text_seq("CONSTRAINTS"), 1782 purge=self._match_text_seq("PURGE"), 1783 cluster=cluster, 1784 concurrently=concurrently, 1785 ) 1786 1787 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1788 return ( 1789 self._match_text_seq("IF") 1790 and (not not_ or self._match(TokenType.NOT)) 1791 and self._match(TokenType.EXISTS) 1792 ) 1793 1794 def _parse_create(self) -> exp.Create | exp.Command: 1795 # Note: this can't be None because we've matched a statement parser 1796 start = self._prev 1797 1798 replace = ( 1799 start.token_type == TokenType.REPLACE 1800 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1801 or self._match_pair(TokenType.OR, TokenType.ALTER) 1802 ) 1803 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1804 1805 unique = self._match(TokenType.UNIQUE) 1806 1807 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1808 clustered = True 1809 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1810 "COLUMNSTORE" 1811 ): 1812 clustered = False 1813 else: 1814 clustered = None 1815 1816 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1817 self._advance() 1818 1819 properties = None 1820 create_token = self._match_set(self.CREATABLES) and self._prev 1821 1822 if not create_token: 1823 # exp.Properties.Location.POST_CREATE 1824 properties = self._parse_properties() 1825 create_token = self._match_set(self.CREATABLES) and self._prev 1826 1827 if not properties or not create_token: 1828 return self._parse_as_command(start) 1829 1830 concurrently = self._match_text_seq("CONCURRENTLY") 1831 exists = self._parse_exists(not_=True) 1832 this = None 1833 expression: t.Optional[exp.Expression] = None 1834 indexes = None 1835 no_schema_binding = None 1836 begin = None 1837 end = None 1838 clone = None 1839 1840 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1841 nonlocal properties 1842 if properties and temp_props: 1843 properties.expressions.extend(temp_props.expressions) 1844 elif temp_props: 1845 properties = temp_props 1846 1847 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1848 this = self._parse_user_defined_function(kind=create_token.token_type) 1849 1850 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1851 extend_props(self._parse_properties()) 1852 1853 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1854 extend_props(self._parse_properties()) 1855 1856 if not expression: 1857 if self._match(TokenType.COMMAND): 1858 expression = self._parse_as_command(self._prev) 1859 else: 1860 begin = self._match(TokenType.BEGIN) 1861 return_ = self._match_text_seq("RETURN") 1862 1863 if self._match(TokenType.STRING, advance=False): 1864 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1865 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1866 expression = self._parse_string() 1867 extend_props(self._parse_properties()) 1868 else: 1869 expression = self._parse_user_defined_function_expression() 1870 1871 end = self._match_text_seq("END") 1872 1873 if return_: 1874 expression = self.expression(exp.Return, this=expression) 1875 elif create_token.token_type == TokenType.INDEX: 1876 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1877 if not self._match(TokenType.ON): 1878 index = self._parse_id_var() 1879 anonymous = False 1880 else: 1881 index = None 1882 anonymous = True 1883 1884 this = self._parse_index(index=index, anonymous=anonymous) 1885 elif create_token.token_type in self.DB_CREATABLES: 1886 table_parts = self._parse_table_parts( 1887 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1888 ) 1889 1890 # exp.Properties.Location.POST_NAME 1891 self._match(TokenType.COMMA) 1892 extend_props(self._parse_properties(before=True)) 1893 1894 this = self._parse_schema(this=table_parts) 1895 1896 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1897 extend_props(self._parse_properties()) 1898 1899 self._match(TokenType.ALIAS) 1900 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1901 # exp.Properties.Location.POST_ALIAS 1902 extend_props(self._parse_properties()) 1903 1904 if create_token.token_type == TokenType.SEQUENCE: 1905 expression = self._parse_types() 1906 extend_props(self._parse_properties()) 1907 else: 1908 expression = self._parse_ddl_select() 1909 1910 if create_token.token_type == TokenType.TABLE: 1911 # exp.Properties.Location.POST_EXPRESSION 1912 extend_props(self._parse_properties()) 1913 1914 indexes = [] 1915 while True: 1916 index = self._parse_index() 1917 1918 # exp.Properties.Location.POST_INDEX 1919 extend_props(self._parse_properties()) 1920 if not index: 1921 break 1922 else: 1923 self._match(TokenType.COMMA) 1924 indexes.append(index) 1925 elif create_token.token_type == TokenType.VIEW: 1926 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1927 no_schema_binding = True 1928 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1929 extend_props(self._parse_properties()) 1930 1931 shallow = self._match_text_seq("SHALLOW") 1932 1933 if self._match_texts(self.CLONE_KEYWORDS): 1934 copy = self._prev.text.lower() == "copy" 1935 clone = self.expression( 1936 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1937 ) 1938 1939 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1940 return self._parse_as_command(start) 1941 1942 create_kind_text = create_token.text.upper() 1943 return self.expression( 1944 exp.Create, 1945 this=this, 1946 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1947 replace=replace, 1948 refresh=refresh, 1949 unique=unique, 1950 expression=expression, 1951 exists=exists, 1952 properties=properties, 1953 indexes=indexes, 1954 no_schema_binding=no_schema_binding, 1955 begin=begin, 1956 end=end, 1957 clone=clone, 1958 concurrently=concurrently, 1959 clustered=clustered, 1960 ) 1961 1962 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1963 seq = exp.SequenceProperties() 1964 1965 options = [] 1966 index = self._index 1967 1968 while self._curr: 1969 self._match(TokenType.COMMA) 1970 if self._match_text_seq("INCREMENT"): 1971 self._match_text_seq("BY") 1972 self._match_text_seq("=") 1973 seq.set("increment", self._parse_term()) 1974 elif self._match_text_seq("MINVALUE"): 1975 seq.set("minvalue", self._parse_term()) 1976 elif self._match_text_seq("MAXVALUE"): 1977 seq.set("maxvalue", self._parse_term()) 1978 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1979 self._match_text_seq("=") 1980 seq.set("start", self._parse_term()) 1981 elif self._match_text_seq("CACHE"): 1982 # T-SQL allows empty CACHE which is initialized dynamically 1983 seq.set("cache", self._parse_number() or True) 1984 elif self._match_text_seq("OWNED", "BY"): 1985 # "OWNED BY NONE" is the default 1986 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1987 else: 1988 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1989 if opt: 1990 options.append(opt) 1991 else: 1992 break 1993 1994 seq.set("options", options if options else None) 1995 return None if self._index == index else seq 1996 1997 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1998 # only used for teradata currently 1999 self._match(TokenType.COMMA) 2000 2001 kwargs = { 2002 "no": self._match_text_seq("NO"), 2003 "dual": self._match_text_seq("DUAL"), 2004 "before": self._match_text_seq("BEFORE"), 2005 "default": self._match_text_seq("DEFAULT"), 2006 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2007 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2008 "after": self._match_text_seq("AFTER"), 2009 "minimum": self._match_texts(("MIN", "MINIMUM")), 2010 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2011 } 2012 2013 if self._match_texts(self.PROPERTY_PARSERS): 2014 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2015 try: 2016 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2017 except TypeError: 2018 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2019 2020 return None 2021 2022 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2023 return self._parse_wrapped_csv(self._parse_property) 2024 2025 def _parse_property(self) -> t.Optional[exp.Expression]: 2026 if self._match_texts(self.PROPERTY_PARSERS): 2027 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2028 2029 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2030 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2031 2032 if self._match_text_seq("COMPOUND", "SORTKEY"): 2033 return self._parse_sortkey(compound=True) 2034 2035 if self._match_text_seq("SQL", "SECURITY"): 2036 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2037 2038 index = self._index 2039 key = self._parse_column() 2040 2041 if not self._match(TokenType.EQ): 2042 self._retreat(index) 2043 return self._parse_sequence_properties() 2044 2045 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2046 if isinstance(key, exp.Column): 2047 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2048 2049 value = self._parse_bitwise() or self._parse_var(any_token=True) 2050 2051 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2052 if isinstance(value, exp.Column): 2053 value = exp.var(value.name) 2054 2055 return self.expression(exp.Property, this=key, value=value) 2056 2057 def _parse_stored(self) -> exp.FileFormatProperty: 2058 self._match(TokenType.ALIAS) 2059 2060 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2061 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2062 2063 return self.expression( 2064 exp.FileFormatProperty, 2065 this=( 2066 self.expression( 2067 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2068 ) 2069 if input_format or output_format 2070 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2071 ), 2072 ) 2073 2074 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2075 field = self._parse_field() 2076 if isinstance(field, exp.Identifier) and not field.quoted: 2077 field = exp.var(field) 2078 2079 return field 2080 2081 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2082 self._match(TokenType.EQ) 2083 self._match(TokenType.ALIAS) 2084 2085 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2086 2087 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2088 properties = [] 2089 while True: 2090 if before: 2091 prop = self._parse_property_before() 2092 else: 2093 prop = self._parse_property() 2094 if not prop: 2095 break 2096 for p in ensure_list(prop): 2097 properties.append(p) 2098 2099 if properties: 2100 return self.expression(exp.Properties, expressions=properties) 2101 2102 return None 2103 2104 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2105 return self.expression( 2106 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2107 ) 2108 2109 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2110 if self._match_texts(("DEFINER", "INVOKER")): 2111 security_specifier = self._prev.text.upper() 2112 return self.expression(exp.SecurityProperty, this=security_specifier) 2113 return None 2114 2115 def _parse_settings_property(self) -> exp.SettingsProperty: 2116 return self.expression( 2117 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2118 ) 2119 2120 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2121 if self._index >= 2: 2122 pre_volatile_token = self._tokens[self._index - 2] 2123 else: 2124 pre_volatile_token = None 2125 2126 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2127 return exp.VolatileProperty() 2128 2129 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2130 2131 def _parse_retention_period(self) -> exp.Var: 2132 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2133 number = self._parse_number() 2134 number_str = f"{number} " if number else "" 2135 unit = self._parse_var(any_token=True) 2136 return exp.var(f"{number_str}{unit}") 2137 2138 def _parse_system_versioning_property( 2139 self, with_: bool = False 2140 ) -> exp.WithSystemVersioningProperty: 2141 self._match(TokenType.EQ) 2142 prop = self.expression( 2143 exp.WithSystemVersioningProperty, 2144 **{ # type: ignore 2145 "on": True, 2146 "with": with_, 2147 }, 2148 ) 2149 2150 if self._match_text_seq("OFF"): 2151 prop.set("on", False) 2152 return prop 2153 2154 self._match(TokenType.ON) 2155 if self._match(TokenType.L_PAREN): 2156 while self._curr and not self._match(TokenType.R_PAREN): 2157 if self._match_text_seq("HISTORY_TABLE", "="): 2158 prop.set("this", self._parse_table_parts()) 2159 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2160 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2161 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2162 prop.set("retention_period", self._parse_retention_period()) 2163 2164 self._match(TokenType.COMMA) 2165 2166 return prop 2167 2168 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2169 self._match(TokenType.EQ) 2170 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2171 prop = self.expression(exp.DataDeletionProperty, on=on) 2172 2173 if self._match(TokenType.L_PAREN): 2174 while self._curr and not self._match(TokenType.R_PAREN): 2175 if self._match_text_seq("FILTER_COLUMN", "="): 2176 prop.set("filter_column", self._parse_column()) 2177 elif self._match_text_seq("RETENTION_PERIOD", "="): 2178 prop.set("retention_period", self._parse_retention_period()) 2179 2180 self._match(TokenType.COMMA) 2181 2182 return prop 2183 2184 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2185 kind = "HASH" 2186 expressions: t.Optional[t.List[exp.Expression]] = None 2187 if self._match_text_seq("BY", "HASH"): 2188 expressions = self._parse_wrapped_csv(self._parse_id_var) 2189 elif self._match_text_seq("BY", "RANDOM"): 2190 kind = "RANDOM" 2191 2192 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2193 buckets: t.Optional[exp.Expression] = None 2194 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2195 buckets = self._parse_number() 2196 2197 return self.expression( 2198 exp.DistributedByProperty, 2199 expressions=expressions, 2200 kind=kind, 2201 buckets=buckets, 2202 order=self._parse_order(), 2203 ) 2204 2205 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2206 self._match_text_seq("KEY") 2207 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2208 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2209 2210 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2211 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2212 prop = self._parse_system_versioning_property(with_=True) 2213 self._match_r_paren() 2214 return prop 2215 2216 if self._match(TokenType.L_PAREN, advance=False): 2217 return self._parse_wrapped_properties() 2218 2219 if self._match_text_seq("JOURNAL"): 2220 return self._parse_withjournaltable() 2221 2222 if self._match_texts(self.VIEW_ATTRIBUTES): 2223 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2224 2225 if self._match_text_seq("DATA"): 2226 return self._parse_withdata(no=False) 2227 elif self._match_text_seq("NO", "DATA"): 2228 return self._parse_withdata(no=True) 2229 2230 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2231 return self._parse_serde_properties(with_=True) 2232 2233 if self._match(TokenType.SCHEMA): 2234 return self.expression( 2235 exp.WithSchemaBindingProperty, 2236 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2237 ) 2238 2239 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2240 return self.expression( 2241 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2242 ) 2243 2244 if not self._next: 2245 return None 2246 2247 return self._parse_withisolatedloading() 2248 2249 def _parse_procedure_option(self) -> exp.Expression | None: 2250 if self._match_text_seq("EXECUTE", "AS"): 2251 return self.expression( 2252 exp.ExecuteAsProperty, 2253 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2254 or self._parse_string(), 2255 ) 2256 2257 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2258 2259 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2260 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2261 self._match(TokenType.EQ) 2262 2263 user = self._parse_id_var() 2264 self._match(TokenType.PARAMETER) 2265 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2266 2267 if not user or not host: 2268 return None 2269 2270 return exp.DefinerProperty(this=f"{user}@{host}") 2271 2272 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2273 self._match(TokenType.TABLE) 2274 self._match(TokenType.EQ) 2275 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2276 2277 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2278 return self.expression(exp.LogProperty, no=no) 2279 2280 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2281 return self.expression(exp.JournalProperty, **kwargs) 2282 2283 def _parse_checksum(self) -> exp.ChecksumProperty: 2284 self._match(TokenType.EQ) 2285 2286 on = None 2287 if self._match(TokenType.ON): 2288 on = True 2289 elif self._match_text_seq("OFF"): 2290 on = False 2291 2292 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2293 2294 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2295 return self.expression( 2296 exp.Cluster, 2297 expressions=( 2298 self._parse_wrapped_csv(self._parse_ordered) 2299 if wrapped 2300 else self._parse_csv(self._parse_ordered) 2301 ), 2302 ) 2303 2304 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2305 self._match_text_seq("BY") 2306 2307 self._match_l_paren() 2308 expressions = self._parse_csv(self._parse_column) 2309 self._match_r_paren() 2310 2311 if self._match_text_seq("SORTED", "BY"): 2312 self._match_l_paren() 2313 sorted_by = self._parse_csv(self._parse_ordered) 2314 self._match_r_paren() 2315 else: 2316 sorted_by = None 2317 2318 self._match(TokenType.INTO) 2319 buckets = self._parse_number() 2320 self._match_text_seq("BUCKETS") 2321 2322 return self.expression( 2323 exp.ClusteredByProperty, 2324 expressions=expressions, 2325 sorted_by=sorted_by, 2326 buckets=buckets, 2327 ) 2328 2329 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2330 if not self._match_text_seq("GRANTS"): 2331 self._retreat(self._index - 1) 2332 return None 2333 2334 return self.expression(exp.CopyGrantsProperty) 2335 2336 def _parse_freespace(self) -> exp.FreespaceProperty: 2337 self._match(TokenType.EQ) 2338 return self.expression( 2339 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2340 ) 2341 2342 def _parse_mergeblockratio( 2343 self, no: bool = False, default: bool = False 2344 ) -> exp.MergeBlockRatioProperty: 2345 if self._match(TokenType.EQ): 2346 return self.expression( 2347 exp.MergeBlockRatioProperty, 2348 this=self._parse_number(), 2349 percent=self._match(TokenType.PERCENT), 2350 ) 2351 2352 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2353 2354 def _parse_datablocksize( 2355 self, 2356 default: t.Optional[bool] = None, 2357 minimum: t.Optional[bool] = None, 2358 maximum: t.Optional[bool] = None, 2359 ) -> exp.DataBlocksizeProperty: 2360 self._match(TokenType.EQ) 2361 size = self._parse_number() 2362 2363 units = None 2364 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2365 units = self._prev.text 2366 2367 return self.expression( 2368 exp.DataBlocksizeProperty, 2369 size=size, 2370 units=units, 2371 default=default, 2372 minimum=minimum, 2373 maximum=maximum, 2374 ) 2375 2376 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2377 self._match(TokenType.EQ) 2378 always = self._match_text_seq("ALWAYS") 2379 manual = self._match_text_seq("MANUAL") 2380 never = self._match_text_seq("NEVER") 2381 default = self._match_text_seq("DEFAULT") 2382 2383 autotemp = None 2384 if self._match_text_seq("AUTOTEMP"): 2385 autotemp = self._parse_schema() 2386 2387 return self.expression( 2388 exp.BlockCompressionProperty, 2389 always=always, 2390 manual=manual, 2391 never=never, 2392 default=default, 2393 autotemp=autotemp, 2394 ) 2395 2396 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2397 index = self._index 2398 no = self._match_text_seq("NO") 2399 concurrent = self._match_text_seq("CONCURRENT") 2400 2401 if not self._match_text_seq("ISOLATED", "LOADING"): 2402 self._retreat(index) 2403 return None 2404 2405 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2406 return self.expression( 2407 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2408 ) 2409 2410 def _parse_locking(self) -> exp.LockingProperty: 2411 if self._match(TokenType.TABLE): 2412 kind = "TABLE" 2413 elif self._match(TokenType.VIEW): 2414 kind = "VIEW" 2415 elif self._match(TokenType.ROW): 2416 kind = "ROW" 2417 elif self._match_text_seq("DATABASE"): 2418 kind = "DATABASE" 2419 else: 2420 kind = None 2421 2422 if kind in ("DATABASE", "TABLE", "VIEW"): 2423 this = self._parse_table_parts() 2424 else: 2425 this = None 2426 2427 if self._match(TokenType.FOR): 2428 for_or_in = "FOR" 2429 elif self._match(TokenType.IN): 2430 for_or_in = "IN" 2431 else: 2432 for_or_in = None 2433 2434 if self._match_text_seq("ACCESS"): 2435 lock_type = "ACCESS" 2436 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2437 lock_type = "EXCLUSIVE" 2438 elif self._match_text_seq("SHARE"): 2439 lock_type = "SHARE" 2440 elif self._match_text_seq("READ"): 2441 lock_type = "READ" 2442 elif self._match_text_seq("WRITE"): 2443 lock_type = "WRITE" 2444 elif self._match_text_seq("CHECKSUM"): 2445 lock_type = "CHECKSUM" 2446 else: 2447 lock_type = None 2448 2449 override = self._match_text_seq("OVERRIDE") 2450 2451 return self.expression( 2452 exp.LockingProperty, 2453 this=this, 2454 kind=kind, 2455 for_or_in=for_or_in, 2456 lock_type=lock_type, 2457 override=override, 2458 ) 2459 2460 def _parse_partition_by(self) -> t.List[exp.Expression]: 2461 if self._match(TokenType.PARTITION_BY): 2462 return self._parse_csv(self._parse_assignment) 2463 return [] 2464 2465 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2466 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2467 if self._match_text_seq("MINVALUE"): 2468 return exp.var("MINVALUE") 2469 if self._match_text_seq("MAXVALUE"): 2470 return exp.var("MAXVALUE") 2471 return self._parse_bitwise() 2472 2473 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2474 expression = None 2475 from_expressions = None 2476 to_expressions = None 2477 2478 if self._match(TokenType.IN): 2479 this = self._parse_wrapped_csv(self._parse_bitwise) 2480 elif self._match(TokenType.FROM): 2481 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2482 self._match_text_seq("TO") 2483 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2484 elif self._match_text_seq("WITH", "(", "MODULUS"): 2485 this = self._parse_number() 2486 self._match_text_seq(",", "REMAINDER") 2487 expression = self._parse_number() 2488 self._match_r_paren() 2489 else: 2490 self.raise_error("Failed to parse partition bound spec.") 2491 2492 return self.expression( 2493 exp.PartitionBoundSpec, 2494 this=this, 2495 expression=expression, 2496 from_expressions=from_expressions, 2497 to_expressions=to_expressions, 2498 ) 2499 2500 # https://www.postgresql.org/docs/current/sql-createtable.html 2501 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2502 if not self._match_text_seq("OF"): 2503 self._retreat(self._index - 1) 2504 return None 2505 2506 this = self._parse_table(schema=True) 2507 2508 if self._match(TokenType.DEFAULT): 2509 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2510 elif self._match_text_seq("FOR", "VALUES"): 2511 expression = self._parse_partition_bound_spec() 2512 else: 2513 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2514 2515 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2516 2517 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2518 self._match(TokenType.EQ) 2519 return self.expression( 2520 exp.PartitionedByProperty, 2521 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2522 ) 2523 2524 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2525 if self._match_text_seq("AND", "STATISTICS"): 2526 statistics = True 2527 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2528 statistics = False 2529 else: 2530 statistics = None 2531 2532 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2533 2534 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2535 if self._match_text_seq("SQL"): 2536 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2537 return None 2538 2539 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2540 if self._match_text_seq("SQL", "DATA"): 2541 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2542 return None 2543 2544 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2545 if self._match_text_seq("PRIMARY", "INDEX"): 2546 return exp.NoPrimaryIndexProperty() 2547 if self._match_text_seq("SQL"): 2548 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2549 return None 2550 2551 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2552 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2553 return exp.OnCommitProperty() 2554 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2555 return exp.OnCommitProperty(delete=True) 2556 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2557 2558 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2559 if self._match_text_seq("SQL", "DATA"): 2560 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2561 return None 2562 2563 def _parse_distkey(self) -> exp.DistKeyProperty: 2564 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2565 2566 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2567 table = self._parse_table(schema=True) 2568 2569 options = [] 2570 while self._match_texts(("INCLUDING", "EXCLUDING")): 2571 this = self._prev.text.upper() 2572 2573 id_var = self._parse_id_var() 2574 if not id_var: 2575 return None 2576 2577 options.append( 2578 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2579 ) 2580 2581 return self.expression(exp.LikeProperty, this=table, expressions=options) 2582 2583 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2584 return self.expression( 2585 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2586 ) 2587 2588 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2589 self._match(TokenType.EQ) 2590 return self.expression( 2591 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2592 ) 2593 2594 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2595 self._match_text_seq("WITH", "CONNECTION") 2596 return self.expression( 2597 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2598 ) 2599 2600 def _parse_returns(self) -> exp.ReturnsProperty: 2601 value: t.Optional[exp.Expression] 2602 null = None 2603 is_table = self._match(TokenType.TABLE) 2604 2605 if is_table: 2606 if self._match(TokenType.LT): 2607 value = self.expression( 2608 exp.Schema, 2609 this="TABLE", 2610 expressions=self._parse_csv(self._parse_struct_types), 2611 ) 2612 if not self._match(TokenType.GT): 2613 self.raise_error("Expecting >") 2614 else: 2615 value = self._parse_schema(exp.var("TABLE")) 2616 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2617 null = True 2618 value = None 2619 else: 2620 value = self._parse_types() 2621 2622 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2623 2624 def _parse_describe(self) -> exp.Describe: 2625 kind = self._match_set(self.CREATABLES) and self._prev.text 2626 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2627 if self._match(TokenType.DOT): 2628 style = None 2629 self._retreat(self._index - 2) 2630 2631 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2632 2633 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2634 this = self._parse_statement() 2635 else: 2636 this = self._parse_table(schema=True) 2637 2638 properties = self._parse_properties() 2639 expressions = properties.expressions if properties else None 2640 partition = self._parse_partition() 2641 return self.expression( 2642 exp.Describe, 2643 this=this, 2644 style=style, 2645 kind=kind, 2646 expressions=expressions, 2647 partition=partition, 2648 format=format, 2649 ) 2650 2651 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2652 kind = self._prev.text.upper() 2653 expressions = [] 2654 2655 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2656 if self._match(TokenType.WHEN): 2657 expression = self._parse_disjunction() 2658 self._match(TokenType.THEN) 2659 else: 2660 expression = None 2661 2662 else_ = self._match(TokenType.ELSE) 2663 2664 if not self._match(TokenType.INTO): 2665 return None 2666 2667 return self.expression( 2668 exp.ConditionalInsert, 2669 this=self.expression( 2670 exp.Insert, 2671 this=self._parse_table(schema=True), 2672 expression=self._parse_derived_table_values(), 2673 ), 2674 expression=expression, 2675 else_=else_, 2676 ) 2677 2678 expression = parse_conditional_insert() 2679 while expression is not None: 2680 expressions.append(expression) 2681 expression = parse_conditional_insert() 2682 2683 return self.expression( 2684 exp.MultitableInserts, 2685 kind=kind, 2686 comments=comments, 2687 expressions=expressions, 2688 source=self._parse_table(), 2689 ) 2690 2691 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2692 comments = [] 2693 hint = self._parse_hint() 2694 overwrite = self._match(TokenType.OVERWRITE) 2695 ignore = self._match(TokenType.IGNORE) 2696 local = self._match_text_seq("LOCAL") 2697 alternative = None 2698 is_function = None 2699 2700 if self._match_text_seq("DIRECTORY"): 2701 this: t.Optional[exp.Expression] = self.expression( 2702 exp.Directory, 2703 this=self._parse_var_or_string(), 2704 local=local, 2705 row_format=self._parse_row_format(match_row=True), 2706 ) 2707 else: 2708 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2709 comments += ensure_list(self._prev_comments) 2710 return self._parse_multitable_inserts(comments) 2711 2712 if self._match(TokenType.OR): 2713 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2714 2715 self._match(TokenType.INTO) 2716 comments += ensure_list(self._prev_comments) 2717 self._match(TokenType.TABLE) 2718 is_function = self._match(TokenType.FUNCTION) 2719 2720 this = ( 2721 self._parse_table(schema=True, parse_partition=True) 2722 if not is_function 2723 else self._parse_function() 2724 ) 2725 2726 returning = self._parse_returning() 2727 2728 return self.expression( 2729 exp.Insert, 2730 comments=comments, 2731 hint=hint, 2732 is_function=is_function, 2733 this=this, 2734 stored=self._match_text_seq("STORED") and self._parse_stored(), 2735 by_name=self._match_text_seq("BY", "NAME"), 2736 exists=self._parse_exists(), 2737 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2738 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2739 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2740 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2741 conflict=self._parse_on_conflict(), 2742 returning=returning or self._parse_returning(), 2743 overwrite=overwrite, 2744 alternative=alternative, 2745 ignore=ignore, 2746 source=self._match(TokenType.TABLE) and self._parse_table(), 2747 ) 2748 2749 def _parse_kill(self) -> exp.Kill: 2750 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2751 2752 return self.expression( 2753 exp.Kill, 2754 this=self._parse_primary(), 2755 kind=kind, 2756 ) 2757 2758 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2759 conflict = self._match_text_seq("ON", "CONFLICT") 2760 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2761 2762 if not conflict and not duplicate: 2763 return None 2764 2765 conflict_keys = None 2766 constraint = None 2767 2768 if conflict: 2769 if self._match_text_seq("ON", "CONSTRAINT"): 2770 constraint = self._parse_id_var() 2771 elif self._match(TokenType.L_PAREN): 2772 conflict_keys = self._parse_csv(self._parse_id_var) 2773 self._match_r_paren() 2774 2775 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2776 if self._prev.token_type == TokenType.UPDATE: 2777 self._match(TokenType.SET) 2778 expressions = self._parse_csv(self._parse_equality) 2779 else: 2780 expressions = None 2781 2782 return self.expression( 2783 exp.OnConflict, 2784 duplicate=duplicate, 2785 expressions=expressions, 2786 action=action, 2787 conflict_keys=conflict_keys, 2788 constraint=constraint, 2789 ) 2790 2791 def _parse_returning(self) -> t.Optional[exp.Returning]: 2792 if not self._match(TokenType.RETURNING): 2793 return None 2794 return self.expression( 2795 exp.Returning, 2796 expressions=self._parse_csv(self._parse_expression), 2797 into=self._match(TokenType.INTO) and self._parse_table_part(), 2798 ) 2799 2800 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2801 if not self._match(TokenType.FORMAT): 2802 return None 2803 return self._parse_row_format() 2804 2805 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2806 index = self._index 2807 with_ = with_ or self._match_text_seq("WITH") 2808 2809 if not self._match(TokenType.SERDE_PROPERTIES): 2810 self._retreat(index) 2811 return None 2812 return self.expression( 2813 exp.SerdeProperties, 2814 **{ # type: ignore 2815 "expressions": self._parse_wrapped_properties(), 2816 "with": with_, 2817 }, 2818 ) 2819 2820 def _parse_row_format( 2821 self, match_row: bool = False 2822 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2823 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2824 return None 2825 2826 if self._match_text_seq("SERDE"): 2827 this = self._parse_string() 2828 2829 serde_properties = self._parse_serde_properties() 2830 2831 return self.expression( 2832 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2833 ) 2834 2835 self._match_text_seq("DELIMITED") 2836 2837 kwargs = {} 2838 2839 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2840 kwargs["fields"] = self._parse_string() 2841 if self._match_text_seq("ESCAPED", "BY"): 2842 kwargs["escaped"] = self._parse_string() 2843 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2844 kwargs["collection_items"] = self._parse_string() 2845 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2846 kwargs["map_keys"] = self._parse_string() 2847 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2848 kwargs["lines"] = self._parse_string() 2849 if self._match_text_seq("NULL", "DEFINED", "AS"): 2850 kwargs["null"] = self._parse_string() 2851 2852 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2853 2854 def _parse_load(self) -> exp.LoadData | exp.Command: 2855 if self._match_text_seq("DATA"): 2856 local = self._match_text_seq("LOCAL") 2857 self._match_text_seq("INPATH") 2858 inpath = self._parse_string() 2859 overwrite = self._match(TokenType.OVERWRITE) 2860 self._match_pair(TokenType.INTO, TokenType.TABLE) 2861 2862 return self.expression( 2863 exp.LoadData, 2864 this=self._parse_table(schema=True), 2865 local=local, 2866 overwrite=overwrite, 2867 inpath=inpath, 2868 partition=self._parse_partition(), 2869 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2870 serde=self._match_text_seq("SERDE") and self._parse_string(), 2871 ) 2872 return self._parse_as_command(self._prev) 2873 2874 def _parse_delete(self) -> exp.Delete: 2875 # This handles MySQL's "Multiple-Table Syntax" 2876 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2877 tables = None 2878 if not self._match(TokenType.FROM, advance=False): 2879 tables = self._parse_csv(self._parse_table) or None 2880 2881 returning = self._parse_returning() 2882 2883 return self.expression( 2884 exp.Delete, 2885 tables=tables, 2886 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2887 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2888 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2889 where=self._parse_where(), 2890 returning=returning or self._parse_returning(), 2891 limit=self._parse_limit(), 2892 ) 2893 2894 def _parse_update(self) -> exp.Update: 2895 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2896 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2897 returning = self._parse_returning() 2898 return self.expression( 2899 exp.Update, 2900 **{ # type: ignore 2901 "this": this, 2902 "expressions": expressions, 2903 "from": self._parse_from(joins=True), 2904 "where": self._parse_where(), 2905 "returning": returning or self._parse_returning(), 2906 "order": self._parse_order(), 2907 "limit": self._parse_limit(), 2908 }, 2909 ) 2910 2911 def _parse_uncache(self) -> exp.Uncache: 2912 if not self._match(TokenType.TABLE): 2913 self.raise_error("Expecting TABLE after UNCACHE") 2914 2915 return self.expression( 2916 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2917 ) 2918 2919 def _parse_cache(self) -> exp.Cache: 2920 lazy = self._match_text_seq("LAZY") 2921 self._match(TokenType.TABLE) 2922 table = self._parse_table(schema=True) 2923 2924 options = [] 2925 if self._match_text_seq("OPTIONS"): 2926 self._match_l_paren() 2927 k = self._parse_string() 2928 self._match(TokenType.EQ) 2929 v = self._parse_string() 2930 options = [k, v] 2931 self._match_r_paren() 2932 2933 self._match(TokenType.ALIAS) 2934 return self.expression( 2935 exp.Cache, 2936 this=table, 2937 lazy=lazy, 2938 options=options, 2939 expression=self._parse_select(nested=True), 2940 ) 2941 2942 def _parse_partition(self) -> t.Optional[exp.Partition]: 2943 if not self._match(TokenType.PARTITION): 2944 return None 2945 2946 return self.expression( 2947 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2948 ) 2949 2950 def _parse_value(self) -> t.Optional[exp.Tuple]: 2951 def _parse_value_expression() -> t.Optional[exp.Expression]: 2952 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 2953 return exp.var(self._prev.text.upper()) 2954 return self._parse_expression() 2955 2956 if self._match(TokenType.L_PAREN): 2957 expressions = self._parse_csv(_parse_value_expression) 2958 self._match_r_paren() 2959 return self.expression(exp.Tuple, expressions=expressions) 2960 2961 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2962 expression = self._parse_expression() 2963 if expression: 2964 return self.expression(exp.Tuple, expressions=[expression]) 2965 return None 2966 2967 def _parse_projections(self) -> t.List[exp.Expression]: 2968 return self._parse_expressions() 2969 2970 def _parse_select( 2971 self, 2972 nested: bool = False, 2973 table: bool = False, 2974 parse_subquery_alias: bool = True, 2975 parse_set_operation: bool = True, 2976 ) -> t.Optional[exp.Expression]: 2977 cte = self._parse_with() 2978 2979 if cte: 2980 this = self._parse_statement() 2981 2982 if not this: 2983 self.raise_error("Failed to parse any statement following CTE") 2984 return cte 2985 2986 if "with" in this.arg_types: 2987 this.set("with", cte) 2988 else: 2989 self.raise_error(f"{this.key} does not support CTE") 2990 this = cte 2991 2992 return this 2993 2994 # duckdb supports leading with FROM x 2995 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2996 2997 if self._match(TokenType.SELECT): 2998 comments = self._prev_comments 2999 3000 hint = self._parse_hint() 3001 3002 if self._next and not self._next.token_type == TokenType.DOT: 3003 all_ = self._match(TokenType.ALL) 3004 distinct = self._match_set(self.DISTINCT_TOKENS) 3005 else: 3006 all_, distinct = None, None 3007 3008 kind = ( 3009 self._match(TokenType.ALIAS) 3010 and self._match_texts(("STRUCT", "VALUE")) 3011 and self._prev.text.upper() 3012 ) 3013 3014 if distinct: 3015 distinct = self.expression( 3016 exp.Distinct, 3017 on=self._parse_value() if self._match(TokenType.ON) else None, 3018 ) 3019 3020 if all_ and distinct: 3021 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3022 3023 operation_modifiers = [] 3024 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3025 operation_modifiers.append(exp.var(self._prev.text.upper())) 3026 3027 limit = self._parse_limit(top=True) 3028 projections = self._parse_projections() 3029 3030 this = self.expression( 3031 exp.Select, 3032 kind=kind, 3033 hint=hint, 3034 distinct=distinct, 3035 expressions=projections, 3036 limit=limit, 3037 operation_modifiers=operation_modifiers or None, 3038 ) 3039 this.comments = comments 3040 3041 into = self._parse_into() 3042 if into: 3043 this.set("into", into) 3044 3045 if not from_: 3046 from_ = self._parse_from() 3047 3048 if from_: 3049 this.set("from", from_) 3050 3051 this = self._parse_query_modifiers(this) 3052 elif (table or nested) and self._match(TokenType.L_PAREN): 3053 if self._match(TokenType.PIVOT): 3054 this = self._parse_simplified_pivot() 3055 elif self._match(TokenType.FROM): 3056 this = exp.select("*").from_( 3057 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3058 ) 3059 else: 3060 this = ( 3061 self._parse_table() 3062 if table 3063 else self._parse_select(nested=True, parse_set_operation=False) 3064 ) 3065 3066 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3067 # in case a modifier (e.g. join) is following 3068 if table and isinstance(this, exp.Values) and this.alias: 3069 alias = this.args["alias"].pop() 3070 this = exp.Table(this=this, alias=alias) 3071 3072 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3073 3074 self._match_r_paren() 3075 3076 # We return early here so that the UNION isn't attached to the subquery by the 3077 # following call to _parse_set_operations, but instead becomes the parent node 3078 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3079 elif self._match(TokenType.VALUES, advance=False): 3080 this = self._parse_derived_table_values() 3081 elif from_: 3082 this = exp.select("*").from_(from_.this, copy=False) 3083 elif self._match(TokenType.SUMMARIZE): 3084 table = self._match(TokenType.TABLE) 3085 this = self._parse_select() or self._parse_string() or self._parse_table() 3086 return self.expression(exp.Summarize, this=this, table=table) 3087 elif self._match(TokenType.DESCRIBE): 3088 this = self._parse_describe() 3089 elif self._match_text_seq("STREAM"): 3090 this = self._parse_function() 3091 if this: 3092 this = self.expression(exp.Stream, this=this) 3093 else: 3094 self._retreat(self._index - 1) 3095 else: 3096 this = None 3097 3098 return self._parse_set_operations(this) if parse_set_operation else this 3099 3100 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3101 if not skip_with_token and not self._match(TokenType.WITH): 3102 return None 3103 3104 comments = self._prev_comments 3105 recursive = self._match(TokenType.RECURSIVE) 3106 3107 last_comments = None 3108 expressions = [] 3109 while True: 3110 expressions.append(self._parse_cte()) 3111 if last_comments: 3112 expressions[-1].add_comments(last_comments) 3113 3114 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3115 break 3116 else: 3117 self._match(TokenType.WITH) 3118 3119 last_comments = self._prev_comments 3120 3121 return self.expression( 3122 exp.With, comments=comments, expressions=expressions, recursive=recursive 3123 ) 3124 3125 def _parse_cte(self) -> exp.CTE: 3126 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3127 if not alias or not alias.this: 3128 self.raise_error("Expected CTE to have alias") 3129 3130 self._match(TokenType.ALIAS) 3131 comments = self._prev_comments 3132 3133 if self._match_text_seq("NOT", "MATERIALIZED"): 3134 materialized = False 3135 elif self._match_text_seq("MATERIALIZED"): 3136 materialized = True 3137 else: 3138 materialized = None 3139 3140 return self.expression( 3141 exp.CTE, 3142 this=self._parse_wrapped(self._parse_statement), 3143 alias=alias, 3144 materialized=materialized, 3145 comments=comments, 3146 ) 3147 3148 def _parse_table_alias( 3149 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3150 ) -> t.Optional[exp.TableAlias]: 3151 any_token = self._match(TokenType.ALIAS) 3152 alias = ( 3153 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3154 or self._parse_string_as_identifier() 3155 ) 3156 3157 index = self._index 3158 if self._match(TokenType.L_PAREN): 3159 columns = self._parse_csv(self._parse_function_parameter) 3160 self._match_r_paren() if columns else self._retreat(index) 3161 else: 3162 columns = None 3163 3164 if not alias and not columns: 3165 return None 3166 3167 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3168 3169 # We bubble up comments from the Identifier to the TableAlias 3170 if isinstance(alias, exp.Identifier): 3171 table_alias.add_comments(alias.pop_comments()) 3172 3173 return table_alias 3174 3175 def _parse_subquery( 3176 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3177 ) -> t.Optional[exp.Subquery]: 3178 if not this: 3179 return None 3180 3181 return self.expression( 3182 exp.Subquery, 3183 this=this, 3184 pivots=self._parse_pivots(), 3185 alias=self._parse_table_alias() if parse_alias else None, 3186 sample=self._parse_table_sample(), 3187 ) 3188 3189 def _implicit_unnests_to_explicit(self, this: E) -> E: 3190 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3191 3192 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3193 for i, join in enumerate(this.args.get("joins") or []): 3194 table = join.this 3195 normalized_table = table.copy() 3196 normalized_table.meta["maybe_column"] = True 3197 normalized_table = _norm(normalized_table, dialect=self.dialect) 3198 3199 if isinstance(table, exp.Table) and not join.args.get("on"): 3200 if normalized_table.parts[0].name in refs: 3201 table_as_column = table.to_column() 3202 unnest = exp.Unnest(expressions=[table_as_column]) 3203 3204 # Table.to_column creates a parent Alias node that we want to convert to 3205 # a TableAlias and attach to the Unnest, so it matches the parser's output 3206 if isinstance(table.args.get("alias"), exp.TableAlias): 3207 table_as_column.replace(table_as_column.this) 3208 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3209 3210 table.replace(unnest) 3211 3212 refs.add(normalized_table.alias_or_name) 3213 3214 return this 3215 3216 def _parse_query_modifiers( 3217 self, this: t.Optional[exp.Expression] 3218 ) -> t.Optional[exp.Expression]: 3219 if isinstance(this, (exp.Query, exp.Table)): 3220 for join in self._parse_joins(): 3221 this.append("joins", join) 3222 for lateral in iter(self._parse_lateral, None): 3223 this.append("laterals", lateral) 3224 3225 while True: 3226 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3227 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3228 key, expression = parser(self) 3229 3230 if expression: 3231 this.set(key, expression) 3232 if key == "limit": 3233 offset = expression.args.pop("offset", None) 3234 3235 if offset: 3236 offset = exp.Offset(expression=offset) 3237 this.set("offset", offset) 3238 3239 limit_by_expressions = expression.expressions 3240 expression.set("expressions", None) 3241 offset.set("expressions", limit_by_expressions) 3242 continue 3243 break 3244 3245 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3246 this = self._implicit_unnests_to_explicit(this) 3247 3248 return this 3249 3250 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3251 start = self._curr 3252 while self._curr: 3253 self._advance() 3254 3255 end = self._tokens[self._index - 1] 3256 return exp.Hint(expressions=[self._find_sql(start, end)]) 3257 3258 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3259 return self._parse_function_call() 3260 3261 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3262 start_index = self._index 3263 should_fallback_to_string = False 3264 3265 hints = [] 3266 try: 3267 for hint in iter( 3268 lambda: self._parse_csv( 3269 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3270 ), 3271 [], 3272 ): 3273 hints.extend(hint) 3274 except ParseError: 3275 should_fallback_to_string = True 3276 3277 if should_fallback_to_string or self._curr: 3278 self._retreat(start_index) 3279 return self._parse_hint_fallback_to_string() 3280 3281 return self.expression(exp.Hint, expressions=hints) 3282 3283 def _parse_hint(self) -> t.Optional[exp.Hint]: 3284 if self._match(TokenType.HINT) and self._prev_comments: 3285 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3286 3287 return None 3288 3289 def _parse_into(self) -> t.Optional[exp.Into]: 3290 if not self._match(TokenType.INTO): 3291 return None 3292 3293 temp = self._match(TokenType.TEMPORARY) 3294 unlogged = self._match_text_seq("UNLOGGED") 3295 self._match(TokenType.TABLE) 3296 3297 return self.expression( 3298 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3299 ) 3300 3301 def _parse_from( 3302 self, joins: bool = False, skip_from_token: bool = False 3303 ) -> t.Optional[exp.From]: 3304 if not skip_from_token and not self._match(TokenType.FROM): 3305 return None 3306 3307 return self.expression( 3308 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3309 ) 3310 3311 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3312 return self.expression( 3313 exp.MatchRecognizeMeasure, 3314 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3315 this=self._parse_expression(), 3316 ) 3317 3318 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3319 if not self._match(TokenType.MATCH_RECOGNIZE): 3320 return None 3321 3322 self._match_l_paren() 3323 3324 partition = self._parse_partition_by() 3325 order = self._parse_order() 3326 3327 measures = ( 3328 self._parse_csv(self._parse_match_recognize_measure) 3329 if self._match_text_seq("MEASURES") 3330 else None 3331 ) 3332 3333 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3334 rows = exp.var("ONE ROW PER MATCH") 3335 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3336 text = "ALL ROWS PER MATCH" 3337 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3338 text += " SHOW EMPTY MATCHES" 3339 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3340 text += " OMIT EMPTY MATCHES" 3341 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3342 text += " WITH UNMATCHED ROWS" 3343 rows = exp.var(text) 3344 else: 3345 rows = None 3346 3347 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3348 text = "AFTER MATCH SKIP" 3349 if self._match_text_seq("PAST", "LAST", "ROW"): 3350 text += " PAST LAST ROW" 3351 elif self._match_text_seq("TO", "NEXT", "ROW"): 3352 text += " TO NEXT ROW" 3353 elif self._match_text_seq("TO", "FIRST"): 3354 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3355 elif self._match_text_seq("TO", "LAST"): 3356 text += f" TO LAST {self._advance_any().text}" # type: ignore 3357 after = exp.var(text) 3358 else: 3359 after = None 3360 3361 if self._match_text_seq("PATTERN"): 3362 self._match_l_paren() 3363 3364 if not self._curr: 3365 self.raise_error("Expecting )", self._curr) 3366 3367 paren = 1 3368 start = self._curr 3369 3370 while self._curr and paren > 0: 3371 if self._curr.token_type == TokenType.L_PAREN: 3372 paren += 1 3373 if self._curr.token_type == TokenType.R_PAREN: 3374 paren -= 1 3375 3376 end = self._prev 3377 self._advance() 3378 3379 if paren > 0: 3380 self.raise_error("Expecting )", self._curr) 3381 3382 pattern = exp.var(self._find_sql(start, end)) 3383 else: 3384 pattern = None 3385 3386 define = ( 3387 self._parse_csv(self._parse_name_as_expression) 3388 if self._match_text_seq("DEFINE") 3389 else None 3390 ) 3391 3392 self._match_r_paren() 3393 3394 return self.expression( 3395 exp.MatchRecognize, 3396 partition_by=partition, 3397 order=order, 3398 measures=measures, 3399 rows=rows, 3400 after=after, 3401 pattern=pattern, 3402 define=define, 3403 alias=self._parse_table_alias(), 3404 ) 3405 3406 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3407 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3408 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3409 cross_apply = False 3410 3411 if cross_apply is not None: 3412 this = self._parse_select(table=True) 3413 view = None 3414 outer = None 3415 elif self._match(TokenType.LATERAL): 3416 this = self._parse_select(table=True) 3417 view = self._match(TokenType.VIEW) 3418 outer = self._match(TokenType.OUTER) 3419 else: 3420 return None 3421 3422 if not this: 3423 this = ( 3424 self._parse_unnest() 3425 or self._parse_function() 3426 or self._parse_id_var(any_token=False) 3427 ) 3428 3429 while self._match(TokenType.DOT): 3430 this = exp.Dot( 3431 this=this, 3432 expression=self._parse_function() or self._parse_id_var(any_token=False), 3433 ) 3434 3435 if view: 3436 table = self._parse_id_var(any_token=False) 3437 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3438 table_alias: t.Optional[exp.TableAlias] = self.expression( 3439 exp.TableAlias, this=table, columns=columns 3440 ) 3441 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3442 # We move the alias from the lateral's child node to the lateral itself 3443 table_alias = this.args["alias"].pop() 3444 else: 3445 table_alias = self._parse_table_alias() 3446 3447 return self.expression( 3448 exp.Lateral, 3449 this=this, 3450 view=view, 3451 outer=outer, 3452 alias=table_alias, 3453 cross_apply=cross_apply, 3454 ) 3455 3456 def _parse_join_parts( 3457 self, 3458 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3459 return ( 3460 self._match_set(self.JOIN_METHODS) and self._prev, 3461 self._match_set(self.JOIN_SIDES) and self._prev, 3462 self._match_set(self.JOIN_KINDS) and self._prev, 3463 ) 3464 3465 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3466 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3467 this = self._parse_column() 3468 if isinstance(this, exp.Column): 3469 return this.this 3470 return this 3471 3472 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3473 3474 def _parse_join( 3475 self, skip_join_token: bool = False, parse_bracket: bool = False 3476 ) -> t.Optional[exp.Join]: 3477 if self._match(TokenType.COMMA): 3478 return self.expression(exp.Join, this=self._parse_table()) 3479 3480 index = self._index 3481 method, side, kind = self._parse_join_parts() 3482 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3483 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3484 3485 if not skip_join_token and not join: 3486 self._retreat(index) 3487 kind = None 3488 method = None 3489 side = None 3490 3491 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3492 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3493 3494 if not skip_join_token and not join and not outer_apply and not cross_apply: 3495 return None 3496 3497 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3498 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3499 kwargs["expressions"] = self._parse_csv( 3500 lambda: self._parse_table(parse_bracket=parse_bracket) 3501 ) 3502 3503 if method: 3504 kwargs["method"] = method.text 3505 if side: 3506 kwargs["side"] = side.text 3507 if kind: 3508 kwargs["kind"] = kind.text 3509 if hint: 3510 kwargs["hint"] = hint 3511 3512 if self._match(TokenType.MATCH_CONDITION): 3513 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3514 3515 if self._match(TokenType.ON): 3516 kwargs["on"] = self._parse_assignment() 3517 elif self._match(TokenType.USING): 3518 kwargs["using"] = self._parse_using_identifiers() 3519 elif ( 3520 not (outer_apply or cross_apply) 3521 and not isinstance(kwargs["this"], exp.Unnest) 3522 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3523 ): 3524 index = self._index 3525 joins: t.Optional[list] = list(self._parse_joins()) 3526 3527 if joins and self._match(TokenType.ON): 3528 kwargs["on"] = self._parse_assignment() 3529 elif joins and self._match(TokenType.USING): 3530 kwargs["using"] = self._parse_using_identifiers() 3531 else: 3532 joins = None 3533 self._retreat(index) 3534 3535 kwargs["this"].set("joins", joins if joins else None) 3536 3537 comments = [c for token in (method, side, kind) if token for c in token.comments] 3538 return self.expression(exp.Join, comments=comments, **kwargs) 3539 3540 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3541 this = self._parse_assignment() 3542 3543 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3544 return this 3545 3546 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3547 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3548 3549 return this 3550 3551 def _parse_index_params(self) -> exp.IndexParameters: 3552 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3553 3554 if self._match(TokenType.L_PAREN, advance=False): 3555 columns = self._parse_wrapped_csv(self._parse_with_operator) 3556 else: 3557 columns = None 3558 3559 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3560 partition_by = self._parse_partition_by() 3561 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3562 tablespace = ( 3563 self._parse_var(any_token=True) 3564 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3565 else None 3566 ) 3567 where = self._parse_where() 3568 3569 on = self._parse_field() if self._match(TokenType.ON) else None 3570 3571 return self.expression( 3572 exp.IndexParameters, 3573 using=using, 3574 columns=columns, 3575 include=include, 3576 partition_by=partition_by, 3577 where=where, 3578 with_storage=with_storage, 3579 tablespace=tablespace, 3580 on=on, 3581 ) 3582 3583 def _parse_index( 3584 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3585 ) -> t.Optional[exp.Index]: 3586 if index or anonymous: 3587 unique = None 3588 primary = None 3589 amp = None 3590 3591 self._match(TokenType.ON) 3592 self._match(TokenType.TABLE) # hive 3593 table = self._parse_table_parts(schema=True) 3594 else: 3595 unique = self._match(TokenType.UNIQUE) 3596 primary = self._match_text_seq("PRIMARY") 3597 amp = self._match_text_seq("AMP") 3598 3599 if not self._match(TokenType.INDEX): 3600 return None 3601 3602 index = self._parse_id_var() 3603 table = None 3604 3605 params = self._parse_index_params() 3606 3607 return self.expression( 3608 exp.Index, 3609 this=index, 3610 table=table, 3611 unique=unique, 3612 primary=primary, 3613 amp=amp, 3614 params=params, 3615 ) 3616 3617 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3618 hints: t.List[exp.Expression] = [] 3619 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3620 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3621 hints.append( 3622 self.expression( 3623 exp.WithTableHint, 3624 expressions=self._parse_csv( 3625 lambda: self._parse_function() or self._parse_var(any_token=True) 3626 ), 3627 ) 3628 ) 3629 self._match_r_paren() 3630 else: 3631 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3632 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3633 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3634 3635 self._match_set((TokenType.INDEX, TokenType.KEY)) 3636 if self._match(TokenType.FOR): 3637 hint.set("target", self._advance_any() and self._prev.text.upper()) 3638 3639 hint.set("expressions", self._parse_wrapped_id_vars()) 3640 hints.append(hint) 3641 3642 return hints or None 3643 3644 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3645 return ( 3646 (not schema and self._parse_function(optional_parens=False)) 3647 or self._parse_id_var(any_token=False) 3648 or self._parse_string_as_identifier() 3649 or self._parse_placeholder() 3650 ) 3651 3652 def _parse_table_parts( 3653 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3654 ) -> exp.Table: 3655 catalog = None 3656 db = None 3657 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3658 3659 while self._match(TokenType.DOT): 3660 if catalog: 3661 # This allows nesting the table in arbitrarily many dot expressions if needed 3662 table = self.expression( 3663 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3664 ) 3665 else: 3666 catalog = db 3667 db = table 3668 # "" used for tsql FROM a..b case 3669 table = self._parse_table_part(schema=schema) or "" 3670 3671 if ( 3672 wildcard 3673 and self._is_connected() 3674 and (isinstance(table, exp.Identifier) or not table) 3675 and self._match(TokenType.STAR) 3676 ): 3677 if isinstance(table, exp.Identifier): 3678 table.args["this"] += "*" 3679 else: 3680 table = exp.Identifier(this="*") 3681 3682 # We bubble up comments from the Identifier to the Table 3683 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3684 3685 if is_db_reference: 3686 catalog = db 3687 db = table 3688 table = None 3689 3690 if not table and not is_db_reference: 3691 self.raise_error(f"Expected table name but got {self._curr}") 3692 if not db and is_db_reference: 3693 self.raise_error(f"Expected database name but got {self._curr}") 3694 3695 table = self.expression( 3696 exp.Table, 3697 comments=comments, 3698 this=table, 3699 db=db, 3700 catalog=catalog, 3701 ) 3702 3703 changes = self._parse_changes() 3704 if changes: 3705 table.set("changes", changes) 3706 3707 at_before = self._parse_historical_data() 3708 if at_before: 3709 table.set("when", at_before) 3710 3711 pivots = self._parse_pivots() 3712 if pivots: 3713 table.set("pivots", pivots) 3714 3715 return table 3716 3717 def _parse_table( 3718 self, 3719 schema: bool = False, 3720 joins: bool = False, 3721 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3722 parse_bracket: bool = False, 3723 is_db_reference: bool = False, 3724 parse_partition: bool = False, 3725 ) -> t.Optional[exp.Expression]: 3726 lateral = self._parse_lateral() 3727 if lateral: 3728 return lateral 3729 3730 unnest = self._parse_unnest() 3731 if unnest: 3732 return unnest 3733 3734 values = self._parse_derived_table_values() 3735 if values: 3736 return values 3737 3738 subquery = self._parse_select(table=True) 3739 if subquery: 3740 if not subquery.args.get("pivots"): 3741 subquery.set("pivots", self._parse_pivots()) 3742 return subquery 3743 3744 bracket = parse_bracket and self._parse_bracket(None) 3745 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3746 3747 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3748 self._parse_table 3749 ) 3750 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3751 3752 only = self._match(TokenType.ONLY) 3753 3754 this = t.cast( 3755 exp.Expression, 3756 bracket 3757 or rows_from 3758 or self._parse_bracket( 3759 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3760 ), 3761 ) 3762 3763 if only: 3764 this.set("only", only) 3765 3766 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3767 self._match_text_seq("*") 3768 3769 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3770 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3771 this.set("partition", self._parse_partition()) 3772 3773 if schema: 3774 return self._parse_schema(this=this) 3775 3776 version = self._parse_version() 3777 3778 if version: 3779 this.set("version", version) 3780 3781 if self.dialect.ALIAS_POST_TABLESAMPLE: 3782 this.set("sample", self._parse_table_sample()) 3783 3784 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3785 if alias: 3786 this.set("alias", alias) 3787 3788 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3789 return self.expression( 3790 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3791 ) 3792 3793 this.set("hints", self._parse_table_hints()) 3794 3795 if not this.args.get("pivots"): 3796 this.set("pivots", self._parse_pivots()) 3797 3798 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3799 this.set("sample", self._parse_table_sample()) 3800 3801 if joins: 3802 for join in self._parse_joins(): 3803 this.append("joins", join) 3804 3805 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3806 this.set("ordinality", True) 3807 this.set("alias", self._parse_table_alias()) 3808 3809 return this 3810 3811 def _parse_version(self) -> t.Optional[exp.Version]: 3812 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3813 this = "TIMESTAMP" 3814 elif self._match(TokenType.VERSION_SNAPSHOT): 3815 this = "VERSION" 3816 else: 3817 return None 3818 3819 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3820 kind = self._prev.text.upper() 3821 start = self._parse_bitwise() 3822 self._match_texts(("TO", "AND")) 3823 end = self._parse_bitwise() 3824 expression: t.Optional[exp.Expression] = self.expression( 3825 exp.Tuple, expressions=[start, end] 3826 ) 3827 elif self._match_text_seq("CONTAINED", "IN"): 3828 kind = "CONTAINED IN" 3829 expression = self.expression( 3830 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3831 ) 3832 elif self._match(TokenType.ALL): 3833 kind = "ALL" 3834 expression = None 3835 else: 3836 self._match_text_seq("AS", "OF") 3837 kind = "AS OF" 3838 expression = self._parse_type() 3839 3840 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3841 3842 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3843 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3844 index = self._index 3845 historical_data = None 3846 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3847 this = self._prev.text.upper() 3848 kind = ( 3849 self._match(TokenType.L_PAREN) 3850 and self._match_texts(self.HISTORICAL_DATA_KIND) 3851 and self._prev.text.upper() 3852 ) 3853 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3854 3855 if expression: 3856 self._match_r_paren() 3857 historical_data = self.expression( 3858 exp.HistoricalData, this=this, kind=kind, expression=expression 3859 ) 3860 else: 3861 self._retreat(index) 3862 3863 return historical_data 3864 3865 def _parse_changes(self) -> t.Optional[exp.Changes]: 3866 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3867 return None 3868 3869 information = self._parse_var(any_token=True) 3870 self._match_r_paren() 3871 3872 return self.expression( 3873 exp.Changes, 3874 information=information, 3875 at_before=self._parse_historical_data(), 3876 end=self._parse_historical_data(), 3877 ) 3878 3879 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3880 if not self._match(TokenType.UNNEST): 3881 return None 3882 3883 expressions = self._parse_wrapped_csv(self._parse_equality) 3884 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3885 3886 alias = self._parse_table_alias() if with_alias else None 3887 3888 if alias: 3889 if self.dialect.UNNEST_COLUMN_ONLY: 3890 if alias.args.get("columns"): 3891 self.raise_error("Unexpected extra column alias in unnest.") 3892 3893 alias.set("columns", [alias.this]) 3894 alias.set("this", None) 3895 3896 columns = alias.args.get("columns") or [] 3897 if offset and len(expressions) < len(columns): 3898 offset = columns.pop() 3899 3900 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3901 self._match(TokenType.ALIAS) 3902 offset = self._parse_id_var( 3903 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3904 ) or exp.to_identifier("offset") 3905 3906 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3907 3908 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3909 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3910 if not is_derived and not ( 3911 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3912 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3913 ): 3914 return None 3915 3916 expressions = self._parse_csv(self._parse_value) 3917 alias = self._parse_table_alias() 3918 3919 if is_derived: 3920 self._match_r_paren() 3921 3922 return self.expression( 3923 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3924 ) 3925 3926 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3927 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3928 as_modifier and self._match_text_seq("USING", "SAMPLE") 3929 ): 3930 return None 3931 3932 bucket_numerator = None 3933 bucket_denominator = None 3934 bucket_field = None 3935 percent = None 3936 size = None 3937 seed = None 3938 3939 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3940 matched_l_paren = self._match(TokenType.L_PAREN) 3941 3942 if self.TABLESAMPLE_CSV: 3943 num = None 3944 expressions = self._parse_csv(self._parse_primary) 3945 else: 3946 expressions = None 3947 num = ( 3948 self._parse_factor() 3949 if self._match(TokenType.NUMBER, advance=False) 3950 else self._parse_primary() or self._parse_placeholder() 3951 ) 3952 3953 if self._match_text_seq("BUCKET"): 3954 bucket_numerator = self._parse_number() 3955 self._match_text_seq("OUT", "OF") 3956 bucket_denominator = bucket_denominator = self._parse_number() 3957 self._match(TokenType.ON) 3958 bucket_field = self._parse_field() 3959 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3960 percent = num 3961 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3962 size = num 3963 else: 3964 percent = num 3965 3966 if matched_l_paren: 3967 self._match_r_paren() 3968 3969 if self._match(TokenType.L_PAREN): 3970 method = self._parse_var(upper=True) 3971 seed = self._match(TokenType.COMMA) and self._parse_number() 3972 self._match_r_paren() 3973 elif self._match_texts(("SEED", "REPEATABLE")): 3974 seed = self._parse_wrapped(self._parse_number) 3975 3976 if not method and self.DEFAULT_SAMPLING_METHOD: 3977 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3978 3979 return self.expression( 3980 exp.TableSample, 3981 expressions=expressions, 3982 method=method, 3983 bucket_numerator=bucket_numerator, 3984 bucket_denominator=bucket_denominator, 3985 bucket_field=bucket_field, 3986 percent=percent, 3987 size=size, 3988 seed=seed, 3989 ) 3990 3991 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3992 return list(iter(self._parse_pivot, None)) or None 3993 3994 def _parse_joins(self) -> t.Iterator[exp.Join]: 3995 return iter(self._parse_join, None) 3996 3997 # https://duckdb.org/docs/sql/statements/pivot 3998 def _parse_simplified_pivot(self) -> exp.Pivot: 3999 def _parse_on() -> t.Optional[exp.Expression]: 4000 this = self._parse_bitwise() 4001 return self._parse_in(this) if self._match(TokenType.IN) else this 4002 4003 this = self._parse_table() 4004 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4005 using = self._match(TokenType.USING) and self._parse_csv( 4006 lambda: self._parse_alias(self._parse_function()) 4007 ) 4008 group = self._parse_group() 4009 return self.expression( 4010 exp.Pivot, this=this, expressions=expressions, using=using, group=group 4011 ) 4012 4013 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4014 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4015 this = self._parse_select_or_expression() 4016 4017 self._match(TokenType.ALIAS) 4018 alias = self._parse_bitwise() 4019 if alias: 4020 if isinstance(alias, exp.Column) and not alias.db: 4021 alias = alias.this 4022 return self.expression(exp.PivotAlias, this=this, alias=alias) 4023 4024 return this 4025 4026 value = self._parse_column() 4027 4028 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4029 self.raise_error("Expecting IN (") 4030 4031 if self._match(TokenType.ANY): 4032 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4033 else: 4034 exprs = self._parse_csv(_parse_aliased_expression) 4035 4036 self._match_r_paren() 4037 return self.expression(exp.In, this=value, expressions=exprs) 4038 4039 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4040 index = self._index 4041 include_nulls = None 4042 4043 if self._match(TokenType.PIVOT): 4044 unpivot = False 4045 elif self._match(TokenType.UNPIVOT): 4046 unpivot = True 4047 4048 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4049 if self._match_text_seq("INCLUDE", "NULLS"): 4050 include_nulls = True 4051 elif self._match_text_seq("EXCLUDE", "NULLS"): 4052 include_nulls = False 4053 else: 4054 return None 4055 4056 expressions = [] 4057 4058 if not self._match(TokenType.L_PAREN): 4059 self._retreat(index) 4060 return None 4061 4062 if unpivot: 4063 expressions = self._parse_csv(self._parse_column) 4064 else: 4065 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4066 4067 if not expressions: 4068 self.raise_error("Failed to parse PIVOT's aggregation list") 4069 4070 if not self._match(TokenType.FOR): 4071 self.raise_error("Expecting FOR") 4072 4073 field = self._parse_pivot_in() 4074 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4075 self._parse_bitwise 4076 ) 4077 4078 self._match_r_paren() 4079 4080 pivot = self.expression( 4081 exp.Pivot, 4082 expressions=expressions, 4083 field=field, 4084 unpivot=unpivot, 4085 include_nulls=include_nulls, 4086 default_on_null=default_on_null, 4087 ) 4088 4089 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4090 pivot.set("alias", self._parse_table_alias()) 4091 4092 if not unpivot: 4093 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4094 4095 columns: t.List[exp.Expression] = [] 4096 for fld in pivot.args["field"].expressions: 4097 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4098 for name in names: 4099 if self.PREFIXED_PIVOT_COLUMNS: 4100 name = f"{name}_{field_name}" if name else field_name 4101 else: 4102 name = f"{field_name}_{name}" if name else field_name 4103 4104 columns.append(exp.to_identifier(name)) 4105 4106 pivot.set("columns", columns) 4107 4108 return pivot 4109 4110 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4111 return [agg.alias for agg in aggregations] 4112 4113 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4114 if not skip_where_token and not self._match(TokenType.PREWHERE): 4115 return None 4116 4117 return self.expression( 4118 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4119 ) 4120 4121 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4122 if not skip_where_token and not self._match(TokenType.WHERE): 4123 return None 4124 4125 return self.expression( 4126 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4127 ) 4128 4129 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4130 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4131 return None 4132 4133 elements: t.Dict[str, t.Any] = defaultdict(list) 4134 4135 if self._match(TokenType.ALL): 4136 elements["all"] = True 4137 elif self._match(TokenType.DISTINCT): 4138 elements["all"] = False 4139 4140 while True: 4141 index = self._index 4142 4143 elements["expressions"].extend( 4144 self._parse_csv( 4145 lambda: None 4146 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4147 else self._parse_assignment() 4148 ) 4149 ) 4150 4151 before_with_index = self._index 4152 with_prefix = self._match(TokenType.WITH) 4153 4154 if self._match(TokenType.ROLLUP): 4155 elements["rollup"].append( 4156 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4157 ) 4158 elif self._match(TokenType.CUBE): 4159 elements["cube"].append( 4160 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4161 ) 4162 elif self._match(TokenType.GROUPING_SETS): 4163 elements["grouping_sets"].append( 4164 self.expression( 4165 exp.GroupingSets, 4166 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4167 ) 4168 ) 4169 elif self._match_text_seq("TOTALS"): 4170 elements["totals"] = True # type: ignore 4171 4172 if before_with_index <= self._index <= before_with_index + 1: 4173 self._retreat(before_with_index) 4174 break 4175 4176 if index == self._index: 4177 break 4178 4179 return self.expression(exp.Group, **elements) # type: ignore 4180 4181 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4182 return self.expression( 4183 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4184 ) 4185 4186 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4187 if self._match(TokenType.L_PAREN): 4188 grouping_set = self._parse_csv(self._parse_column) 4189 self._match_r_paren() 4190 return self.expression(exp.Tuple, expressions=grouping_set) 4191 4192 return self._parse_column() 4193 4194 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4195 if not skip_having_token and not self._match(TokenType.HAVING): 4196 return None 4197 return self.expression(exp.Having, this=self._parse_assignment()) 4198 4199 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4200 if not self._match(TokenType.QUALIFY): 4201 return None 4202 return self.expression(exp.Qualify, this=self._parse_assignment()) 4203 4204 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4205 if skip_start_token: 4206 start = None 4207 elif self._match(TokenType.START_WITH): 4208 start = self._parse_assignment() 4209 else: 4210 return None 4211 4212 self._match(TokenType.CONNECT_BY) 4213 nocycle = self._match_text_seq("NOCYCLE") 4214 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4215 exp.Prior, this=self._parse_bitwise() 4216 ) 4217 connect = self._parse_assignment() 4218 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4219 4220 if not start and self._match(TokenType.START_WITH): 4221 start = self._parse_assignment() 4222 4223 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4224 4225 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4226 this = self._parse_id_var(any_token=True) 4227 if self._match(TokenType.ALIAS): 4228 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4229 return this 4230 4231 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4232 if self._match_text_seq("INTERPOLATE"): 4233 return self._parse_wrapped_csv(self._parse_name_as_expression) 4234 return None 4235 4236 def _parse_order( 4237 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4238 ) -> t.Optional[exp.Expression]: 4239 siblings = None 4240 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4241 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4242 return this 4243 4244 siblings = True 4245 4246 return self.expression( 4247 exp.Order, 4248 this=this, 4249 expressions=self._parse_csv(self._parse_ordered), 4250 siblings=siblings, 4251 ) 4252 4253 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4254 if not self._match(token): 4255 return None 4256 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4257 4258 def _parse_ordered( 4259 self, parse_method: t.Optional[t.Callable] = None 4260 ) -> t.Optional[exp.Ordered]: 4261 this = parse_method() if parse_method else self._parse_assignment() 4262 if not this: 4263 return None 4264 4265 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4266 this = exp.var("ALL") 4267 4268 asc = self._match(TokenType.ASC) 4269 desc = self._match(TokenType.DESC) or (asc and False) 4270 4271 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4272 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4273 4274 nulls_first = is_nulls_first or False 4275 explicitly_null_ordered = is_nulls_first or is_nulls_last 4276 4277 if ( 4278 not explicitly_null_ordered 4279 and ( 4280 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4281 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4282 ) 4283 and self.dialect.NULL_ORDERING != "nulls_are_last" 4284 ): 4285 nulls_first = True 4286 4287 if self._match_text_seq("WITH", "FILL"): 4288 with_fill = self.expression( 4289 exp.WithFill, 4290 **{ # type: ignore 4291 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4292 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4293 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4294 "interpolate": self._parse_interpolate(), 4295 }, 4296 ) 4297 else: 4298 with_fill = None 4299 4300 return self.expression( 4301 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4302 ) 4303 4304 def _parse_limit( 4305 self, 4306 this: t.Optional[exp.Expression] = None, 4307 top: bool = False, 4308 skip_limit_token: bool = False, 4309 ) -> t.Optional[exp.Expression]: 4310 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4311 comments = self._prev_comments 4312 if top: 4313 limit_paren = self._match(TokenType.L_PAREN) 4314 expression = self._parse_term() if limit_paren else self._parse_number() 4315 4316 if limit_paren: 4317 self._match_r_paren() 4318 else: 4319 expression = self._parse_term() 4320 4321 if self._match(TokenType.COMMA): 4322 offset = expression 4323 expression = self._parse_term() 4324 else: 4325 offset = None 4326 4327 limit_exp = self.expression( 4328 exp.Limit, 4329 this=this, 4330 expression=expression, 4331 offset=offset, 4332 comments=comments, 4333 expressions=self._parse_limit_by(), 4334 ) 4335 4336 return limit_exp 4337 4338 if self._match(TokenType.FETCH): 4339 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4340 direction = self._prev.text.upper() if direction else "FIRST" 4341 4342 count = self._parse_field(tokens=self.FETCH_TOKENS) 4343 percent = self._match(TokenType.PERCENT) 4344 4345 self._match_set((TokenType.ROW, TokenType.ROWS)) 4346 4347 only = self._match_text_seq("ONLY") 4348 with_ties = self._match_text_seq("WITH", "TIES") 4349 4350 if only and with_ties: 4351 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4352 4353 return self.expression( 4354 exp.Fetch, 4355 direction=direction, 4356 count=count, 4357 percent=percent, 4358 with_ties=with_ties, 4359 ) 4360 4361 return this 4362 4363 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4364 if not self._match(TokenType.OFFSET): 4365 return this 4366 4367 count = self._parse_term() 4368 self._match_set((TokenType.ROW, TokenType.ROWS)) 4369 4370 return self.expression( 4371 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4372 ) 4373 4374 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4375 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4376 4377 def _parse_locks(self) -> t.List[exp.Lock]: 4378 locks = [] 4379 while True: 4380 if self._match_text_seq("FOR", "UPDATE"): 4381 update = True 4382 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4383 "LOCK", "IN", "SHARE", "MODE" 4384 ): 4385 update = False 4386 else: 4387 break 4388 4389 expressions = None 4390 if self._match_text_seq("OF"): 4391 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4392 4393 wait: t.Optional[bool | exp.Expression] = None 4394 if self._match_text_seq("NOWAIT"): 4395 wait = True 4396 elif self._match_text_seq("WAIT"): 4397 wait = self._parse_primary() 4398 elif self._match_text_seq("SKIP", "LOCKED"): 4399 wait = False 4400 4401 locks.append( 4402 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4403 ) 4404 4405 return locks 4406 4407 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4408 while this and self._match_set(self.SET_OPERATIONS): 4409 token_type = self._prev.token_type 4410 4411 if token_type == TokenType.UNION: 4412 operation: t.Type[exp.SetOperation] = exp.Union 4413 elif token_type == TokenType.EXCEPT: 4414 operation = exp.Except 4415 else: 4416 operation = exp.Intersect 4417 4418 comments = self._prev.comments 4419 4420 if self._match(TokenType.DISTINCT): 4421 distinct: t.Optional[bool] = True 4422 elif self._match(TokenType.ALL): 4423 distinct = False 4424 else: 4425 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4426 if distinct is None: 4427 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4428 4429 by_name = self._match_text_seq("BY", "NAME") 4430 expression = self._parse_select(nested=True, parse_set_operation=False) 4431 4432 this = self.expression( 4433 operation, 4434 comments=comments, 4435 this=this, 4436 distinct=distinct, 4437 by_name=by_name, 4438 expression=expression, 4439 ) 4440 4441 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4442 expression = this.expression 4443 4444 if expression: 4445 for arg in self.SET_OP_MODIFIERS: 4446 expr = expression.args.get(arg) 4447 if expr: 4448 this.set(arg, expr.pop()) 4449 4450 return this 4451 4452 def _parse_expression(self) -> t.Optional[exp.Expression]: 4453 return self._parse_alias(self._parse_assignment()) 4454 4455 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4456 this = self._parse_disjunction() 4457 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4458 # This allows us to parse <non-identifier token> := <expr> 4459 this = exp.column( 4460 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4461 ) 4462 4463 while self._match_set(self.ASSIGNMENT): 4464 if isinstance(this, exp.Column) and len(this.parts) == 1: 4465 this = this.this 4466 4467 this = self.expression( 4468 self.ASSIGNMENT[self._prev.token_type], 4469 this=this, 4470 comments=self._prev_comments, 4471 expression=self._parse_assignment(), 4472 ) 4473 4474 return this 4475 4476 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4477 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4478 4479 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4480 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4481 4482 def _parse_equality(self) -> t.Optional[exp.Expression]: 4483 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4484 4485 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4486 return self._parse_tokens(self._parse_range, self.COMPARISON) 4487 4488 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4489 this = this or self._parse_bitwise() 4490 negate = self._match(TokenType.NOT) 4491 4492 if self._match_set(self.RANGE_PARSERS): 4493 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4494 if not expression: 4495 return this 4496 4497 this = expression 4498 elif self._match(TokenType.ISNULL): 4499 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4500 4501 # Postgres supports ISNULL and NOTNULL for conditions. 4502 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4503 if self._match(TokenType.NOTNULL): 4504 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4505 this = self.expression(exp.Not, this=this) 4506 4507 if negate: 4508 this = self._negate_range(this) 4509 4510 if self._match(TokenType.IS): 4511 this = self._parse_is(this) 4512 4513 return this 4514 4515 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4516 if not this: 4517 return this 4518 4519 return self.expression(exp.Not, this=this) 4520 4521 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4522 index = self._index - 1 4523 negate = self._match(TokenType.NOT) 4524 4525 if self._match_text_seq("DISTINCT", "FROM"): 4526 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4527 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4528 4529 if self._match(TokenType.JSON): 4530 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4531 4532 if self._match_text_seq("WITH"): 4533 _with = True 4534 elif self._match_text_seq("WITHOUT"): 4535 _with = False 4536 else: 4537 _with = None 4538 4539 unique = self._match(TokenType.UNIQUE) 4540 self._match_text_seq("KEYS") 4541 expression: t.Optional[exp.Expression] = self.expression( 4542 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4543 ) 4544 else: 4545 expression = self._parse_primary() or self._parse_null() 4546 if not expression: 4547 self._retreat(index) 4548 return None 4549 4550 this = self.expression(exp.Is, this=this, expression=expression) 4551 return self.expression(exp.Not, this=this) if negate else this 4552 4553 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4554 unnest = self._parse_unnest(with_alias=False) 4555 if unnest: 4556 this = self.expression(exp.In, this=this, unnest=unnest) 4557 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4558 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4559 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4560 4561 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4562 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4563 else: 4564 this = self.expression(exp.In, this=this, expressions=expressions) 4565 4566 if matched_l_paren: 4567 self._match_r_paren(this) 4568 elif not self._match(TokenType.R_BRACKET, expression=this): 4569 self.raise_error("Expecting ]") 4570 else: 4571 this = self.expression(exp.In, this=this, field=self._parse_column()) 4572 4573 return this 4574 4575 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4576 low = self._parse_bitwise() 4577 self._match(TokenType.AND) 4578 high = self._parse_bitwise() 4579 return self.expression(exp.Between, this=this, low=low, high=high) 4580 4581 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4582 if not self._match(TokenType.ESCAPE): 4583 return this 4584 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4585 4586 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4587 index = self._index 4588 4589 if not self._match(TokenType.INTERVAL) and match_interval: 4590 return None 4591 4592 if self._match(TokenType.STRING, advance=False): 4593 this = self._parse_primary() 4594 else: 4595 this = self._parse_term() 4596 4597 if not this or ( 4598 isinstance(this, exp.Column) 4599 and not this.table 4600 and not this.this.quoted 4601 and this.name.upper() == "IS" 4602 ): 4603 self._retreat(index) 4604 return None 4605 4606 unit = self._parse_function() or ( 4607 not self._match(TokenType.ALIAS, advance=False) 4608 and self._parse_var(any_token=True, upper=True) 4609 ) 4610 4611 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4612 # each INTERVAL expression into this canonical form so it's easy to transpile 4613 if this and this.is_number: 4614 this = exp.Literal.string(this.to_py()) 4615 elif this and this.is_string: 4616 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4617 if len(parts) == 1: 4618 if unit: 4619 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4620 self._retreat(self._index - 1) 4621 4622 this = exp.Literal.string(parts[0][0]) 4623 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4624 4625 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4626 unit = self.expression( 4627 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4628 ) 4629 4630 interval = self.expression(exp.Interval, this=this, unit=unit) 4631 4632 index = self._index 4633 self._match(TokenType.PLUS) 4634 4635 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4636 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4637 return self.expression( 4638 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4639 ) 4640 4641 self._retreat(index) 4642 return interval 4643 4644 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4645 this = self._parse_term() 4646 4647 while True: 4648 if self._match_set(self.BITWISE): 4649 this = self.expression( 4650 self.BITWISE[self._prev.token_type], 4651 this=this, 4652 expression=self._parse_term(), 4653 ) 4654 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4655 this = self.expression( 4656 exp.DPipe, 4657 this=this, 4658 expression=self._parse_term(), 4659 safe=not self.dialect.STRICT_STRING_CONCAT, 4660 ) 4661 elif self._match(TokenType.DQMARK): 4662 this = self.expression( 4663 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4664 ) 4665 elif self._match_pair(TokenType.LT, TokenType.LT): 4666 this = self.expression( 4667 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4668 ) 4669 elif self._match_pair(TokenType.GT, TokenType.GT): 4670 this = self.expression( 4671 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4672 ) 4673 else: 4674 break 4675 4676 return this 4677 4678 def _parse_term(self) -> t.Optional[exp.Expression]: 4679 this = self._parse_factor() 4680 4681 while self._match_set(self.TERM): 4682 klass = self.TERM[self._prev.token_type] 4683 comments = self._prev_comments 4684 expression = self._parse_factor() 4685 4686 this = self.expression(klass, this=this, comments=comments, expression=expression) 4687 4688 if isinstance(this, exp.Collate): 4689 expr = this.expression 4690 4691 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4692 # fallback to Identifier / Var 4693 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4694 ident = expr.this 4695 if isinstance(ident, exp.Identifier): 4696 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4697 4698 return this 4699 4700 def _parse_factor(self) -> t.Optional[exp.Expression]: 4701 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4702 this = parse_method() 4703 4704 while self._match_set(self.FACTOR): 4705 klass = self.FACTOR[self._prev.token_type] 4706 comments = self._prev_comments 4707 expression = parse_method() 4708 4709 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4710 self._retreat(self._index - 1) 4711 return this 4712 4713 this = self.expression(klass, this=this, comments=comments, expression=expression) 4714 4715 if isinstance(this, exp.Div): 4716 this.args["typed"] = self.dialect.TYPED_DIVISION 4717 this.args["safe"] = self.dialect.SAFE_DIVISION 4718 4719 return this 4720 4721 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4722 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4723 4724 def _parse_unary(self) -> t.Optional[exp.Expression]: 4725 if self._match_set(self.UNARY_PARSERS): 4726 return self.UNARY_PARSERS[self._prev.token_type](self) 4727 return self._parse_at_time_zone(self._parse_type()) 4728 4729 def _parse_type( 4730 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4731 ) -> t.Optional[exp.Expression]: 4732 interval = parse_interval and self._parse_interval() 4733 if interval: 4734 return interval 4735 4736 index = self._index 4737 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4738 4739 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4740 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4741 if isinstance(data_type, exp.Cast): 4742 # This constructor can contain ops directly after it, for instance struct unnesting: 4743 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4744 return self._parse_column_ops(data_type) 4745 4746 if data_type: 4747 index2 = self._index 4748 this = self._parse_primary() 4749 4750 if isinstance(this, exp.Literal): 4751 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4752 if parser: 4753 return parser(self, this, data_type) 4754 4755 return self.expression(exp.Cast, this=this, to=data_type) 4756 4757 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4758 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4759 # 4760 # If the index difference here is greater than 1, that means the parser itself must have 4761 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4762 # 4763 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4764 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4765 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4766 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4767 # 4768 # In these cases, we don't really want to return the converted type, but instead retreat 4769 # and try to parse a Column or Identifier in the section below. 4770 if data_type.expressions and index2 - index > 1: 4771 self._retreat(index2) 4772 return self._parse_column_ops(data_type) 4773 4774 self._retreat(index) 4775 4776 if fallback_to_identifier: 4777 return self._parse_id_var() 4778 4779 this = self._parse_column() 4780 return this and self._parse_column_ops(this) 4781 4782 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4783 this = self._parse_type() 4784 if not this: 4785 return None 4786 4787 if isinstance(this, exp.Column) and not this.table: 4788 this = exp.var(this.name.upper()) 4789 4790 return self.expression( 4791 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4792 ) 4793 4794 def _parse_types( 4795 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4796 ) -> t.Optional[exp.Expression]: 4797 index = self._index 4798 4799 this: t.Optional[exp.Expression] = None 4800 prefix = self._match_text_seq("SYSUDTLIB", ".") 4801 4802 if not self._match_set(self.TYPE_TOKENS): 4803 identifier = allow_identifiers and self._parse_id_var( 4804 any_token=False, tokens=(TokenType.VAR,) 4805 ) 4806 if isinstance(identifier, exp.Identifier): 4807 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4808 4809 if len(tokens) != 1: 4810 self.raise_error("Unexpected identifier", self._prev) 4811 4812 if tokens[0].token_type in self.TYPE_TOKENS: 4813 self._prev = tokens[0] 4814 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4815 type_name = identifier.name 4816 4817 while self._match(TokenType.DOT): 4818 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4819 4820 this = exp.DataType.build(type_name, udt=True) 4821 else: 4822 self._retreat(self._index - 1) 4823 return None 4824 else: 4825 return None 4826 4827 type_token = self._prev.token_type 4828 4829 if type_token == TokenType.PSEUDO_TYPE: 4830 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4831 4832 if type_token == TokenType.OBJECT_IDENTIFIER: 4833 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4834 4835 # https://materialize.com/docs/sql/types/map/ 4836 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4837 key_type = self._parse_types( 4838 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4839 ) 4840 if not self._match(TokenType.FARROW): 4841 self._retreat(index) 4842 return None 4843 4844 value_type = self._parse_types( 4845 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4846 ) 4847 if not self._match(TokenType.R_BRACKET): 4848 self._retreat(index) 4849 return None 4850 4851 return exp.DataType( 4852 this=exp.DataType.Type.MAP, 4853 expressions=[key_type, value_type], 4854 nested=True, 4855 prefix=prefix, 4856 ) 4857 4858 nested = type_token in self.NESTED_TYPE_TOKENS 4859 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4860 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4861 expressions = None 4862 maybe_func = False 4863 4864 if self._match(TokenType.L_PAREN): 4865 if is_struct: 4866 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4867 elif nested: 4868 expressions = self._parse_csv( 4869 lambda: self._parse_types( 4870 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4871 ) 4872 ) 4873 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4874 this = expressions[0] 4875 this.set("nullable", True) 4876 self._match_r_paren() 4877 return this 4878 elif type_token in self.ENUM_TYPE_TOKENS: 4879 expressions = self._parse_csv(self._parse_equality) 4880 elif is_aggregate: 4881 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4882 any_token=False, tokens=(TokenType.VAR,) 4883 ) 4884 if not func_or_ident or not self._match(TokenType.COMMA): 4885 return None 4886 expressions = self._parse_csv( 4887 lambda: self._parse_types( 4888 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4889 ) 4890 ) 4891 expressions.insert(0, func_or_ident) 4892 else: 4893 expressions = self._parse_csv(self._parse_type_size) 4894 4895 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4896 if type_token == TokenType.VECTOR and len(expressions) == 2: 4897 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4898 4899 if not expressions or not self._match(TokenType.R_PAREN): 4900 self._retreat(index) 4901 return None 4902 4903 maybe_func = True 4904 4905 values: t.Optional[t.List[exp.Expression]] = None 4906 4907 if nested and self._match(TokenType.LT): 4908 if is_struct: 4909 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4910 else: 4911 expressions = self._parse_csv( 4912 lambda: self._parse_types( 4913 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4914 ) 4915 ) 4916 4917 if not self._match(TokenType.GT): 4918 self.raise_error("Expecting >") 4919 4920 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4921 values = self._parse_csv(self._parse_assignment) 4922 if not values and is_struct: 4923 values = None 4924 self._retreat(self._index - 1) 4925 else: 4926 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4927 4928 if type_token in self.TIMESTAMPS: 4929 if self._match_text_seq("WITH", "TIME", "ZONE"): 4930 maybe_func = False 4931 tz_type = ( 4932 exp.DataType.Type.TIMETZ 4933 if type_token in self.TIMES 4934 else exp.DataType.Type.TIMESTAMPTZ 4935 ) 4936 this = exp.DataType(this=tz_type, expressions=expressions) 4937 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4938 maybe_func = False 4939 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4940 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4941 maybe_func = False 4942 elif type_token == TokenType.INTERVAL: 4943 unit = self._parse_var(upper=True) 4944 if unit: 4945 if self._match_text_seq("TO"): 4946 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4947 4948 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4949 else: 4950 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4951 4952 if maybe_func and check_func: 4953 index2 = self._index 4954 peek = self._parse_string() 4955 4956 if not peek: 4957 self._retreat(index) 4958 return None 4959 4960 self._retreat(index2) 4961 4962 if not this: 4963 if self._match_text_seq("UNSIGNED"): 4964 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4965 if not unsigned_type_token: 4966 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4967 4968 type_token = unsigned_type_token or type_token 4969 4970 this = exp.DataType( 4971 this=exp.DataType.Type[type_token.value], 4972 expressions=expressions, 4973 nested=nested, 4974 prefix=prefix, 4975 ) 4976 4977 # Empty arrays/structs are allowed 4978 if values is not None: 4979 cls = exp.Struct if is_struct else exp.Array 4980 this = exp.cast(cls(expressions=values), this, copy=False) 4981 4982 elif expressions: 4983 this.set("expressions", expressions) 4984 4985 # https://materialize.com/docs/sql/types/list/#type-name 4986 while self._match(TokenType.LIST): 4987 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4988 4989 index = self._index 4990 4991 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4992 matched_array = self._match(TokenType.ARRAY) 4993 4994 while self._curr: 4995 datatype_token = self._prev.token_type 4996 matched_l_bracket = self._match(TokenType.L_BRACKET) 4997 if not matched_l_bracket and not matched_array: 4998 break 4999 5000 matched_array = False 5001 values = self._parse_csv(self._parse_assignment) or None 5002 if ( 5003 values 5004 and not schema 5005 and ( 5006 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5007 ) 5008 ): 5009 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5010 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5011 self._retreat(index) 5012 break 5013 5014 this = exp.DataType( 5015 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5016 ) 5017 self._match(TokenType.R_BRACKET) 5018 5019 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5020 converter = self.TYPE_CONVERTERS.get(this.this) 5021 if converter: 5022 this = converter(t.cast(exp.DataType, this)) 5023 5024 return this 5025 5026 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5027 index = self._index 5028 5029 if ( 5030 self._curr 5031 and self._next 5032 and self._curr.token_type in self.TYPE_TOKENS 5033 and self._next.token_type in self.TYPE_TOKENS 5034 ): 5035 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5036 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5037 this = self._parse_id_var() 5038 else: 5039 this = ( 5040 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5041 or self._parse_id_var() 5042 ) 5043 5044 self._match(TokenType.COLON) 5045 5046 if ( 5047 type_required 5048 and not isinstance(this, exp.DataType) 5049 and not self._match_set(self.TYPE_TOKENS, advance=False) 5050 ): 5051 self._retreat(index) 5052 return self._parse_types() 5053 5054 return self._parse_column_def(this) 5055 5056 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5057 if not self._match_text_seq("AT", "TIME", "ZONE"): 5058 return this 5059 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5060 5061 def _parse_column(self) -> t.Optional[exp.Expression]: 5062 this = self._parse_column_reference() 5063 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5064 5065 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5066 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5067 5068 return column 5069 5070 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5071 this = self._parse_field() 5072 if ( 5073 not this 5074 and self._match(TokenType.VALUES, advance=False) 5075 and self.VALUES_FOLLOWED_BY_PAREN 5076 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5077 ): 5078 this = self._parse_id_var() 5079 5080 if isinstance(this, exp.Identifier): 5081 # We bubble up comments from the Identifier to the Column 5082 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5083 5084 return this 5085 5086 def _parse_colon_as_variant_extract( 5087 self, this: t.Optional[exp.Expression] 5088 ) -> t.Optional[exp.Expression]: 5089 casts = [] 5090 json_path = [] 5091 escape = None 5092 5093 while self._match(TokenType.COLON): 5094 start_index = self._index 5095 5096 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5097 path = self._parse_column_ops( 5098 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5099 ) 5100 5101 # The cast :: operator has a lower precedence than the extraction operator :, so 5102 # we rearrange the AST appropriately to avoid casting the JSON path 5103 while isinstance(path, exp.Cast): 5104 casts.append(path.to) 5105 path = path.this 5106 5107 if casts: 5108 dcolon_offset = next( 5109 i 5110 for i, t in enumerate(self._tokens[start_index:]) 5111 if t.token_type == TokenType.DCOLON 5112 ) 5113 end_token = self._tokens[start_index + dcolon_offset - 1] 5114 else: 5115 end_token = self._prev 5116 5117 if path: 5118 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5119 # it'll roundtrip to a string literal in GET_PATH 5120 if isinstance(path, exp.Identifier) and path.quoted: 5121 escape = True 5122 5123 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5124 5125 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5126 # Databricks transforms it back to the colon/dot notation 5127 if json_path: 5128 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5129 5130 if json_path_expr: 5131 json_path_expr.set("escape", escape) 5132 5133 this = self.expression( 5134 exp.JSONExtract, 5135 this=this, 5136 expression=json_path_expr, 5137 variant_extract=True, 5138 ) 5139 5140 while casts: 5141 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5142 5143 return this 5144 5145 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5146 return self._parse_types() 5147 5148 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5149 this = self._parse_bracket(this) 5150 5151 while self._match_set(self.COLUMN_OPERATORS): 5152 op_token = self._prev.token_type 5153 op = self.COLUMN_OPERATORS.get(op_token) 5154 5155 if op_token == TokenType.DCOLON: 5156 field = self._parse_dcolon() 5157 if not field: 5158 self.raise_error("Expected type") 5159 elif op and self._curr: 5160 field = self._parse_column_reference() or self._parse_bracket() 5161 else: 5162 field = self._parse_field(any_token=True, anonymous_func=True) 5163 5164 if isinstance(field, (exp.Func, exp.Window)) and this: 5165 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5166 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5167 this = exp.replace_tree( 5168 this, 5169 lambda n: ( 5170 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5171 if n.table 5172 else n.this 5173 ) 5174 if isinstance(n, exp.Column) 5175 else n, 5176 ) 5177 5178 if op: 5179 this = op(self, this, field) 5180 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5181 this = self.expression( 5182 exp.Column, 5183 comments=this.comments, 5184 this=field, 5185 table=this.this, 5186 db=this.args.get("table"), 5187 catalog=this.args.get("db"), 5188 ) 5189 elif isinstance(field, exp.Window): 5190 # Move the exp.Dot's to the window's function 5191 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5192 field.set("this", window_func) 5193 this = field 5194 else: 5195 this = self.expression(exp.Dot, this=this, expression=field) 5196 5197 if field and field.comments: 5198 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5199 5200 this = self._parse_bracket(this) 5201 5202 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5203 5204 def _parse_primary(self) -> t.Optional[exp.Expression]: 5205 if self._match_set(self.PRIMARY_PARSERS): 5206 token_type = self._prev.token_type 5207 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5208 5209 if token_type == TokenType.STRING: 5210 expressions = [primary] 5211 while self._match(TokenType.STRING): 5212 expressions.append(exp.Literal.string(self._prev.text)) 5213 5214 if len(expressions) > 1: 5215 return self.expression(exp.Concat, expressions=expressions) 5216 5217 return primary 5218 5219 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5220 return exp.Literal.number(f"0.{self._prev.text}") 5221 5222 if self._match(TokenType.L_PAREN): 5223 comments = self._prev_comments 5224 query = self._parse_select() 5225 5226 if query: 5227 expressions = [query] 5228 else: 5229 expressions = self._parse_expressions() 5230 5231 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5232 5233 if not this and self._match(TokenType.R_PAREN, advance=False): 5234 this = self.expression(exp.Tuple) 5235 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5236 this = self._parse_subquery(this=this, parse_alias=False) 5237 elif isinstance(this, exp.Subquery): 5238 this = self._parse_subquery( 5239 this=self._parse_set_operations(this), parse_alias=False 5240 ) 5241 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5242 this = self.expression(exp.Tuple, expressions=expressions) 5243 else: 5244 this = self.expression(exp.Paren, this=this) 5245 5246 if this: 5247 this.add_comments(comments) 5248 5249 self._match_r_paren(expression=this) 5250 return this 5251 5252 return None 5253 5254 def _parse_field( 5255 self, 5256 any_token: bool = False, 5257 tokens: t.Optional[t.Collection[TokenType]] = None, 5258 anonymous_func: bool = False, 5259 ) -> t.Optional[exp.Expression]: 5260 if anonymous_func: 5261 field = ( 5262 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5263 or self._parse_primary() 5264 ) 5265 else: 5266 field = self._parse_primary() or self._parse_function( 5267 anonymous=anonymous_func, any_token=any_token 5268 ) 5269 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5270 5271 def _parse_function( 5272 self, 5273 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5274 anonymous: bool = False, 5275 optional_parens: bool = True, 5276 any_token: bool = False, 5277 ) -> t.Optional[exp.Expression]: 5278 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5279 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5280 fn_syntax = False 5281 if ( 5282 self._match(TokenType.L_BRACE, advance=False) 5283 and self._next 5284 and self._next.text.upper() == "FN" 5285 ): 5286 self._advance(2) 5287 fn_syntax = True 5288 5289 func = self._parse_function_call( 5290 functions=functions, 5291 anonymous=anonymous, 5292 optional_parens=optional_parens, 5293 any_token=any_token, 5294 ) 5295 5296 if fn_syntax: 5297 self._match(TokenType.R_BRACE) 5298 5299 return func 5300 5301 def _parse_function_call( 5302 self, 5303 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5304 anonymous: bool = False, 5305 optional_parens: bool = True, 5306 any_token: bool = False, 5307 ) -> t.Optional[exp.Expression]: 5308 if not self._curr: 5309 return None 5310 5311 comments = self._curr.comments 5312 token_type = self._curr.token_type 5313 this = self._curr.text 5314 upper = this.upper() 5315 5316 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5317 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5318 self._advance() 5319 return self._parse_window(parser(self)) 5320 5321 if not self._next or self._next.token_type != TokenType.L_PAREN: 5322 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5323 self._advance() 5324 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5325 5326 return None 5327 5328 if any_token: 5329 if token_type in self.RESERVED_TOKENS: 5330 return None 5331 elif token_type not in self.FUNC_TOKENS: 5332 return None 5333 5334 self._advance(2) 5335 5336 parser = self.FUNCTION_PARSERS.get(upper) 5337 if parser and not anonymous: 5338 this = parser(self) 5339 else: 5340 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5341 5342 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5343 this = self.expression( 5344 subquery_predicate, comments=comments, this=self._parse_select() 5345 ) 5346 self._match_r_paren() 5347 return this 5348 5349 if functions is None: 5350 functions = self.FUNCTIONS 5351 5352 function = functions.get(upper) 5353 5354 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5355 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5356 5357 if alias: 5358 args = self._kv_to_prop_eq(args) 5359 5360 if function and not anonymous: 5361 if "dialect" in function.__code__.co_varnames: 5362 func = function(args, dialect=self.dialect) 5363 else: 5364 func = function(args) 5365 5366 func = self.validate_expression(func, args) 5367 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5368 func.meta["name"] = this 5369 5370 this = func 5371 else: 5372 if token_type == TokenType.IDENTIFIER: 5373 this = exp.Identifier(this=this, quoted=True) 5374 this = self.expression(exp.Anonymous, this=this, expressions=args) 5375 5376 if isinstance(this, exp.Expression): 5377 this.add_comments(comments) 5378 5379 self._match_r_paren(this) 5380 return self._parse_window(this) 5381 5382 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5383 return expression 5384 5385 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5386 transformed = [] 5387 5388 for index, e in enumerate(expressions): 5389 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5390 if isinstance(e, exp.Alias): 5391 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5392 5393 if not isinstance(e, exp.PropertyEQ): 5394 e = self.expression( 5395 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5396 ) 5397 5398 if isinstance(e.this, exp.Column): 5399 e.this.replace(e.this.this) 5400 else: 5401 e = self._to_prop_eq(e, index) 5402 5403 transformed.append(e) 5404 5405 return transformed 5406 5407 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5408 return self._parse_statement() 5409 5410 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5411 return self._parse_column_def(self._parse_id_var()) 5412 5413 def _parse_user_defined_function( 5414 self, kind: t.Optional[TokenType] = None 5415 ) -> t.Optional[exp.Expression]: 5416 this = self._parse_id_var() 5417 5418 while self._match(TokenType.DOT): 5419 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5420 5421 if not self._match(TokenType.L_PAREN): 5422 return this 5423 5424 expressions = self._parse_csv(self._parse_function_parameter) 5425 self._match_r_paren() 5426 return self.expression( 5427 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5428 ) 5429 5430 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5431 literal = self._parse_primary() 5432 if literal: 5433 return self.expression(exp.Introducer, this=token.text, expression=literal) 5434 5435 return self.expression(exp.Identifier, this=token.text) 5436 5437 def _parse_session_parameter(self) -> exp.SessionParameter: 5438 kind = None 5439 this = self._parse_id_var() or self._parse_primary() 5440 5441 if this and self._match(TokenType.DOT): 5442 kind = this.name 5443 this = self._parse_var() or self._parse_primary() 5444 5445 return self.expression(exp.SessionParameter, this=this, kind=kind) 5446 5447 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5448 return self._parse_id_var() 5449 5450 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5451 index = self._index 5452 5453 if self._match(TokenType.L_PAREN): 5454 expressions = t.cast( 5455 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5456 ) 5457 5458 if not self._match(TokenType.R_PAREN): 5459 self._retreat(index) 5460 else: 5461 expressions = [self._parse_lambda_arg()] 5462 5463 if self._match_set(self.LAMBDAS): 5464 return self.LAMBDAS[self._prev.token_type](self, expressions) 5465 5466 self._retreat(index) 5467 5468 this: t.Optional[exp.Expression] 5469 5470 if self._match(TokenType.DISTINCT): 5471 this = self.expression( 5472 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5473 ) 5474 else: 5475 this = self._parse_select_or_expression(alias=alias) 5476 5477 return self._parse_limit( 5478 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5479 ) 5480 5481 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5482 index = self._index 5483 if not self._match(TokenType.L_PAREN): 5484 return this 5485 5486 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5487 # expr can be of both types 5488 if self._match_set(self.SELECT_START_TOKENS): 5489 self._retreat(index) 5490 return this 5491 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5492 self._match_r_paren() 5493 return self.expression(exp.Schema, this=this, expressions=args) 5494 5495 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5496 return self._parse_column_def(self._parse_field(any_token=True)) 5497 5498 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5499 # column defs are not really columns, they're identifiers 5500 if isinstance(this, exp.Column): 5501 this = this.this 5502 5503 kind = self._parse_types(schema=True) 5504 5505 if self._match_text_seq("FOR", "ORDINALITY"): 5506 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5507 5508 constraints: t.List[exp.Expression] = [] 5509 5510 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5511 ("ALIAS", "MATERIALIZED") 5512 ): 5513 persisted = self._prev.text.upper() == "MATERIALIZED" 5514 constraint_kind = exp.ComputedColumnConstraint( 5515 this=self._parse_assignment(), 5516 persisted=persisted or self._match_text_seq("PERSISTED"), 5517 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5518 ) 5519 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5520 elif ( 5521 kind 5522 and self._match(TokenType.ALIAS, advance=False) 5523 and ( 5524 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5525 or (self._next and self._next.token_type == TokenType.L_PAREN) 5526 ) 5527 ): 5528 self._advance() 5529 constraints.append( 5530 self.expression( 5531 exp.ColumnConstraint, 5532 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5533 ) 5534 ) 5535 5536 while True: 5537 constraint = self._parse_column_constraint() 5538 if not constraint: 5539 break 5540 constraints.append(constraint) 5541 5542 if not kind and not constraints: 5543 return this 5544 5545 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5546 5547 def _parse_auto_increment( 5548 self, 5549 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5550 start = None 5551 increment = None 5552 5553 if self._match(TokenType.L_PAREN, advance=False): 5554 args = self._parse_wrapped_csv(self._parse_bitwise) 5555 start = seq_get(args, 0) 5556 increment = seq_get(args, 1) 5557 elif self._match_text_seq("START"): 5558 start = self._parse_bitwise() 5559 self._match_text_seq("INCREMENT") 5560 increment = self._parse_bitwise() 5561 5562 if start and increment: 5563 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5564 5565 return exp.AutoIncrementColumnConstraint() 5566 5567 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5568 if not self._match_text_seq("REFRESH"): 5569 self._retreat(self._index - 1) 5570 return None 5571 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5572 5573 def _parse_compress(self) -> exp.CompressColumnConstraint: 5574 if self._match(TokenType.L_PAREN, advance=False): 5575 return self.expression( 5576 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5577 ) 5578 5579 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5580 5581 def _parse_generated_as_identity( 5582 self, 5583 ) -> ( 5584 exp.GeneratedAsIdentityColumnConstraint 5585 | exp.ComputedColumnConstraint 5586 | exp.GeneratedAsRowColumnConstraint 5587 ): 5588 if self._match_text_seq("BY", "DEFAULT"): 5589 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5590 this = self.expression( 5591 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5592 ) 5593 else: 5594 self._match_text_seq("ALWAYS") 5595 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5596 5597 self._match(TokenType.ALIAS) 5598 5599 if self._match_text_seq("ROW"): 5600 start = self._match_text_seq("START") 5601 if not start: 5602 self._match(TokenType.END) 5603 hidden = self._match_text_seq("HIDDEN") 5604 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5605 5606 identity = self._match_text_seq("IDENTITY") 5607 5608 if self._match(TokenType.L_PAREN): 5609 if self._match(TokenType.START_WITH): 5610 this.set("start", self._parse_bitwise()) 5611 if self._match_text_seq("INCREMENT", "BY"): 5612 this.set("increment", self._parse_bitwise()) 5613 if self._match_text_seq("MINVALUE"): 5614 this.set("minvalue", self._parse_bitwise()) 5615 if self._match_text_seq("MAXVALUE"): 5616 this.set("maxvalue", self._parse_bitwise()) 5617 5618 if self._match_text_seq("CYCLE"): 5619 this.set("cycle", True) 5620 elif self._match_text_seq("NO", "CYCLE"): 5621 this.set("cycle", False) 5622 5623 if not identity: 5624 this.set("expression", self._parse_range()) 5625 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5626 args = self._parse_csv(self._parse_bitwise) 5627 this.set("start", seq_get(args, 0)) 5628 this.set("increment", seq_get(args, 1)) 5629 5630 self._match_r_paren() 5631 5632 return this 5633 5634 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5635 self._match_text_seq("LENGTH") 5636 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5637 5638 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5639 if self._match_text_seq("NULL"): 5640 return self.expression(exp.NotNullColumnConstraint) 5641 if self._match_text_seq("CASESPECIFIC"): 5642 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5643 if self._match_text_seq("FOR", "REPLICATION"): 5644 return self.expression(exp.NotForReplicationColumnConstraint) 5645 5646 # Unconsume the `NOT` token 5647 self._retreat(self._index - 1) 5648 return None 5649 5650 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5651 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5652 5653 procedure_option_follows = ( 5654 self._match(TokenType.WITH, advance=False) 5655 and self._next 5656 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5657 ) 5658 5659 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5660 return self.expression( 5661 exp.ColumnConstraint, 5662 this=this, 5663 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5664 ) 5665 5666 return this 5667 5668 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5669 if not self._match(TokenType.CONSTRAINT): 5670 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5671 5672 return self.expression( 5673 exp.Constraint, 5674 this=self._parse_id_var(), 5675 expressions=self._parse_unnamed_constraints(), 5676 ) 5677 5678 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5679 constraints = [] 5680 while True: 5681 constraint = self._parse_unnamed_constraint() or self._parse_function() 5682 if not constraint: 5683 break 5684 constraints.append(constraint) 5685 5686 return constraints 5687 5688 def _parse_unnamed_constraint( 5689 self, constraints: t.Optional[t.Collection[str]] = None 5690 ) -> t.Optional[exp.Expression]: 5691 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5692 constraints or self.CONSTRAINT_PARSERS 5693 ): 5694 return None 5695 5696 constraint = self._prev.text.upper() 5697 if constraint not in self.CONSTRAINT_PARSERS: 5698 self.raise_error(f"No parser found for schema constraint {constraint}.") 5699 5700 return self.CONSTRAINT_PARSERS[constraint](self) 5701 5702 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5703 return self._parse_id_var(any_token=False) 5704 5705 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5706 self._match_text_seq("KEY") 5707 return self.expression( 5708 exp.UniqueColumnConstraint, 5709 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5710 this=self._parse_schema(self._parse_unique_key()), 5711 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5712 on_conflict=self._parse_on_conflict(), 5713 ) 5714 5715 def _parse_key_constraint_options(self) -> t.List[str]: 5716 options = [] 5717 while True: 5718 if not self._curr: 5719 break 5720 5721 if self._match(TokenType.ON): 5722 action = None 5723 on = self._advance_any() and self._prev.text 5724 5725 if self._match_text_seq("NO", "ACTION"): 5726 action = "NO ACTION" 5727 elif self._match_text_seq("CASCADE"): 5728 action = "CASCADE" 5729 elif self._match_text_seq("RESTRICT"): 5730 action = "RESTRICT" 5731 elif self._match_pair(TokenType.SET, TokenType.NULL): 5732 action = "SET NULL" 5733 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5734 action = "SET DEFAULT" 5735 else: 5736 self.raise_error("Invalid key constraint") 5737 5738 options.append(f"ON {on} {action}") 5739 else: 5740 var = self._parse_var_from_options( 5741 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5742 ) 5743 if not var: 5744 break 5745 options.append(var.name) 5746 5747 return options 5748 5749 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5750 if match and not self._match(TokenType.REFERENCES): 5751 return None 5752 5753 expressions = None 5754 this = self._parse_table(schema=True) 5755 options = self._parse_key_constraint_options() 5756 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5757 5758 def _parse_foreign_key(self) -> exp.ForeignKey: 5759 expressions = self._parse_wrapped_id_vars() 5760 reference = self._parse_references() 5761 options = {} 5762 5763 while self._match(TokenType.ON): 5764 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5765 self.raise_error("Expected DELETE or UPDATE") 5766 5767 kind = self._prev.text.lower() 5768 5769 if self._match_text_seq("NO", "ACTION"): 5770 action = "NO ACTION" 5771 elif self._match(TokenType.SET): 5772 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5773 action = "SET " + self._prev.text.upper() 5774 else: 5775 self._advance() 5776 action = self._prev.text.upper() 5777 5778 options[kind] = action 5779 5780 return self.expression( 5781 exp.ForeignKey, 5782 expressions=expressions, 5783 reference=reference, 5784 **options, # type: ignore 5785 ) 5786 5787 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5788 return self._parse_field() 5789 5790 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5791 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5792 self._retreat(self._index - 1) 5793 return None 5794 5795 id_vars = self._parse_wrapped_id_vars() 5796 return self.expression( 5797 exp.PeriodForSystemTimeConstraint, 5798 this=seq_get(id_vars, 0), 5799 expression=seq_get(id_vars, 1), 5800 ) 5801 5802 def _parse_primary_key( 5803 self, wrapped_optional: bool = False, in_props: bool = False 5804 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5805 desc = ( 5806 self._match_set((TokenType.ASC, TokenType.DESC)) 5807 and self._prev.token_type == TokenType.DESC 5808 ) 5809 5810 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5811 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5812 5813 expressions = self._parse_wrapped_csv( 5814 self._parse_primary_key_part, optional=wrapped_optional 5815 ) 5816 options = self._parse_key_constraint_options() 5817 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5818 5819 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5820 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5821 5822 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5823 """ 5824 Parses a datetime column in ODBC format. We parse the column into the corresponding 5825 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5826 same as we did for `DATE('yyyy-mm-dd')`. 5827 5828 Reference: 5829 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5830 """ 5831 self._match(TokenType.VAR) 5832 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5833 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5834 if not self._match(TokenType.R_BRACE): 5835 self.raise_error("Expected }") 5836 return expression 5837 5838 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5839 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5840 return this 5841 5842 bracket_kind = self._prev.token_type 5843 if ( 5844 bracket_kind == TokenType.L_BRACE 5845 and self._curr 5846 and self._curr.token_type == TokenType.VAR 5847 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5848 ): 5849 return self._parse_odbc_datetime_literal() 5850 5851 expressions = self._parse_csv( 5852 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5853 ) 5854 5855 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5856 self.raise_error("Expected ]") 5857 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5858 self.raise_error("Expected }") 5859 5860 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5861 if bracket_kind == TokenType.L_BRACE: 5862 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5863 elif not this: 5864 this = build_array_constructor( 5865 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5866 ) 5867 else: 5868 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5869 if constructor_type: 5870 return build_array_constructor( 5871 constructor_type, 5872 args=expressions, 5873 bracket_kind=bracket_kind, 5874 dialect=self.dialect, 5875 ) 5876 5877 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5878 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5879 5880 self._add_comments(this) 5881 return self._parse_bracket(this) 5882 5883 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5884 if self._match(TokenType.COLON): 5885 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5886 return this 5887 5888 def _parse_case(self) -> t.Optional[exp.Expression]: 5889 ifs = [] 5890 default = None 5891 5892 comments = self._prev_comments 5893 expression = self._parse_assignment() 5894 5895 while self._match(TokenType.WHEN): 5896 this = self._parse_assignment() 5897 self._match(TokenType.THEN) 5898 then = self._parse_assignment() 5899 ifs.append(self.expression(exp.If, this=this, true=then)) 5900 5901 if self._match(TokenType.ELSE): 5902 default = self._parse_assignment() 5903 5904 if not self._match(TokenType.END): 5905 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5906 default = exp.column("interval") 5907 else: 5908 self.raise_error("Expected END after CASE", self._prev) 5909 5910 return self.expression( 5911 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5912 ) 5913 5914 def _parse_if(self) -> t.Optional[exp.Expression]: 5915 if self._match(TokenType.L_PAREN): 5916 args = self._parse_csv(self._parse_assignment) 5917 this = self.validate_expression(exp.If.from_arg_list(args), args) 5918 self._match_r_paren() 5919 else: 5920 index = self._index - 1 5921 5922 if self.NO_PAREN_IF_COMMANDS and index == 0: 5923 return self._parse_as_command(self._prev) 5924 5925 condition = self._parse_assignment() 5926 5927 if not condition: 5928 self._retreat(index) 5929 return None 5930 5931 self._match(TokenType.THEN) 5932 true = self._parse_assignment() 5933 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5934 self._match(TokenType.END) 5935 this = self.expression(exp.If, this=condition, true=true, false=false) 5936 5937 return this 5938 5939 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5940 if not self._match_text_seq("VALUE", "FOR"): 5941 self._retreat(self._index - 1) 5942 return None 5943 5944 return self.expression( 5945 exp.NextValueFor, 5946 this=self._parse_column(), 5947 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5948 ) 5949 5950 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5951 this = self._parse_function() or self._parse_var_or_string(upper=True) 5952 5953 if self._match(TokenType.FROM): 5954 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5955 5956 if not self._match(TokenType.COMMA): 5957 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5958 5959 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5960 5961 def _parse_gap_fill(self) -> exp.GapFill: 5962 self._match(TokenType.TABLE) 5963 this = self._parse_table() 5964 5965 self._match(TokenType.COMMA) 5966 args = [this, *self._parse_csv(self._parse_lambda)] 5967 5968 gap_fill = exp.GapFill.from_arg_list(args) 5969 return self.validate_expression(gap_fill, args) 5970 5971 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5972 this = self._parse_assignment() 5973 5974 if not self._match(TokenType.ALIAS): 5975 if self._match(TokenType.COMMA): 5976 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5977 5978 self.raise_error("Expected AS after CAST") 5979 5980 fmt = None 5981 to = self._parse_types() 5982 5983 if self._match(TokenType.FORMAT): 5984 fmt_string = self._parse_string() 5985 fmt = self._parse_at_time_zone(fmt_string) 5986 5987 if not to: 5988 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5989 if to.this in exp.DataType.TEMPORAL_TYPES: 5990 this = self.expression( 5991 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5992 this=this, 5993 format=exp.Literal.string( 5994 format_time( 5995 fmt_string.this if fmt_string else "", 5996 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5997 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5998 ) 5999 ), 6000 safe=safe, 6001 ) 6002 6003 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6004 this.set("zone", fmt.args["zone"]) 6005 return this 6006 elif not to: 6007 self.raise_error("Expected TYPE after CAST") 6008 elif isinstance(to, exp.Identifier): 6009 to = exp.DataType.build(to.name, udt=True) 6010 elif to.this == exp.DataType.Type.CHAR: 6011 if self._match(TokenType.CHARACTER_SET): 6012 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6013 6014 return self.expression( 6015 exp.Cast if strict else exp.TryCast, 6016 this=this, 6017 to=to, 6018 format=fmt, 6019 safe=safe, 6020 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6021 ) 6022 6023 def _parse_string_agg(self) -> exp.GroupConcat: 6024 if self._match(TokenType.DISTINCT): 6025 args: t.List[t.Optional[exp.Expression]] = [ 6026 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6027 ] 6028 if self._match(TokenType.COMMA): 6029 args.extend(self._parse_csv(self._parse_assignment)) 6030 else: 6031 args = self._parse_csv(self._parse_assignment) # type: ignore 6032 6033 if self._match_text_seq("ON", "OVERFLOW"): 6034 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6035 if self._match_text_seq("ERROR"): 6036 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6037 else: 6038 self._match_text_seq("TRUNCATE") 6039 on_overflow = self.expression( 6040 exp.OverflowTruncateBehavior, 6041 this=self._parse_string(), 6042 with_count=( 6043 self._match_text_seq("WITH", "COUNT") 6044 or not self._match_text_seq("WITHOUT", "COUNT") 6045 ), 6046 ) 6047 else: 6048 on_overflow = None 6049 6050 index = self._index 6051 if not self._match(TokenType.R_PAREN) and args: 6052 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6053 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6054 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6055 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6056 6057 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6058 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6059 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6060 if not self._match_text_seq("WITHIN", "GROUP"): 6061 self._retreat(index) 6062 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6063 6064 # The corresponding match_r_paren will be called in parse_function (caller) 6065 self._match_l_paren() 6066 6067 return self.expression( 6068 exp.GroupConcat, 6069 this=self._parse_order(this=seq_get(args, 0)), 6070 separator=seq_get(args, 1), 6071 on_overflow=on_overflow, 6072 ) 6073 6074 def _parse_convert( 6075 self, strict: bool, safe: t.Optional[bool] = None 6076 ) -> t.Optional[exp.Expression]: 6077 this = self._parse_bitwise() 6078 6079 if self._match(TokenType.USING): 6080 to: t.Optional[exp.Expression] = self.expression( 6081 exp.CharacterSet, this=self._parse_var() 6082 ) 6083 elif self._match(TokenType.COMMA): 6084 to = self._parse_types() 6085 else: 6086 to = None 6087 6088 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6089 6090 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6091 """ 6092 There are generally two variants of the DECODE function: 6093 6094 - DECODE(bin, charset) 6095 - DECODE(expression, search, result [, search, result] ... [, default]) 6096 6097 The second variant will always be parsed into a CASE expression. Note that NULL 6098 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6099 instead of relying on pattern matching. 6100 """ 6101 args = self._parse_csv(self._parse_assignment) 6102 6103 if len(args) < 3: 6104 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6105 6106 expression, *expressions = args 6107 if not expression: 6108 return None 6109 6110 ifs = [] 6111 for search, result in zip(expressions[::2], expressions[1::2]): 6112 if not search or not result: 6113 return None 6114 6115 if isinstance(search, exp.Literal): 6116 ifs.append( 6117 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6118 ) 6119 elif isinstance(search, exp.Null): 6120 ifs.append( 6121 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6122 ) 6123 else: 6124 cond = exp.or_( 6125 exp.EQ(this=expression.copy(), expression=search), 6126 exp.and_( 6127 exp.Is(this=expression.copy(), expression=exp.Null()), 6128 exp.Is(this=search.copy(), expression=exp.Null()), 6129 copy=False, 6130 ), 6131 copy=False, 6132 ) 6133 ifs.append(exp.If(this=cond, true=result)) 6134 6135 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6136 6137 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6138 self._match_text_seq("KEY") 6139 key = self._parse_column() 6140 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6141 self._match_text_seq("VALUE") 6142 value = self._parse_bitwise() 6143 6144 if not key and not value: 6145 return None 6146 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6147 6148 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6149 if not this or not self._match_text_seq("FORMAT", "JSON"): 6150 return this 6151 6152 return self.expression(exp.FormatJson, this=this) 6153 6154 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6155 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6156 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6157 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6158 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6159 else: 6160 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6161 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6162 6163 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6164 6165 if not empty and not error and not null: 6166 return None 6167 6168 return self.expression( 6169 exp.OnCondition, 6170 empty=empty, 6171 error=error, 6172 null=null, 6173 ) 6174 6175 def _parse_on_handling( 6176 self, on: str, *values: str 6177 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6178 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6179 for value in values: 6180 if self._match_text_seq(value, "ON", on): 6181 return f"{value} ON {on}" 6182 6183 index = self._index 6184 if self._match(TokenType.DEFAULT): 6185 default_value = self._parse_bitwise() 6186 if self._match_text_seq("ON", on): 6187 return default_value 6188 6189 self._retreat(index) 6190 6191 return None 6192 6193 @t.overload 6194 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6195 6196 @t.overload 6197 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6198 6199 def _parse_json_object(self, agg=False): 6200 star = self._parse_star() 6201 expressions = ( 6202 [star] 6203 if star 6204 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6205 ) 6206 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6207 6208 unique_keys = None 6209 if self._match_text_seq("WITH", "UNIQUE"): 6210 unique_keys = True 6211 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6212 unique_keys = False 6213 6214 self._match_text_seq("KEYS") 6215 6216 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6217 self._parse_type() 6218 ) 6219 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6220 6221 return self.expression( 6222 exp.JSONObjectAgg if agg else exp.JSONObject, 6223 expressions=expressions, 6224 null_handling=null_handling, 6225 unique_keys=unique_keys, 6226 return_type=return_type, 6227 encoding=encoding, 6228 ) 6229 6230 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6231 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6232 if not self._match_text_seq("NESTED"): 6233 this = self._parse_id_var() 6234 kind = self._parse_types(allow_identifiers=False) 6235 nested = None 6236 else: 6237 this = None 6238 kind = None 6239 nested = True 6240 6241 path = self._match_text_seq("PATH") and self._parse_string() 6242 nested_schema = nested and self._parse_json_schema() 6243 6244 return self.expression( 6245 exp.JSONColumnDef, 6246 this=this, 6247 kind=kind, 6248 path=path, 6249 nested_schema=nested_schema, 6250 ) 6251 6252 def _parse_json_schema(self) -> exp.JSONSchema: 6253 self._match_text_seq("COLUMNS") 6254 return self.expression( 6255 exp.JSONSchema, 6256 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6257 ) 6258 6259 def _parse_json_table(self) -> exp.JSONTable: 6260 this = self._parse_format_json(self._parse_bitwise()) 6261 path = self._match(TokenType.COMMA) and self._parse_string() 6262 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6263 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6264 schema = self._parse_json_schema() 6265 6266 return exp.JSONTable( 6267 this=this, 6268 schema=schema, 6269 path=path, 6270 error_handling=error_handling, 6271 empty_handling=empty_handling, 6272 ) 6273 6274 def _parse_match_against(self) -> exp.MatchAgainst: 6275 expressions = self._parse_csv(self._parse_column) 6276 6277 self._match_text_seq(")", "AGAINST", "(") 6278 6279 this = self._parse_string() 6280 6281 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6282 modifier = "IN NATURAL LANGUAGE MODE" 6283 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6284 modifier = f"{modifier} WITH QUERY EXPANSION" 6285 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6286 modifier = "IN BOOLEAN MODE" 6287 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6288 modifier = "WITH QUERY EXPANSION" 6289 else: 6290 modifier = None 6291 6292 return self.expression( 6293 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6294 ) 6295 6296 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6297 def _parse_open_json(self) -> exp.OpenJSON: 6298 this = self._parse_bitwise() 6299 path = self._match(TokenType.COMMA) and self._parse_string() 6300 6301 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6302 this = self._parse_field(any_token=True) 6303 kind = self._parse_types() 6304 path = self._parse_string() 6305 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6306 6307 return self.expression( 6308 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6309 ) 6310 6311 expressions = None 6312 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6313 self._match_l_paren() 6314 expressions = self._parse_csv(_parse_open_json_column_def) 6315 6316 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6317 6318 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6319 args = self._parse_csv(self._parse_bitwise) 6320 6321 if self._match(TokenType.IN): 6322 return self.expression( 6323 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6324 ) 6325 6326 if haystack_first: 6327 haystack = seq_get(args, 0) 6328 needle = seq_get(args, 1) 6329 else: 6330 needle = seq_get(args, 0) 6331 haystack = seq_get(args, 1) 6332 6333 return self.expression( 6334 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6335 ) 6336 6337 def _parse_predict(self) -> exp.Predict: 6338 self._match_text_seq("MODEL") 6339 this = self._parse_table() 6340 6341 self._match(TokenType.COMMA) 6342 self._match_text_seq("TABLE") 6343 6344 return self.expression( 6345 exp.Predict, 6346 this=this, 6347 expression=self._parse_table(), 6348 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6349 ) 6350 6351 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6352 args = self._parse_csv(self._parse_table) 6353 return exp.JoinHint(this=func_name.upper(), expressions=args) 6354 6355 def _parse_substring(self) -> exp.Substring: 6356 # Postgres supports the form: substring(string [from int] [for int]) 6357 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6358 6359 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6360 6361 if self._match(TokenType.FROM): 6362 args.append(self._parse_bitwise()) 6363 if self._match(TokenType.FOR): 6364 if len(args) == 1: 6365 args.append(exp.Literal.number(1)) 6366 args.append(self._parse_bitwise()) 6367 6368 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6369 6370 def _parse_trim(self) -> exp.Trim: 6371 # https://www.w3resource.com/sql/character-functions/trim.php 6372 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6373 6374 position = None 6375 collation = None 6376 expression = None 6377 6378 if self._match_texts(self.TRIM_TYPES): 6379 position = self._prev.text.upper() 6380 6381 this = self._parse_bitwise() 6382 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6383 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6384 expression = self._parse_bitwise() 6385 6386 if invert_order: 6387 this, expression = expression, this 6388 6389 if self._match(TokenType.COLLATE): 6390 collation = self._parse_bitwise() 6391 6392 return self.expression( 6393 exp.Trim, this=this, position=position, expression=expression, collation=collation 6394 ) 6395 6396 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6397 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6398 6399 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6400 return self._parse_window(self._parse_id_var(), alias=True) 6401 6402 def _parse_respect_or_ignore_nulls( 6403 self, this: t.Optional[exp.Expression] 6404 ) -> t.Optional[exp.Expression]: 6405 if self._match_text_seq("IGNORE", "NULLS"): 6406 return self.expression(exp.IgnoreNulls, this=this) 6407 if self._match_text_seq("RESPECT", "NULLS"): 6408 return self.expression(exp.RespectNulls, this=this) 6409 return this 6410 6411 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6412 if self._match(TokenType.HAVING): 6413 self._match_texts(("MAX", "MIN")) 6414 max = self._prev.text.upper() != "MIN" 6415 return self.expression( 6416 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6417 ) 6418 6419 return this 6420 6421 def _parse_window( 6422 self, this: t.Optional[exp.Expression], alias: bool = False 6423 ) -> t.Optional[exp.Expression]: 6424 func = this 6425 comments = func.comments if isinstance(func, exp.Expression) else None 6426 6427 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6428 self._match(TokenType.WHERE) 6429 this = self.expression( 6430 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6431 ) 6432 self._match_r_paren() 6433 6434 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6435 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6436 if self._match_text_seq("WITHIN", "GROUP"): 6437 order = self._parse_wrapped(self._parse_order) 6438 this = self.expression(exp.WithinGroup, this=this, expression=order) 6439 6440 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6441 # Some dialects choose to implement and some do not. 6442 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6443 6444 # There is some code above in _parse_lambda that handles 6445 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6446 6447 # The below changes handle 6448 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6449 6450 # Oracle allows both formats 6451 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6452 # and Snowflake chose to do the same for familiarity 6453 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6454 if isinstance(this, exp.AggFunc): 6455 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6456 6457 if ignore_respect and ignore_respect is not this: 6458 ignore_respect.replace(ignore_respect.this) 6459 this = self.expression(ignore_respect.__class__, this=this) 6460 6461 this = self._parse_respect_or_ignore_nulls(this) 6462 6463 # bigquery select from window x AS (partition by ...) 6464 if alias: 6465 over = None 6466 self._match(TokenType.ALIAS) 6467 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6468 return this 6469 else: 6470 over = self._prev.text.upper() 6471 6472 if comments and isinstance(func, exp.Expression): 6473 func.pop_comments() 6474 6475 if not self._match(TokenType.L_PAREN): 6476 return self.expression( 6477 exp.Window, 6478 comments=comments, 6479 this=this, 6480 alias=self._parse_id_var(False), 6481 over=over, 6482 ) 6483 6484 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6485 6486 first = self._match(TokenType.FIRST) 6487 if self._match_text_seq("LAST"): 6488 first = False 6489 6490 partition, order = self._parse_partition_and_order() 6491 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6492 6493 if kind: 6494 self._match(TokenType.BETWEEN) 6495 start = self._parse_window_spec() 6496 self._match(TokenType.AND) 6497 end = self._parse_window_spec() 6498 6499 spec = self.expression( 6500 exp.WindowSpec, 6501 kind=kind, 6502 start=start["value"], 6503 start_side=start["side"], 6504 end=end["value"], 6505 end_side=end["side"], 6506 ) 6507 else: 6508 spec = None 6509 6510 self._match_r_paren() 6511 6512 window = self.expression( 6513 exp.Window, 6514 comments=comments, 6515 this=this, 6516 partition_by=partition, 6517 order=order, 6518 spec=spec, 6519 alias=window_alias, 6520 over=over, 6521 first=first, 6522 ) 6523 6524 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6525 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6526 return self._parse_window(window, alias=alias) 6527 6528 return window 6529 6530 def _parse_partition_and_order( 6531 self, 6532 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6533 return self._parse_partition_by(), self._parse_order() 6534 6535 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6536 self._match(TokenType.BETWEEN) 6537 6538 return { 6539 "value": ( 6540 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6541 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6542 or self._parse_bitwise() 6543 ), 6544 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6545 } 6546 6547 def _parse_alias( 6548 self, this: t.Optional[exp.Expression], explicit: bool = False 6549 ) -> t.Optional[exp.Expression]: 6550 any_token = self._match(TokenType.ALIAS) 6551 comments = self._prev_comments or [] 6552 6553 if explicit and not any_token: 6554 return this 6555 6556 if self._match(TokenType.L_PAREN): 6557 aliases = self.expression( 6558 exp.Aliases, 6559 comments=comments, 6560 this=this, 6561 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6562 ) 6563 self._match_r_paren(aliases) 6564 return aliases 6565 6566 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6567 self.STRING_ALIASES and self._parse_string_as_identifier() 6568 ) 6569 6570 if alias: 6571 comments.extend(alias.pop_comments()) 6572 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6573 column = this.this 6574 6575 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6576 if not this.comments and column and column.comments: 6577 this.comments = column.pop_comments() 6578 6579 return this 6580 6581 def _parse_id_var( 6582 self, 6583 any_token: bool = True, 6584 tokens: t.Optional[t.Collection[TokenType]] = None, 6585 ) -> t.Optional[exp.Expression]: 6586 expression = self._parse_identifier() 6587 if not expression and ( 6588 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6589 ): 6590 quoted = self._prev.token_type == TokenType.STRING 6591 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6592 6593 return expression 6594 6595 def _parse_string(self) -> t.Optional[exp.Expression]: 6596 if self._match_set(self.STRING_PARSERS): 6597 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6598 return self._parse_placeholder() 6599 6600 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6601 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6602 6603 def _parse_number(self) -> t.Optional[exp.Expression]: 6604 if self._match_set(self.NUMERIC_PARSERS): 6605 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6606 return self._parse_placeholder() 6607 6608 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6609 if self._match(TokenType.IDENTIFIER): 6610 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6611 return self._parse_placeholder() 6612 6613 def _parse_var( 6614 self, 6615 any_token: bool = False, 6616 tokens: t.Optional[t.Collection[TokenType]] = None, 6617 upper: bool = False, 6618 ) -> t.Optional[exp.Expression]: 6619 if ( 6620 (any_token and self._advance_any()) 6621 or self._match(TokenType.VAR) 6622 or (self._match_set(tokens) if tokens else False) 6623 ): 6624 return self.expression( 6625 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6626 ) 6627 return self._parse_placeholder() 6628 6629 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6630 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6631 self._advance() 6632 return self._prev 6633 return None 6634 6635 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6636 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6637 6638 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6639 return self._parse_primary() or self._parse_var(any_token=True) 6640 6641 def _parse_null(self) -> t.Optional[exp.Expression]: 6642 if self._match_set(self.NULL_TOKENS): 6643 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6644 return self._parse_placeholder() 6645 6646 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6647 if self._match(TokenType.TRUE): 6648 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6649 if self._match(TokenType.FALSE): 6650 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6651 return self._parse_placeholder() 6652 6653 def _parse_star(self) -> t.Optional[exp.Expression]: 6654 if self._match(TokenType.STAR): 6655 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6656 return self._parse_placeholder() 6657 6658 def _parse_parameter(self) -> exp.Parameter: 6659 this = self._parse_identifier() or self._parse_primary_or_var() 6660 return self.expression(exp.Parameter, this=this) 6661 6662 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6663 if self._match_set(self.PLACEHOLDER_PARSERS): 6664 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6665 if placeholder: 6666 return placeholder 6667 self._advance(-1) 6668 return None 6669 6670 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6671 if not self._match_texts(keywords): 6672 return None 6673 if self._match(TokenType.L_PAREN, advance=False): 6674 return self._parse_wrapped_csv(self._parse_expression) 6675 6676 expression = self._parse_expression() 6677 return [expression] if expression else None 6678 6679 def _parse_csv( 6680 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6681 ) -> t.List[exp.Expression]: 6682 parse_result = parse_method() 6683 items = [parse_result] if parse_result is not None else [] 6684 6685 while self._match(sep): 6686 self._add_comments(parse_result) 6687 parse_result = parse_method() 6688 if parse_result is not None: 6689 items.append(parse_result) 6690 6691 return items 6692 6693 def _parse_tokens( 6694 self, parse_method: t.Callable, expressions: t.Dict 6695 ) -> t.Optional[exp.Expression]: 6696 this = parse_method() 6697 6698 while self._match_set(expressions): 6699 this = self.expression( 6700 expressions[self._prev.token_type], 6701 this=this, 6702 comments=self._prev_comments, 6703 expression=parse_method(), 6704 ) 6705 6706 return this 6707 6708 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6709 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6710 6711 def _parse_wrapped_csv( 6712 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6713 ) -> t.List[exp.Expression]: 6714 return self._parse_wrapped( 6715 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6716 ) 6717 6718 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6719 wrapped = self._match(TokenType.L_PAREN) 6720 if not wrapped and not optional: 6721 self.raise_error("Expecting (") 6722 parse_result = parse_method() 6723 if wrapped: 6724 self._match_r_paren() 6725 return parse_result 6726 6727 def _parse_expressions(self) -> t.List[exp.Expression]: 6728 return self._parse_csv(self._parse_expression) 6729 6730 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6731 return self._parse_select() or self._parse_set_operations( 6732 self._parse_expression() if alias else self._parse_assignment() 6733 ) 6734 6735 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6736 return self._parse_query_modifiers( 6737 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6738 ) 6739 6740 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6741 this = None 6742 if self._match_texts(self.TRANSACTION_KIND): 6743 this = self._prev.text 6744 6745 self._match_texts(("TRANSACTION", "WORK")) 6746 6747 modes = [] 6748 while True: 6749 mode = [] 6750 while self._match(TokenType.VAR): 6751 mode.append(self._prev.text) 6752 6753 if mode: 6754 modes.append(" ".join(mode)) 6755 if not self._match(TokenType.COMMA): 6756 break 6757 6758 return self.expression(exp.Transaction, this=this, modes=modes) 6759 6760 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6761 chain = None 6762 savepoint = None 6763 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6764 6765 self._match_texts(("TRANSACTION", "WORK")) 6766 6767 if self._match_text_seq("TO"): 6768 self._match_text_seq("SAVEPOINT") 6769 savepoint = self._parse_id_var() 6770 6771 if self._match(TokenType.AND): 6772 chain = not self._match_text_seq("NO") 6773 self._match_text_seq("CHAIN") 6774 6775 if is_rollback: 6776 return self.expression(exp.Rollback, savepoint=savepoint) 6777 6778 return self.expression(exp.Commit, chain=chain) 6779 6780 def _parse_refresh(self) -> exp.Refresh: 6781 self._match(TokenType.TABLE) 6782 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6783 6784 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6785 if not self._match_text_seq("ADD"): 6786 return None 6787 6788 self._match(TokenType.COLUMN) 6789 exists_column = self._parse_exists(not_=True) 6790 expression = self._parse_field_def() 6791 6792 if expression: 6793 expression.set("exists", exists_column) 6794 6795 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6796 if self._match_texts(("FIRST", "AFTER")): 6797 position = self._prev.text 6798 column_position = self.expression( 6799 exp.ColumnPosition, this=self._parse_column(), position=position 6800 ) 6801 expression.set("position", column_position) 6802 6803 return expression 6804 6805 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6806 drop = self._match(TokenType.DROP) and self._parse_drop() 6807 if drop and not isinstance(drop, exp.Command): 6808 drop.set("kind", drop.args.get("kind", "COLUMN")) 6809 return drop 6810 6811 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6812 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6813 return self.expression( 6814 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6815 ) 6816 6817 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6818 index = self._index - 1 6819 6820 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6821 return self._parse_csv( 6822 lambda: self.expression( 6823 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6824 ) 6825 ) 6826 6827 self._retreat(index) 6828 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6829 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6830 6831 if self._match_text_seq("ADD", "COLUMNS"): 6832 schema = self._parse_schema() 6833 if schema: 6834 return [schema] 6835 return [] 6836 6837 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6838 6839 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6840 if self._match_texts(self.ALTER_ALTER_PARSERS): 6841 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6842 6843 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6844 # keyword after ALTER we default to parsing this statement 6845 self._match(TokenType.COLUMN) 6846 column = self._parse_field(any_token=True) 6847 6848 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6849 return self.expression(exp.AlterColumn, this=column, drop=True) 6850 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6851 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6852 if self._match(TokenType.COMMENT): 6853 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6854 if self._match_text_seq("DROP", "NOT", "NULL"): 6855 return self.expression( 6856 exp.AlterColumn, 6857 this=column, 6858 drop=True, 6859 allow_null=True, 6860 ) 6861 if self._match_text_seq("SET", "NOT", "NULL"): 6862 return self.expression( 6863 exp.AlterColumn, 6864 this=column, 6865 allow_null=False, 6866 ) 6867 self._match_text_seq("SET", "DATA") 6868 self._match_text_seq("TYPE") 6869 return self.expression( 6870 exp.AlterColumn, 6871 this=column, 6872 dtype=self._parse_types(), 6873 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6874 using=self._match(TokenType.USING) and self._parse_assignment(), 6875 ) 6876 6877 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6878 if self._match_texts(("ALL", "EVEN", "AUTO")): 6879 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6880 6881 self._match_text_seq("KEY", "DISTKEY") 6882 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6883 6884 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6885 if compound: 6886 self._match_text_seq("SORTKEY") 6887 6888 if self._match(TokenType.L_PAREN, advance=False): 6889 return self.expression( 6890 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6891 ) 6892 6893 self._match_texts(("AUTO", "NONE")) 6894 return self.expression( 6895 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6896 ) 6897 6898 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6899 index = self._index - 1 6900 6901 partition_exists = self._parse_exists() 6902 if self._match(TokenType.PARTITION, advance=False): 6903 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6904 6905 self._retreat(index) 6906 return self._parse_csv(self._parse_drop_column) 6907 6908 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6909 if self._match(TokenType.COLUMN): 6910 exists = self._parse_exists() 6911 old_column = self._parse_column() 6912 to = self._match_text_seq("TO") 6913 new_column = self._parse_column() 6914 6915 if old_column is None or to is None or new_column is None: 6916 return None 6917 6918 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6919 6920 self._match_text_seq("TO") 6921 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6922 6923 def _parse_alter_table_set(self) -> exp.AlterSet: 6924 alter_set = self.expression(exp.AlterSet) 6925 6926 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6927 "TABLE", "PROPERTIES" 6928 ): 6929 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6930 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6931 alter_set.set("expressions", [self._parse_assignment()]) 6932 elif self._match_texts(("LOGGED", "UNLOGGED")): 6933 alter_set.set("option", exp.var(self._prev.text.upper())) 6934 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6935 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6936 elif self._match_text_seq("LOCATION"): 6937 alter_set.set("location", self._parse_field()) 6938 elif self._match_text_seq("ACCESS", "METHOD"): 6939 alter_set.set("access_method", self._parse_field()) 6940 elif self._match_text_seq("TABLESPACE"): 6941 alter_set.set("tablespace", self._parse_field()) 6942 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6943 alter_set.set("file_format", [self._parse_field()]) 6944 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6945 alter_set.set("file_format", self._parse_wrapped_options()) 6946 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6947 alter_set.set("copy_options", self._parse_wrapped_options()) 6948 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6949 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6950 else: 6951 if self._match_text_seq("SERDE"): 6952 alter_set.set("serde", self._parse_field()) 6953 6954 alter_set.set("expressions", [self._parse_properties()]) 6955 6956 return alter_set 6957 6958 def _parse_alter(self) -> exp.Alter | exp.Command: 6959 start = self._prev 6960 6961 alter_token = self._match_set(self.ALTERABLES) and self._prev 6962 if not alter_token: 6963 return self._parse_as_command(start) 6964 6965 exists = self._parse_exists() 6966 only = self._match_text_seq("ONLY") 6967 this = self._parse_table(schema=True) 6968 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6969 6970 if self._next: 6971 self._advance() 6972 6973 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6974 if parser: 6975 actions = ensure_list(parser(self)) 6976 not_valid = self._match_text_seq("NOT", "VALID") 6977 options = self._parse_csv(self._parse_property) 6978 6979 if not self._curr and actions: 6980 return self.expression( 6981 exp.Alter, 6982 this=this, 6983 kind=alter_token.text.upper(), 6984 exists=exists, 6985 actions=actions, 6986 only=only, 6987 options=options, 6988 cluster=cluster, 6989 not_valid=not_valid, 6990 ) 6991 6992 return self._parse_as_command(start) 6993 6994 def _parse_merge(self) -> exp.Merge: 6995 self._match(TokenType.INTO) 6996 target = self._parse_table() 6997 6998 if target and self._match(TokenType.ALIAS, advance=False): 6999 target.set("alias", self._parse_table_alias()) 7000 7001 self._match(TokenType.USING) 7002 using = self._parse_table() 7003 7004 self._match(TokenType.ON) 7005 on = self._parse_assignment() 7006 7007 return self.expression( 7008 exp.Merge, 7009 this=target, 7010 using=using, 7011 on=on, 7012 expressions=self._parse_when_matched(), 7013 returning=self._parse_returning(), 7014 ) 7015 7016 def _parse_when_matched(self) -> t.List[exp.When]: 7017 whens = [] 7018 7019 while self._match(TokenType.WHEN): 7020 matched = not self._match(TokenType.NOT) 7021 self._match_text_seq("MATCHED") 7022 source = ( 7023 False 7024 if self._match_text_seq("BY", "TARGET") 7025 else self._match_text_seq("BY", "SOURCE") 7026 ) 7027 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7028 7029 self._match(TokenType.THEN) 7030 7031 if self._match(TokenType.INSERT): 7032 this = self._parse_star() 7033 if this: 7034 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7035 else: 7036 then = self.expression( 7037 exp.Insert, 7038 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7039 expression=self._match_text_seq("VALUES") and self._parse_value(), 7040 ) 7041 elif self._match(TokenType.UPDATE): 7042 expressions = self._parse_star() 7043 if expressions: 7044 then = self.expression(exp.Update, expressions=expressions) 7045 else: 7046 then = self.expression( 7047 exp.Update, 7048 expressions=self._match(TokenType.SET) 7049 and self._parse_csv(self._parse_equality), 7050 ) 7051 elif self._match(TokenType.DELETE): 7052 then = self.expression(exp.Var, this=self._prev.text) 7053 else: 7054 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7055 7056 whens.append( 7057 self.expression( 7058 exp.When, 7059 matched=matched, 7060 source=source, 7061 condition=condition, 7062 then=then, 7063 ) 7064 ) 7065 return whens 7066 7067 def _parse_show(self) -> t.Optional[exp.Expression]: 7068 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7069 if parser: 7070 return parser(self) 7071 return self._parse_as_command(self._prev) 7072 7073 def _parse_set_item_assignment( 7074 self, kind: t.Optional[str] = None 7075 ) -> t.Optional[exp.Expression]: 7076 index = self._index 7077 7078 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7079 return self._parse_set_transaction(global_=kind == "GLOBAL") 7080 7081 left = self._parse_primary() or self._parse_column() 7082 assignment_delimiter = self._match_texts(("=", "TO")) 7083 7084 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7085 self._retreat(index) 7086 return None 7087 7088 right = self._parse_statement() or self._parse_id_var() 7089 if isinstance(right, (exp.Column, exp.Identifier)): 7090 right = exp.var(right.name) 7091 7092 this = self.expression(exp.EQ, this=left, expression=right) 7093 return self.expression(exp.SetItem, this=this, kind=kind) 7094 7095 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7096 self._match_text_seq("TRANSACTION") 7097 characteristics = self._parse_csv( 7098 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7099 ) 7100 return self.expression( 7101 exp.SetItem, 7102 expressions=characteristics, 7103 kind="TRANSACTION", 7104 **{"global": global_}, # type: ignore 7105 ) 7106 7107 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7108 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7109 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7110 7111 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7112 index = self._index 7113 set_ = self.expression( 7114 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7115 ) 7116 7117 if self._curr: 7118 self._retreat(index) 7119 return self._parse_as_command(self._prev) 7120 7121 return set_ 7122 7123 def _parse_var_from_options( 7124 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7125 ) -> t.Optional[exp.Var]: 7126 start = self._curr 7127 if not start: 7128 return None 7129 7130 option = start.text.upper() 7131 continuations = options.get(option) 7132 7133 index = self._index 7134 self._advance() 7135 for keywords in continuations or []: 7136 if isinstance(keywords, str): 7137 keywords = (keywords,) 7138 7139 if self._match_text_seq(*keywords): 7140 option = f"{option} {' '.join(keywords)}" 7141 break 7142 else: 7143 if continuations or continuations is None: 7144 if raise_unmatched: 7145 self.raise_error(f"Unknown option {option}") 7146 7147 self._retreat(index) 7148 return None 7149 7150 return exp.var(option) 7151 7152 def _parse_as_command(self, start: Token) -> exp.Command: 7153 while self._curr: 7154 self._advance() 7155 text = self._find_sql(start, self._prev) 7156 size = len(start.text) 7157 self._warn_unsupported() 7158 return exp.Command(this=text[:size], expression=text[size:]) 7159 7160 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7161 settings = [] 7162 7163 self._match_l_paren() 7164 kind = self._parse_id_var() 7165 7166 if self._match(TokenType.L_PAREN): 7167 while True: 7168 key = self._parse_id_var() 7169 value = self._parse_primary() 7170 if not key and value is None: 7171 break 7172 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7173 self._match(TokenType.R_PAREN) 7174 7175 self._match_r_paren() 7176 7177 return self.expression( 7178 exp.DictProperty, 7179 this=this, 7180 kind=kind.this if kind else None, 7181 settings=settings, 7182 ) 7183 7184 def _parse_dict_range(self, this: str) -> exp.DictRange: 7185 self._match_l_paren() 7186 has_min = self._match_text_seq("MIN") 7187 if has_min: 7188 min = self._parse_var() or self._parse_primary() 7189 self._match_text_seq("MAX") 7190 max = self._parse_var() or self._parse_primary() 7191 else: 7192 max = self._parse_var() or self._parse_primary() 7193 min = exp.Literal.number(0) 7194 self._match_r_paren() 7195 return self.expression(exp.DictRange, this=this, min=min, max=max) 7196 7197 def _parse_comprehension( 7198 self, this: t.Optional[exp.Expression] 7199 ) -> t.Optional[exp.Comprehension]: 7200 index = self._index 7201 expression = self._parse_column() 7202 if not self._match(TokenType.IN): 7203 self._retreat(index - 1) 7204 return None 7205 iterator = self._parse_column() 7206 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7207 return self.expression( 7208 exp.Comprehension, 7209 this=this, 7210 expression=expression, 7211 iterator=iterator, 7212 condition=condition, 7213 ) 7214 7215 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7216 if self._match(TokenType.HEREDOC_STRING): 7217 return self.expression(exp.Heredoc, this=self._prev.text) 7218 7219 if not self._match_text_seq("$"): 7220 return None 7221 7222 tags = ["$"] 7223 tag_text = None 7224 7225 if self._is_connected(): 7226 self._advance() 7227 tags.append(self._prev.text.upper()) 7228 else: 7229 self.raise_error("No closing $ found") 7230 7231 if tags[-1] != "$": 7232 if self._is_connected() and self._match_text_seq("$"): 7233 tag_text = tags[-1] 7234 tags.append("$") 7235 else: 7236 self.raise_error("No closing $ found") 7237 7238 heredoc_start = self._curr 7239 7240 while self._curr: 7241 if self._match_text_seq(*tags, advance=False): 7242 this = self._find_sql(heredoc_start, self._prev) 7243 self._advance(len(tags)) 7244 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7245 7246 self._advance() 7247 7248 self.raise_error(f"No closing {''.join(tags)} found") 7249 return None 7250 7251 def _find_parser( 7252 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7253 ) -> t.Optional[t.Callable]: 7254 if not self._curr: 7255 return None 7256 7257 index = self._index 7258 this = [] 7259 while True: 7260 # The current token might be multiple words 7261 curr = self._curr.text.upper() 7262 key = curr.split(" ") 7263 this.append(curr) 7264 7265 self._advance() 7266 result, trie = in_trie(trie, key) 7267 if result == TrieResult.FAILED: 7268 break 7269 7270 if result == TrieResult.EXISTS: 7271 subparser = parsers[" ".join(this)] 7272 return subparser 7273 7274 self._retreat(index) 7275 return None 7276 7277 def _match(self, token_type, advance=True, expression=None): 7278 if not self._curr: 7279 return None 7280 7281 if self._curr.token_type == token_type: 7282 if advance: 7283 self._advance() 7284 self._add_comments(expression) 7285 return True 7286 7287 return None 7288 7289 def _match_set(self, types, advance=True): 7290 if not self._curr: 7291 return None 7292 7293 if self._curr.token_type in types: 7294 if advance: 7295 self._advance() 7296 return True 7297 7298 return None 7299 7300 def _match_pair(self, token_type_a, token_type_b, advance=True): 7301 if not self._curr or not self._next: 7302 return None 7303 7304 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7305 if advance: 7306 self._advance(2) 7307 return True 7308 7309 return None 7310 7311 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7312 if not self._match(TokenType.L_PAREN, expression=expression): 7313 self.raise_error("Expecting (") 7314 7315 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7316 if not self._match(TokenType.R_PAREN, expression=expression): 7317 self.raise_error("Expecting )") 7318 7319 def _match_texts(self, texts, advance=True): 7320 if ( 7321 self._curr 7322 and self._curr.token_type != TokenType.STRING 7323 and self._curr.text.upper() in texts 7324 ): 7325 if advance: 7326 self._advance() 7327 return True 7328 return None 7329 7330 def _match_text_seq(self, *texts, advance=True): 7331 index = self._index 7332 for text in texts: 7333 if ( 7334 self._curr 7335 and self._curr.token_type != TokenType.STRING 7336 and self._curr.text.upper() == text 7337 ): 7338 self._advance() 7339 else: 7340 self._retreat(index) 7341 return None 7342 7343 if not advance: 7344 self._retreat(index) 7345 7346 return True 7347 7348 def _replace_lambda( 7349 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7350 ) -> t.Optional[exp.Expression]: 7351 if not node: 7352 return node 7353 7354 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7355 7356 for column in node.find_all(exp.Column): 7357 typ = lambda_types.get(column.parts[0].name) 7358 if typ is not None: 7359 dot_or_id = column.to_dot() if column.table else column.this 7360 7361 if typ: 7362 dot_or_id = self.expression( 7363 exp.Cast, 7364 this=dot_or_id, 7365 to=typ, 7366 ) 7367 7368 parent = column.parent 7369 7370 while isinstance(parent, exp.Dot): 7371 if not isinstance(parent.parent, exp.Dot): 7372 parent.replace(dot_or_id) 7373 break 7374 parent = parent.parent 7375 else: 7376 if column is node: 7377 node = dot_or_id 7378 else: 7379 column.replace(dot_or_id) 7380 return node 7381 7382 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7383 start = self._prev 7384 7385 # Not to be confused with TRUNCATE(number, decimals) function call 7386 if self._match(TokenType.L_PAREN): 7387 self._retreat(self._index - 2) 7388 return self._parse_function() 7389 7390 # Clickhouse supports TRUNCATE DATABASE as well 7391 is_database = self._match(TokenType.DATABASE) 7392 7393 self._match(TokenType.TABLE) 7394 7395 exists = self._parse_exists(not_=False) 7396 7397 expressions = self._parse_csv( 7398 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7399 ) 7400 7401 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7402 7403 if self._match_text_seq("RESTART", "IDENTITY"): 7404 identity = "RESTART" 7405 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7406 identity = "CONTINUE" 7407 else: 7408 identity = None 7409 7410 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7411 option = self._prev.text 7412 else: 7413 option = None 7414 7415 partition = self._parse_partition() 7416 7417 # Fallback case 7418 if self._curr: 7419 return self._parse_as_command(start) 7420 7421 return self.expression( 7422 exp.TruncateTable, 7423 expressions=expressions, 7424 is_database=is_database, 7425 exists=exists, 7426 cluster=cluster, 7427 identity=identity, 7428 option=option, 7429 partition=partition, 7430 ) 7431 7432 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7433 this = self._parse_ordered(self._parse_opclass) 7434 7435 if not self._match(TokenType.WITH): 7436 return this 7437 7438 op = self._parse_var(any_token=True) 7439 7440 return self.expression(exp.WithOperator, this=this, op=op) 7441 7442 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7443 self._match(TokenType.EQ) 7444 self._match(TokenType.L_PAREN) 7445 7446 opts: t.List[t.Optional[exp.Expression]] = [] 7447 while self._curr and not self._match(TokenType.R_PAREN): 7448 if self._match_text_seq("FORMAT_NAME", "="): 7449 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7450 # so we parse it separately to use _parse_field() 7451 prop = self.expression( 7452 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7453 ) 7454 opts.append(prop) 7455 else: 7456 opts.append(self._parse_property()) 7457 7458 self._match(TokenType.COMMA) 7459 7460 return opts 7461 7462 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7463 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7464 7465 options = [] 7466 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7467 option = self._parse_var(any_token=True) 7468 prev = self._prev.text.upper() 7469 7470 # Different dialects might separate options and values by white space, "=" and "AS" 7471 self._match(TokenType.EQ) 7472 self._match(TokenType.ALIAS) 7473 7474 param = self.expression(exp.CopyParameter, this=option) 7475 7476 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7477 TokenType.L_PAREN, advance=False 7478 ): 7479 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7480 param.set("expressions", self._parse_wrapped_options()) 7481 elif prev == "FILE_FORMAT": 7482 # T-SQL's external file format case 7483 param.set("expression", self._parse_field()) 7484 else: 7485 param.set("expression", self._parse_unquoted_field()) 7486 7487 options.append(param) 7488 self._match(sep) 7489 7490 return options 7491 7492 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7493 expr = self.expression(exp.Credentials) 7494 7495 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7496 expr.set("storage", self._parse_field()) 7497 if self._match_text_seq("CREDENTIALS"): 7498 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7499 creds = ( 7500 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7501 ) 7502 expr.set("credentials", creds) 7503 if self._match_text_seq("ENCRYPTION"): 7504 expr.set("encryption", self._parse_wrapped_options()) 7505 if self._match_text_seq("IAM_ROLE"): 7506 expr.set("iam_role", self._parse_field()) 7507 if self._match_text_seq("REGION"): 7508 expr.set("region", self._parse_field()) 7509 7510 return expr 7511 7512 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7513 return self._parse_field() 7514 7515 def _parse_copy(self) -> exp.Copy | exp.Command: 7516 start = self._prev 7517 7518 self._match(TokenType.INTO) 7519 7520 this = ( 7521 self._parse_select(nested=True, parse_subquery_alias=False) 7522 if self._match(TokenType.L_PAREN, advance=False) 7523 else self._parse_table(schema=True) 7524 ) 7525 7526 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7527 7528 files = self._parse_csv(self._parse_file_location) 7529 credentials = self._parse_credentials() 7530 7531 self._match_text_seq("WITH") 7532 7533 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7534 7535 # Fallback case 7536 if self._curr: 7537 return self._parse_as_command(start) 7538 7539 return self.expression( 7540 exp.Copy, 7541 this=this, 7542 kind=kind, 7543 credentials=credentials, 7544 files=files, 7545 params=params, 7546 ) 7547 7548 def _parse_normalize(self) -> exp.Normalize: 7549 return self.expression( 7550 exp.Normalize, 7551 this=self._parse_bitwise(), 7552 form=self._match(TokenType.COMMA) and self._parse_var(), 7553 ) 7554 7555 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7556 if self._match_text_seq("COLUMNS", "(", advance=False): 7557 this = self._parse_function() 7558 if isinstance(this, exp.Columns): 7559 this.set("unpack", True) 7560 return this 7561 7562 return self.expression( 7563 exp.Star, 7564 **{ # type: ignore 7565 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7566 "replace": self._parse_star_op("REPLACE"), 7567 "rename": self._parse_star_op("RENAME"), 7568 }, 7569 ) 7570 7571 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7572 privilege_parts = [] 7573 7574 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7575 # (end of privilege list) or L_PAREN (start of column list) are met 7576 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7577 privilege_parts.append(self._curr.text.upper()) 7578 self._advance() 7579 7580 this = exp.var(" ".join(privilege_parts)) 7581 expressions = ( 7582 self._parse_wrapped_csv(self._parse_column) 7583 if self._match(TokenType.L_PAREN, advance=False) 7584 else None 7585 ) 7586 7587 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7588 7589 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7590 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7591 principal = self._parse_id_var() 7592 7593 if not principal: 7594 return None 7595 7596 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7597 7598 def _parse_grant(self) -> exp.Grant | exp.Command: 7599 start = self._prev 7600 7601 privileges = self._parse_csv(self._parse_grant_privilege) 7602 7603 self._match(TokenType.ON) 7604 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7605 7606 # Attempt to parse the securable e.g. MySQL allows names 7607 # such as "foo.*", "*.*" which are not easily parseable yet 7608 securable = self._try_parse(self._parse_table_parts) 7609 7610 if not securable or not self._match_text_seq("TO"): 7611 return self._parse_as_command(start) 7612 7613 principals = self._parse_csv(self._parse_grant_principal) 7614 7615 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7616 7617 if self._curr: 7618 return self._parse_as_command(start) 7619 7620 return self.expression( 7621 exp.Grant, 7622 privileges=privileges, 7623 kind=kind, 7624 securable=securable, 7625 principals=principals, 7626 grant_option=grant_option, 7627 ) 7628 7629 def _parse_overlay(self) -> exp.Overlay: 7630 return self.expression( 7631 exp.Overlay, 7632 **{ # type: ignore 7633 "this": self._parse_bitwise(), 7634 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7635 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7636 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7637 }, 7638 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1394 def __init__( 1395 self, 1396 error_level: t.Optional[ErrorLevel] = None, 1397 error_message_context: int = 100, 1398 max_errors: int = 3, 1399 dialect: DialectType = None, 1400 ): 1401 from sqlglot.dialects import Dialect 1402 1403 self.error_level = error_level or ErrorLevel.IMMEDIATE 1404 self.error_message_context = error_message_context 1405 self.max_errors = max_errors 1406 self.dialect = Dialect.get_or_raise(dialect) 1407 self.reset()
1419 def parse( 1420 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1421 ) -> t.List[t.Optional[exp.Expression]]: 1422 """ 1423 Parses a list of tokens and returns a list of syntax trees, one tree 1424 per parsed SQL statement. 1425 1426 Args: 1427 raw_tokens: The list of tokens. 1428 sql: The original SQL string, used to produce helpful debug messages. 1429 1430 Returns: 1431 The list of the produced syntax trees. 1432 """ 1433 return self._parse( 1434 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1435 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1437 def parse_into( 1438 self, 1439 expression_types: exp.IntoType, 1440 raw_tokens: t.List[Token], 1441 sql: t.Optional[str] = None, 1442 ) -> t.List[t.Optional[exp.Expression]]: 1443 """ 1444 Parses a list of tokens into a given Expression type. If a collection of Expression 1445 types is given instead, this method will try to parse the token list into each one 1446 of them, stopping at the first for which the parsing succeeds. 1447 1448 Args: 1449 expression_types: The expression type(s) to try and parse the token list into. 1450 raw_tokens: The list of tokens. 1451 sql: The original SQL string, used to produce helpful debug messages. 1452 1453 Returns: 1454 The target Expression. 1455 """ 1456 errors = [] 1457 for expression_type in ensure_list(expression_types): 1458 parser = self.EXPRESSION_PARSERS.get(expression_type) 1459 if not parser: 1460 raise TypeError(f"No parser registered for {expression_type}") 1461 1462 try: 1463 return self._parse(parser, raw_tokens, sql) 1464 except ParseError as e: 1465 e.errors[0]["into_expression"] = expression_type 1466 errors.append(e) 1467 1468 raise ParseError( 1469 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1470 errors=merge_errors(errors), 1471 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1511 def check_errors(self) -> None: 1512 """Logs or raises any found errors, depending on the chosen error level setting.""" 1513 if self.error_level == ErrorLevel.WARN: 1514 for error in self.errors: 1515 logger.error(str(error)) 1516 elif self.error_level == ErrorLevel.RAISE and self.errors: 1517 raise ParseError( 1518 concat_messages(self.errors, self.max_errors), 1519 errors=merge_errors(self.errors), 1520 )
Logs or raises any found errors, depending on the chosen error level setting.
1522 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1523 """ 1524 Appends an error in the list of recorded errors or raises it, depending on the chosen 1525 error level setting. 1526 """ 1527 token = token or self._curr or self._prev or Token.string("") 1528 start = token.start 1529 end = token.end + 1 1530 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1531 highlight = self.sql[start:end] 1532 end_context = self.sql[end : end + self.error_message_context] 1533 1534 error = ParseError.new( 1535 f"{message}. Line {token.line}, Col: {token.col}.\n" 1536 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1537 description=message, 1538 line=token.line, 1539 col=token.col, 1540 start_context=start_context, 1541 highlight=highlight, 1542 end_context=end_context, 1543 ) 1544 1545 if self.error_level == ErrorLevel.IMMEDIATE: 1546 raise error 1547 1548 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1550 def expression( 1551 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1552 ) -> E: 1553 """ 1554 Creates a new, validated Expression. 1555 1556 Args: 1557 exp_class: The expression class to instantiate. 1558 comments: An optional list of comments to attach to the expression. 1559 kwargs: The arguments to set for the expression along with their respective values. 1560 1561 Returns: 1562 The target expression. 1563 """ 1564 instance = exp_class(**kwargs) 1565 instance.add_comments(comments) if comments else self._add_comments(instance) 1566 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1573 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1574 """ 1575 Validates an Expression, making sure that all its mandatory arguments are set. 1576 1577 Args: 1578 expression: The expression to validate. 1579 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1580 1581 Returns: 1582 The validated expression. 1583 """ 1584 if self.error_level != ErrorLevel.IGNORE: 1585 for error_message in expression.error_messages(args): 1586 self.raise_error(error_message) 1587 1588 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.