sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.DECIMAL256, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.POINT, 361 TokenType.RING, 362 TokenType.LINESTRING, 363 TokenType.MULTILINESTRING, 364 TokenType.POLYGON, 365 TokenType.MULTIPOLYGON, 366 TokenType.HLLSKETCH, 367 TokenType.HSTORE, 368 TokenType.PSEUDO_TYPE, 369 TokenType.SUPER, 370 TokenType.SERIAL, 371 TokenType.SMALLSERIAL, 372 TokenType.BIGSERIAL, 373 TokenType.XML, 374 TokenType.YEAR, 375 TokenType.UNIQUEIDENTIFIER, 376 TokenType.USERDEFINED, 377 TokenType.MONEY, 378 TokenType.SMALLMONEY, 379 TokenType.ROWVERSION, 380 TokenType.IMAGE, 381 TokenType.VARIANT, 382 TokenType.VECTOR, 383 TokenType.OBJECT, 384 TokenType.OBJECT_IDENTIFIER, 385 TokenType.INET, 386 TokenType.IPADDRESS, 387 TokenType.IPPREFIX, 388 TokenType.IPV4, 389 TokenType.IPV6, 390 TokenType.UNKNOWN, 391 TokenType.NULL, 392 TokenType.NAME, 393 TokenType.TDIGEST, 394 *ENUM_TYPE_TOKENS, 395 *NESTED_TYPE_TOKENS, 396 *AGGREGATE_TYPE_TOKENS, 397 } 398 399 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 400 TokenType.BIGINT: TokenType.UBIGINT, 401 TokenType.INT: TokenType.UINT, 402 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 403 TokenType.SMALLINT: TokenType.USMALLINT, 404 TokenType.TINYINT: TokenType.UTINYINT, 405 TokenType.DECIMAL: TokenType.UDECIMAL, 406 } 407 408 SUBQUERY_PREDICATES = { 409 TokenType.ANY: exp.Any, 410 TokenType.ALL: exp.All, 411 TokenType.EXISTS: exp.Exists, 412 TokenType.SOME: exp.Any, 413 } 414 415 RESERVED_TOKENS = { 416 *Tokenizer.SINGLE_TOKENS.values(), 417 TokenType.SELECT, 418 } - {TokenType.IDENTIFIER} 419 420 DB_CREATABLES = { 421 TokenType.DATABASE, 422 TokenType.DICTIONARY, 423 TokenType.MODEL, 424 TokenType.SCHEMA, 425 TokenType.SEQUENCE, 426 TokenType.STORAGE_INTEGRATION, 427 TokenType.TABLE, 428 TokenType.TAG, 429 TokenType.VIEW, 430 TokenType.WAREHOUSE, 431 TokenType.STREAMLIT, 432 TokenType.SINK, 433 TokenType.SOURCE, 434 } 435 436 CREATABLES = { 437 TokenType.COLUMN, 438 TokenType.CONSTRAINT, 439 TokenType.FOREIGN_KEY, 440 TokenType.FUNCTION, 441 TokenType.INDEX, 442 TokenType.PROCEDURE, 443 *DB_CREATABLES, 444 } 445 446 ALTERABLES = { 447 TokenType.INDEX, 448 TokenType.TABLE, 449 TokenType.VIEW, 450 } 451 452 # Tokens that can represent identifiers 453 ID_VAR_TOKENS = { 454 TokenType.ALL, 455 TokenType.ATTACH, 456 TokenType.VAR, 457 TokenType.ANTI, 458 TokenType.APPLY, 459 TokenType.ASC, 460 TokenType.ASOF, 461 TokenType.AUTO_INCREMENT, 462 TokenType.BEGIN, 463 TokenType.BPCHAR, 464 TokenType.CACHE, 465 TokenType.CASE, 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.COMMENT, 469 TokenType.COMMIT, 470 TokenType.CONSTRAINT, 471 TokenType.COPY, 472 TokenType.CUBE, 473 TokenType.DEFAULT, 474 TokenType.DELETE, 475 TokenType.DESC, 476 TokenType.DESCRIBE, 477 TokenType.DETACH, 478 TokenType.DICTIONARY, 479 TokenType.DIV, 480 TokenType.END, 481 TokenType.EXECUTE, 482 TokenType.ESCAPE, 483 TokenType.FALSE, 484 TokenType.FIRST, 485 TokenType.FILTER, 486 TokenType.FINAL, 487 TokenType.FORMAT, 488 TokenType.FULL, 489 TokenType.IDENTIFIER, 490 TokenType.IS, 491 TokenType.ISNULL, 492 TokenType.INTERVAL, 493 TokenType.KEEP, 494 TokenType.KILL, 495 TokenType.LEFT, 496 TokenType.LOAD, 497 TokenType.MERGE, 498 TokenType.NATURAL, 499 TokenType.NEXT, 500 TokenType.OFFSET, 501 TokenType.OPERATOR, 502 TokenType.ORDINALITY, 503 TokenType.OVERLAPS, 504 TokenType.OVERWRITE, 505 TokenType.PARTITION, 506 TokenType.PERCENT, 507 TokenType.PIVOT, 508 TokenType.PRAGMA, 509 TokenType.RANGE, 510 TokenType.RECURSIVE, 511 TokenType.REFERENCES, 512 TokenType.REFRESH, 513 TokenType.RENAME, 514 TokenType.REPLACE, 515 TokenType.RIGHT, 516 TokenType.ROLLUP, 517 TokenType.ROW, 518 TokenType.ROWS, 519 TokenType.SEMI, 520 TokenType.SET, 521 TokenType.SETTINGS, 522 TokenType.SHOW, 523 TokenType.TEMPORARY, 524 TokenType.TOP, 525 TokenType.TRUE, 526 TokenType.TRUNCATE, 527 TokenType.UNIQUE, 528 TokenType.UNNEST, 529 TokenType.UNPIVOT, 530 TokenType.UPDATE, 531 TokenType.USE, 532 TokenType.VOLATILE, 533 TokenType.WINDOW, 534 *CREATABLES, 535 *SUBQUERY_PREDICATES, 536 *TYPE_TOKENS, 537 *NO_PAREN_FUNCTIONS, 538 } 539 ID_VAR_TOKENS.remove(TokenType.UNION) 540 541 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 542 543 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 544 TokenType.ANTI, 545 TokenType.APPLY, 546 TokenType.ASOF, 547 TokenType.FULL, 548 TokenType.LEFT, 549 TokenType.LOCK, 550 TokenType.NATURAL, 551 TokenType.OFFSET, 552 TokenType.RIGHT, 553 TokenType.SEMI, 554 TokenType.WINDOW, 555 } 556 557 ALIAS_TOKENS = ID_VAR_TOKENS 558 559 ARRAY_CONSTRUCTORS = { 560 "ARRAY": exp.Array, 561 "LIST": exp.List, 562 } 563 564 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 565 566 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 567 568 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 569 570 FUNC_TOKENS = { 571 TokenType.COLLATE, 572 TokenType.COMMAND, 573 TokenType.CURRENT_DATE, 574 TokenType.CURRENT_DATETIME, 575 TokenType.CURRENT_TIMESTAMP, 576 TokenType.CURRENT_TIME, 577 TokenType.CURRENT_USER, 578 TokenType.FILTER, 579 TokenType.FIRST, 580 TokenType.FORMAT, 581 TokenType.GLOB, 582 TokenType.IDENTIFIER, 583 TokenType.INDEX, 584 TokenType.ISNULL, 585 TokenType.ILIKE, 586 TokenType.INSERT, 587 TokenType.LIKE, 588 TokenType.MERGE, 589 TokenType.OFFSET, 590 TokenType.PRIMARY_KEY, 591 TokenType.RANGE, 592 TokenType.REPLACE, 593 TokenType.RLIKE, 594 TokenType.ROW, 595 TokenType.UNNEST, 596 TokenType.VAR, 597 TokenType.LEFT, 598 TokenType.RIGHT, 599 TokenType.SEQUENCE, 600 TokenType.DATE, 601 TokenType.DATETIME, 602 TokenType.TABLE, 603 TokenType.TIMESTAMP, 604 TokenType.TIMESTAMPTZ, 605 TokenType.TRUNCATE, 606 TokenType.WINDOW, 607 TokenType.XOR, 608 *TYPE_TOKENS, 609 *SUBQUERY_PREDICATES, 610 } 611 612 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 613 TokenType.AND: exp.And, 614 } 615 616 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 617 TokenType.COLON_EQ: exp.PropertyEQ, 618 } 619 620 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 621 TokenType.OR: exp.Or, 622 } 623 624 EQUALITY = { 625 TokenType.EQ: exp.EQ, 626 TokenType.NEQ: exp.NEQ, 627 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 628 } 629 630 COMPARISON = { 631 TokenType.GT: exp.GT, 632 TokenType.GTE: exp.GTE, 633 TokenType.LT: exp.LT, 634 TokenType.LTE: exp.LTE, 635 } 636 637 BITWISE = { 638 TokenType.AMP: exp.BitwiseAnd, 639 TokenType.CARET: exp.BitwiseXor, 640 TokenType.PIPE: exp.BitwiseOr, 641 } 642 643 TERM = { 644 TokenType.DASH: exp.Sub, 645 TokenType.PLUS: exp.Add, 646 TokenType.MOD: exp.Mod, 647 TokenType.COLLATE: exp.Collate, 648 } 649 650 FACTOR = { 651 TokenType.DIV: exp.IntDiv, 652 TokenType.LR_ARROW: exp.Distance, 653 TokenType.SLASH: exp.Div, 654 TokenType.STAR: exp.Mul, 655 } 656 657 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 658 659 TIMES = { 660 TokenType.TIME, 661 TokenType.TIMETZ, 662 } 663 664 TIMESTAMPS = { 665 TokenType.TIMESTAMP, 666 TokenType.TIMESTAMPTZ, 667 TokenType.TIMESTAMPLTZ, 668 *TIMES, 669 } 670 671 SET_OPERATIONS = { 672 TokenType.UNION, 673 TokenType.INTERSECT, 674 TokenType.EXCEPT, 675 } 676 677 JOIN_METHODS = { 678 TokenType.ASOF, 679 TokenType.NATURAL, 680 TokenType.POSITIONAL, 681 } 682 683 JOIN_SIDES = { 684 TokenType.LEFT, 685 TokenType.RIGHT, 686 TokenType.FULL, 687 } 688 689 JOIN_KINDS = { 690 TokenType.ANTI, 691 TokenType.CROSS, 692 TokenType.INNER, 693 TokenType.OUTER, 694 TokenType.SEMI, 695 TokenType.STRAIGHT_JOIN, 696 } 697 698 JOIN_HINTS: t.Set[str] = set() 699 700 LAMBDAS = { 701 TokenType.ARROW: lambda self, expressions: self.expression( 702 exp.Lambda, 703 this=self._replace_lambda( 704 self._parse_assignment(), 705 expressions, 706 ), 707 expressions=expressions, 708 ), 709 TokenType.FARROW: lambda self, expressions: self.expression( 710 exp.Kwarg, 711 this=exp.var(expressions[0].name), 712 expression=self._parse_assignment(), 713 ), 714 } 715 716 COLUMN_OPERATORS = { 717 TokenType.DOT: None, 718 TokenType.DCOLON: lambda self, this, to: self.expression( 719 exp.Cast if self.STRICT_CAST else exp.TryCast, 720 this=this, 721 to=to, 722 ), 723 TokenType.ARROW: lambda self, this, path: self.expression( 724 exp.JSONExtract, 725 this=this, 726 expression=self.dialect.to_json_path(path), 727 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 728 ), 729 TokenType.DARROW: lambda self, this, path: self.expression( 730 exp.JSONExtractScalar, 731 this=this, 732 expression=self.dialect.to_json_path(path), 733 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 734 ), 735 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 736 exp.JSONBExtract, 737 this=this, 738 expression=path, 739 ), 740 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 741 exp.JSONBExtractScalar, 742 this=this, 743 expression=path, 744 ), 745 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 746 exp.JSONBContains, 747 this=this, 748 expression=key, 749 ), 750 } 751 752 EXPRESSION_PARSERS = { 753 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 754 exp.Column: lambda self: self._parse_column(), 755 exp.Condition: lambda self: self._parse_assignment(), 756 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 757 exp.Expression: lambda self: self._parse_expression(), 758 exp.From: lambda self: self._parse_from(joins=True), 759 exp.Group: lambda self: self._parse_group(), 760 exp.Having: lambda self: self._parse_having(), 761 exp.Hint: lambda self: self._parse_hint_body(), 762 exp.Identifier: lambda self: self._parse_id_var(), 763 exp.Join: lambda self: self._parse_join(), 764 exp.Lambda: lambda self: self._parse_lambda(), 765 exp.Lateral: lambda self: self._parse_lateral(), 766 exp.Limit: lambda self: self._parse_limit(), 767 exp.Offset: lambda self: self._parse_offset(), 768 exp.Order: lambda self: self._parse_order(), 769 exp.Ordered: lambda self: self._parse_ordered(), 770 exp.Properties: lambda self: self._parse_properties(), 771 exp.Qualify: lambda self: self._parse_qualify(), 772 exp.Returning: lambda self: self._parse_returning(), 773 exp.Select: lambda self: self._parse_select(), 774 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 775 exp.Table: lambda self: self._parse_table_parts(), 776 exp.TableAlias: lambda self: self._parse_table_alias(), 777 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 778 exp.Where: lambda self: self._parse_where(), 779 exp.Window: lambda self: self._parse_named_window(), 780 exp.With: lambda self: self._parse_with(), 781 "JOIN_TYPE": lambda self: self._parse_join_parts(), 782 } 783 784 STATEMENT_PARSERS = { 785 TokenType.ALTER: lambda self: self._parse_alter(), 786 TokenType.BEGIN: lambda self: self._parse_transaction(), 787 TokenType.CACHE: lambda self: self._parse_cache(), 788 TokenType.COMMENT: lambda self: self._parse_comment(), 789 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 790 TokenType.COPY: lambda self: self._parse_copy(), 791 TokenType.CREATE: lambda self: self._parse_create(), 792 TokenType.DELETE: lambda self: self._parse_delete(), 793 TokenType.DESC: lambda self: self._parse_describe(), 794 TokenType.DESCRIBE: lambda self: self._parse_describe(), 795 TokenType.DROP: lambda self: self._parse_drop(), 796 TokenType.GRANT: lambda self: self._parse_grant(), 797 TokenType.INSERT: lambda self: self._parse_insert(), 798 TokenType.KILL: lambda self: self._parse_kill(), 799 TokenType.LOAD: lambda self: self._parse_load(), 800 TokenType.MERGE: lambda self: self._parse_merge(), 801 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 802 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 803 TokenType.REFRESH: lambda self: self._parse_refresh(), 804 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 805 TokenType.SET: lambda self: self._parse_set(), 806 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 807 TokenType.UNCACHE: lambda self: self._parse_uncache(), 808 TokenType.UPDATE: lambda self: self._parse_update(), 809 TokenType.USE: lambda self: self.expression( 810 exp.Use, 811 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 812 this=self._parse_table(schema=False), 813 ), 814 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 815 } 816 817 UNARY_PARSERS = { 818 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 819 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 820 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 821 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 822 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 823 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 824 } 825 826 STRING_PARSERS = { 827 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 828 exp.RawString, this=token.text 829 ), 830 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 831 exp.National, this=token.text 832 ), 833 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 834 TokenType.STRING: lambda self, token: self.expression( 835 exp.Literal, this=token.text, is_string=True 836 ), 837 TokenType.UNICODE_STRING: lambda self, token: self.expression( 838 exp.UnicodeString, 839 this=token.text, 840 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 841 ), 842 } 843 844 NUMERIC_PARSERS = { 845 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 846 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 847 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 848 TokenType.NUMBER: lambda self, token: self.expression( 849 exp.Literal, this=token.text, is_string=False 850 ), 851 } 852 853 PRIMARY_PARSERS = { 854 **STRING_PARSERS, 855 **NUMERIC_PARSERS, 856 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 857 TokenType.NULL: lambda self, _: self.expression(exp.Null), 858 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 859 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 860 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 861 TokenType.STAR: lambda self, _: self._parse_star_ops(), 862 } 863 864 PLACEHOLDER_PARSERS = { 865 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 866 TokenType.PARAMETER: lambda self: self._parse_parameter(), 867 TokenType.COLON: lambda self: ( 868 self.expression(exp.Placeholder, this=self._prev.text) 869 if self._match_set(self.ID_VAR_TOKENS) 870 else None 871 ), 872 } 873 874 RANGE_PARSERS = { 875 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 876 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 877 TokenType.GLOB: binary_range_parser(exp.Glob), 878 TokenType.ILIKE: binary_range_parser(exp.ILike), 879 TokenType.IN: lambda self, this: self._parse_in(this), 880 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 881 TokenType.IS: lambda self, this: self._parse_is(this), 882 TokenType.LIKE: binary_range_parser(exp.Like), 883 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 884 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 885 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 886 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 887 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 888 } 889 890 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 891 "ALLOWED_VALUES": lambda self: self.expression( 892 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 893 ), 894 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 895 "AUTO": lambda self: self._parse_auto_property(), 896 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 897 "BACKUP": lambda self: self.expression( 898 exp.BackupProperty, this=self._parse_var(any_token=True) 899 ), 900 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 901 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 902 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 903 "CHECKSUM": lambda self: self._parse_checksum(), 904 "CLUSTER BY": lambda self: self._parse_cluster(), 905 "CLUSTERED": lambda self: self._parse_clustered_by(), 906 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 907 exp.CollateProperty, **kwargs 908 ), 909 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 910 "CONTAINS": lambda self: self._parse_contains_property(), 911 "COPY": lambda self: self._parse_copy_property(), 912 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 913 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 914 "DEFINER": lambda self: self._parse_definer(), 915 "DETERMINISTIC": lambda self: self.expression( 916 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 917 ), 918 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 919 "DUPLICATE": lambda self: self._parse_duplicate(), 920 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 921 "DISTKEY": lambda self: self._parse_distkey(), 922 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 923 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 924 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 925 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 926 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 927 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 928 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 929 "FREESPACE": lambda self: self._parse_freespace(), 930 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 931 "HEAP": lambda self: self.expression(exp.HeapProperty), 932 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 933 "IMMUTABLE": lambda self: self.expression( 934 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 935 ), 936 "INHERITS": lambda self: self.expression( 937 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 938 ), 939 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 940 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 941 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 942 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 943 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 944 "LIKE": lambda self: self._parse_create_like(), 945 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 946 "LOCK": lambda self: self._parse_locking(), 947 "LOCKING": lambda self: self._parse_locking(), 948 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 949 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 950 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 951 "MODIFIES": lambda self: self._parse_modifies_property(), 952 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 953 "NO": lambda self: self._parse_no_property(), 954 "ON": lambda self: self._parse_on_property(), 955 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 956 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 957 "PARTITION": lambda self: self._parse_partitioned_of(), 958 "PARTITION BY": lambda self: self._parse_partitioned_by(), 959 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 960 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 961 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 962 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 963 "READS": lambda self: self._parse_reads_property(), 964 "REMOTE": lambda self: self._parse_remote_with_connection(), 965 "RETURNS": lambda self: self._parse_returns(), 966 "STRICT": lambda self: self.expression(exp.StrictProperty), 967 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 968 "ROW": lambda self: self._parse_row(), 969 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 970 "SAMPLE": lambda self: self.expression( 971 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 972 ), 973 "SECURE": lambda self: self.expression(exp.SecureProperty), 974 "SECURITY": lambda self: self._parse_security(), 975 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 976 "SETTINGS": lambda self: self._parse_settings_property(), 977 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 978 "SORTKEY": lambda self: self._parse_sortkey(), 979 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 980 "STABLE": lambda self: self.expression( 981 exp.StabilityProperty, this=exp.Literal.string("STABLE") 982 ), 983 "STORED": lambda self: self._parse_stored(), 984 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 985 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 986 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 987 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 988 "TO": lambda self: self._parse_to_table(), 989 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 990 "TRANSFORM": lambda self: self.expression( 991 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 992 ), 993 "TTL": lambda self: self._parse_ttl(), 994 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 995 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 996 "VOLATILE": lambda self: self._parse_volatile_property(), 997 "WITH": lambda self: self._parse_with_property(), 998 } 999 1000 CONSTRAINT_PARSERS = { 1001 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1002 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1003 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1004 "CHARACTER SET": lambda self: self.expression( 1005 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1006 ), 1007 "CHECK": lambda self: self.expression( 1008 exp.CheckColumnConstraint, 1009 this=self._parse_wrapped(self._parse_assignment), 1010 enforced=self._match_text_seq("ENFORCED"), 1011 ), 1012 "COLLATE": lambda self: self.expression( 1013 exp.CollateColumnConstraint, 1014 this=self._parse_identifier() or self._parse_column(), 1015 ), 1016 "COMMENT": lambda self: self.expression( 1017 exp.CommentColumnConstraint, this=self._parse_string() 1018 ), 1019 "COMPRESS": lambda self: self._parse_compress(), 1020 "CLUSTERED": lambda self: self.expression( 1021 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1022 ), 1023 "NONCLUSTERED": lambda self: self.expression( 1024 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1025 ), 1026 "DEFAULT": lambda self: self.expression( 1027 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1028 ), 1029 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1030 "EPHEMERAL": lambda self: self.expression( 1031 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1032 ), 1033 "EXCLUDE": lambda self: self.expression( 1034 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1035 ), 1036 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1037 "FORMAT": lambda self: self.expression( 1038 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1039 ), 1040 "GENERATED": lambda self: self._parse_generated_as_identity(), 1041 "IDENTITY": lambda self: self._parse_auto_increment(), 1042 "INLINE": lambda self: self._parse_inline(), 1043 "LIKE": lambda self: self._parse_create_like(), 1044 "NOT": lambda self: self._parse_not_constraint(), 1045 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1046 "ON": lambda self: ( 1047 self._match(TokenType.UPDATE) 1048 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1049 ) 1050 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1051 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1052 "PERIOD": lambda self: self._parse_period_for_system_time(), 1053 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1054 "REFERENCES": lambda self: self._parse_references(match=False), 1055 "TITLE": lambda self: self.expression( 1056 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1057 ), 1058 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1059 "UNIQUE": lambda self: self._parse_unique(), 1060 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1061 "WATERMARK": lambda self: self.expression( 1062 exp.WatermarkColumnConstraint, 1063 this=self._match(TokenType.FOR) and self._parse_column(), 1064 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1065 ), 1066 "WITH": lambda self: self.expression( 1067 exp.Properties, expressions=self._parse_wrapped_properties() 1068 ), 1069 } 1070 1071 ALTER_PARSERS = { 1072 "ADD": lambda self: self._parse_alter_table_add(), 1073 "AS": lambda self: self._parse_select(), 1074 "ALTER": lambda self: self._parse_alter_table_alter(), 1075 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1076 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1077 "DROP": lambda self: self._parse_alter_table_drop(), 1078 "RENAME": lambda self: self._parse_alter_table_rename(), 1079 "SET": lambda self: self._parse_alter_table_set(), 1080 "SWAP": lambda self: self.expression( 1081 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1082 ), 1083 } 1084 1085 ALTER_ALTER_PARSERS = { 1086 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1087 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1088 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1089 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1090 } 1091 1092 SCHEMA_UNNAMED_CONSTRAINTS = { 1093 "CHECK", 1094 "EXCLUDE", 1095 "FOREIGN KEY", 1096 "LIKE", 1097 "PERIOD", 1098 "PRIMARY KEY", 1099 "UNIQUE", 1100 "WATERMARK", 1101 } 1102 1103 NO_PAREN_FUNCTION_PARSERS = { 1104 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1105 "CASE": lambda self: self._parse_case(), 1106 "CONNECT_BY_ROOT": lambda self: self.expression( 1107 exp.ConnectByRoot, this=self._parse_column() 1108 ), 1109 "IF": lambda self: self._parse_if(), 1110 "NEXT": lambda self: self._parse_next_value_for(), 1111 } 1112 1113 INVALID_FUNC_NAME_TOKENS = { 1114 TokenType.IDENTIFIER, 1115 TokenType.STRING, 1116 } 1117 1118 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1119 1120 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1121 1122 FUNCTION_PARSERS = { 1123 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1124 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1125 "DECODE": lambda self: self._parse_decode(), 1126 "EXTRACT": lambda self: self._parse_extract(), 1127 "GAP_FILL": lambda self: self._parse_gap_fill(), 1128 "JSON_OBJECT": lambda self: self._parse_json_object(), 1129 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1130 "JSON_TABLE": lambda self: self._parse_json_table(), 1131 "MATCH": lambda self: self._parse_match_against(), 1132 "NORMALIZE": lambda self: self._parse_normalize(), 1133 "OPENJSON": lambda self: self._parse_open_json(), 1134 "OVERLAY": lambda self: self._parse_overlay(), 1135 "POSITION": lambda self: self._parse_position(), 1136 "PREDICT": lambda self: self._parse_predict(), 1137 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1138 "STRING_AGG": lambda self: self._parse_string_agg(), 1139 "SUBSTRING": lambda self: self._parse_substring(), 1140 "TRIM": lambda self: self._parse_trim(), 1141 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1142 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1143 } 1144 1145 QUERY_MODIFIER_PARSERS = { 1146 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1147 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1148 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1149 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1150 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1151 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1152 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1153 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1154 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1155 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1156 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1157 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1158 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1159 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1160 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1161 TokenType.CLUSTER_BY: lambda self: ( 1162 "cluster", 1163 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1164 ), 1165 TokenType.DISTRIBUTE_BY: lambda self: ( 1166 "distribute", 1167 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1168 ), 1169 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1170 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1171 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1172 } 1173 1174 SET_PARSERS = { 1175 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1176 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1177 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1178 "TRANSACTION": lambda self: self._parse_set_transaction(), 1179 } 1180 1181 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1182 1183 TYPE_LITERAL_PARSERS = { 1184 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1185 } 1186 1187 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1188 1189 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1190 1191 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1192 1193 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1194 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1195 "ISOLATION": ( 1196 ("LEVEL", "REPEATABLE", "READ"), 1197 ("LEVEL", "READ", "COMMITTED"), 1198 ("LEVEL", "READ", "UNCOMITTED"), 1199 ("LEVEL", "SERIALIZABLE"), 1200 ), 1201 "READ": ("WRITE", "ONLY"), 1202 } 1203 1204 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1205 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1206 ) 1207 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1208 1209 CREATE_SEQUENCE: OPTIONS_TYPE = { 1210 "SCALE": ("EXTEND", "NOEXTEND"), 1211 "SHARD": ("EXTEND", "NOEXTEND"), 1212 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1213 **dict.fromkeys( 1214 ( 1215 "SESSION", 1216 "GLOBAL", 1217 "KEEP", 1218 "NOKEEP", 1219 "ORDER", 1220 "NOORDER", 1221 "NOCACHE", 1222 "CYCLE", 1223 "NOCYCLE", 1224 "NOMINVALUE", 1225 "NOMAXVALUE", 1226 "NOSCALE", 1227 "NOSHARD", 1228 ), 1229 tuple(), 1230 ), 1231 } 1232 1233 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1234 1235 USABLES: OPTIONS_TYPE = dict.fromkeys( 1236 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1237 ) 1238 1239 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1240 1241 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1242 "TYPE": ("EVOLUTION",), 1243 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1244 } 1245 1246 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1247 1248 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1249 1250 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1251 "NOT": ("ENFORCED",), 1252 "MATCH": ( 1253 "FULL", 1254 "PARTIAL", 1255 "SIMPLE", 1256 ), 1257 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1258 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1259 } 1260 1261 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1262 1263 CLONE_KEYWORDS = {"CLONE", "COPY"} 1264 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1265 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1266 1267 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1268 1269 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1270 1271 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1272 1273 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1274 1275 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1276 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1277 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1278 1279 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1280 1281 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1282 1283 ADD_CONSTRAINT_TOKENS = { 1284 TokenType.CONSTRAINT, 1285 TokenType.FOREIGN_KEY, 1286 TokenType.INDEX, 1287 TokenType.KEY, 1288 TokenType.PRIMARY_KEY, 1289 TokenType.UNIQUE, 1290 } 1291 1292 DISTINCT_TOKENS = {TokenType.DISTINCT} 1293 1294 NULL_TOKENS = {TokenType.NULL} 1295 1296 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1297 1298 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1299 1300 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1301 1302 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1303 1304 ODBC_DATETIME_LITERALS = { 1305 "d": exp.Date, 1306 "t": exp.Time, 1307 "ts": exp.Timestamp, 1308 } 1309 1310 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1311 1312 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1313 1314 # The style options for the DESCRIBE statement 1315 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1316 1317 OPERATION_MODIFIERS: t.Set[str] = set() 1318 1319 STRICT_CAST = True 1320 1321 PREFIXED_PIVOT_COLUMNS = False 1322 IDENTIFY_PIVOT_STRINGS = False 1323 1324 LOG_DEFAULTS_TO_LN = False 1325 1326 # Whether ADD is present for each column added by ALTER TABLE 1327 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1328 1329 # Whether the table sample clause expects CSV syntax 1330 TABLESAMPLE_CSV = False 1331 1332 # The default method used for table sampling 1333 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1334 1335 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1336 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1337 1338 # Whether the TRIM function expects the characters to trim as its first argument 1339 TRIM_PATTERN_FIRST = False 1340 1341 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1342 STRING_ALIASES = False 1343 1344 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1345 MODIFIERS_ATTACHED_TO_SET_OP = True 1346 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1347 1348 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1349 NO_PAREN_IF_COMMANDS = True 1350 1351 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1352 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1353 1354 # Whether the `:` operator is used to extract a value from a VARIANT column 1355 COLON_IS_VARIANT_EXTRACT = False 1356 1357 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1358 # If this is True and '(' is not found, the keyword will be treated as an identifier 1359 VALUES_FOLLOWED_BY_PAREN = True 1360 1361 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1362 SUPPORTS_IMPLICIT_UNNEST = False 1363 1364 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1365 INTERVAL_SPANS = True 1366 1367 # Whether a PARTITION clause can follow a table reference 1368 SUPPORTS_PARTITION_SELECTION = False 1369 1370 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1371 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1372 1373 __slots__ = ( 1374 "error_level", 1375 "error_message_context", 1376 "max_errors", 1377 "dialect", 1378 "sql", 1379 "errors", 1380 "_tokens", 1381 "_index", 1382 "_curr", 1383 "_next", 1384 "_prev", 1385 "_prev_comments", 1386 ) 1387 1388 # Autofilled 1389 SHOW_TRIE: t.Dict = {} 1390 SET_TRIE: t.Dict = {} 1391 1392 def __init__( 1393 self, 1394 error_level: t.Optional[ErrorLevel] = None, 1395 error_message_context: int = 100, 1396 max_errors: int = 3, 1397 dialect: DialectType = None, 1398 ): 1399 from sqlglot.dialects import Dialect 1400 1401 self.error_level = error_level or ErrorLevel.IMMEDIATE 1402 self.error_message_context = error_message_context 1403 self.max_errors = max_errors 1404 self.dialect = Dialect.get_or_raise(dialect) 1405 self.reset() 1406 1407 def reset(self): 1408 self.sql = "" 1409 self.errors = [] 1410 self._tokens = [] 1411 self._index = 0 1412 self._curr = None 1413 self._next = None 1414 self._prev = None 1415 self._prev_comments = None 1416 1417 def parse( 1418 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1419 ) -> t.List[t.Optional[exp.Expression]]: 1420 """ 1421 Parses a list of tokens and returns a list of syntax trees, one tree 1422 per parsed SQL statement. 1423 1424 Args: 1425 raw_tokens: The list of tokens. 1426 sql: The original SQL string, used to produce helpful debug messages. 1427 1428 Returns: 1429 The list of the produced syntax trees. 1430 """ 1431 return self._parse( 1432 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1433 ) 1434 1435 def parse_into( 1436 self, 1437 expression_types: exp.IntoType, 1438 raw_tokens: t.List[Token], 1439 sql: t.Optional[str] = None, 1440 ) -> t.List[t.Optional[exp.Expression]]: 1441 """ 1442 Parses a list of tokens into a given Expression type. If a collection of Expression 1443 types is given instead, this method will try to parse the token list into each one 1444 of them, stopping at the first for which the parsing succeeds. 1445 1446 Args: 1447 expression_types: The expression type(s) to try and parse the token list into. 1448 raw_tokens: The list of tokens. 1449 sql: The original SQL string, used to produce helpful debug messages. 1450 1451 Returns: 1452 The target Expression. 1453 """ 1454 errors = [] 1455 for expression_type in ensure_list(expression_types): 1456 parser = self.EXPRESSION_PARSERS.get(expression_type) 1457 if not parser: 1458 raise TypeError(f"No parser registered for {expression_type}") 1459 1460 try: 1461 return self._parse(parser, raw_tokens, sql) 1462 except ParseError as e: 1463 e.errors[0]["into_expression"] = expression_type 1464 errors.append(e) 1465 1466 raise ParseError( 1467 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1468 errors=merge_errors(errors), 1469 ) from errors[-1] 1470 1471 def _parse( 1472 self, 1473 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1474 raw_tokens: t.List[Token], 1475 sql: t.Optional[str] = None, 1476 ) -> t.List[t.Optional[exp.Expression]]: 1477 self.reset() 1478 self.sql = sql or "" 1479 1480 total = len(raw_tokens) 1481 chunks: t.List[t.List[Token]] = [[]] 1482 1483 for i, token in enumerate(raw_tokens): 1484 if token.token_type == TokenType.SEMICOLON: 1485 if token.comments: 1486 chunks.append([token]) 1487 1488 if i < total - 1: 1489 chunks.append([]) 1490 else: 1491 chunks[-1].append(token) 1492 1493 expressions = [] 1494 1495 for tokens in chunks: 1496 self._index = -1 1497 self._tokens = tokens 1498 self._advance() 1499 1500 expressions.append(parse_method(self)) 1501 1502 if self._index < len(self._tokens): 1503 self.raise_error("Invalid expression / Unexpected token") 1504 1505 self.check_errors() 1506 1507 return expressions 1508 1509 def check_errors(self) -> None: 1510 """Logs or raises any found errors, depending on the chosen error level setting.""" 1511 if self.error_level == ErrorLevel.WARN: 1512 for error in self.errors: 1513 logger.error(str(error)) 1514 elif self.error_level == ErrorLevel.RAISE and self.errors: 1515 raise ParseError( 1516 concat_messages(self.errors, self.max_errors), 1517 errors=merge_errors(self.errors), 1518 ) 1519 1520 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1521 """ 1522 Appends an error in the list of recorded errors or raises it, depending on the chosen 1523 error level setting. 1524 """ 1525 token = token or self._curr or self._prev or Token.string("") 1526 start = token.start 1527 end = token.end + 1 1528 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1529 highlight = self.sql[start:end] 1530 end_context = self.sql[end : end + self.error_message_context] 1531 1532 error = ParseError.new( 1533 f"{message}. Line {token.line}, Col: {token.col}.\n" 1534 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1535 description=message, 1536 line=token.line, 1537 col=token.col, 1538 start_context=start_context, 1539 highlight=highlight, 1540 end_context=end_context, 1541 ) 1542 1543 if self.error_level == ErrorLevel.IMMEDIATE: 1544 raise error 1545 1546 self.errors.append(error) 1547 1548 def expression( 1549 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1550 ) -> E: 1551 """ 1552 Creates a new, validated Expression. 1553 1554 Args: 1555 exp_class: The expression class to instantiate. 1556 comments: An optional list of comments to attach to the expression. 1557 kwargs: The arguments to set for the expression along with their respective values. 1558 1559 Returns: 1560 The target expression. 1561 """ 1562 instance = exp_class(**kwargs) 1563 instance.add_comments(comments) if comments else self._add_comments(instance) 1564 return self.validate_expression(instance) 1565 1566 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1567 if expression and self._prev_comments: 1568 expression.add_comments(self._prev_comments) 1569 self._prev_comments = None 1570 1571 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1572 """ 1573 Validates an Expression, making sure that all its mandatory arguments are set. 1574 1575 Args: 1576 expression: The expression to validate. 1577 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1578 1579 Returns: 1580 The validated expression. 1581 """ 1582 if self.error_level != ErrorLevel.IGNORE: 1583 for error_message in expression.error_messages(args): 1584 self.raise_error(error_message) 1585 1586 return expression 1587 1588 def _find_sql(self, start: Token, end: Token) -> str: 1589 return self.sql[start.start : end.end + 1] 1590 1591 def _is_connected(self) -> bool: 1592 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1593 1594 def _advance(self, times: int = 1) -> None: 1595 self._index += times 1596 self._curr = seq_get(self._tokens, self._index) 1597 self._next = seq_get(self._tokens, self._index + 1) 1598 1599 if self._index > 0: 1600 self._prev = self._tokens[self._index - 1] 1601 self._prev_comments = self._prev.comments 1602 else: 1603 self._prev = None 1604 self._prev_comments = None 1605 1606 def _retreat(self, index: int) -> None: 1607 if index != self._index: 1608 self._advance(index - self._index) 1609 1610 def _warn_unsupported(self) -> None: 1611 if len(self._tokens) <= 1: 1612 return 1613 1614 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1615 # interested in emitting a warning for the one being currently processed. 1616 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1617 1618 logger.warning( 1619 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1620 ) 1621 1622 def _parse_command(self) -> exp.Command: 1623 self._warn_unsupported() 1624 return self.expression( 1625 exp.Command, 1626 comments=self._prev_comments, 1627 this=self._prev.text.upper(), 1628 expression=self._parse_string(), 1629 ) 1630 1631 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1632 """ 1633 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1634 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1635 solve this by setting & resetting the parser state accordingly 1636 """ 1637 index = self._index 1638 error_level = self.error_level 1639 1640 self.error_level = ErrorLevel.IMMEDIATE 1641 try: 1642 this = parse_method() 1643 except ParseError: 1644 this = None 1645 finally: 1646 if not this or retreat: 1647 self._retreat(index) 1648 self.error_level = error_level 1649 1650 return this 1651 1652 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1653 start = self._prev 1654 exists = self._parse_exists() if allow_exists else None 1655 1656 self._match(TokenType.ON) 1657 1658 materialized = self._match_text_seq("MATERIALIZED") 1659 kind = self._match_set(self.CREATABLES) and self._prev 1660 if not kind: 1661 return self._parse_as_command(start) 1662 1663 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1664 this = self._parse_user_defined_function(kind=kind.token_type) 1665 elif kind.token_type == TokenType.TABLE: 1666 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1667 elif kind.token_type == TokenType.COLUMN: 1668 this = self._parse_column() 1669 else: 1670 this = self._parse_id_var() 1671 1672 self._match(TokenType.IS) 1673 1674 return self.expression( 1675 exp.Comment, 1676 this=this, 1677 kind=kind.text, 1678 expression=self._parse_string(), 1679 exists=exists, 1680 materialized=materialized, 1681 ) 1682 1683 def _parse_to_table( 1684 self, 1685 ) -> exp.ToTableProperty: 1686 table = self._parse_table_parts(schema=True) 1687 return self.expression(exp.ToTableProperty, this=table) 1688 1689 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1690 def _parse_ttl(self) -> exp.Expression: 1691 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1692 this = self._parse_bitwise() 1693 1694 if self._match_text_seq("DELETE"): 1695 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1696 if self._match_text_seq("RECOMPRESS"): 1697 return self.expression( 1698 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1699 ) 1700 if self._match_text_seq("TO", "DISK"): 1701 return self.expression( 1702 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1703 ) 1704 if self._match_text_seq("TO", "VOLUME"): 1705 return self.expression( 1706 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1707 ) 1708 1709 return this 1710 1711 expressions = self._parse_csv(_parse_ttl_action) 1712 where = self._parse_where() 1713 group = self._parse_group() 1714 1715 aggregates = None 1716 if group and self._match(TokenType.SET): 1717 aggregates = self._parse_csv(self._parse_set_item) 1718 1719 return self.expression( 1720 exp.MergeTreeTTL, 1721 expressions=expressions, 1722 where=where, 1723 group=group, 1724 aggregates=aggregates, 1725 ) 1726 1727 def _parse_statement(self) -> t.Optional[exp.Expression]: 1728 if self._curr is None: 1729 return None 1730 1731 if self._match_set(self.STATEMENT_PARSERS): 1732 comments = self._prev_comments 1733 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1734 stmt.add_comments(comments, prepend=True) 1735 return stmt 1736 1737 if self._match_set(self.dialect.tokenizer.COMMANDS): 1738 return self._parse_command() 1739 1740 expression = self._parse_expression() 1741 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1742 return self._parse_query_modifiers(expression) 1743 1744 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1745 start = self._prev 1746 temporary = self._match(TokenType.TEMPORARY) 1747 materialized = self._match_text_seq("MATERIALIZED") 1748 1749 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1750 if not kind: 1751 return self._parse_as_command(start) 1752 1753 concurrently = self._match_text_seq("CONCURRENTLY") 1754 if_exists = exists or self._parse_exists() 1755 1756 if kind == "COLUMN": 1757 this = self._parse_column() 1758 else: 1759 this = self._parse_table_parts( 1760 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1761 ) 1762 1763 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1764 1765 if self._match(TokenType.L_PAREN, advance=False): 1766 expressions = self._parse_wrapped_csv(self._parse_types) 1767 else: 1768 expressions = None 1769 1770 return self.expression( 1771 exp.Drop, 1772 exists=if_exists, 1773 this=this, 1774 expressions=expressions, 1775 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1776 temporary=temporary, 1777 materialized=materialized, 1778 cascade=self._match_text_seq("CASCADE"), 1779 constraints=self._match_text_seq("CONSTRAINTS"), 1780 purge=self._match_text_seq("PURGE"), 1781 cluster=cluster, 1782 concurrently=concurrently, 1783 ) 1784 1785 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1786 return ( 1787 self._match_text_seq("IF") 1788 and (not not_ or self._match(TokenType.NOT)) 1789 and self._match(TokenType.EXISTS) 1790 ) 1791 1792 def _parse_create(self) -> exp.Create | exp.Command: 1793 # Note: this can't be None because we've matched a statement parser 1794 start = self._prev 1795 1796 replace = ( 1797 start.token_type == TokenType.REPLACE 1798 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1799 or self._match_pair(TokenType.OR, TokenType.ALTER) 1800 ) 1801 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1802 1803 unique = self._match(TokenType.UNIQUE) 1804 1805 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1806 clustered = True 1807 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1808 "COLUMNSTORE" 1809 ): 1810 clustered = False 1811 else: 1812 clustered = None 1813 1814 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1815 self._advance() 1816 1817 properties = None 1818 create_token = self._match_set(self.CREATABLES) and self._prev 1819 1820 if not create_token: 1821 # exp.Properties.Location.POST_CREATE 1822 properties = self._parse_properties() 1823 create_token = self._match_set(self.CREATABLES) and self._prev 1824 1825 if not properties or not create_token: 1826 return self._parse_as_command(start) 1827 1828 concurrently = self._match_text_seq("CONCURRENTLY") 1829 exists = self._parse_exists(not_=True) 1830 this = None 1831 expression: t.Optional[exp.Expression] = None 1832 indexes = None 1833 no_schema_binding = None 1834 begin = None 1835 end = None 1836 clone = None 1837 1838 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1839 nonlocal properties 1840 if properties and temp_props: 1841 properties.expressions.extend(temp_props.expressions) 1842 elif temp_props: 1843 properties = temp_props 1844 1845 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1846 this = self._parse_user_defined_function(kind=create_token.token_type) 1847 1848 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1849 extend_props(self._parse_properties()) 1850 1851 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1852 extend_props(self._parse_properties()) 1853 1854 if not expression: 1855 if self._match(TokenType.COMMAND): 1856 expression = self._parse_as_command(self._prev) 1857 else: 1858 begin = self._match(TokenType.BEGIN) 1859 return_ = self._match_text_seq("RETURN") 1860 1861 if self._match(TokenType.STRING, advance=False): 1862 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1863 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1864 expression = self._parse_string() 1865 extend_props(self._parse_properties()) 1866 else: 1867 expression = self._parse_user_defined_function_expression() 1868 1869 end = self._match_text_seq("END") 1870 1871 if return_: 1872 expression = self.expression(exp.Return, this=expression) 1873 elif create_token.token_type == TokenType.INDEX: 1874 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1875 if not self._match(TokenType.ON): 1876 index = self._parse_id_var() 1877 anonymous = False 1878 else: 1879 index = None 1880 anonymous = True 1881 1882 this = self._parse_index(index=index, anonymous=anonymous) 1883 elif create_token.token_type in self.DB_CREATABLES: 1884 table_parts = self._parse_table_parts( 1885 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1886 ) 1887 1888 # exp.Properties.Location.POST_NAME 1889 self._match(TokenType.COMMA) 1890 extend_props(self._parse_properties(before=True)) 1891 1892 this = self._parse_schema(this=table_parts) 1893 1894 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1895 extend_props(self._parse_properties()) 1896 1897 self._match(TokenType.ALIAS) 1898 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1899 # exp.Properties.Location.POST_ALIAS 1900 extend_props(self._parse_properties()) 1901 1902 if create_token.token_type == TokenType.SEQUENCE: 1903 expression = self._parse_types() 1904 extend_props(self._parse_properties()) 1905 else: 1906 expression = self._parse_ddl_select() 1907 1908 if create_token.token_type == TokenType.TABLE: 1909 # exp.Properties.Location.POST_EXPRESSION 1910 extend_props(self._parse_properties()) 1911 1912 indexes = [] 1913 while True: 1914 index = self._parse_index() 1915 1916 # exp.Properties.Location.POST_INDEX 1917 extend_props(self._parse_properties()) 1918 if not index: 1919 break 1920 else: 1921 self._match(TokenType.COMMA) 1922 indexes.append(index) 1923 elif create_token.token_type == TokenType.VIEW: 1924 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1925 no_schema_binding = True 1926 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1927 extend_props(self._parse_properties()) 1928 1929 shallow = self._match_text_seq("SHALLOW") 1930 1931 if self._match_texts(self.CLONE_KEYWORDS): 1932 copy = self._prev.text.lower() == "copy" 1933 clone = self.expression( 1934 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1935 ) 1936 1937 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1938 return self._parse_as_command(start) 1939 1940 create_kind_text = create_token.text.upper() 1941 return self.expression( 1942 exp.Create, 1943 this=this, 1944 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1945 replace=replace, 1946 refresh=refresh, 1947 unique=unique, 1948 expression=expression, 1949 exists=exists, 1950 properties=properties, 1951 indexes=indexes, 1952 no_schema_binding=no_schema_binding, 1953 begin=begin, 1954 end=end, 1955 clone=clone, 1956 concurrently=concurrently, 1957 clustered=clustered, 1958 ) 1959 1960 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1961 seq = exp.SequenceProperties() 1962 1963 options = [] 1964 index = self._index 1965 1966 while self._curr: 1967 self._match(TokenType.COMMA) 1968 if self._match_text_seq("INCREMENT"): 1969 self._match_text_seq("BY") 1970 self._match_text_seq("=") 1971 seq.set("increment", self._parse_term()) 1972 elif self._match_text_seq("MINVALUE"): 1973 seq.set("minvalue", self._parse_term()) 1974 elif self._match_text_seq("MAXVALUE"): 1975 seq.set("maxvalue", self._parse_term()) 1976 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1977 self._match_text_seq("=") 1978 seq.set("start", self._parse_term()) 1979 elif self._match_text_seq("CACHE"): 1980 # T-SQL allows empty CACHE which is initialized dynamically 1981 seq.set("cache", self._parse_number() or True) 1982 elif self._match_text_seq("OWNED", "BY"): 1983 # "OWNED BY NONE" is the default 1984 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1985 else: 1986 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1987 if opt: 1988 options.append(opt) 1989 else: 1990 break 1991 1992 seq.set("options", options if options else None) 1993 return None if self._index == index else seq 1994 1995 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1996 # only used for teradata currently 1997 self._match(TokenType.COMMA) 1998 1999 kwargs = { 2000 "no": self._match_text_seq("NO"), 2001 "dual": self._match_text_seq("DUAL"), 2002 "before": self._match_text_seq("BEFORE"), 2003 "default": self._match_text_seq("DEFAULT"), 2004 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2005 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2006 "after": self._match_text_seq("AFTER"), 2007 "minimum": self._match_texts(("MIN", "MINIMUM")), 2008 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2009 } 2010 2011 if self._match_texts(self.PROPERTY_PARSERS): 2012 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2013 try: 2014 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2015 except TypeError: 2016 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2017 2018 return None 2019 2020 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2021 return self._parse_wrapped_csv(self._parse_property) 2022 2023 def _parse_property(self) -> t.Optional[exp.Expression]: 2024 if self._match_texts(self.PROPERTY_PARSERS): 2025 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2026 2027 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2028 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2029 2030 if self._match_text_seq("COMPOUND", "SORTKEY"): 2031 return self._parse_sortkey(compound=True) 2032 2033 if self._match_text_seq("SQL", "SECURITY"): 2034 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2035 2036 index = self._index 2037 key = self._parse_column() 2038 2039 if not self._match(TokenType.EQ): 2040 self._retreat(index) 2041 return self._parse_sequence_properties() 2042 2043 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2044 if isinstance(key, exp.Column): 2045 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2046 2047 value = self._parse_bitwise() or self._parse_var(any_token=True) 2048 2049 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2050 if isinstance(value, exp.Column): 2051 value = exp.var(value.name) 2052 2053 return self.expression(exp.Property, this=key, value=value) 2054 2055 def _parse_stored(self) -> exp.FileFormatProperty: 2056 self._match(TokenType.ALIAS) 2057 2058 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2059 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2060 2061 return self.expression( 2062 exp.FileFormatProperty, 2063 this=( 2064 self.expression( 2065 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2066 ) 2067 if input_format or output_format 2068 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2069 ), 2070 ) 2071 2072 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2073 field = self._parse_field() 2074 if isinstance(field, exp.Identifier) and not field.quoted: 2075 field = exp.var(field) 2076 2077 return field 2078 2079 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2080 self._match(TokenType.EQ) 2081 self._match(TokenType.ALIAS) 2082 2083 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2084 2085 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2086 properties = [] 2087 while True: 2088 if before: 2089 prop = self._parse_property_before() 2090 else: 2091 prop = self._parse_property() 2092 if not prop: 2093 break 2094 for p in ensure_list(prop): 2095 properties.append(p) 2096 2097 if properties: 2098 return self.expression(exp.Properties, expressions=properties) 2099 2100 return None 2101 2102 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2103 return self.expression( 2104 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2105 ) 2106 2107 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2108 if self._match_texts(("DEFINER", "INVOKER")): 2109 security_specifier = self._prev.text.upper() 2110 return self.expression(exp.SecurityProperty, this=security_specifier) 2111 return None 2112 2113 def _parse_settings_property(self) -> exp.SettingsProperty: 2114 return self.expression( 2115 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2116 ) 2117 2118 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2119 if self._index >= 2: 2120 pre_volatile_token = self._tokens[self._index - 2] 2121 else: 2122 pre_volatile_token = None 2123 2124 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2125 return exp.VolatileProperty() 2126 2127 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2128 2129 def _parse_retention_period(self) -> exp.Var: 2130 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2131 number = self._parse_number() 2132 number_str = f"{number} " if number else "" 2133 unit = self._parse_var(any_token=True) 2134 return exp.var(f"{number_str}{unit}") 2135 2136 def _parse_system_versioning_property( 2137 self, with_: bool = False 2138 ) -> exp.WithSystemVersioningProperty: 2139 self._match(TokenType.EQ) 2140 prop = self.expression( 2141 exp.WithSystemVersioningProperty, 2142 **{ # type: ignore 2143 "on": True, 2144 "with": with_, 2145 }, 2146 ) 2147 2148 if self._match_text_seq("OFF"): 2149 prop.set("on", False) 2150 return prop 2151 2152 self._match(TokenType.ON) 2153 if self._match(TokenType.L_PAREN): 2154 while self._curr and not self._match(TokenType.R_PAREN): 2155 if self._match_text_seq("HISTORY_TABLE", "="): 2156 prop.set("this", self._parse_table_parts()) 2157 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2158 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2159 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2160 prop.set("retention_period", self._parse_retention_period()) 2161 2162 self._match(TokenType.COMMA) 2163 2164 return prop 2165 2166 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2167 self._match(TokenType.EQ) 2168 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2169 prop = self.expression(exp.DataDeletionProperty, on=on) 2170 2171 if self._match(TokenType.L_PAREN): 2172 while self._curr and not self._match(TokenType.R_PAREN): 2173 if self._match_text_seq("FILTER_COLUMN", "="): 2174 prop.set("filter_column", self._parse_column()) 2175 elif self._match_text_seq("RETENTION_PERIOD", "="): 2176 prop.set("retention_period", self._parse_retention_period()) 2177 2178 self._match(TokenType.COMMA) 2179 2180 return prop 2181 2182 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2183 kind = "HASH" 2184 expressions: t.Optional[t.List[exp.Expression]] = None 2185 if self._match_text_seq("BY", "HASH"): 2186 expressions = self._parse_wrapped_csv(self._parse_id_var) 2187 elif self._match_text_seq("BY", "RANDOM"): 2188 kind = "RANDOM" 2189 2190 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2191 buckets: t.Optional[exp.Expression] = None 2192 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2193 buckets = self._parse_number() 2194 2195 return self.expression( 2196 exp.DistributedByProperty, 2197 expressions=expressions, 2198 kind=kind, 2199 buckets=buckets, 2200 order=self._parse_order(), 2201 ) 2202 2203 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2204 self._match_text_seq("KEY") 2205 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2206 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2207 2208 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2209 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2210 prop = self._parse_system_versioning_property(with_=True) 2211 self._match_r_paren() 2212 return prop 2213 2214 if self._match(TokenType.L_PAREN, advance=False): 2215 return self._parse_wrapped_properties() 2216 2217 if self._match_text_seq("JOURNAL"): 2218 return self._parse_withjournaltable() 2219 2220 if self._match_texts(self.VIEW_ATTRIBUTES): 2221 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2222 2223 if self._match_text_seq("DATA"): 2224 return self._parse_withdata(no=False) 2225 elif self._match_text_seq("NO", "DATA"): 2226 return self._parse_withdata(no=True) 2227 2228 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2229 return self._parse_serde_properties(with_=True) 2230 2231 if self._match(TokenType.SCHEMA): 2232 return self.expression( 2233 exp.WithSchemaBindingProperty, 2234 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2235 ) 2236 2237 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2238 return self.expression( 2239 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2240 ) 2241 2242 if not self._next: 2243 return None 2244 2245 return self._parse_withisolatedloading() 2246 2247 def _parse_procedure_option(self) -> exp.Expression | None: 2248 if self._match_text_seq("EXECUTE", "AS"): 2249 return self.expression( 2250 exp.ExecuteAsProperty, 2251 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2252 or self._parse_string(), 2253 ) 2254 2255 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2256 2257 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2258 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2259 self._match(TokenType.EQ) 2260 2261 user = self._parse_id_var() 2262 self._match(TokenType.PARAMETER) 2263 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2264 2265 if not user or not host: 2266 return None 2267 2268 return exp.DefinerProperty(this=f"{user}@{host}") 2269 2270 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2271 self._match(TokenType.TABLE) 2272 self._match(TokenType.EQ) 2273 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2274 2275 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2276 return self.expression(exp.LogProperty, no=no) 2277 2278 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2279 return self.expression(exp.JournalProperty, **kwargs) 2280 2281 def _parse_checksum(self) -> exp.ChecksumProperty: 2282 self._match(TokenType.EQ) 2283 2284 on = None 2285 if self._match(TokenType.ON): 2286 on = True 2287 elif self._match_text_seq("OFF"): 2288 on = False 2289 2290 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2291 2292 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2293 return self.expression( 2294 exp.Cluster, 2295 expressions=( 2296 self._parse_wrapped_csv(self._parse_ordered) 2297 if wrapped 2298 else self._parse_csv(self._parse_ordered) 2299 ), 2300 ) 2301 2302 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2303 self._match_text_seq("BY") 2304 2305 self._match_l_paren() 2306 expressions = self._parse_csv(self._parse_column) 2307 self._match_r_paren() 2308 2309 if self._match_text_seq("SORTED", "BY"): 2310 self._match_l_paren() 2311 sorted_by = self._parse_csv(self._parse_ordered) 2312 self._match_r_paren() 2313 else: 2314 sorted_by = None 2315 2316 self._match(TokenType.INTO) 2317 buckets = self._parse_number() 2318 self._match_text_seq("BUCKETS") 2319 2320 return self.expression( 2321 exp.ClusteredByProperty, 2322 expressions=expressions, 2323 sorted_by=sorted_by, 2324 buckets=buckets, 2325 ) 2326 2327 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2328 if not self._match_text_seq("GRANTS"): 2329 self._retreat(self._index - 1) 2330 return None 2331 2332 return self.expression(exp.CopyGrantsProperty) 2333 2334 def _parse_freespace(self) -> exp.FreespaceProperty: 2335 self._match(TokenType.EQ) 2336 return self.expression( 2337 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2338 ) 2339 2340 def _parse_mergeblockratio( 2341 self, no: bool = False, default: bool = False 2342 ) -> exp.MergeBlockRatioProperty: 2343 if self._match(TokenType.EQ): 2344 return self.expression( 2345 exp.MergeBlockRatioProperty, 2346 this=self._parse_number(), 2347 percent=self._match(TokenType.PERCENT), 2348 ) 2349 2350 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2351 2352 def _parse_datablocksize( 2353 self, 2354 default: t.Optional[bool] = None, 2355 minimum: t.Optional[bool] = None, 2356 maximum: t.Optional[bool] = None, 2357 ) -> exp.DataBlocksizeProperty: 2358 self._match(TokenType.EQ) 2359 size = self._parse_number() 2360 2361 units = None 2362 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2363 units = self._prev.text 2364 2365 return self.expression( 2366 exp.DataBlocksizeProperty, 2367 size=size, 2368 units=units, 2369 default=default, 2370 minimum=minimum, 2371 maximum=maximum, 2372 ) 2373 2374 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2375 self._match(TokenType.EQ) 2376 always = self._match_text_seq("ALWAYS") 2377 manual = self._match_text_seq("MANUAL") 2378 never = self._match_text_seq("NEVER") 2379 default = self._match_text_seq("DEFAULT") 2380 2381 autotemp = None 2382 if self._match_text_seq("AUTOTEMP"): 2383 autotemp = self._parse_schema() 2384 2385 return self.expression( 2386 exp.BlockCompressionProperty, 2387 always=always, 2388 manual=manual, 2389 never=never, 2390 default=default, 2391 autotemp=autotemp, 2392 ) 2393 2394 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2395 index = self._index 2396 no = self._match_text_seq("NO") 2397 concurrent = self._match_text_seq("CONCURRENT") 2398 2399 if not self._match_text_seq("ISOLATED", "LOADING"): 2400 self._retreat(index) 2401 return None 2402 2403 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2404 return self.expression( 2405 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2406 ) 2407 2408 def _parse_locking(self) -> exp.LockingProperty: 2409 if self._match(TokenType.TABLE): 2410 kind = "TABLE" 2411 elif self._match(TokenType.VIEW): 2412 kind = "VIEW" 2413 elif self._match(TokenType.ROW): 2414 kind = "ROW" 2415 elif self._match_text_seq("DATABASE"): 2416 kind = "DATABASE" 2417 else: 2418 kind = None 2419 2420 if kind in ("DATABASE", "TABLE", "VIEW"): 2421 this = self._parse_table_parts() 2422 else: 2423 this = None 2424 2425 if self._match(TokenType.FOR): 2426 for_or_in = "FOR" 2427 elif self._match(TokenType.IN): 2428 for_or_in = "IN" 2429 else: 2430 for_or_in = None 2431 2432 if self._match_text_seq("ACCESS"): 2433 lock_type = "ACCESS" 2434 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2435 lock_type = "EXCLUSIVE" 2436 elif self._match_text_seq("SHARE"): 2437 lock_type = "SHARE" 2438 elif self._match_text_seq("READ"): 2439 lock_type = "READ" 2440 elif self._match_text_seq("WRITE"): 2441 lock_type = "WRITE" 2442 elif self._match_text_seq("CHECKSUM"): 2443 lock_type = "CHECKSUM" 2444 else: 2445 lock_type = None 2446 2447 override = self._match_text_seq("OVERRIDE") 2448 2449 return self.expression( 2450 exp.LockingProperty, 2451 this=this, 2452 kind=kind, 2453 for_or_in=for_or_in, 2454 lock_type=lock_type, 2455 override=override, 2456 ) 2457 2458 def _parse_partition_by(self) -> t.List[exp.Expression]: 2459 if self._match(TokenType.PARTITION_BY): 2460 return self._parse_csv(self._parse_assignment) 2461 return [] 2462 2463 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2464 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2465 if self._match_text_seq("MINVALUE"): 2466 return exp.var("MINVALUE") 2467 if self._match_text_seq("MAXVALUE"): 2468 return exp.var("MAXVALUE") 2469 return self._parse_bitwise() 2470 2471 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2472 expression = None 2473 from_expressions = None 2474 to_expressions = None 2475 2476 if self._match(TokenType.IN): 2477 this = self._parse_wrapped_csv(self._parse_bitwise) 2478 elif self._match(TokenType.FROM): 2479 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2480 self._match_text_seq("TO") 2481 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2482 elif self._match_text_seq("WITH", "(", "MODULUS"): 2483 this = self._parse_number() 2484 self._match_text_seq(",", "REMAINDER") 2485 expression = self._parse_number() 2486 self._match_r_paren() 2487 else: 2488 self.raise_error("Failed to parse partition bound spec.") 2489 2490 return self.expression( 2491 exp.PartitionBoundSpec, 2492 this=this, 2493 expression=expression, 2494 from_expressions=from_expressions, 2495 to_expressions=to_expressions, 2496 ) 2497 2498 # https://www.postgresql.org/docs/current/sql-createtable.html 2499 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2500 if not self._match_text_seq("OF"): 2501 self._retreat(self._index - 1) 2502 return None 2503 2504 this = self._parse_table(schema=True) 2505 2506 if self._match(TokenType.DEFAULT): 2507 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2508 elif self._match_text_seq("FOR", "VALUES"): 2509 expression = self._parse_partition_bound_spec() 2510 else: 2511 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2512 2513 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2514 2515 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2516 self._match(TokenType.EQ) 2517 return self.expression( 2518 exp.PartitionedByProperty, 2519 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2520 ) 2521 2522 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2523 if self._match_text_seq("AND", "STATISTICS"): 2524 statistics = True 2525 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2526 statistics = False 2527 else: 2528 statistics = None 2529 2530 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2531 2532 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2533 if self._match_text_seq("SQL"): 2534 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2535 return None 2536 2537 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2538 if self._match_text_seq("SQL", "DATA"): 2539 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2540 return None 2541 2542 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2543 if self._match_text_seq("PRIMARY", "INDEX"): 2544 return exp.NoPrimaryIndexProperty() 2545 if self._match_text_seq("SQL"): 2546 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2547 return None 2548 2549 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2550 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2551 return exp.OnCommitProperty() 2552 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2553 return exp.OnCommitProperty(delete=True) 2554 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2555 2556 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2557 if self._match_text_seq("SQL", "DATA"): 2558 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2559 return None 2560 2561 def _parse_distkey(self) -> exp.DistKeyProperty: 2562 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2563 2564 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2565 table = self._parse_table(schema=True) 2566 2567 options = [] 2568 while self._match_texts(("INCLUDING", "EXCLUDING")): 2569 this = self._prev.text.upper() 2570 2571 id_var = self._parse_id_var() 2572 if not id_var: 2573 return None 2574 2575 options.append( 2576 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2577 ) 2578 2579 return self.expression(exp.LikeProperty, this=table, expressions=options) 2580 2581 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2582 return self.expression( 2583 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2584 ) 2585 2586 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2587 self._match(TokenType.EQ) 2588 return self.expression( 2589 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2590 ) 2591 2592 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2593 self._match_text_seq("WITH", "CONNECTION") 2594 return self.expression( 2595 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2596 ) 2597 2598 def _parse_returns(self) -> exp.ReturnsProperty: 2599 value: t.Optional[exp.Expression] 2600 null = None 2601 is_table = self._match(TokenType.TABLE) 2602 2603 if is_table: 2604 if self._match(TokenType.LT): 2605 value = self.expression( 2606 exp.Schema, 2607 this="TABLE", 2608 expressions=self._parse_csv(self._parse_struct_types), 2609 ) 2610 if not self._match(TokenType.GT): 2611 self.raise_error("Expecting >") 2612 else: 2613 value = self._parse_schema(exp.var("TABLE")) 2614 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2615 null = True 2616 value = None 2617 else: 2618 value = self._parse_types() 2619 2620 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2621 2622 def _parse_describe(self) -> exp.Describe: 2623 kind = self._match_set(self.CREATABLES) and self._prev.text 2624 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2625 if self._match(TokenType.DOT): 2626 style = None 2627 self._retreat(self._index - 2) 2628 2629 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2630 2631 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2632 this = self._parse_statement() 2633 else: 2634 this = self._parse_table(schema=True) 2635 2636 properties = self._parse_properties() 2637 expressions = properties.expressions if properties else None 2638 partition = self._parse_partition() 2639 return self.expression( 2640 exp.Describe, 2641 this=this, 2642 style=style, 2643 kind=kind, 2644 expressions=expressions, 2645 partition=partition, 2646 format=format, 2647 ) 2648 2649 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2650 kind = self._prev.text.upper() 2651 expressions = [] 2652 2653 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2654 if self._match(TokenType.WHEN): 2655 expression = self._parse_disjunction() 2656 self._match(TokenType.THEN) 2657 else: 2658 expression = None 2659 2660 else_ = self._match(TokenType.ELSE) 2661 2662 if not self._match(TokenType.INTO): 2663 return None 2664 2665 return self.expression( 2666 exp.ConditionalInsert, 2667 this=self.expression( 2668 exp.Insert, 2669 this=self._parse_table(schema=True), 2670 expression=self._parse_derived_table_values(), 2671 ), 2672 expression=expression, 2673 else_=else_, 2674 ) 2675 2676 expression = parse_conditional_insert() 2677 while expression is not None: 2678 expressions.append(expression) 2679 expression = parse_conditional_insert() 2680 2681 return self.expression( 2682 exp.MultitableInserts, 2683 kind=kind, 2684 comments=comments, 2685 expressions=expressions, 2686 source=self._parse_table(), 2687 ) 2688 2689 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2690 comments = [] 2691 hint = self._parse_hint() 2692 overwrite = self._match(TokenType.OVERWRITE) 2693 ignore = self._match(TokenType.IGNORE) 2694 local = self._match_text_seq("LOCAL") 2695 alternative = None 2696 is_function = None 2697 2698 if self._match_text_seq("DIRECTORY"): 2699 this: t.Optional[exp.Expression] = self.expression( 2700 exp.Directory, 2701 this=self._parse_var_or_string(), 2702 local=local, 2703 row_format=self._parse_row_format(match_row=True), 2704 ) 2705 else: 2706 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2707 comments += ensure_list(self._prev_comments) 2708 return self._parse_multitable_inserts(comments) 2709 2710 if self._match(TokenType.OR): 2711 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2712 2713 self._match(TokenType.INTO) 2714 comments += ensure_list(self._prev_comments) 2715 self._match(TokenType.TABLE) 2716 is_function = self._match(TokenType.FUNCTION) 2717 2718 this = ( 2719 self._parse_table(schema=True, parse_partition=True) 2720 if not is_function 2721 else self._parse_function() 2722 ) 2723 2724 returning = self._parse_returning() 2725 2726 return self.expression( 2727 exp.Insert, 2728 comments=comments, 2729 hint=hint, 2730 is_function=is_function, 2731 this=this, 2732 stored=self._match_text_seq("STORED") and self._parse_stored(), 2733 by_name=self._match_text_seq("BY", "NAME"), 2734 exists=self._parse_exists(), 2735 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2736 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2737 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2738 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2739 conflict=self._parse_on_conflict(), 2740 returning=returning or self._parse_returning(), 2741 overwrite=overwrite, 2742 alternative=alternative, 2743 ignore=ignore, 2744 source=self._match(TokenType.TABLE) and self._parse_table(), 2745 ) 2746 2747 def _parse_kill(self) -> exp.Kill: 2748 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2749 2750 return self.expression( 2751 exp.Kill, 2752 this=self._parse_primary(), 2753 kind=kind, 2754 ) 2755 2756 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2757 conflict = self._match_text_seq("ON", "CONFLICT") 2758 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2759 2760 if not conflict and not duplicate: 2761 return None 2762 2763 conflict_keys = None 2764 constraint = None 2765 2766 if conflict: 2767 if self._match_text_seq("ON", "CONSTRAINT"): 2768 constraint = self._parse_id_var() 2769 elif self._match(TokenType.L_PAREN): 2770 conflict_keys = self._parse_csv(self._parse_id_var) 2771 self._match_r_paren() 2772 2773 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2774 if self._prev.token_type == TokenType.UPDATE: 2775 self._match(TokenType.SET) 2776 expressions = self._parse_csv(self._parse_equality) 2777 else: 2778 expressions = None 2779 2780 return self.expression( 2781 exp.OnConflict, 2782 duplicate=duplicate, 2783 expressions=expressions, 2784 action=action, 2785 conflict_keys=conflict_keys, 2786 constraint=constraint, 2787 ) 2788 2789 def _parse_returning(self) -> t.Optional[exp.Returning]: 2790 if not self._match(TokenType.RETURNING): 2791 return None 2792 return self.expression( 2793 exp.Returning, 2794 expressions=self._parse_csv(self._parse_expression), 2795 into=self._match(TokenType.INTO) and self._parse_table_part(), 2796 ) 2797 2798 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2799 if not self._match(TokenType.FORMAT): 2800 return None 2801 return self._parse_row_format() 2802 2803 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2804 index = self._index 2805 with_ = with_ or self._match_text_seq("WITH") 2806 2807 if not self._match(TokenType.SERDE_PROPERTIES): 2808 self._retreat(index) 2809 return None 2810 return self.expression( 2811 exp.SerdeProperties, 2812 **{ # type: ignore 2813 "expressions": self._parse_wrapped_properties(), 2814 "with": with_, 2815 }, 2816 ) 2817 2818 def _parse_row_format( 2819 self, match_row: bool = False 2820 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2821 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2822 return None 2823 2824 if self._match_text_seq("SERDE"): 2825 this = self._parse_string() 2826 2827 serde_properties = self._parse_serde_properties() 2828 2829 return self.expression( 2830 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2831 ) 2832 2833 self._match_text_seq("DELIMITED") 2834 2835 kwargs = {} 2836 2837 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2838 kwargs["fields"] = self._parse_string() 2839 if self._match_text_seq("ESCAPED", "BY"): 2840 kwargs["escaped"] = self._parse_string() 2841 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2842 kwargs["collection_items"] = self._parse_string() 2843 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2844 kwargs["map_keys"] = self._parse_string() 2845 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2846 kwargs["lines"] = self._parse_string() 2847 if self._match_text_seq("NULL", "DEFINED", "AS"): 2848 kwargs["null"] = self._parse_string() 2849 2850 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2851 2852 def _parse_load(self) -> exp.LoadData | exp.Command: 2853 if self._match_text_seq("DATA"): 2854 local = self._match_text_seq("LOCAL") 2855 self._match_text_seq("INPATH") 2856 inpath = self._parse_string() 2857 overwrite = self._match(TokenType.OVERWRITE) 2858 self._match_pair(TokenType.INTO, TokenType.TABLE) 2859 2860 return self.expression( 2861 exp.LoadData, 2862 this=self._parse_table(schema=True), 2863 local=local, 2864 overwrite=overwrite, 2865 inpath=inpath, 2866 partition=self._parse_partition(), 2867 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2868 serde=self._match_text_seq("SERDE") and self._parse_string(), 2869 ) 2870 return self._parse_as_command(self._prev) 2871 2872 def _parse_delete(self) -> exp.Delete: 2873 # This handles MySQL's "Multiple-Table Syntax" 2874 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2875 tables = None 2876 if not self._match(TokenType.FROM, advance=False): 2877 tables = self._parse_csv(self._parse_table) or None 2878 2879 returning = self._parse_returning() 2880 2881 return self.expression( 2882 exp.Delete, 2883 tables=tables, 2884 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2885 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2886 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2887 where=self._parse_where(), 2888 returning=returning or self._parse_returning(), 2889 limit=self._parse_limit(), 2890 ) 2891 2892 def _parse_update(self) -> exp.Update: 2893 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2894 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2895 returning = self._parse_returning() 2896 return self.expression( 2897 exp.Update, 2898 **{ # type: ignore 2899 "this": this, 2900 "expressions": expressions, 2901 "from": self._parse_from(joins=True), 2902 "where": self._parse_where(), 2903 "returning": returning or self._parse_returning(), 2904 "order": self._parse_order(), 2905 "limit": self._parse_limit(), 2906 }, 2907 ) 2908 2909 def _parse_uncache(self) -> exp.Uncache: 2910 if not self._match(TokenType.TABLE): 2911 self.raise_error("Expecting TABLE after UNCACHE") 2912 2913 return self.expression( 2914 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2915 ) 2916 2917 def _parse_cache(self) -> exp.Cache: 2918 lazy = self._match_text_seq("LAZY") 2919 self._match(TokenType.TABLE) 2920 table = self._parse_table(schema=True) 2921 2922 options = [] 2923 if self._match_text_seq("OPTIONS"): 2924 self._match_l_paren() 2925 k = self._parse_string() 2926 self._match(TokenType.EQ) 2927 v = self._parse_string() 2928 options = [k, v] 2929 self._match_r_paren() 2930 2931 self._match(TokenType.ALIAS) 2932 return self.expression( 2933 exp.Cache, 2934 this=table, 2935 lazy=lazy, 2936 options=options, 2937 expression=self._parse_select(nested=True), 2938 ) 2939 2940 def _parse_partition(self) -> t.Optional[exp.Partition]: 2941 if not self._match(TokenType.PARTITION): 2942 return None 2943 2944 return self.expression( 2945 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2946 ) 2947 2948 def _parse_value(self) -> t.Optional[exp.Tuple]: 2949 if self._match(TokenType.L_PAREN): 2950 expressions = self._parse_csv(self._parse_expression) 2951 self._match_r_paren() 2952 return self.expression(exp.Tuple, expressions=expressions) 2953 2954 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2955 expression = self._parse_expression() 2956 if expression: 2957 return self.expression(exp.Tuple, expressions=[expression]) 2958 return None 2959 2960 def _parse_projections(self) -> t.List[exp.Expression]: 2961 return self._parse_expressions() 2962 2963 def _parse_select( 2964 self, 2965 nested: bool = False, 2966 table: bool = False, 2967 parse_subquery_alias: bool = True, 2968 parse_set_operation: bool = True, 2969 ) -> t.Optional[exp.Expression]: 2970 cte = self._parse_with() 2971 2972 if cte: 2973 this = self._parse_statement() 2974 2975 if not this: 2976 self.raise_error("Failed to parse any statement following CTE") 2977 return cte 2978 2979 if "with" in this.arg_types: 2980 this.set("with", cte) 2981 else: 2982 self.raise_error(f"{this.key} does not support CTE") 2983 this = cte 2984 2985 return this 2986 2987 # duckdb supports leading with FROM x 2988 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2989 2990 if self._match(TokenType.SELECT): 2991 comments = self._prev_comments 2992 2993 hint = self._parse_hint() 2994 2995 if self._next and not self._next.token_type == TokenType.DOT: 2996 all_ = self._match(TokenType.ALL) 2997 distinct = self._match_set(self.DISTINCT_TOKENS) 2998 else: 2999 all_, distinct = None, None 3000 3001 kind = ( 3002 self._match(TokenType.ALIAS) 3003 and self._match_texts(("STRUCT", "VALUE")) 3004 and self._prev.text.upper() 3005 ) 3006 3007 if distinct: 3008 distinct = self.expression( 3009 exp.Distinct, 3010 on=self._parse_value() if self._match(TokenType.ON) else None, 3011 ) 3012 3013 if all_ and distinct: 3014 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3015 3016 operation_modifiers = [] 3017 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3018 operation_modifiers.append(exp.var(self._prev.text.upper())) 3019 3020 limit = self._parse_limit(top=True) 3021 projections = self._parse_projections() 3022 3023 this = self.expression( 3024 exp.Select, 3025 kind=kind, 3026 hint=hint, 3027 distinct=distinct, 3028 expressions=projections, 3029 limit=limit, 3030 operation_modifiers=operation_modifiers or None, 3031 ) 3032 this.comments = comments 3033 3034 into = self._parse_into() 3035 if into: 3036 this.set("into", into) 3037 3038 if not from_: 3039 from_ = self._parse_from() 3040 3041 if from_: 3042 this.set("from", from_) 3043 3044 this = self._parse_query_modifiers(this) 3045 elif (table or nested) and self._match(TokenType.L_PAREN): 3046 if self._match(TokenType.PIVOT): 3047 this = self._parse_simplified_pivot() 3048 elif self._match(TokenType.FROM): 3049 this = exp.select("*").from_( 3050 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3051 ) 3052 else: 3053 this = ( 3054 self._parse_table() 3055 if table 3056 else self._parse_select(nested=True, parse_set_operation=False) 3057 ) 3058 3059 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3060 # in case a modifier (e.g. join) is following 3061 if table and isinstance(this, exp.Values) and this.alias: 3062 alias = this.args["alias"].pop() 3063 this = exp.Table(this=this, alias=alias) 3064 3065 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3066 3067 self._match_r_paren() 3068 3069 # We return early here so that the UNION isn't attached to the subquery by the 3070 # following call to _parse_set_operations, but instead becomes the parent node 3071 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3072 elif self._match(TokenType.VALUES, advance=False): 3073 this = self._parse_derived_table_values() 3074 elif from_: 3075 this = exp.select("*").from_(from_.this, copy=False) 3076 elif self._match(TokenType.SUMMARIZE): 3077 table = self._match(TokenType.TABLE) 3078 this = self._parse_select() or self._parse_string() or self._parse_table() 3079 return self.expression(exp.Summarize, this=this, table=table) 3080 elif self._match(TokenType.DESCRIBE): 3081 this = self._parse_describe() 3082 elif self._match_text_seq("STREAM"): 3083 this = self._parse_function() 3084 if this: 3085 this = self.expression(exp.Stream, this=this) 3086 else: 3087 self._retreat(self._index - 1) 3088 else: 3089 this = None 3090 3091 return self._parse_set_operations(this) if parse_set_operation else this 3092 3093 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3094 if not skip_with_token and not self._match(TokenType.WITH): 3095 return None 3096 3097 comments = self._prev_comments 3098 recursive = self._match(TokenType.RECURSIVE) 3099 3100 last_comments = None 3101 expressions = [] 3102 while True: 3103 expressions.append(self._parse_cte()) 3104 if last_comments: 3105 expressions[-1].add_comments(last_comments) 3106 3107 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3108 break 3109 else: 3110 self._match(TokenType.WITH) 3111 3112 last_comments = self._prev_comments 3113 3114 return self.expression( 3115 exp.With, comments=comments, expressions=expressions, recursive=recursive 3116 ) 3117 3118 def _parse_cte(self) -> exp.CTE: 3119 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3120 if not alias or not alias.this: 3121 self.raise_error("Expected CTE to have alias") 3122 3123 self._match(TokenType.ALIAS) 3124 comments = self._prev_comments 3125 3126 if self._match_text_seq("NOT", "MATERIALIZED"): 3127 materialized = False 3128 elif self._match_text_seq("MATERIALIZED"): 3129 materialized = True 3130 else: 3131 materialized = None 3132 3133 return self.expression( 3134 exp.CTE, 3135 this=self._parse_wrapped(self._parse_statement), 3136 alias=alias, 3137 materialized=materialized, 3138 comments=comments, 3139 ) 3140 3141 def _parse_table_alias( 3142 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3143 ) -> t.Optional[exp.TableAlias]: 3144 any_token = self._match(TokenType.ALIAS) 3145 alias = ( 3146 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3147 or self._parse_string_as_identifier() 3148 ) 3149 3150 index = self._index 3151 if self._match(TokenType.L_PAREN): 3152 columns = self._parse_csv(self._parse_function_parameter) 3153 self._match_r_paren() if columns else self._retreat(index) 3154 else: 3155 columns = None 3156 3157 if not alias and not columns: 3158 return None 3159 3160 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3161 3162 # We bubble up comments from the Identifier to the TableAlias 3163 if isinstance(alias, exp.Identifier): 3164 table_alias.add_comments(alias.pop_comments()) 3165 3166 return table_alias 3167 3168 def _parse_subquery( 3169 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3170 ) -> t.Optional[exp.Subquery]: 3171 if not this: 3172 return None 3173 3174 return self.expression( 3175 exp.Subquery, 3176 this=this, 3177 pivots=self._parse_pivots(), 3178 alias=self._parse_table_alias() if parse_alias else None, 3179 sample=self._parse_table_sample(), 3180 ) 3181 3182 def _implicit_unnests_to_explicit(self, this: E) -> E: 3183 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3184 3185 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3186 for i, join in enumerate(this.args.get("joins") or []): 3187 table = join.this 3188 normalized_table = table.copy() 3189 normalized_table.meta["maybe_column"] = True 3190 normalized_table = _norm(normalized_table, dialect=self.dialect) 3191 3192 if isinstance(table, exp.Table) and not join.args.get("on"): 3193 if normalized_table.parts[0].name in refs: 3194 table_as_column = table.to_column() 3195 unnest = exp.Unnest(expressions=[table_as_column]) 3196 3197 # Table.to_column creates a parent Alias node that we want to convert to 3198 # a TableAlias and attach to the Unnest, so it matches the parser's output 3199 if isinstance(table.args.get("alias"), exp.TableAlias): 3200 table_as_column.replace(table_as_column.this) 3201 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3202 3203 table.replace(unnest) 3204 3205 refs.add(normalized_table.alias_or_name) 3206 3207 return this 3208 3209 def _parse_query_modifiers( 3210 self, this: t.Optional[exp.Expression] 3211 ) -> t.Optional[exp.Expression]: 3212 if isinstance(this, (exp.Query, exp.Table)): 3213 for join in self._parse_joins(): 3214 this.append("joins", join) 3215 for lateral in iter(self._parse_lateral, None): 3216 this.append("laterals", lateral) 3217 3218 while True: 3219 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3220 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3221 key, expression = parser(self) 3222 3223 if expression: 3224 this.set(key, expression) 3225 if key == "limit": 3226 offset = expression.args.pop("offset", None) 3227 3228 if offset: 3229 offset = exp.Offset(expression=offset) 3230 this.set("offset", offset) 3231 3232 limit_by_expressions = expression.expressions 3233 expression.set("expressions", None) 3234 offset.set("expressions", limit_by_expressions) 3235 continue 3236 break 3237 3238 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3239 this = self._implicit_unnests_to_explicit(this) 3240 3241 return this 3242 3243 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3244 start = self._curr 3245 while self._curr: 3246 self._advance() 3247 3248 end = self._tokens[self._index - 1] 3249 return exp.Hint(expressions=[self._find_sql(start, end)]) 3250 3251 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3252 return self._parse_function_call() 3253 3254 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3255 start_index = self._index 3256 should_fallback_to_string = False 3257 3258 hints = [] 3259 try: 3260 for hint in iter( 3261 lambda: self._parse_csv( 3262 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3263 ), 3264 [], 3265 ): 3266 hints.extend(hint) 3267 except ParseError: 3268 should_fallback_to_string = True 3269 3270 if should_fallback_to_string or self._curr: 3271 self._retreat(start_index) 3272 return self._parse_hint_fallback_to_string() 3273 3274 return self.expression(exp.Hint, expressions=hints) 3275 3276 def _parse_hint(self) -> t.Optional[exp.Hint]: 3277 if self._match(TokenType.HINT) and self._prev_comments: 3278 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3279 3280 return None 3281 3282 def _parse_into(self) -> t.Optional[exp.Into]: 3283 if not self._match(TokenType.INTO): 3284 return None 3285 3286 temp = self._match(TokenType.TEMPORARY) 3287 unlogged = self._match_text_seq("UNLOGGED") 3288 self._match(TokenType.TABLE) 3289 3290 return self.expression( 3291 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3292 ) 3293 3294 def _parse_from( 3295 self, joins: bool = False, skip_from_token: bool = False 3296 ) -> t.Optional[exp.From]: 3297 if not skip_from_token and not self._match(TokenType.FROM): 3298 return None 3299 3300 return self.expression( 3301 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3302 ) 3303 3304 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3305 return self.expression( 3306 exp.MatchRecognizeMeasure, 3307 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3308 this=self._parse_expression(), 3309 ) 3310 3311 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3312 if not self._match(TokenType.MATCH_RECOGNIZE): 3313 return None 3314 3315 self._match_l_paren() 3316 3317 partition = self._parse_partition_by() 3318 order = self._parse_order() 3319 3320 measures = ( 3321 self._parse_csv(self._parse_match_recognize_measure) 3322 if self._match_text_seq("MEASURES") 3323 else None 3324 ) 3325 3326 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3327 rows = exp.var("ONE ROW PER MATCH") 3328 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3329 text = "ALL ROWS PER MATCH" 3330 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3331 text += " SHOW EMPTY MATCHES" 3332 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3333 text += " OMIT EMPTY MATCHES" 3334 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3335 text += " WITH UNMATCHED ROWS" 3336 rows = exp.var(text) 3337 else: 3338 rows = None 3339 3340 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3341 text = "AFTER MATCH SKIP" 3342 if self._match_text_seq("PAST", "LAST", "ROW"): 3343 text += " PAST LAST ROW" 3344 elif self._match_text_seq("TO", "NEXT", "ROW"): 3345 text += " TO NEXT ROW" 3346 elif self._match_text_seq("TO", "FIRST"): 3347 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3348 elif self._match_text_seq("TO", "LAST"): 3349 text += f" TO LAST {self._advance_any().text}" # type: ignore 3350 after = exp.var(text) 3351 else: 3352 after = None 3353 3354 if self._match_text_seq("PATTERN"): 3355 self._match_l_paren() 3356 3357 if not self._curr: 3358 self.raise_error("Expecting )", self._curr) 3359 3360 paren = 1 3361 start = self._curr 3362 3363 while self._curr and paren > 0: 3364 if self._curr.token_type == TokenType.L_PAREN: 3365 paren += 1 3366 if self._curr.token_type == TokenType.R_PAREN: 3367 paren -= 1 3368 3369 end = self._prev 3370 self._advance() 3371 3372 if paren > 0: 3373 self.raise_error("Expecting )", self._curr) 3374 3375 pattern = exp.var(self._find_sql(start, end)) 3376 else: 3377 pattern = None 3378 3379 define = ( 3380 self._parse_csv(self._parse_name_as_expression) 3381 if self._match_text_seq("DEFINE") 3382 else None 3383 ) 3384 3385 self._match_r_paren() 3386 3387 return self.expression( 3388 exp.MatchRecognize, 3389 partition_by=partition, 3390 order=order, 3391 measures=measures, 3392 rows=rows, 3393 after=after, 3394 pattern=pattern, 3395 define=define, 3396 alias=self._parse_table_alias(), 3397 ) 3398 3399 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3400 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3401 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3402 cross_apply = False 3403 3404 if cross_apply is not None: 3405 this = self._parse_select(table=True) 3406 view = None 3407 outer = None 3408 elif self._match(TokenType.LATERAL): 3409 this = self._parse_select(table=True) 3410 view = self._match(TokenType.VIEW) 3411 outer = self._match(TokenType.OUTER) 3412 else: 3413 return None 3414 3415 if not this: 3416 this = ( 3417 self._parse_unnest() 3418 or self._parse_function() 3419 or self._parse_id_var(any_token=False) 3420 ) 3421 3422 while self._match(TokenType.DOT): 3423 this = exp.Dot( 3424 this=this, 3425 expression=self._parse_function() or self._parse_id_var(any_token=False), 3426 ) 3427 3428 if view: 3429 table = self._parse_id_var(any_token=False) 3430 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3431 table_alias: t.Optional[exp.TableAlias] = self.expression( 3432 exp.TableAlias, this=table, columns=columns 3433 ) 3434 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3435 # We move the alias from the lateral's child node to the lateral itself 3436 table_alias = this.args["alias"].pop() 3437 else: 3438 table_alias = self._parse_table_alias() 3439 3440 return self.expression( 3441 exp.Lateral, 3442 this=this, 3443 view=view, 3444 outer=outer, 3445 alias=table_alias, 3446 cross_apply=cross_apply, 3447 ) 3448 3449 def _parse_join_parts( 3450 self, 3451 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3452 return ( 3453 self._match_set(self.JOIN_METHODS) and self._prev, 3454 self._match_set(self.JOIN_SIDES) and self._prev, 3455 self._match_set(self.JOIN_KINDS) and self._prev, 3456 ) 3457 3458 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3459 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3460 this = self._parse_column() 3461 if isinstance(this, exp.Column): 3462 return this.this 3463 return this 3464 3465 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3466 3467 def _parse_join( 3468 self, skip_join_token: bool = False, parse_bracket: bool = False 3469 ) -> t.Optional[exp.Join]: 3470 if self._match(TokenType.COMMA): 3471 return self.expression(exp.Join, this=self._parse_table()) 3472 3473 index = self._index 3474 method, side, kind = self._parse_join_parts() 3475 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3476 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3477 3478 if not skip_join_token and not join: 3479 self._retreat(index) 3480 kind = None 3481 method = None 3482 side = None 3483 3484 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3485 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3486 3487 if not skip_join_token and not join and not outer_apply and not cross_apply: 3488 return None 3489 3490 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3491 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3492 kwargs["expressions"] = self._parse_csv( 3493 lambda: self._parse_table(parse_bracket=parse_bracket) 3494 ) 3495 3496 if method: 3497 kwargs["method"] = method.text 3498 if side: 3499 kwargs["side"] = side.text 3500 if kind: 3501 kwargs["kind"] = kind.text 3502 if hint: 3503 kwargs["hint"] = hint 3504 3505 if self._match(TokenType.MATCH_CONDITION): 3506 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3507 3508 if self._match(TokenType.ON): 3509 kwargs["on"] = self._parse_assignment() 3510 elif self._match(TokenType.USING): 3511 kwargs["using"] = self._parse_using_identifiers() 3512 elif ( 3513 not (outer_apply or cross_apply) 3514 and not isinstance(kwargs["this"], exp.Unnest) 3515 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3516 ): 3517 index = self._index 3518 joins: t.Optional[list] = list(self._parse_joins()) 3519 3520 if joins and self._match(TokenType.ON): 3521 kwargs["on"] = self._parse_assignment() 3522 elif joins and self._match(TokenType.USING): 3523 kwargs["using"] = self._parse_using_identifiers() 3524 else: 3525 joins = None 3526 self._retreat(index) 3527 3528 kwargs["this"].set("joins", joins if joins else None) 3529 3530 comments = [c for token in (method, side, kind) if token for c in token.comments] 3531 return self.expression(exp.Join, comments=comments, **kwargs) 3532 3533 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3534 this = self._parse_assignment() 3535 3536 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3537 return this 3538 3539 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3540 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3541 3542 return this 3543 3544 def _parse_index_params(self) -> exp.IndexParameters: 3545 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3546 3547 if self._match(TokenType.L_PAREN, advance=False): 3548 columns = self._parse_wrapped_csv(self._parse_with_operator) 3549 else: 3550 columns = None 3551 3552 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3553 partition_by = self._parse_partition_by() 3554 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3555 tablespace = ( 3556 self._parse_var(any_token=True) 3557 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3558 else None 3559 ) 3560 where = self._parse_where() 3561 3562 on = self._parse_field() if self._match(TokenType.ON) else None 3563 3564 return self.expression( 3565 exp.IndexParameters, 3566 using=using, 3567 columns=columns, 3568 include=include, 3569 partition_by=partition_by, 3570 where=where, 3571 with_storage=with_storage, 3572 tablespace=tablespace, 3573 on=on, 3574 ) 3575 3576 def _parse_index( 3577 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3578 ) -> t.Optional[exp.Index]: 3579 if index or anonymous: 3580 unique = None 3581 primary = None 3582 amp = None 3583 3584 self._match(TokenType.ON) 3585 self._match(TokenType.TABLE) # hive 3586 table = self._parse_table_parts(schema=True) 3587 else: 3588 unique = self._match(TokenType.UNIQUE) 3589 primary = self._match_text_seq("PRIMARY") 3590 amp = self._match_text_seq("AMP") 3591 3592 if not self._match(TokenType.INDEX): 3593 return None 3594 3595 index = self._parse_id_var() 3596 table = None 3597 3598 params = self._parse_index_params() 3599 3600 return self.expression( 3601 exp.Index, 3602 this=index, 3603 table=table, 3604 unique=unique, 3605 primary=primary, 3606 amp=amp, 3607 params=params, 3608 ) 3609 3610 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3611 hints: t.List[exp.Expression] = [] 3612 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3613 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3614 hints.append( 3615 self.expression( 3616 exp.WithTableHint, 3617 expressions=self._parse_csv( 3618 lambda: self._parse_function() or self._parse_var(any_token=True) 3619 ), 3620 ) 3621 ) 3622 self._match_r_paren() 3623 else: 3624 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3625 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3626 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3627 3628 self._match_set((TokenType.INDEX, TokenType.KEY)) 3629 if self._match(TokenType.FOR): 3630 hint.set("target", self._advance_any() and self._prev.text.upper()) 3631 3632 hint.set("expressions", self._parse_wrapped_id_vars()) 3633 hints.append(hint) 3634 3635 return hints or None 3636 3637 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3638 return ( 3639 (not schema and self._parse_function(optional_parens=False)) 3640 or self._parse_id_var(any_token=False) 3641 or self._parse_string_as_identifier() 3642 or self._parse_placeholder() 3643 ) 3644 3645 def _parse_table_parts( 3646 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3647 ) -> exp.Table: 3648 catalog = None 3649 db = None 3650 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3651 3652 while self._match(TokenType.DOT): 3653 if catalog: 3654 # This allows nesting the table in arbitrarily many dot expressions if needed 3655 table = self.expression( 3656 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3657 ) 3658 else: 3659 catalog = db 3660 db = table 3661 # "" used for tsql FROM a..b case 3662 table = self._parse_table_part(schema=schema) or "" 3663 3664 if ( 3665 wildcard 3666 and self._is_connected() 3667 and (isinstance(table, exp.Identifier) or not table) 3668 and self._match(TokenType.STAR) 3669 ): 3670 if isinstance(table, exp.Identifier): 3671 table.args["this"] += "*" 3672 else: 3673 table = exp.Identifier(this="*") 3674 3675 # We bubble up comments from the Identifier to the Table 3676 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3677 3678 if is_db_reference: 3679 catalog = db 3680 db = table 3681 table = None 3682 3683 if not table and not is_db_reference: 3684 self.raise_error(f"Expected table name but got {self._curr}") 3685 if not db and is_db_reference: 3686 self.raise_error(f"Expected database name but got {self._curr}") 3687 3688 table = self.expression( 3689 exp.Table, 3690 comments=comments, 3691 this=table, 3692 db=db, 3693 catalog=catalog, 3694 ) 3695 3696 changes = self._parse_changes() 3697 if changes: 3698 table.set("changes", changes) 3699 3700 at_before = self._parse_historical_data() 3701 if at_before: 3702 table.set("when", at_before) 3703 3704 pivots = self._parse_pivots() 3705 if pivots: 3706 table.set("pivots", pivots) 3707 3708 return table 3709 3710 def _parse_table( 3711 self, 3712 schema: bool = False, 3713 joins: bool = False, 3714 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3715 parse_bracket: bool = False, 3716 is_db_reference: bool = False, 3717 parse_partition: bool = False, 3718 ) -> t.Optional[exp.Expression]: 3719 lateral = self._parse_lateral() 3720 if lateral: 3721 return lateral 3722 3723 unnest = self._parse_unnest() 3724 if unnest: 3725 return unnest 3726 3727 values = self._parse_derived_table_values() 3728 if values: 3729 return values 3730 3731 subquery = self._parse_select(table=True) 3732 if subquery: 3733 if not subquery.args.get("pivots"): 3734 subquery.set("pivots", self._parse_pivots()) 3735 return subquery 3736 3737 bracket = parse_bracket and self._parse_bracket(None) 3738 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3739 3740 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3741 self._parse_table 3742 ) 3743 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3744 3745 only = self._match(TokenType.ONLY) 3746 3747 this = t.cast( 3748 exp.Expression, 3749 bracket 3750 or rows_from 3751 or self._parse_bracket( 3752 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3753 ), 3754 ) 3755 3756 if only: 3757 this.set("only", only) 3758 3759 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3760 self._match_text_seq("*") 3761 3762 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3763 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3764 this.set("partition", self._parse_partition()) 3765 3766 if schema: 3767 return self._parse_schema(this=this) 3768 3769 version = self._parse_version() 3770 3771 if version: 3772 this.set("version", version) 3773 3774 if self.dialect.ALIAS_POST_TABLESAMPLE: 3775 this.set("sample", self._parse_table_sample()) 3776 3777 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3778 if alias: 3779 this.set("alias", alias) 3780 3781 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3782 return self.expression( 3783 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3784 ) 3785 3786 this.set("hints", self._parse_table_hints()) 3787 3788 if not this.args.get("pivots"): 3789 this.set("pivots", self._parse_pivots()) 3790 3791 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3792 this.set("sample", self._parse_table_sample()) 3793 3794 if joins: 3795 for join in self._parse_joins(): 3796 this.append("joins", join) 3797 3798 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3799 this.set("ordinality", True) 3800 this.set("alias", self._parse_table_alias()) 3801 3802 return this 3803 3804 def _parse_version(self) -> t.Optional[exp.Version]: 3805 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3806 this = "TIMESTAMP" 3807 elif self._match(TokenType.VERSION_SNAPSHOT): 3808 this = "VERSION" 3809 else: 3810 return None 3811 3812 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3813 kind = self._prev.text.upper() 3814 start = self._parse_bitwise() 3815 self._match_texts(("TO", "AND")) 3816 end = self._parse_bitwise() 3817 expression: t.Optional[exp.Expression] = self.expression( 3818 exp.Tuple, expressions=[start, end] 3819 ) 3820 elif self._match_text_seq("CONTAINED", "IN"): 3821 kind = "CONTAINED IN" 3822 expression = self.expression( 3823 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3824 ) 3825 elif self._match(TokenType.ALL): 3826 kind = "ALL" 3827 expression = None 3828 else: 3829 self._match_text_seq("AS", "OF") 3830 kind = "AS OF" 3831 expression = self._parse_type() 3832 3833 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3834 3835 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3836 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3837 index = self._index 3838 historical_data = None 3839 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3840 this = self._prev.text.upper() 3841 kind = ( 3842 self._match(TokenType.L_PAREN) 3843 and self._match_texts(self.HISTORICAL_DATA_KIND) 3844 and self._prev.text.upper() 3845 ) 3846 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3847 3848 if expression: 3849 self._match_r_paren() 3850 historical_data = self.expression( 3851 exp.HistoricalData, this=this, kind=kind, expression=expression 3852 ) 3853 else: 3854 self._retreat(index) 3855 3856 return historical_data 3857 3858 def _parse_changes(self) -> t.Optional[exp.Changes]: 3859 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3860 return None 3861 3862 information = self._parse_var(any_token=True) 3863 self._match_r_paren() 3864 3865 return self.expression( 3866 exp.Changes, 3867 information=information, 3868 at_before=self._parse_historical_data(), 3869 end=self._parse_historical_data(), 3870 ) 3871 3872 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3873 if not self._match(TokenType.UNNEST): 3874 return None 3875 3876 expressions = self._parse_wrapped_csv(self._parse_equality) 3877 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3878 3879 alias = self._parse_table_alias() if with_alias else None 3880 3881 if alias: 3882 if self.dialect.UNNEST_COLUMN_ONLY: 3883 if alias.args.get("columns"): 3884 self.raise_error("Unexpected extra column alias in unnest.") 3885 3886 alias.set("columns", [alias.this]) 3887 alias.set("this", None) 3888 3889 columns = alias.args.get("columns") or [] 3890 if offset and len(expressions) < len(columns): 3891 offset = columns.pop() 3892 3893 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3894 self._match(TokenType.ALIAS) 3895 offset = self._parse_id_var( 3896 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3897 ) or exp.to_identifier("offset") 3898 3899 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3900 3901 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3902 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3903 if not is_derived and not ( 3904 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3905 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3906 ): 3907 return None 3908 3909 expressions = self._parse_csv(self._parse_value) 3910 alias = self._parse_table_alias() 3911 3912 if is_derived: 3913 self._match_r_paren() 3914 3915 return self.expression( 3916 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3917 ) 3918 3919 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3920 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3921 as_modifier and self._match_text_seq("USING", "SAMPLE") 3922 ): 3923 return None 3924 3925 bucket_numerator = None 3926 bucket_denominator = None 3927 bucket_field = None 3928 percent = None 3929 size = None 3930 seed = None 3931 3932 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3933 matched_l_paren = self._match(TokenType.L_PAREN) 3934 3935 if self.TABLESAMPLE_CSV: 3936 num = None 3937 expressions = self._parse_csv(self._parse_primary) 3938 else: 3939 expressions = None 3940 num = ( 3941 self._parse_factor() 3942 if self._match(TokenType.NUMBER, advance=False) 3943 else self._parse_primary() or self._parse_placeholder() 3944 ) 3945 3946 if self._match_text_seq("BUCKET"): 3947 bucket_numerator = self._parse_number() 3948 self._match_text_seq("OUT", "OF") 3949 bucket_denominator = bucket_denominator = self._parse_number() 3950 self._match(TokenType.ON) 3951 bucket_field = self._parse_field() 3952 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3953 percent = num 3954 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3955 size = num 3956 else: 3957 percent = num 3958 3959 if matched_l_paren: 3960 self._match_r_paren() 3961 3962 if self._match(TokenType.L_PAREN): 3963 method = self._parse_var(upper=True) 3964 seed = self._match(TokenType.COMMA) and self._parse_number() 3965 self._match_r_paren() 3966 elif self._match_texts(("SEED", "REPEATABLE")): 3967 seed = self._parse_wrapped(self._parse_number) 3968 3969 if not method and self.DEFAULT_SAMPLING_METHOD: 3970 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3971 3972 return self.expression( 3973 exp.TableSample, 3974 expressions=expressions, 3975 method=method, 3976 bucket_numerator=bucket_numerator, 3977 bucket_denominator=bucket_denominator, 3978 bucket_field=bucket_field, 3979 percent=percent, 3980 size=size, 3981 seed=seed, 3982 ) 3983 3984 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3985 return list(iter(self._parse_pivot, None)) or None 3986 3987 def _parse_joins(self) -> t.Iterator[exp.Join]: 3988 return iter(self._parse_join, None) 3989 3990 # https://duckdb.org/docs/sql/statements/pivot 3991 def _parse_simplified_pivot(self) -> exp.Pivot: 3992 def _parse_on() -> t.Optional[exp.Expression]: 3993 this = self._parse_bitwise() 3994 return self._parse_in(this) if self._match(TokenType.IN) else this 3995 3996 this = self._parse_table() 3997 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3998 using = self._match(TokenType.USING) and self._parse_csv( 3999 lambda: self._parse_alias(self._parse_function()) 4000 ) 4001 group = self._parse_group() 4002 return self.expression( 4003 exp.Pivot, this=this, expressions=expressions, using=using, group=group 4004 ) 4005 4006 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4007 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4008 this = self._parse_select_or_expression() 4009 4010 self._match(TokenType.ALIAS) 4011 alias = self._parse_bitwise() 4012 if alias: 4013 if isinstance(alias, exp.Column) and not alias.db: 4014 alias = alias.this 4015 return self.expression(exp.PivotAlias, this=this, alias=alias) 4016 4017 return this 4018 4019 value = self._parse_column() 4020 4021 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4022 self.raise_error("Expecting IN (") 4023 4024 if self._match(TokenType.ANY): 4025 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4026 else: 4027 exprs = self._parse_csv(_parse_aliased_expression) 4028 4029 self._match_r_paren() 4030 return self.expression(exp.In, this=value, expressions=exprs) 4031 4032 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4033 index = self._index 4034 include_nulls = None 4035 4036 if self._match(TokenType.PIVOT): 4037 unpivot = False 4038 elif self._match(TokenType.UNPIVOT): 4039 unpivot = True 4040 4041 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4042 if self._match_text_seq("INCLUDE", "NULLS"): 4043 include_nulls = True 4044 elif self._match_text_seq("EXCLUDE", "NULLS"): 4045 include_nulls = False 4046 else: 4047 return None 4048 4049 expressions = [] 4050 4051 if not self._match(TokenType.L_PAREN): 4052 self._retreat(index) 4053 return None 4054 4055 if unpivot: 4056 expressions = self._parse_csv(self._parse_column) 4057 else: 4058 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4059 4060 if not expressions: 4061 self.raise_error("Failed to parse PIVOT's aggregation list") 4062 4063 if not self._match(TokenType.FOR): 4064 self.raise_error("Expecting FOR") 4065 4066 field = self._parse_pivot_in() 4067 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4068 self._parse_bitwise 4069 ) 4070 4071 self._match_r_paren() 4072 4073 pivot = self.expression( 4074 exp.Pivot, 4075 expressions=expressions, 4076 field=field, 4077 unpivot=unpivot, 4078 include_nulls=include_nulls, 4079 default_on_null=default_on_null, 4080 ) 4081 4082 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4083 pivot.set("alias", self._parse_table_alias()) 4084 4085 if not unpivot: 4086 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4087 4088 columns: t.List[exp.Expression] = [] 4089 for fld in pivot.args["field"].expressions: 4090 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4091 for name in names: 4092 if self.PREFIXED_PIVOT_COLUMNS: 4093 name = f"{name}_{field_name}" if name else field_name 4094 else: 4095 name = f"{field_name}_{name}" if name else field_name 4096 4097 columns.append(exp.to_identifier(name)) 4098 4099 pivot.set("columns", columns) 4100 4101 return pivot 4102 4103 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4104 return [agg.alias for agg in aggregations] 4105 4106 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4107 if not skip_where_token and not self._match(TokenType.PREWHERE): 4108 return None 4109 4110 return self.expression( 4111 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4112 ) 4113 4114 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4115 if not skip_where_token and not self._match(TokenType.WHERE): 4116 return None 4117 4118 return self.expression( 4119 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4120 ) 4121 4122 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4123 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4124 return None 4125 4126 elements: t.Dict[str, t.Any] = defaultdict(list) 4127 4128 if self._match(TokenType.ALL): 4129 elements["all"] = True 4130 elif self._match(TokenType.DISTINCT): 4131 elements["all"] = False 4132 4133 while True: 4134 index = self._index 4135 4136 elements["expressions"].extend( 4137 self._parse_csv( 4138 lambda: None 4139 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4140 else self._parse_assignment() 4141 ) 4142 ) 4143 4144 before_with_index = self._index 4145 with_prefix = self._match(TokenType.WITH) 4146 4147 if self._match(TokenType.ROLLUP): 4148 elements["rollup"].append( 4149 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4150 ) 4151 elif self._match(TokenType.CUBE): 4152 elements["cube"].append( 4153 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4154 ) 4155 elif self._match(TokenType.GROUPING_SETS): 4156 elements["grouping_sets"].append( 4157 self.expression( 4158 exp.GroupingSets, 4159 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4160 ) 4161 ) 4162 elif self._match_text_seq("TOTALS"): 4163 elements["totals"] = True # type: ignore 4164 4165 if before_with_index <= self._index <= before_with_index + 1: 4166 self._retreat(before_with_index) 4167 break 4168 4169 if index == self._index: 4170 break 4171 4172 return self.expression(exp.Group, **elements) # type: ignore 4173 4174 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4175 return self.expression( 4176 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4177 ) 4178 4179 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4180 if self._match(TokenType.L_PAREN): 4181 grouping_set = self._parse_csv(self._parse_column) 4182 self._match_r_paren() 4183 return self.expression(exp.Tuple, expressions=grouping_set) 4184 4185 return self._parse_column() 4186 4187 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4188 if not skip_having_token and not self._match(TokenType.HAVING): 4189 return None 4190 return self.expression(exp.Having, this=self._parse_assignment()) 4191 4192 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4193 if not self._match(TokenType.QUALIFY): 4194 return None 4195 return self.expression(exp.Qualify, this=self._parse_assignment()) 4196 4197 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4198 if skip_start_token: 4199 start = None 4200 elif self._match(TokenType.START_WITH): 4201 start = self._parse_assignment() 4202 else: 4203 return None 4204 4205 self._match(TokenType.CONNECT_BY) 4206 nocycle = self._match_text_seq("NOCYCLE") 4207 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4208 exp.Prior, this=self._parse_bitwise() 4209 ) 4210 connect = self._parse_assignment() 4211 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4212 4213 if not start and self._match(TokenType.START_WITH): 4214 start = self._parse_assignment() 4215 4216 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4217 4218 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4219 this = self._parse_id_var(any_token=True) 4220 if self._match(TokenType.ALIAS): 4221 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4222 return this 4223 4224 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4225 if self._match_text_seq("INTERPOLATE"): 4226 return self._parse_wrapped_csv(self._parse_name_as_expression) 4227 return None 4228 4229 def _parse_order( 4230 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4231 ) -> t.Optional[exp.Expression]: 4232 siblings = None 4233 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4234 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4235 return this 4236 4237 siblings = True 4238 4239 return self.expression( 4240 exp.Order, 4241 this=this, 4242 expressions=self._parse_csv(self._parse_ordered), 4243 siblings=siblings, 4244 ) 4245 4246 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4247 if not self._match(token): 4248 return None 4249 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4250 4251 def _parse_ordered( 4252 self, parse_method: t.Optional[t.Callable] = None 4253 ) -> t.Optional[exp.Ordered]: 4254 this = parse_method() if parse_method else self._parse_assignment() 4255 if not this: 4256 return None 4257 4258 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4259 this = exp.var("ALL") 4260 4261 asc = self._match(TokenType.ASC) 4262 desc = self._match(TokenType.DESC) or (asc and False) 4263 4264 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4265 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4266 4267 nulls_first = is_nulls_first or False 4268 explicitly_null_ordered = is_nulls_first or is_nulls_last 4269 4270 if ( 4271 not explicitly_null_ordered 4272 and ( 4273 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4274 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4275 ) 4276 and self.dialect.NULL_ORDERING != "nulls_are_last" 4277 ): 4278 nulls_first = True 4279 4280 if self._match_text_seq("WITH", "FILL"): 4281 with_fill = self.expression( 4282 exp.WithFill, 4283 **{ # type: ignore 4284 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4285 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4286 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4287 "interpolate": self._parse_interpolate(), 4288 }, 4289 ) 4290 else: 4291 with_fill = None 4292 4293 return self.expression( 4294 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4295 ) 4296 4297 def _parse_limit( 4298 self, 4299 this: t.Optional[exp.Expression] = None, 4300 top: bool = False, 4301 skip_limit_token: bool = False, 4302 ) -> t.Optional[exp.Expression]: 4303 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4304 comments = self._prev_comments 4305 if top: 4306 limit_paren = self._match(TokenType.L_PAREN) 4307 expression = self._parse_term() if limit_paren else self._parse_number() 4308 4309 if limit_paren: 4310 self._match_r_paren() 4311 else: 4312 expression = self._parse_term() 4313 4314 if self._match(TokenType.COMMA): 4315 offset = expression 4316 expression = self._parse_term() 4317 else: 4318 offset = None 4319 4320 limit_exp = self.expression( 4321 exp.Limit, 4322 this=this, 4323 expression=expression, 4324 offset=offset, 4325 comments=comments, 4326 expressions=self._parse_limit_by(), 4327 ) 4328 4329 return limit_exp 4330 4331 if self._match(TokenType.FETCH): 4332 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4333 direction = self._prev.text.upper() if direction else "FIRST" 4334 4335 count = self._parse_field(tokens=self.FETCH_TOKENS) 4336 percent = self._match(TokenType.PERCENT) 4337 4338 self._match_set((TokenType.ROW, TokenType.ROWS)) 4339 4340 only = self._match_text_seq("ONLY") 4341 with_ties = self._match_text_seq("WITH", "TIES") 4342 4343 if only and with_ties: 4344 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4345 4346 return self.expression( 4347 exp.Fetch, 4348 direction=direction, 4349 count=count, 4350 percent=percent, 4351 with_ties=with_ties, 4352 ) 4353 4354 return this 4355 4356 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4357 if not self._match(TokenType.OFFSET): 4358 return this 4359 4360 count = self._parse_term() 4361 self._match_set((TokenType.ROW, TokenType.ROWS)) 4362 4363 return self.expression( 4364 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4365 ) 4366 4367 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4368 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4369 4370 def _parse_locks(self) -> t.List[exp.Lock]: 4371 locks = [] 4372 while True: 4373 if self._match_text_seq("FOR", "UPDATE"): 4374 update = True 4375 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4376 "LOCK", "IN", "SHARE", "MODE" 4377 ): 4378 update = False 4379 else: 4380 break 4381 4382 expressions = None 4383 if self._match_text_seq("OF"): 4384 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4385 4386 wait: t.Optional[bool | exp.Expression] = None 4387 if self._match_text_seq("NOWAIT"): 4388 wait = True 4389 elif self._match_text_seq("WAIT"): 4390 wait = self._parse_primary() 4391 elif self._match_text_seq("SKIP", "LOCKED"): 4392 wait = False 4393 4394 locks.append( 4395 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4396 ) 4397 4398 return locks 4399 4400 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4401 while this and self._match_set(self.SET_OPERATIONS): 4402 token_type = self._prev.token_type 4403 4404 if token_type == TokenType.UNION: 4405 operation: t.Type[exp.SetOperation] = exp.Union 4406 elif token_type == TokenType.EXCEPT: 4407 operation = exp.Except 4408 else: 4409 operation = exp.Intersect 4410 4411 comments = self._prev.comments 4412 4413 if self._match(TokenType.DISTINCT): 4414 distinct: t.Optional[bool] = True 4415 elif self._match(TokenType.ALL): 4416 distinct = False 4417 else: 4418 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4419 if distinct is None: 4420 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4421 4422 by_name = self._match_text_seq("BY", "NAME") 4423 expression = self._parse_select(nested=True, parse_set_operation=False) 4424 4425 this = self.expression( 4426 operation, 4427 comments=comments, 4428 this=this, 4429 distinct=distinct, 4430 by_name=by_name, 4431 expression=expression, 4432 ) 4433 4434 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4435 expression = this.expression 4436 4437 if expression: 4438 for arg in self.SET_OP_MODIFIERS: 4439 expr = expression.args.get(arg) 4440 if expr: 4441 this.set(arg, expr.pop()) 4442 4443 return this 4444 4445 def _parse_expression(self) -> t.Optional[exp.Expression]: 4446 return self._parse_alias(self._parse_assignment()) 4447 4448 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4449 this = self._parse_disjunction() 4450 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4451 # This allows us to parse <non-identifier token> := <expr> 4452 this = exp.column( 4453 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4454 ) 4455 4456 while self._match_set(self.ASSIGNMENT): 4457 if isinstance(this, exp.Column) and len(this.parts) == 1: 4458 this = this.this 4459 4460 this = self.expression( 4461 self.ASSIGNMENT[self._prev.token_type], 4462 this=this, 4463 comments=self._prev_comments, 4464 expression=self._parse_assignment(), 4465 ) 4466 4467 return this 4468 4469 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4470 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4471 4472 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4473 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4474 4475 def _parse_equality(self) -> t.Optional[exp.Expression]: 4476 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4477 4478 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4479 return self._parse_tokens(self._parse_range, self.COMPARISON) 4480 4481 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4482 this = this or self._parse_bitwise() 4483 negate = self._match(TokenType.NOT) 4484 4485 if self._match_set(self.RANGE_PARSERS): 4486 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4487 if not expression: 4488 return this 4489 4490 this = expression 4491 elif self._match(TokenType.ISNULL): 4492 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4493 4494 # Postgres supports ISNULL and NOTNULL for conditions. 4495 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4496 if self._match(TokenType.NOTNULL): 4497 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4498 this = self.expression(exp.Not, this=this) 4499 4500 if negate: 4501 this = self._negate_range(this) 4502 4503 if self._match(TokenType.IS): 4504 this = self._parse_is(this) 4505 4506 return this 4507 4508 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4509 if not this: 4510 return this 4511 4512 return self.expression(exp.Not, this=this) 4513 4514 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4515 index = self._index - 1 4516 negate = self._match(TokenType.NOT) 4517 4518 if self._match_text_seq("DISTINCT", "FROM"): 4519 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4520 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4521 4522 if self._match(TokenType.JSON): 4523 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4524 4525 if self._match_text_seq("WITH"): 4526 _with = True 4527 elif self._match_text_seq("WITHOUT"): 4528 _with = False 4529 else: 4530 _with = None 4531 4532 unique = self._match(TokenType.UNIQUE) 4533 self._match_text_seq("KEYS") 4534 expression: t.Optional[exp.Expression] = self.expression( 4535 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4536 ) 4537 else: 4538 expression = self._parse_primary() or self._parse_null() 4539 if not expression: 4540 self._retreat(index) 4541 return None 4542 4543 this = self.expression(exp.Is, this=this, expression=expression) 4544 return self.expression(exp.Not, this=this) if negate else this 4545 4546 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4547 unnest = self._parse_unnest(with_alias=False) 4548 if unnest: 4549 this = self.expression(exp.In, this=this, unnest=unnest) 4550 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4551 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4552 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4553 4554 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4555 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4556 else: 4557 this = self.expression(exp.In, this=this, expressions=expressions) 4558 4559 if matched_l_paren: 4560 self._match_r_paren(this) 4561 elif not self._match(TokenType.R_BRACKET, expression=this): 4562 self.raise_error("Expecting ]") 4563 else: 4564 this = self.expression(exp.In, this=this, field=self._parse_column()) 4565 4566 return this 4567 4568 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4569 low = self._parse_bitwise() 4570 self._match(TokenType.AND) 4571 high = self._parse_bitwise() 4572 return self.expression(exp.Between, this=this, low=low, high=high) 4573 4574 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4575 if not self._match(TokenType.ESCAPE): 4576 return this 4577 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4578 4579 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4580 index = self._index 4581 4582 if not self._match(TokenType.INTERVAL) and match_interval: 4583 return None 4584 4585 if self._match(TokenType.STRING, advance=False): 4586 this = self._parse_primary() 4587 else: 4588 this = self._parse_term() 4589 4590 if not this or ( 4591 isinstance(this, exp.Column) 4592 and not this.table 4593 and not this.this.quoted 4594 and this.name.upper() == "IS" 4595 ): 4596 self._retreat(index) 4597 return None 4598 4599 unit = self._parse_function() or ( 4600 not self._match(TokenType.ALIAS, advance=False) 4601 and self._parse_var(any_token=True, upper=True) 4602 ) 4603 4604 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4605 # each INTERVAL expression into this canonical form so it's easy to transpile 4606 if this and this.is_number: 4607 this = exp.Literal.string(this.to_py()) 4608 elif this and this.is_string: 4609 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4610 if len(parts) == 1: 4611 if unit: 4612 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4613 self._retreat(self._index - 1) 4614 4615 this = exp.Literal.string(parts[0][0]) 4616 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4617 4618 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4619 unit = self.expression( 4620 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4621 ) 4622 4623 interval = self.expression(exp.Interval, this=this, unit=unit) 4624 4625 index = self._index 4626 self._match(TokenType.PLUS) 4627 4628 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4629 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4630 return self.expression( 4631 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4632 ) 4633 4634 self._retreat(index) 4635 return interval 4636 4637 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4638 this = self._parse_term() 4639 4640 while True: 4641 if self._match_set(self.BITWISE): 4642 this = self.expression( 4643 self.BITWISE[self._prev.token_type], 4644 this=this, 4645 expression=self._parse_term(), 4646 ) 4647 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4648 this = self.expression( 4649 exp.DPipe, 4650 this=this, 4651 expression=self._parse_term(), 4652 safe=not self.dialect.STRICT_STRING_CONCAT, 4653 ) 4654 elif self._match(TokenType.DQMARK): 4655 this = self.expression( 4656 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4657 ) 4658 elif self._match_pair(TokenType.LT, TokenType.LT): 4659 this = self.expression( 4660 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4661 ) 4662 elif self._match_pair(TokenType.GT, TokenType.GT): 4663 this = self.expression( 4664 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4665 ) 4666 else: 4667 break 4668 4669 return this 4670 4671 def _parse_term(self) -> t.Optional[exp.Expression]: 4672 this = self._parse_factor() 4673 4674 while self._match_set(self.TERM): 4675 klass = self.TERM[self._prev.token_type] 4676 comments = self._prev_comments 4677 expression = self._parse_factor() 4678 4679 this = self.expression(klass, this=this, comments=comments, expression=expression) 4680 4681 if isinstance(this, exp.Collate): 4682 expr = this.expression 4683 4684 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4685 # fallback to Identifier / Var 4686 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4687 ident = expr.this 4688 if isinstance(ident, exp.Identifier): 4689 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4690 4691 return this 4692 4693 def _parse_factor(self) -> t.Optional[exp.Expression]: 4694 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4695 this = parse_method() 4696 4697 while self._match_set(self.FACTOR): 4698 klass = self.FACTOR[self._prev.token_type] 4699 comments = self._prev_comments 4700 expression = parse_method() 4701 4702 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4703 self._retreat(self._index - 1) 4704 return this 4705 4706 this = self.expression(klass, this=this, comments=comments, expression=expression) 4707 4708 if isinstance(this, exp.Div): 4709 this.args["typed"] = self.dialect.TYPED_DIVISION 4710 this.args["safe"] = self.dialect.SAFE_DIVISION 4711 4712 return this 4713 4714 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4715 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4716 4717 def _parse_unary(self) -> t.Optional[exp.Expression]: 4718 if self._match_set(self.UNARY_PARSERS): 4719 return self.UNARY_PARSERS[self._prev.token_type](self) 4720 return self._parse_at_time_zone(self._parse_type()) 4721 4722 def _parse_type( 4723 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4724 ) -> t.Optional[exp.Expression]: 4725 interval = parse_interval and self._parse_interval() 4726 if interval: 4727 return interval 4728 4729 index = self._index 4730 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4731 4732 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4733 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4734 if isinstance(data_type, exp.Cast): 4735 # This constructor can contain ops directly after it, for instance struct unnesting: 4736 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4737 return self._parse_column_ops(data_type) 4738 4739 if data_type: 4740 index2 = self._index 4741 this = self._parse_primary() 4742 4743 if isinstance(this, exp.Literal): 4744 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4745 if parser: 4746 return parser(self, this, data_type) 4747 4748 return self.expression(exp.Cast, this=this, to=data_type) 4749 4750 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4751 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4752 # 4753 # If the index difference here is greater than 1, that means the parser itself must have 4754 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4755 # 4756 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4757 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4758 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4759 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4760 # 4761 # In these cases, we don't really want to return the converted type, but instead retreat 4762 # and try to parse a Column or Identifier in the section below. 4763 if data_type.expressions and index2 - index > 1: 4764 self._retreat(index2) 4765 return self._parse_column_ops(data_type) 4766 4767 self._retreat(index) 4768 4769 if fallback_to_identifier: 4770 return self._parse_id_var() 4771 4772 this = self._parse_column() 4773 return this and self._parse_column_ops(this) 4774 4775 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4776 this = self._parse_type() 4777 if not this: 4778 return None 4779 4780 if isinstance(this, exp.Column) and not this.table: 4781 this = exp.var(this.name.upper()) 4782 4783 return self.expression( 4784 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4785 ) 4786 4787 def _parse_types( 4788 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4789 ) -> t.Optional[exp.Expression]: 4790 index = self._index 4791 4792 this: t.Optional[exp.Expression] = None 4793 prefix = self._match_text_seq("SYSUDTLIB", ".") 4794 4795 if not self._match_set(self.TYPE_TOKENS): 4796 identifier = allow_identifiers and self._parse_id_var( 4797 any_token=False, tokens=(TokenType.VAR,) 4798 ) 4799 if isinstance(identifier, exp.Identifier): 4800 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4801 4802 if len(tokens) != 1: 4803 self.raise_error("Unexpected identifier", self._prev) 4804 4805 if tokens[0].token_type in self.TYPE_TOKENS: 4806 self._prev = tokens[0] 4807 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4808 type_name = identifier.name 4809 4810 while self._match(TokenType.DOT): 4811 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4812 4813 this = exp.DataType.build(type_name, udt=True) 4814 else: 4815 self._retreat(self._index - 1) 4816 return None 4817 else: 4818 return None 4819 4820 type_token = self._prev.token_type 4821 4822 if type_token == TokenType.PSEUDO_TYPE: 4823 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4824 4825 if type_token == TokenType.OBJECT_IDENTIFIER: 4826 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4827 4828 # https://materialize.com/docs/sql/types/map/ 4829 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4830 key_type = self._parse_types( 4831 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4832 ) 4833 if not self._match(TokenType.FARROW): 4834 self._retreat(index) 4835 return None 4836 4837 value_type = self._parse_types( 4838 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4839 ) 4840 if not self._match(TokenType.R_BRACKET): 4841 self._retreat(index) 4842 return None 4843 4844 return exp.DataType( 4845 this=exp.DataType.Type.MAP, 4846 expressions=[key_type, value_type], 4847 nested=True, 4848 prefix=prefix, 4849 ) 4850 4851 nested = type_token in self.NESTED_TYPE_TOKENS 4852 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4853 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4854 expressions = None 4855 maybe_func = False 4856 4857 if self._match(TokenType.L_PAREN): 4858 if is_struct: 4859 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4860 elif nested: 4861 expressions = self._parse_csv( 4862 lambda: self._parse_types( 4863 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4864 ) 4865 ) 4866 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4867 this = expressions[0] 4868 this.set("nullable", True) 4869 self._match_r_paren() 4870 return this 4871 elif type_token in self.ENUM_TYPE_TOKENS: 4872 expressions = self._parse_csv(self._parse_equality) 4873 elif is_aggregate: 4874 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4875 any_token=False, tokens=(TokenType.VAR,) 4876 ) 4877 if not func_or_ident or not self._match(TokenType.COMMA): 4878 return None 4879 expressions = self._parse_csv( 4880 lambda: self._parse_types( 4881 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4882 ) 4883 ) 4884 expressions.insert(0, func_or_ident) 4885 else: 4886 expressions = self._parse_csv(self._parse_type_size) 4887 4888 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4889 if type_token == TokenType.VECTOR and len(expressions) == 2: 4890 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4891 4892 if not expressions or not self._match(TokenType.R_PAREN): 4893 self._retreat(index) 4894 return None 4895 4896 maybe_func = True 4897 4898 values: t.Optional[t.List[exp.Expression]] = None 4899 4900 if nested and self._match(TokenType.LT): 4901 if is_struct: 4902 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4903 else: 4904 expressions = self._parse_csv( 4905 lambda: self._parse_types( 4906 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4907 ) 4908 ) 4909 4910 if not self._match(TokenType.GT): 4911 self.raise_error("Expecting >") 4912 4913 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4914 values = self._parse_csv(self._parse_assignment) 4915 if not values and is_struct: 4916 values = None 4917 self._retreat(self._index - 1) 4918 else: 4919 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4920 4921 if type_token in self.TIMESTAMPS: 4922 if self._match_text_seq("WITH", "TIME", "ZONE"): 4923 maybe_func = False 4924 tz_type = ( 4925 exp.DataType.Type.TIMETZ 4926 if type_token in self.TIMES 4927 else exp.DataType.Type.TIMESTAMPTZ 4928 ) 4929 this = exp.DataType(this=tz_type, expressions=expressions) 4930 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4931 maybe_func = False 4932 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4933 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4934 maybe_func = False 4935 elif type_token == TokenType.INTERVAL: 4936 unit = self._parse_var(upper=True) 4937 if unit: 4938 if self._match_text_seq("TO"): 4939 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4940 4941 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4942 else: 4943 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4944 4945 if maybe_func and check_func: 4946 index2 = self._index 4947 peek = self._parse_string() 4948 4949 if not peek: 4950 self._retreat(index) 4951 return None 4952 4953 self._retreat(index2) 4954 4955 if not this: 4956 if self._match_text_seq("UNSIGNED"): 4957 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4958 if not unsigned_type_token: 4959 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4960 4961 type_token = unsigned_type_token or type_token 4962 4963 this = exp.DataType( 4964 this=exp.DataType.Type[type_token.value], 4965 expressions=expressions, 4966 nested=nested, 4967 prefix=prefix, 4968 ) 4969 4970 # Empty arrays/structs are allowed 4971 if values is not None: 4972 cls = exp.Struct if is_struct else exp.Array 4973 this = exp.cast(cls(expressions=values), this, copy=False) 4974 4975 elif expressions: 4976 this.set("expressions", expressions) 4977 4978 # https://materialize.com/docs/sql/types/list/#type-name 4979 while self._match(TokenType.LIST): 4980 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4981 4982 index = self._index 4983 4984 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4985 matched_array = self._match(TokenType.ARRAY) 4986 4987 while self._curr: 4988 datatype_token = self._prev.token_type 4989 matched_l_bracket = self._match(TokenType.L_BRACKET) 4990 if not matched_l_bracket and not matched_array: 4991 break 4992 4993 matched_array = False 4994 values = self._parse_csv(self._parse_assignment) or None 4995 if ( 4996 values 4997 and not schema 4998 and ( 4999 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5000 ) 5001 ): 5002 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5003 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5004 self._retreat(index) 5005 break 5006 5007 this = exp.DataType( 5008 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5009 ) 5010 self._match(TokenType.R_BRACKET) 5011 5012 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5013 converter = self.TYPE_CONVERTERS.get(this.this) 5014 if converter: 5015 this = converter(t.cast(exp.DataType, this)) 5016 5017 return this 5018 5019 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5020 index = self._index 5021 5022 if ( 5023 self._curr 5024 and self._next 5025 and self._curr.token_type in self.TYPE_TOKENS 5026 and self._next.token_type in self.TYPE_TOKENS 5027 ): 5028 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5029 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5030 this = self._parse_id_var() 5031 else: 5032 this = ( 5033 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5034 or self._parse_id_var() 5035 ) 5036 5037 self._match(TokenType.COLON) 5038 5039 if ( 5040 type_required 5041 and not isinstance(this, exp.DataType) 5042 and not self._match_set(self.TYPE_TOKENS, advance=False) 5043 ): 5044 self._retreat(index) 5045 return self._parse_types() 5046 5047 return self._parse_column_def(this) 5048 5049 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5050 if not self._match_text_seq("AT", "TIME", "ZONE"): 5051 return this 5052 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5053 5054 def _parse_column(self) -> t.Optional[exp.Expression]: 5055 this = self._parse_column_reference() 5056 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5057 5058 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5059 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5060 5061 return column 5062 5063 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5064 this = self._parse_field() 5065 if ( 5066 not this 5067 and self._match(TokenType.VALUES, advance=False) 5068 and self.VALUES_FOLLOWED_BY_PAREN 5069 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5070 ): 5071 this = self._parse_id_var() 5072 5073 if isinstance(this, exp.Identifier): 5074 # We bubble up comments from the Identifier to the Column 5075 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5076 5077 return this 5078 5079 def _parse_colon_as_variant_extract( 5080 self, this: t.Optional[exp.Expression] 5081 ) -> t.Optional[exp.Expression]: 5082 casts = [] 5083 json_path = [] 5084 escape = None 5085 5086 while self._match(TokenType.COLON): 5087 start_index = self._index 5088 5089 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5090 path = self._parse_column_ops( 5091 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5092 ) 5093 5094 # The cast :: operator has a lower precedence than the extraction operator :, so 5095 # we rearrange the AST appropriately to avoid casting the JSON path 5096 while isinstance(path, exp.Cast): 5097 casts.append(path.to) 5098 path = path.this 5099 5100 if casts: 5101 dcolon_offset = next( 5102 i 5103 for i, t in enumerate(self._tokens[start_index:]) 5104 if t.token_type == TokenType.DCOLON 5105 ) 5106 end_token = self._tokens[start_index + dcolon_offset - 1] 5107 else: 5108 end_token = self._prev 5109 5110 if path: 5111 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5112 # it'll roundtrip to a string literal in GET_PATH 5113 if isinstance(path, exp.Identifier) and path.quoted: 5114 escape = True 5115 5116 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5117 5118 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5119 # Databricks transforms it back to the colon/dot notation 5120 if json_path: 5121 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5122 5123 if json_path_expr: 5124 json_path_expr.set("escape", escape) 5125 5126 this = self.expression( 5127 exp.JSONExtract, 5128 this=this, 5129 expression=json_path_expr, 5130 variant_extract=True, 5131 ) 5132 5133 while casts: 5134 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5135 5136 return this 5137 5138 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5139 return self._parse_types() 5140 5141 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5142 this = self._parse_bracket(this) 5143 5144 while self._match_set(self.COLUMN_OPERATORS): 5145 op_token = self._prev.token_type 5146 op = self.COLUMN_OPERATORS.get(op_token) 5147 5148 if op_token == TokenType.DCOLON: 5149 field = self._parse_dcolon() 5150 if not field: 5151 self.raise_error("Expected type") 5152 elif op and self._curr: 5153 field = self._parse_column_reference() or self._parse_bracket() 5154 else: 5155 field = self._parse_field(any_token=True, anonymous_func=True) 5156 5157 if isinstance(field, (exp.Func, exp.Window)) and this: 5158 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5159 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5160 this = exp.replace_tree( 5161 this, 5162 lambda n: ( 5163 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5164 if n.table 5165 else n.this 5166 ) 5167 if isinstance(n, exp.Column) 5168 else n, 5169 ) 5170 5171 if op: 5172 this = op(self, this, field) 5173 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5174 this = self.expression( 5175 exp.Column, 5176 comments=this.comments, 5177 this=field, 5178 table=this.this, 5179 db=this.args.get("table"), 5180 catalog=this.args.get("db"), 5181 ) 5182 elif isinstance(field, exp.Window): 5183 # Move the exp.Dot's to the window's function 5184 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5185 field.set("this", window_func) 5186 this = field 5187 else: 5188 this = self.expression(exp.Dot, this=this, expression=field) 5189 5190 if field and field.comments: 5191 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5192 5193 this = self._parse_bracket(this) 5194 5195 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5196 5197 def _parse_primary(self) -> t.Optional[exp.Expression]: 5198 if self._match_set(self.PRIMARY_PARSERS): 5199 token_type = self._prev.token_type 5200 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5201 5202 if token_type == TokenType.STRING: 5203 expressions = [primary] 5204 while self._match(TokenType.STRING): 5205 expressions.append(exp.Literal.string(self._prev.text)) 5206 5207 if len(expressions) > 1: 5208 return self.expression(exp.Concat, expressions=expressions) 5209 5210 return primary 5211 5212 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5213 return exp.Literal.number(f"0.{self._prev.text}") 5214 5215 if self._match(TokenType.L_PAREN): 5216 comments = self._prev_comments 5217 query = self._parse_select() 5218 5219 if query: 5220 expressions = [query] 5221 else: 5222 expressions = self._parse_expressions() 5223 5224 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5225 5226 if not this and self._match(TokenType.R_PAREN, advance=False): 5227 this = self.expression(exp.Tuple) 5228 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5229 this = self._parse_subquery(this=this, parse_alias=False) 5230 elif isinstance(this, exp.Subquery): 5231 this = self._parse_subquery( 5232 this=self._parse_set_operations(this), parse_alias=False 5233 ) 5234 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5235 this = self.expression(exp.Tuple, expressions=expressions) 5236 else: 5237 this = self.expression(exp.Paren, this=this) 5238 5239 if this: 5240 this.add_comments(comments) 5241 5242 self._match_r_paren(expression=this) 5243 return this 5244 5245 return None 5246 5247 def _parse_field( 5248 self, 5249 any_token: bool = False, 5250 tokens: t.Optional[t.Collection[TokenType]] = None, 5251 anonymous_func: bool = False, 5252 ) -> t.Optional[exp.Expression]: 5253 if anonymous_func: 5254 field = ( 5255 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5256 or self._parse_primary() 5257 ) 5258 else: 5259 field = self._parse_primary() or self._parse_function( 5260 anonymous=anonymous_func, any_token=any_token 5261 ) 5262 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5263 5264 def _parse_function( 5265 self, 5266 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5267 anonymous: bool = False, 5268 optional_parens: bool = True, 5269 any_token: bool = False, 5270 ) -> t.Optional[exp.Expression]: 5271 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5272 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5273 fn_syntax = False 5274 if ( 5275 self._match(TokenType.L_BRACE, advance=False) 5276 and self._next 5277 and self._next.text.upper() == "FN" 5278 ): 5279 self._advance(2) 5280 fn_syntax = True 5281 5282 func = self._parse_function_call( 5283 functions=functions, 5284 anonymous=anonymous, 5285 optional_parens=optional_parens, 5286 any_token=any_token, 5287 ) 5288 5289 if fn_syntax: 5290 self._match(TokenType.R_BRACE) 5291 5292 return func 5293 5294 def _parse_function_call( 5295 self, 5296 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5297 anonymous: bool = False, 5298 optional_parens: bool = True, 5299 any_token: bool = False, 5300 ) -> t.Optional[exp.Expression]: 5301 if not self._curr: 5302 return None 5303 5304 comments = self._curr.comments 5305 token_type = self._curr.token_type 5306 this = self._curr.text 5307 upper = this.upper() 5308 5309 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5310 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5311 self._advance() 5312 return self._parse_window(parser(self)) 5313 5314 if not self._next or self._next.token_type != TokenType.L_PAREN: 5315 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5316 self._advance() 5317 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5318 5319 return None 5320 5321 if any_token: 5322 if token_type in self.RESERVED_TOKENS: 5323 return None 5324 elif token_type not in self.FUNC_TOKENS: 5325 return None 5326 5327 self._advance(2) 5328 5329 parser = self.FUNCTION_PARSERS.get(upper) 5330 if parser and not anonymous: 5331 this = parser(self) 5332 else: 5333 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5334 5335 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5336 this = self.expression( 5337 subquery_predicate, comments=comments, this=self._parse_select() 5338 ) 5339 self._match_r_paren() 5340 return this 5341 5342 if functions is None: 5343 functions = self.FUNCTIONS 5344 5345 function = functions.get(upper) 5346 5347 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5348 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5349 5350 if alias: 5351 args = self._kv_to_prop_eq(args) 5352 5353 if function and not anonymous: 5354 if "dialect" in function.__code__.co_varnames: 5355 func = function(args, dialect=self.dialect) 5356 else: 5357 func = function(args) 5358 5359 func = self.validate_expression(func, args) 5360 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5361 func.meta["name"] = this 5362 5363 this = func 5364 else: 5365 if token_type == TokenType.IDENTIFIER: 5366 this = exp.Identifier(this=this, quoted=True) 5367 this = self.expression(exp.Anonymous, this=this, expressions=args) 5368 5369 if isinstance(this, exp.Expression): 5370 this.add_comments(comments) 5371 5372 self._match_r_paren(this) 5373 return self._parse_window(this) 5374 5375 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5376 return expression 5377 5378 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5379 transformed = [] 5380 5381 for index, e in enumerate(expressions): 5382 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5383 if isinstance(e, exp.Alias): 5384 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5385 5386 if not isinstance(e, exp.PropertyEQ): 5387 e = self.expression( 5388 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5389 ) 5390 5391 if isinstance(e.this, exp.Column): 5392 e.this.replace(e.this.this) 5393 else: 5394 e = self._to_prop_eq(e, index) 5395 5396 transformed.append(e) 5397 5398 return transformed 5399 5400 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5401 return self._parse_statement() 5402 5403 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5404 return self._parse_column_def(self._parse_id_var()) 5405 5406 def _parse_user_defined_function( 5407 self, kind: t.Optional[TokenType] = None 5408 ) -> t.Optional[exp.Expression]: 5409 this = self._parse_id_var() 5410 5411 while self._match(TokenType.DOT): 5412 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5413 5414 if not self._match(TokenType.L_PAREN): 5415 return this 5416 5417 expressions = self._parse_csv(self._parse_function_parameter) 5418 self._match_r_paren() 5419 return self.expression( 5420 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5421 ) 5422 5423 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5424 literal = self._parse_primary() 5425 if literal: 5426 return self.expression(exp.Introducer, this=token.text, expression=literal) 5427 5428 return self.expression(exp.Identifier, this=token.text) 5429 5430 def _parse_session_parameter(self) -> exp.SessionParameter: 5431 kind = None 5432 this = self._parse_id_var() or self._parse_primary() 5433 5434 if this and self._match(TokenType.DOT): 5435 kind = this.name 5436 this = self._parse_var() or self._parse_primary() 5437 5438 return self.expression(exp.SessionParameter, this=this, kind=kind) 5439 5440 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5441 return self._parse_id_var() 5442 5443 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5444 index = self._index 5445 5446 if self._match(TokenType.L_PAREN): 5447 expressions = t.cast( 5448 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5449 ) 5450 5451 if not self._match(TokenType.R_PAREN): 5452 self._retreat(index) 5453 else: 5454 expressions = [self._parse_lambda_arg()] 5455 5456 if self._match_set(self.LAMBDAS): 5457 return self.LAMBDAS[self._prev.token_type](self, expressions) 5458 5459 self._retreat(index) 5460 5461 this: t.Optional[exp.Expression] 5462 5463 if self._match(TokenType.DISTINCT): 5464 this = self.expression( 5465 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5466 ) 5467 else: 5468 this = self._parse_select_or_expression(alias=alias) 5469 5470 return self._parse_limit( 5471 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5472 ) 5473 5474 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5475 index = self._index 5476 if not self._match(TokenType.L_PAREN): 5477 return this 5478 5479 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5480 # expr can be of both types 5481 if self._match_set(self.SELECT_START_TOKENS): 5482 self._retreat(index) 5483 return this 5484 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5485 self._match_r_paren() 5486 return self.expression(exp.Schema, this=this, expressions=args) 5487 5488 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5489 return self._parse_column_def(self._parse_field(any_token=True)) 5490 5491 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5492 # column defs are not really columns, they're identifiers 5493 if isinstance(this, exp.Column): 5494 this = this.this 5495 5496 kind = self._parse_types(schema=True) 5497 5498 if self._match_text_seq("FOR", "ORDINALITY"): 5499 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5500 5501 constraints: t.List[exp.Expression] = [] 5502 5503 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5504 ("ALIAS", "MATERIALIZED") 5505 ): 5506 persisted = self._prev.text.upper() == "MATERIALIZED" 5507 constraint_kind = exp.ComputedColumnConstraint( 5508 this=self._parse_assignment(), 5509 persisted=persisted or self._match_text_seq("PERSISTED"), 5510 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5511 ) 5512 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5513 elif ( 5514 kind 5515 and self._match(TokenType.ALIAS, advance=False) 5516 and ( 5517 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5518 or (self._next and self._next.token_type == TokenType.L_PAREN) 5519 ) 5520 ): 5521 self._advance() 5522 constraints.append( 5523 self.expression( 5524 exp.ColumnConstraint, 5525 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5526 ) 5527 ) 5528 5529 while True: 5530 constraint = self._parse_column_constraint() 5531 if not constraint: 5532 break 5533 constraints.append(constraint) 5534 5535 if not kind and not constraints: 5536 return this 5537 5538 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5539 5540 def _parse_auto_increment( 5541 self, 5542 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5543 start = None 5544 increment = None 5545 5546 if self._match(TokenType.L_PAREN, advance=False): 5547 args = self._parse_wrapped_csv(self._parse_bitwise) 5548 start = seq_get(args, 0) 5549 increment = seq_get(args, 1) 5550 elif self._match_text_seq("START"): 5551 start = self._parse_bitwise() 5552 self._match_text_seq("INCREMENT") 5553 increment = self._parse_bitwise() 5554 5555 if start and increment: 5556 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5557 5558 return exp.AutoIncrementColumnConstraint() 5559 5560 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5561 if not self._match_text_seq("REFRESH"): 5562 self._retreat(self._index - 1) 5563 return None 5564 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5565 5566 def _parse_compress(self) -> exp.CompressColumnConstraint: 5567 if self._match(TokenType.L_PAREN, advance=False): 5568 return self.expression( 5569 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5570 ) 5571 5572 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5573 5574 def _parse_generated_as_identity( 5575 self, 5576 ) -> ( 5577 exp.GeneratedAsIdentityColumnConstraint 5578 | exp.ComputedColumnConstraint 5579 | exp.GeneratedAsRowColumnConstraint 5580 ): 5581 if self._match_text_seq("BY", "DEFAULT"): 5582 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5583 this = self.expression( 5584 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5585 ) 5586 else: 5587 self._match_text_seq("ALWAYS") 5588 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5589 5590 self._match(TokenType.ALIAS) 5591 5592 if self._match_text_seq("ROW"): 5593 start = self._match_text_seq("START") 5594 if not start: 5595 self._match(TokenType.END) 5596 hidden = self._match_text_seq("HIDDEN") 5597 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5598 5599 identity = self._match_text_seq("IDENTITY") 5600 5601 if self._match(TokenType.L_PAREN): 5602 if self._match(TokenType.START_WITH): 5603 this.set("start", self._parse_bitwise()) 5604 if self._match_text_seq("INCREMENT", "BY"): 5605 this.set("increment", self._parse_bitwise()) 5606 if self._match_text_seq("MINVALUE"): 5607 this.set("minvalue", self._parse_bitwise()) 5608 if self._match_text_seq("MAXVALUE"): 5609 this.set("maxvalue", self._parse_bitwise()) 5610 5611 if self._match_text_seq("CYCLE"): 5612 this.set("cycle", True) 5613 elif self._match_text_seq("NO", "CYCLE"): 5614 this.set("cycle", False) 5615 5616 if not identity: 5617 this.set("expression", self._parse_range()) 5618 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5619 args = self._parse_csv(self._parse_bitwise) 5620 this.set("start", seq_get(args, 0)) 5621 this.set("increment", seq_get(args, 1)) 5622 5623 self._match_r_paren() 5624 5625 return this 5626 5627 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5628 self._match_text_seq("LENGTH") 5629 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5630 5631 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5632 if self._match_text_seq("NULL"): 5633 return self.expression(exp.NotNullColumnConstraint) 5634 if self._match_text_seq("CASESPECIFIC"): 5635 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5636 if self._match_text_seq("FOR", "REPLICATION"): 5637 return self.expression(exp.NotForReplicationColumnConstraint) 5638 5639 # Unconsume the `NOT` token 5640 self._retreat(self._index - 1) 5641 return None 5642 5643 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5644 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5645 5646 procedure_option_follows = ( 5647 self._match(TokenType.WITH, advance=False) 5648 and self._next 5649 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5650 ) 5651 5652 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5653 return self.expression( 5654 exp.ColumnConstraint, 5655 this=this, 5656 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5657 ) 5658 5659 return this 5660 5661 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5662 if not self._match(TokenType.CONSTRAINT): 5663 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5664 5665 return self.expression( 5666 exp.Constraint, 5667 this=self._parse_id_var(), 5668 expressions=self._parse_unnamed_constraints(), 5669 ) 5670 5671 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5672 constraints = [] 5673 while True: 5674 constraint = self._parse_unnamed_constraint() or self._parse_function() 5675 if not constraint: 5676 break 5677 constraints.append(constraint) 5678 5679 return constraints 5680 5681 def _parse_unnamed_constraint( 5682 self, constraints: t.Optional[t.Collection[str]] = None 5683 ) -> t.Optional[exp.Expression]: 5684 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5685 constraints or self.CONSTRAINT_PARSERS 5686 ): 5687 return None 5688 5689 constraint = self._prev.text.upper() 5690 if constraint not in self.CONSTRAINT_PARSERS: 5691 self.raise_error(f"No parser found for schema constraint {constraint}.") 5692 5693 return self.CONSTRAINT_PARSERS[constraint](self) 5694 5695 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5696 return self._parse_id_var(any_token=False) 5697 5698 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5699 self._match_text_seq("KEY") 5700 return self.expression( 5701 exp.UniqueColumnConstraint, 5702 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5703 this=self._parse_schema(self._parse_unique_key()), 5704 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5705 on_conflict=self._parse_on_conflict(), 5706 ) 5707 5708 def _parse_key_constraint_options(self) -> t.List[str]: 5709 options = [] 5710 while True: 5711 if not self._curr: 5712 break 5713 5714 if self._match(TokenType.ON): 5715 action = None 5716 on = self._advance_any() and self._prev.text 5717 5718 if self._match_text_seq("NO", "ACTION"): 5719 action = "NO ACTION" 5720 elif self._match_text_seq("CASCADE"): 5721 action = "CASCADE" 5722 elif self._match_text_seq("RESTRICT"): 5723 action = "RESTRICT" 5724 elif self._match_pair(TokenType.SET, TokenType.NULL): 5725 action = "SET NULL" 5726 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5727 action = "SET DEFAULT" 5728 else: 5729 self.raise_error("Invalid key constraint") 5730 5731 options.append(f"ON {on} {action}") 5732 else: 5733 var = self._parse_var_from_options( 5734 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5735 ) 5736 if not var: 5737 break 5738 options.append(var.name) 5739 5740 return options 5741 5742 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5743 if match and not self._match(TokenType.REFERENCES): 5744 return None 5745 5746 expressions = None 5747 this = self._parse_table(schema=True) 5748 options = self._parse_key_constraint_options() 5749 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5750 5751 def _parse_foreign_key(self) -> exp.ForeignKey: 5752 expressions = self._parse_wrapped_id_vars() 5753 reference = self._parse_references() 5754 options = {} 5755 5756 while self._match(TokenType.ON): 5757 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5758 self.raise_error("Expected DELETE or UPDATE") 5759 5760 kind = self._prev.text.lower() 5761 5762 if self._match_text_seq("NO", "ACTION"): 5763 action = "NO ACTION" 5764 elif self._match(TokenType.SET): 5765 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5766 action = "SET " + self._prev.text.upper() 5767 else: 5768 self._advance() 5769 action = self._prev.text.upper() 5770 5771 options[kind] = action 5772 5773 return self.expression( 5774 exp.ForeignKey, 5775 expressions=expressions, 5776 reference=reference, 5777 **options, # type: ignore 5778 ) 5779 5780 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5781 return self._parse_field() 5782 5783 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5784 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5785 self._retreat(self._index - 1) 5786 return None 5787 5788 id_vars = self._parse_wrapped_id_vars() 5789 return self.expression( 5790 exp.PeriodForSystemTimeConstraint, 5791 this=seq_get(id_vars, 0), 5792 expression=seq_get(id_vars, 1), 5793 ) 5794 5795 def _parse_primary_key( 5796 self, wrapped_optional: bool = False, in_props: bool = False 5797 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5798 desc = ( 5799 self._match_set((TokenType.ASC, TokenType.DESC)) 5800 and self._prev.token_type == TokenType.DESC 5801 ) 5802 5803 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5804 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5805 5806 expressions = self._parse_wrapped_csv( 5807 self._parse_primary_key_part, optional=wrapped_optional 5808 ) 5809 options = self._parse_key_constraint_options() 5810 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5811 5812 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5813 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5814 5815 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5816 """ 5817 Parses a datetime column in ODBC format. We parse the column into the corresponding 5818 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5819 same as we did for `DATE('yyyy-mm-dd')`. 5820 5821 Reference: 5822 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5823 """ 5824 self._match(TokenType.VAR) 5825 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5826 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5827 if not self._match(TokenType.R_BRACE): 5828 self.raise_error("Expected }") 5829 return expression 5830 5831 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5832 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5833 return this 5834 5835 bracket_kind = self._prev.token_type 5836 if ( 5837 bracket_kind == TokenType.L_BRACE 5838 and self._curr 5839 and self._curr.token_type == TokenType.VAR 5840 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5841 ): 5842 return self._parse_odbc_datetime_literal() 5843 5844 expressions = self._parse_csv( 5845 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5846 ) 5847 5848 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5849 self.raise_error("Expected ]") 5850 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5851 self.raise_error("Expected }") 5852 5853 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5854 if bracket_kind == TokenType.L_BRACE: 5855 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5856 elif not this: 5857 this = build_array_constructor( 5858 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5859 ) 5860 else: 5861 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5862 if constructor_type: 5863 return build_array_constructor( 5864 constructor_type, 5865 args=expressions, 5866 bracket_kind=bracket_kind, 5867 dialect=self.dialect, 5868 ) 5869 5870 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5871 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5872 5873 self._add_comments(this) 5874 return self._parse_bracket(this) 5875 5876 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5877 if self._match(TokenType.COLON): 5878 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5879 return this 5880 5881 def _parse_case(self) -> t.Optional[exp.Expression]: 5882 ifs = [] 5883 default = None 5884 5885 comments = self._prev_comments 5886 expression = self._parse_assignment() 5887 5888 while self._match(TokenType.WHEN): 5889 this = self._parse_assignment() 5890 self._match(TokenType.THEN) 5891 then = self._parse_assignment() 5892 ifs.append(self.expression(exp.If, this=this, true=then)) 5893 5894 if self._match(TokenType.ELSE): 5895 default = self._parse_assignment() 5896 5897 if not self._match(TokenType.END): 5898 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5899 default = exp.column("interval") 5900 else: 5901 self.raise_error("Expected END after CASE", self._prev) 5902 5903 return self.expression( 5904 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5905 ) 5906 5907 def _parse_if(self) -> t.Optional[exp.Expression]: 5908 if self._match(TokenType.L_PAREN): 5909 args = self._parse_csv(self._parse_assignment) 5910 this = self.validate_expression(exp.If.from_arg_list(args), args) 5911 self._match_r_paren() 5912 else: 5913 index = self._index - 1 5914 5915 if self.NO_PAREN_IF_COMMANDS and index == 0: 5916 return self._parse_as_command(self._prev) 5917 5918 condition = self._parse_assignment() 5919 5920 if not condition: 5921 self._retreat(index) 5922 return None 5923 5924 self._match(TokenType.THEN) 5925 true = self._parse_assignment() 5926 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5927 self._match(TokenType.END) 5928 this = self.expression(exp.If, this=condition, true=true, false=false) 5929 5930 return this 5931 5932 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5933 if not self._match_text_seq("VALUE", "FOR"): 5934 self._retreat(self._index - 1) 5935 return None 5936 5937 return self.expression( 5938 exp.NextValueFor, 5939 this=self._parse_column(), 5940 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5941 ) 5942 5943 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5944 this = self._parse_function() or self._parse_var_or_string(upper=True) 5945 5946 if self._match(TokenType.FROM): 5947 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5948 5949 if not self._match(TokenType.COMMA): 5950 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5951 5952 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5953 5954 def _parse_gap_fill(self) -> exp.GapFill: 5955 self._match(TokenType.TABLE) 5956 this = self._parse_table() 5957 5958 self._match(TokenType.COMMA) 5959 args = [this, *self._parse_csv(self._parse_lambda)] 5960 5961 gap_fill = exp.GapFill.from_arg_list(args) 5962 return self.validate_expression(gap_fill, args) 5963 5964 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5965 this = self._parse_assignment() 5966 5967 if not self._match(TokenType.ALIAS): 5968 if self._match(TokenType.COMMA): 5969 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5970 5971 self.raise_error("Expected AS after CAST") 5972 5973 fmt = None 5974 to = self._parse_types() 5975 5976 if self._match(TokenType.FORMAT): 5977 fmt_string = self._parse_string() 5978 fmt = self._parse_at_time_zone(fmt_string) 5979 5980 if not to: 5981 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5982 if to.this in exp.DataType.TEMPORAL_TYPES: 5983 this = self.expression( 5984 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5985 this=this, 5986 format=exp.Literal.string( 5987 format_time( 5988 fmt_string.this if fmt_string else "", 5989 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5990 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5991 ) 5992 ), 5993 safe=safe, 5994 ) 5995 5996 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5997 this.set("zone", fmt.args["zone"]) 5998 return this 5999 elif not to: 6000 self.raise_error("Expected TYPE after CAST") 6001 elif isinstance(to, exp.Identifier): 6002 to = exp.DataType.build(to.name, udt=True) 6003 elif to.this == exp.DataType.Type.CHAR: 6004 if self._match(TokenType.CHARACTER_SET): 6005 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6006 6007 return self.expression( 6008 exp.Cast if strict else exp.TryCast, 6009 this=this, 6010 to=to, 6011 format=fmt, 6012 safe=safe, 6013 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6014 ) 6015 6016 def _parse_string_agg(self) -> exp.GroupConcat: 6017 if self._match(TokenType.DISTINCT): 6018 args: t.List[t.Optional[exp.Expression]] = [ 6019 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6020 ] 6021 if self._match(TokenType.COMMA): 6022 args.extend(self._parse_csv(self._parse_assignment)) 6023 else: 6024 args = self._parse_csv(self._parse_assignment) # type: ignore 6025 6026 if self._match_text_seq("ON", "OVERFLOW"): 6027 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6028 if self._match_text_seq("ERROR"): 6029 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6030 else: 6031 self._match_text_seq("TRUNCATE") 6032 on_overflow = self.expression( 6033 exp.OverflowTruncateBehavior, 6034 this=self._parse_string(), 6035 with_count=( 6036 self._match_text_seq("WITH", "COUNT") 6037 or not self._match_text_seq("WITHOUT", "COUNT") 6038 ), 6039 ) 6040 else: 6041 on_overflow = None 6042 6043 index = self._index 6044 if not self._match(TokenType.R_PAREN) and args: 6045 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6046 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6047 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6048 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6049 6050 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6051 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6052 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6053 if not self._match_text_seq("WITHIN", "GROUP"): 6054 self._retreat(index) 6055 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6056 6057 # The corresponding match_r_paren will be called in parse_function (caller) 6058 self._match_l_paren() 6059 6060 return self.expression( 6061 exp.GroupConcat, 6062 this=self._parse_order(this=seq_get(args, 0)), 6063 separator=seq_get(args, 1), 6064 on_overflow=on_overflow, 6065 ) 6066 6067 def _parse_convert( 6068 self, strict: bool, safe: t.Optional[bool] = None 6069 ) -> t.Optional[exp.Expression]: 6070 this = self._parse_bitwise() 6071 6072 if self._match(TokenType.USING): 6073 to: t.Optional[exp.Expression] = self.expression( 6074 exp.CharacterSet, this=self._parse_var() 6075 ) 6076 elif self._match(TokenType.COMMA): 6077 to = self._parse_types() 6078 else: 6079 to = None 6080 6081 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6082 6083 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6084 """ 6085 There are generally two variants of the DECODE function: 6086 6087 - DECODE(bin, charset) 6088 - DECODE(expression, search, result [, search, result] ... [, default]) 6089 6090 The second variant will always be parsed into a CASE expression. Note that NULL 6091 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6092 instead of relying on pattern matching. 6093 """ 6094 args = self._parse_csv(self._parse_assignment) 6095 6096 if len(args) < 3: 6097 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6098 6099 expression, *expressions = args 6100 if not expression: 6101 return None 6102 6103 ifs = [] 6104 for search, result in zip(expressions[::2], expressions[1::2]): 6105 if not search or not result: 6106 return None 6107 6108 if isinstance(search, exp.Literal): 6109 ifs.append( 6110 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6111 ) 6112 elif isinstance(search, exp.Null): 6113 ifs.append( 6114 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6115 ) 6116 else: 6117 cond = exp.or_( 6118 exp.EQ(this=expression.copy(), expression=search), 6119 exp.and_( 6120 exp.Is(this=expression.copy(), expression=exp.Null()), 6121 exp.Is(this=search.copy(), expression=exp.Null()), 6122 copy=False, 6123 ), 6124 copy=False, 6125 ) 6126 ifs.append(exp.If(this=cond, true=result)) 6127 6128 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6129 6130 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6131 self._match_text_seq("KEY") 6132 key = self._parse_column() 6133 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6134 self._match_text_seq("VALUE") 6135 value = self._parse_bitwise() 6136 6137 if not key and not value: 6138 return None 6139 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6140 6141 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6142 if not this or not self._match_text_seq("FORMAT", "JSON"): 6143 return this 6144 6145 return self.expression(exp.FormatJson, this=this) 6146 6147 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6148 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6149 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6150 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6151 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6152 else: 6153 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6154 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6155 6156 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6157 6158 if not empty and not error and not null: 6159 return None 6160 6161 return self.expression( 6162 exp.OnCondition, 6163 empty=empty, 6164 error=error, 6165 null=null, 6166 ) 6167 6168 def _parse_on_handling( 6169 self, on: str, *values: str 6170 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6171 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6172 for value in values: 6173 if self._match_text_seq(value, "ON", on): 6174 return f"{value} ON {on}" 6175 6176 index = self._index 6177 if self._match(TokenType.DEFAULT): 6178 default_value = self._parse_bitwise() 6179 if self._match_text_seq("ON", on): 6180 return default_value 6181 6182 self._retreat(index) 6183 6184 return None 6185 6186 @t.overload 6187 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6188 6189 @t.overload 6190 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6191 6192 def _parse_json_object(self, agg=False): 6193 star = self._parse_star() 6194 expressions = ( 6195 [star] 6196 if star 6197 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6198 ) 6199 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6200 6201 unique_keys = None 6202 if self._match_text_seq("WITH", "UNIQUE"): 6203 unique_keys = True 6204 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6205 unique_keys = False 6206 6207 self._match_text_seq("KEYS") 6208 6209 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6210 self._parse_type() 6211 ) 6212 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6213 6214 return self.expression( 6215 exp.JSONObjectAgg if agg else exp.JSONObject, 6216 expressions=expressions, 6217 null_handling=null_handling, 6218 unique_keys=unique_keys, 6219 return_type=return_type, 6220 encoding=encoding, 6221 ) 6222 6223 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6224 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6225 if not self._match_text_seq("NESTED"): 6226 this = self._parse_id_var() 6227 kind = self._parse_types(allow_identifiers=False) 6228 nested = None 6229 else: 6230 this = None 6231 kind = None 6232 nested = True 6233 6234 path = self._match_text_seq("PATH") and self._parse_string() 6235 nested_schema = nested and self._parse_json_schema() 6236 6237 return self.expression( 6238 exp.JSONColumnDef, 6239 this=this, 6240 kind=kind, 6241 path=path, 6242 nested_schema=nested_schema, 6243 ) 6244 6245 def _parse_json_schema(self) -> exp.JSONSchema: 6246 self._match_text_seq("COLUMNS") 6247 return self.expression( 6248 exp.JSONSchema, 6249 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6250 ) 6251 6252 def _parse_json_table(self) -> exp.JSONTable: 6253 this = self._parse_format_json(self._parse_bitwise()) 6254 path = self._match(TokenType.COMMA) and self._parse_string() 6255 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6256 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6257 schema = self._parse_json_schema() 6258 6259 return exp.JSONTable( 6260 this=this, 6261 schema=schema, 6262 path=path, 6263 error_handling=error_handling, 6264 empty_handling=empty_handling, 6265 ) 6266 6267 def _parse_match_against(self) -> exp.MatchAgainst: 6268 expressions = self._parse_csv(self._parse_column) 6269 6270 self._match_text_seq(")", "AGAINST", "(") 6271 6272 this = self._parse_string() 6273 6274 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6275 modifier = "IN NATURAL LANGUAGE MODE" 6276 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6277 modifier = f"{modifier} WITH QUERY EXPANSION" 6278 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6279 modifier = "IN BOOLEAN MODE" 6280 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6281 modifier = "WITH QUERY EXPANSION" 6282 else: 6283 modifier = None 6284 6285 return self.expression( 6286 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6287 ) 6288 6289 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6290 def _parse_open_json(self) -> exp.OpenJSON: 6291 this = self._parse_bitwise() 6292 path = self._match(TokenType.COMMA) and self._parse_string() 6293 6294 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6295 this = self._parse_field(any_token=True) 6296 kind = self._parse_types() 6297 path = self._parse_string() 6298 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6299 6300 return self.expression( 6301 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6302 ) 6303 6304 expressions = None 6305 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6306 self._match_l_paren() 6307 expressions = self._parse_csv(_parse_open_json_column_def) 6308 6309 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6310 6311 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6312 args = self._parse_csv(self._parse_bitwise) 6313 6314 if self._match(TokenType.IN): 6315 return self.expression( 6316 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6317 ) 6318 6319 if haystack_first: 6320 haystack = seq_get(args, 0) 6321 needle = seq_get(args, 1) 6322 else: 6323 needle = seq_get(args, 0) 6324 haystack = seq_get(args, 1) 6325 6326 return self.expression( 6327 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6328 ) 6329 6330 def _parse_predict(self) -> exp.Predict: 6331 self._match_text_seq("MODEL") 6332 this = self._parse_table() 6333 6334 self._match(TokenType.COMMA) 6335 self._match_text_seq("TABLE") 6336 6337 return self.expression( 6338 exp.Predict, 6339 this=this, 6340 expression=self._parse_table(), 6341 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6342 ) 6343 6344 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6345 args = self._parse_csv(self._parse_table) 6346 return exp.JoinHint(this=func_name.upper(), expressions=args) 6347 6348 def _parse_substring(self) -> exp.Substring: 6349 # Postgres supports the form: substring(string [from int] [for int]) 6350 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6351 6352 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6353 6354 if self._match(TokenType.FROM): 6355 args.append(self._parse_bitwise()) 6356 if self._match(TokenType.FOR): 6357 if len(args) == 1: 6358 args.append(exp.Literal.number(1)) 6359 args.append(self._parse_bitwise()) 6360 6361 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6362 6363 def _parse_trim(self) -> exp.Trim: 6364 # https://www.w3resource.com/sql/character-functions/trim.php 6365 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6366 6367 position = None 6368 collation = None 6369 expression = None 6370 6371 if self._match_texts(self.TRIM_TYPES): 6372 position = self._prev.text.upper() 6373 6374 this = self._parse_bitwise() 6375 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6376 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6377 expression = self._parse_bitwise() 6378 6379 if invert_order: 6380 this, expression = expression, this 6381 6382 if self._match(TokenType.COLLATE): 6383 collation = self._parse_bitwise() 6384 6385 return self.expression( 6386 exp.Trim, this=this, position=position, expression=expression, collation=collation 6387 ) 6388 6389 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6390 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6391 6392 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6393 return self._parse_window(self._parse_id_var(), alias=True) 6394 6395 def _parse_respect_or_ignore_nulls( 6396 self, this: t.Optional[exp.Expression] 6397 ) -> t.Optional[exp.Expression]: 6398 if self._match_text_seq("IGNORE", "NULLS"): 6399 return self.expression(exp.IgnoreNulls, this=this) 6400 if self._match_text_seq("RESPECT", "NULLS"): 6401 return self.expression(exp.RespectNulls, this=this) 6402 return this 6403 6404 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6405 if self._match(TokenType.HAVING): 6406 self._match_texts(("MAX", "MIN")) 6407 max = self._prev.text.upper() != "MIN" 6408 return self.expression( 6409 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6410 ) 6411 6412 return this 6413 6414 def _parse_window( 6415 self, this: t.Optional[exp.Expression], alias: bool = False 6416 ) -> t.Optional[exp.Expression]: 6417 func = this 6418 comments = func.comments if isinstance(func, exp.Expression) else None 6419 6420 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6421 self._match(TokenType.WHERE) 6422 this = self.expression( 6423 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6424 ) 6425 self._match_r_paren() 6426 6427 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6428 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6429 if self._match_text_seq("WITHIN", "GROUP"): 6430 order = self._parse_wrapped(self._parse_order) 6431 this = self.expression(exp.WithinGroup, this=this, expression=order) 6432 6433 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6434 # Some dialects choose to implement and some do not. 6435 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6436 6437 # There is some code above in _parse_lambda that handles 6438 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6439 6440 # The below changes handle 6441 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6442 6443 # Oracle allows both formats 6444 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6445 # and Snowflake chose to do the same for familiarity 6446 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6447 if isinstance(this, exp.AggFunc): 6448 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6449 6450 if ignore_respect and ignore_respect is not this: 6451 ignore_respect.replace(ignore_respect.this) 6452 this = self.expression(ignore_respect.__class__, this=this) 6453 6454 this = self._parse_respect_or_ignore_nulls(this) 6455 6456 # bigquery select from window x AS (partition by ...) 6457 if alias: 6458 over = None 6459 self._match(TokenType.ALIAS) 6460 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6461 return this 6462 else: 6463 over = self._prev.text.upper() 6464 6465 if comments and isinstance(func, exp.Expression): 6466 func.pop_comments() 6467 6468 if not self._match(TokenType.L_PAREN): 6469 return self.expression( 6470 exp.Window, 6471 comments=comments, 6472 this=this, 6473 alias=self._parse_id_var(False), 6474 over=over, 6475 ) 6476 6477 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6478 6479 first = self._match(TokenType.FIRST) 6480 if self._match_text_seq("LAST"): 6481 first = False 6482 6483 partition, order = self._parse_partition_and_order() 6484 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6485 6486 if kind: 6487 self._match(TokenType.BETWEEN) 6488 start = self._parse_window_spec() 6489 self._match(TokenType.AND) 6490 end = self._parse_window_spec() 6491 6492 spec = self.expression( 6493 exp.WindowSpec, 6494 kind=kind, 6495 start=start["value"], 6496 start_side=start["side"], 6497 end=end["value"], 6498 end_side=end["side"], 6499 ) 6500 else: 6501 spec = None 6502 6503 self._match_r_paren() 6504 6505 window = self.expression( 6506 exp.Window, 6507 comments=comments, 6508 this=this, 6509 partition_by=partition, 6510 order=order, 6511 spec=spec, 6512 alias=window_alias, 6513 over=over, 6514 first=first, 6515 ) 6516 6517 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6518 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6519 return self._parse_window(window, alias=alias) 6520 6521 return window 6522 6523 def _parse_partition_and_order( 6524 self, 6525 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6526 return self._parse_partition_by(), self._parse_order() 6527 6528 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6529 self._match(TokenType.BETWEEN) 6530 6531 return { 6532 "value": ( 6533 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6534 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6535 or self._parse_bitwise() 6536 ), 6537 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6538 } 6539 6540 def _parse_alias( 6541 self, this: t.Optional[exp.Expression], explicit: bool = False 6542 ) -> t.Optional[exp.Expression]: 6543 any_token = self._match(TokenType.ALIAS) 6544 comments = self._prev_comments or [] 6545 6546 if explicit and not any_token: 6547 return this 6548 6549 if self._match(TokenType.L_PAREN): 6550 aliases = self.expression( 6551 exp.Aliases, 6552 comments=comments, 6553 this=this, 6554 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6555 ) 6556 self._match_r_paren(aliases) 6557 return aliases 6558 6559 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6560 self.STRING_ALIASES and self._parse_string_as_identifier() 6561 ) 6562 6563 if alias: 6564 comments.extend(alias.pop_comments()) 6565 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6566 column = this.this 6567 6568 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6569 if not this.comments and column and column.comments: 6570 this.comments = column.pop_comments() 6571 6572 return this 6573 6574 def _parse_id_var( 6575 self, 6576 any_token: bool = True, 6577 tokens: t.Optional[t.Collection[TokenType]] = None, 6578 ) -> t.Optional[exp.Expression]: 6579 expression = self._parse_identifier() 6580 if not expression and ( 6581 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6582 ): 6583 quoted = self._prev.token_type == TokenType.STRING 6584 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6585 6586 return expression 6587 6588 def _parse_string(self) -> t.Optional[exp.Expression]: 6589 if self._match_set(self.STRING_PARSERS): 6590 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6591 return self._parse_placeholder() 6592 6593 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6594 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6595 6596 def _parse_number(self) -> t.Optional[exp.Expression]: 6597 if self._match_set(self.NUMERIC_PARSERS): 6598 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6599 return self._parse_placeholder() 6600 6601 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6602 if self._match(TokenType.IDENTIFIER): 6603 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6604 return self._parse_placeholder() 6605 6606 def _parse_var( 6607 self, 6608 any_token: bool = False, 6609 tokens: t.Optional[t.Collection[TokenType]] = None, 6610 upper: bool = False, 6611 ) -> t.Optional[exp.Expression]: 6612 if ( 6613 (any_token and self._advance_any()) 6614 or self._match(TokenType.VAR) 6615 or (self._match_set(tokens) if tokens else False) 6616 ): 6617 return self.expression( 6618 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6619 ) 6620 return self._parse_placeholder() 6621 6622 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6623 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6624 self._advance() 6625 return self._prev 6626 return None 6627 6628 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6629 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6630 6631 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6632 return self._parse_primary() or self._parse_var(any_token=True) 6633 6634 def _parse_null(self) -> t.Optional[exp.Expression]: 6635 if self._match_set(self.NULL_TOKENS): 6636 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6637 return self._parse_placeholder() 6638 6639 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6640 if self._match(TokenType.TRUE): 6641 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6642 if self._match(TokenType.FALSE): 6643 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6644 return self._parse_placeholder() 6645 6646 def _parse_star(self) -> t.Optional[exp.Expression]: 6647 if self._match(TokenType.STAR): 6648 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6649 return self._parse_placeholder() 6650 6651 def _parse_parameter(self) -> exp.Parameter: 6652 this = self._parse_identifier() or self._parse_primary_or_var() 6653 return self.expression(exp.Parameter, this=this) 6654 6655 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6656 if self._match_set(self.PLACEHOLDER_PARSERS): 6657 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6658 if placeholder: 6659 return placeholder 6660 self._advance(-1) 6661 return None 6662 6663 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6664 if not self._match_texts(keywords): 6665 return None 6666 if self._match(TokenType.L_PAREN, advance=False): 6667 return self._parse_wrapped_csv(self._parse_expression) 6668 6669 expression = self._parse_expression() 6670 return [expression] if expression else None 6671 6672 def _parse_csv( 6673 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6674 ) -> t.List[exp.Expression]: 6675 parse_result = parse_method() 6676 items = [parse_result] if parse_result is not None else [] 6677 6678 while self._match(sep): 6679 self._add_comments(parse_result) 6680 parse_result = parse_method() 6681 if parse_result is not None: 6682 items.append(parse_result) 6683 6684 return items 6685 6686 def _parse_tokens( 6687 self, parse_method: t.Callable, expressions: t.Dict 6688 ) -> t.Optional[exp.Expression]: 6689 this = parse_method() 6690 6691 while self._match_set(expressions): 6692 this = self.expression( 6693 expressions[self._prev.token_type], 6694 this=this, 6695 comments=self._prev_comments, 6696 expression=parse_method(), 6697 ) 6698 6699 return this 6700 6701 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6702 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6703 6704 def _parse_wrapped_csv( 6705 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6706 ) -> t.List[exp.Expression]: 6707 return self._parse_wrapped( 6708 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6709 ) 6710 6711 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6712 wrapped = self._match(TokenType.L_PAREN) 6713 if not wrapped and not optional: 6714 self.raise_error("Expecting (") 6715 parse_result = parse_method() 6716 if wrapped: 6717 self._match_r_paren() 6718 return parse_result 6719 6720 def _parse_expressions(self) -> t.List[exp.Expression]: 6721 return self._parse_csv(self._parse_expression) 6722 6723 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6724 return self._parse_select() or self._parse_set_operations( 6725 self._parse_expression() if alias else self._parse_assignment() 6726 ) 6727 6728 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6729 return self._parse_query_modifiers( 6730 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6731 ) 6732 6733 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6734 this = None 6735 if self._match_texts(self.TRANSACTION_KIND): 6736 this = self._prev.text 6737 6738 self._match_texts(("TRANSACTION", "WORK")) 6739 6740 modes = [] 6741 while True: 6742 mode = [] 6743 while self._match(TokenType.VAR): 6744 mode.append(self._prev.text) 6745 6746 if mode: 6747 modes.append(" ".join(mode)) 6748 if not self._match(TokenType.COMMA): 6749 break 6750 6751 return self.expression(exp.Transaction, this=this, modes=modes) 6752 6753 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6754 chain = None 6755 savepoint = None 6756 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6757 6758 self._match_texts(("TRANSACTION", "WORK")) 6759 6760 if self._match_text_seq("TO"): 6761 self._match_text_seq("SAVEPOINT") 6762 savepoint = self._parse_id_var() 6763 6764 if self._match(TokenType.AND): 6765 chain = not self._match_text_seq("NO") 6766 self._match_text_seq("CHAIN") 6767 6768 if is_rollback: 6769 return self.expression(exp.Rollback, savepoint=savepoint) 6770 6771 return self.expression(exp.Commit, chain=chain) 6772 6773 def _parse_refresh(self) -> exp.Refresh: 6774 self._match(TokenType.TABLE) 6775 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6776 6777 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6778 if not self._match_text_seq("ADD"): 6779 return None 6780 6781 self._match(TokenType.COLUMN) 6782 exists_column = self._parse_exists(not_=True) 6783 expression = self._parse_field_def() 6784 6785 if expression: 6786 expression.set("exists", exists_column) 6787 6788 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6789 if self._match_texts(("FIRST", "AFTER")): 6790 position = self._prev.text 6791 column_position = self.expression( 6792 exp.ColumnPosition, this=self._parse_column(), position=position 6793 ) 6794 expression.set("position", column_position) 6795 6796 return expression 6797 6798 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6799 drop = self._match(TokenType.DROP) and self._parse_drop() 6800 if drop and not isinstance(drop, exp.Command): 6801 drop.set("kind", drop.args.get("kind", "COLUMN")) 6802 return drop 6803 6804 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6805 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6806 return self.expression( 6807 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6808 ) 6809 6810 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6811 index = self._index - 1 6812 6813 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6814 return self._parse_csv( 6815 lambda: self.expression( 6816 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6817 ) 6818 ) 6819 6820 self._retreat(index) 6821 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6822 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6823 6824 if self._match_text_seq("ADD", "COLUMNS"): 6825 schema = self._parse_schema() 6826 if schema: 6827 return [schema] 6828 return [] 6829 6830 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6831 6832 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6833 if self._match_texts(self.ALTER_ALTER_PARSERS): 6834 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6835 6836 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6837 # keyword after ALTER we default to parsing this statement 6838 self._match(TokenType.COLUMN) 6839 column = self._parse_field(any_token=True) 6840 6841 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6842 return self.expression(exp.AlterColumn, this=column, drop=True) 6843 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6844 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6845 if self._match(TokenType.COMMENT): 6846 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6847 if self._match_text_seq("DROP", "NOT", "NULL"): 6848 return self.expression( 6849 exp.AlterColumn, 6850 this=column, 6851 drop=True, 6852 allow_null=True, 6853 ) 6854 if self._match_text_seq("SET", "NOT", "NULL"): 6855 return self.expression( 6856 exp.AlterColumn, 6857 this=column, 6858 allow_null=False, 6859 ) 6860 self._match_text_seq("SET", "DATA") 6861 self._match_text_seq("TYPE") 6862 return self.expression( 6863 exp.AlterColumn, 6864 this=column, 6865 dtype=self._parse_types(), 6866 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6867 using=self._match(TokenType.USING) and self._parse_assignment(), 6868 ) 6869 6870 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6871 if self._match_texts(("ALL", "EVEN", "AUTO")): 6872 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6873 6874 self._match_text_seq("KEY", "DISTKEY") 6875 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6876 6877 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6878 if compound: 6879 self._match_text_seq("SORTKEY") 6880 6881 if self._match(TokenType.L_PAREN, advance=False): 6882 return self.expression( 6883 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6884 ) 6885 6886 self._match_texts(("AUTO", "NONE")) 6887 return self.expression( 6888 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6889 ) 6890 6891 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6892 index = self._index - 1 6893 6894 partition_exists = self._parse_exists() 6895 if self._match(TokenType.PARTITION, advance=False): 6896 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6897 6898 self._retreat(index) 6899 return self._parse_csv(self._parse_drop_column) 6900 6901 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6902 if self._match(TokenType.COLUMN): 6903 exists = self._parse_exists() 6904 old_column = self._parse_column() 6905 to = self._match_text_seq("TO") 6906 new_column = self._parse_column() 6907 6908 if old_column is None or to is None or new_column is None: 6909 return None 6910 6911 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6912 6913 self._match_text_seq("TO") 6914 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6915 6916 def _parse_alter_table_set(self) -> exp.AlterSet: 6917 alter_set = self.expression(exp.AlterSet) 6918 6919 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6920 "TABLE", "PROPERTIES" 6921 ): 6922 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6923 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6924 alter_set.set("expressions", [self._parse_assignment()]) 6925 elif self._match_texts(("LOGGED", "UNLOGGED")): 6926 alter_set.set("option", exp.var(self._prev.text.upper())) 6927 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6928 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6929 elif self._match_text_seq("LOCATION"): 6930 alter_set.set("location", self._parse_field()) 6931 elif self._match_text_seq("ACCESS", "METHOD"): 6932 alter_set.set("access_method", self._parse_field()) 6933 elif self._match_text_seq("TABLESPACE"): 6934 alter_set.set("tablespace", self._parse_field()) 6935 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6936 alter_set.set("file_format", [self._parse_field()]) 6937 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6938 alter_set.set("file_format", self._parse_wrapped_options()) 6939 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6940 alter_set.set("copy_options", self._parse_wrapped_options()) 6941 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6942 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6943 else: 6944 if self._match_text_seq("SERDE"): 6945 alter_set.set("serde", self._parse_field()) 6946 6947 alter_set.set("expressions", [self._parse_properties()]) 6948 6949 return alter_set 6950 6951 def _parse_alter(self) -> exp.Alter | exp.Command: 6952 start = self._prev 6953 6954 alter_token = self._match_set(self.ALTERABLES) and self._prev 6955 if not alter_token: 6956 return self._parse_as_command(start) 6957 6958 exists = self._parse_exists() 6959 only = self._match_text_seq("ONLY") 6960 this = self._parse_table(schema=True) 6961 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6962 6963 if self._next: 6964 self._advance() 6965 6966 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6967 if parser: 6968 actions = ensure_list(parser(self)) 6969 not_valid = self._match_text_seq("NOT", "VALID") 6970 options = self._parse_csv(self._parse_property) 6971 6972 if not self._curr and actions: 6973 return self.expression( 6974 exp.Alter, 6975 this=this, 6976 kind=alter_token.text.upper(), 6977 exists=exists, 6978 actions=actions, 6979 only=only, 6980 options=options, 6981 cluster=cluster, 6982 not_valid=not_valid, 6983 ) 6984 6985 return self._parse_as_command(start) 6986 6987 def _parse_merge(self) -> exp.Merge: 6988 self._match(TokenType.INTO) 6989 target = self._parse_table() 6990 6991 if target and self._match(TokenType.ALIAS, advance=False): 6992 target.set("alias", self._parse_table_alias()) 6993 6994 self._match(TokenType.USING) 6995 using = self._parse_table() 6996 6997 self._match(TokenType.ON) 6998 on = self._parse_assignment() 6999 7000 return self.expression( 7001 exp.Merge, 7002 this=target, 7003 using=using, 7004 on=on, 7005 expressions=self._parse_when_matched(), 7006 returning=self._parse_returning(), 7007 ) 7008 7009 def _parse_when_matched(self) -> t.List[exp.When]: 7010 whens = [] 7011 7012 while self._match(TokenType.WHEN): 7013 matched = not self._match(TokenType.NOT) 7014 self._match_text_seq("MATCHED") 7015 source = ( 7016 False 7017 if self._match_text_seq("BY", "TARGET") 7018 else self._match_text_seq("BY", "SOURCE") 7019 ) 7020 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7021 7022 self._match(TokenType.THEN) 7023 7024 if self._match(TokenType.INSERT): 7025 this = self._parse_star() 7026 if this: 7027 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7028 else: 7029 then = self.expression( 7030 exp.Insert, 7031 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7032 expression=self._match_text_seq("VALUES") and self._parse_value(), 7033 ) 7034 elif self._match(TokenType.UPDATE): 7035 expressions = self._parse_star() 7036 if expressions: 7037 then = self.expression(exp.Update, expressions=expressions) 7038 else: 7039 then = self.expression( 7040 exp.Update, 7041 expressions=self._match(TokenType.SET) 7042 and self._parse_csv(self._parse_equality), 7043 ) 7044 elif self._match(TokenType.DELETE): 7045 then = self.expression(exp.Var, this=self._prev.text) 7046 else: 7047 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7048 7049 whens.append( 7050 self.expression( 7051 exp.When, 7052 matched=matched, 7053 source=source, 7054 condition=condition, 7055 then=then, 7056 ) 7057 ) 7058 return whens 7059 7060 def _parse_show(self) -> t.Optional[exp.Expression]: 7061 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7062 if parser: 7063 return parser(self) 7064 return self._parse_as_command(self._prev) 7065 7066 def _parse_set_item_assignment( 7067 self, kind: t.Optional[str] = None 7068 ) -> t.Optional[exp.Expression]: 7069 index = self._index 7070 7071 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7072 return self._parse_set_transaction(global_=kind == "GLOBAL") 7073 7074 left = self._parse_primary() or self._parse_column() 7075 assignment_delimiter = self._match_texts(("=", "TO")) 7076 7077 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7078 self._retreat(index) 7079 return None 7080 7081 right = self._parse_statement() or self._parse_id_var() 7082 if isinstance(right, (exp.Column, exp.Identifier)): 7083 right = exp.var(right.name) 7084 7085 this = self.expression(exp.EQ, this=left, expression=right) 7086 return self.expression(exp.SetItem, this=this, kind=kind) 7087 7088 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7089 self._match_text_seq("TRANSACTION") 7090 characteristics = self._parse_csv( 7091 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7092 ) 7093 return self.expression( 7094 exp.SetItem, 7095 expressions=characteristics, 7096 kind="TRANSACTION", 7097 **{"global": global_}, # type: ignore 7098 ) 7099 7100 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7101 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7102 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7103 7104 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7105 index = self._index 7106 set_ = self.expression( 7107 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7108 ) 7109 7110 if self._curr: 7111 self._retreat(index) 7112 return self._parse_as_command(self._prev) 7113 7114 return set_ 7115 7116 def _parse_var_from_options( 7117 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7118 ) -> t.Optional[exp.Var]: 7119 start = self._curr 7120 if not start: 7121 return None 7122 7123 option = start.text.upper() 7124 continuations = options.get(option) 7125 7126 index = self._index 7127 self._advance() 7128 for keywords in continuations or []: 7129 if isinstance(keywords, str): 7130 keywords = (keywords,) 7131 7132 if self._match_text_seq(*keywords): 7133 option = f"{option} {' '.join(keywords)}" 7134 break 7135 else: 7136 if continuations or continuations is None: 7137 if raise_unmatched: 7138 self.raise_error(f"Unknown option {option}") 7139 7140 self._retreat(index) 7141 return None 7142 7143 return exp.var(option) 7144 7145 def _parse_as_command(self, start: Token) -> exp.Command: 7146 while self._curr: 7147 self._advance() 7148 text = self._find_sql(start, self._prev) 7149 size = len(start.text) 7150 self._warn_unsupported() 7151 return exp.Command(this=text[:size], expression=text[size:]) 7152 7153 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7154 settings = [] 7155 7156 self._match_l_paren() 7157 kind = self._parse_id_var() 7158 7159 if self._match(TokenType.L_PAREN): 7160 while True: 7161 key = self._parse_id_var() 7162 value = self._parse_primary() 7163 if not key and value is None: 7164 break 7165 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7166 self._match(TokenType.R_PAREN) 7167 7168 self._match_r_paren() 7169 7170 return self.expression( 7171 exp.DictProperty, 7172 this=this, 7173 kind=kind.this if kind else None, 7174 settings=settings, 7175 ) 7176 7177 def _parse_dict_range(self, this: str) -> exp.DictRange: 7178 self._match_l_paren() 7179 has_min = self._match_text_seq("MIN") 7180 if has_min: 7181 min = self._parse_var() or self._parse_primary() 7182 self._match_text_seq("MAX") 7183 max = self._parse_var() or self._parse_primary() 7184 else: 7185 max = self._parse_var() or self._parse_primary() 7186 min = exp.Literal.number(0) 7187 self._match_r_paren() 7188 return self.expression(exp.DictRange, this=this, min=min, max=max) 7189 7190 def _parse_comprehension( 7191 self, this: t.Optional[exp.Expression] 7192 ) -> t.Optional[exp.Comprehension]: 7193 index = self._index 7194 expression = self._parse_column() 7195 if not self._match(TokenType.IN): 7196 self._retreat(index - 1) 7197 return None 7198 iterator = self._parse_column() 7199 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7200 return self.expression( 7201 exp.Comprehension, 7202 this=this, 7203 expression=expression, 7204 iterator=iterator, 7205 condition=condition, 7206 ) 7207 7208 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7209 if self._match(TokenType.HEREDOC_STRING): 7210 return self.expression(exp.Heredoc, this=self._prev.text) 7211 7212 if not self._match_text_seq("$"): 7213 return None 7214 7215 tags = ["$"] 7216 tag_text = None 7217 7218 if self._is_connected(): 7219 self._advance() 7220 tags.append(self._prev.text.upper()) 7221 else: 7222 self.raise_error("No closing $ found") 7223 7224 if tags[-1] != "$": 7225 if self._is_connected() and self._match_text_seq("$"): 7226 tag_text = tags[-1] 7227 tags.append("$") 7228 else: 7229 self.raise_error("No closing $ found") 7230 7231 heredoc_start = self._curr 7232 7233 while self._curr: 7234 if self._match_text_seq(*tags, advance=False): 7235 this = self._find_sql(heredoc_start, self._prev) 7236 self._advance(len(tags)) 7237 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7238 7239 self._advance() 7240 7241 self.raise_error(f"No closing {''.join(tags)} found") 7242 return None 7243 7244 def _find_parser( 7245 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7246 ) -> t.Optional[t.Callable]: 7247 if not self._curr: 7248 return None 7249 7250 index = self._index 7251 this = [] 7252 while True: 7253 # The current token might be multiple words 7254 curr = self._curr.text.upper() 7255 key = curr.split(" ") 7256 this.append(curr) 7257 7258 self._advance() 7259 result, trie = in_trie(trie, key) 7260 if result == TrieResult.FAILED: 7261 break 7262 7263 if result == TrieResult.EXISTS: 7264 subparser = parsers[" ".join(this)] 7265 return subparser 7266 7267 self._retreat(index) 7268 return None 7269 7270 def _match(self, token_type, advance=True, expression=None): 7271 if not self._curr: 7272 return None 7273 7274 if self._curr.token_type == token_type: 7275 if advance: 7276 self._advance() 7277 self._add_comments(expression) 7278 return True 7279 7280 return None 7281 7282 def _match_set(self, types, advance=True): 7283 if not self._curr: 7284 return None 7285 7286 if self._curr.token_type in types: 7287 if advance: 7288 self._advance() 7289 return True 7290 7291 return None 7292 7293 def _match_pair(self, token_type_a, token_type_b, advance=True): 7294 if not self._curr or not self._next: 7295 return None 7296 7297 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7298 if advance: 7299 self._advance(2) 7300 return True 7301 7302 return None 7303 7304 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7305 if not self._match(TokenType.L_PAREN, expression=expression): 7306 self.raise_error("Expecting (") 7307 7308 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7309 if not self._match(TokenType.R_PAREN, expression=expression): 7310 self.raise_error("Expecting )") 7311 7312 def _match_texts(self, texts, advance=True): 7313 if ( 7314 self._curr 7315 and self._curr.token_type != TokenType.STRING 7316 and self._curr.text.upper() in texts 7317 ): 7318 if advance: 7319 self._advance() 7320 return True 7321 return None 7322 7323 def _match_text_seq(self, *texts, advance=True): 7324 index = self._index 7325 for text in texts: 7326 if ( 7327 self._curr 7328 and self._curr.token_type != TokenType.STRING 7329 and self._curr.text.upper() == text 7330 ): 7331 self._advance() 7332 else: 7333 self._retreat(index) 7334 return None 7335 7336 if not advance: 7337 self._retreat(index) 7338 7339 return True 7340 7341 def _replace_lambda( 7342 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7343 ) -> t.Optional[exp.Expression]: 7344 if not node: 7345 return node 7346 7347 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7348 7349 for column in node.find_all(exp.Column): 7350 typ = lambda_types.get(column.parts[0].name) 7351 if typ is not None: 7352 dot_or_id = column.to_dot() if column.table else column.this 7353 7354 if typ: 7355 dot_or_id = self.expression( 7356 exp.Cast, 7357 this=dot_or_id, 7358 to=typ, 7359 ) 7360 7361 parent = column.parent 7362 7363 while isinstance(parent, exp.Dot): 7364 if not isinstance(parent.parent, exp.Dot): 7365 parent.replace(dot_or_id) 7366 break 7367 parent = parent.parent 7368 else: 7369 if column is node: 7370 node = dot_or_id 7371 else: 7372 column.replace(dot_or_id) 7373 return node 7374 7375 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7376 start = self._prev 7377 7378 # Not to be confused with TRUNCATE(number, decimals) function call 7379 if self._match(TokenType.L_PAREN): 7380 self._retreat(self._index - 2) 7381 return self._parse_function() 7382 7383 # Clickhouse supports TRUNCATE DATABASE as well 7384 is_database = self._match(TokenType.DATABASE) 7385 7386 self._match(TokenType.TABLE) 7387 7388 exists = self._parse_exists(not_=False) 7389 7390 expressions = self._parse_csv( 7391 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7392 ) 7393 7394 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7395 7396 if self._match_text_seq("RESTART", "IDENTITY"): 7397 identity = "RESTART" 7398 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7399 identity = "CONTINUE" 7400 else: 7401 identity = None 7402 7403 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7404 option = self._prev.text 7405 else: 7406 option = None 7407 7408 partition = self._parse_partition() 7409 7410 # Fallback case 7411 if self._curr: 7412 return self._parse_as_command(start) 7413 7414 return self.expression( 7415 exp.TruncateTable, 7416 expressions=expressions, 7417 is_database=is_database, 7418 exists=exists, 7419 cluster=cluster, 7420 identity=identity, 7421 option=option, 7422 partition=partition, 7423 ) 7424 7425 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7426 this = self._parse_ordered(self._parse_opclass) 7427 7428 if not self._match(TokenType.WITH): 7429 return this 7430 7431 op = self._parse_var(any_token=True) 7432 7433 return self.expression(exp.WithOperator, this=this, op=op) 7434 7435 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7436 self._match(TokenType.EQ) 7437 self._match(TokenType.L_PAREN) 7438 7439 opts: t.List[t.Optional[exp.Expression]] = [] 7440 while self._curr and not self._match(TokenType.R_PAREN): 7441 if self._match_text_seq("FORMAT_NAME", "="): 7442 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7443 # so we parse it separately to use _parse_field() 7444 prop = self.expression( 7445 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7446 ) 7447 opts.append(prop) 7448 else: 7449 opts.append(self._parse_property()) 7450 7451 self._match(TokenType.COMMA) 7452 7453 return opts 7454 7455 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7456 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7457 7458 options = [] 7459 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7460 option = self._parse_var(any_token=True) 7461 prev = self._prev.text.upper() 7462 7463 # Different dialects might separate options and values by white space, "=" and "AS" 7464 self._match(TokenType.EQ) 7465 self._match(TokenType.ALIAS) 7466 7467 param = self.expression(exp.CopyParameter, this=option) 7468 7469 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7470 TokenType.L_PAREN, advance=False 7471 ): 7472 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7473 param.set("expressions", self._parse_wrapped_options()) 7474 elif prev == "FILE_FORMAT": 7475 # T-SQL's external file format case 7476 param.set("expression", self._parse_field()) 7477 else: 7478 param.set("expression", self._parse_unquoted_field()) 7479 7480 options.append(param) 7481 self._match(sep) 7482 7483 return options 7484 7485 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7486 expr = self.expression(exp.Credentials) 7487 7488 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7489 expr.set("storage", self._parse_field()) 7490 if self._match_text_seq("CREDENTIALS"): 7491 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7492 creds = ( 7493 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7494 ) 7495 expr.set("credentials", creds) 7496 if self._match_text_seq("ENCRYPTION"): 7497 expr.set("encryption", self._parse_wrapped_options()) 7498 if self._match_text_seq("IAM_ROLE"): 7499 expr.set("iam_role", self._parse_field()) 7500 if self._match_text_seq("REGION"): 7501 expr.set("region", self._parse_field()) 7502 7503 return expr 7504 7505 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7506 return self._parse_field() 7507 7508 def _parse_copy(self) -> exp.Copy | exp.Command: 7509 start = self._prev 7510 7511 self._match(TokenType.INTO) 7512 7513 this = ( 7514 self._parse_select(nested=True, parse_subquery_alias=False) 7515 if self._match(TokenType.L_PAREN, advance=False) 7516 else self._parse_table(schema=True) 7517 ) 7518 7519 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7520 7521 files = self._parse_csv(self._parse_file_location) 7522 credentials = self._parse_credentials() 7523 7524 self._match_text_seq("WITH") 7525 7526 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7527 7528 # Fallback case 7529 if self._curr: 7530 return self._parse_as_command(start) 7531 7532 return self.expression( 7533 exp.Copy, 7534 this=this, 7535 kind=kind, 7536 credentials=credentials, 7537 files=files, 7538 params=params, 7539 ) 7540 7541 def _parse_normalize(self) -> exp.Normalize: 7542 return self.expression( 7543 exp.Normalize, 7544 this=self._parse_bitwise(), 7545 form=self._match(TokenType.COMMA) and self._parse_var(), 7546 ) 7547 7548 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7549 if self._match_text_seq("COLUMNS", "(", advance=False): 7550 this = self._parse_function() 7551 if isinstance(this, exp.Columns): 7552 this.set("unpack", True) 7553 return this 7554 7555 return self.expression( 7556 exp.Star, 7557 **{ # type: ignore 7558 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7559 "replace": self._parse_star_op("REPLACE"), 7560 "rename": self._parse_star_op("RENAME"), 7561 }, 7562 ) 7563 7564 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7565 privilege_parts = [] 7566 7567 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7568 # (end of privilege list) or L_PAREN (start of column list) are met 7569 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7570 privilege_parts.append(self._curr.text.upper()) 7571 self._advance() 7572 7573 this = exp.var(" ".join(privilege_parts)) 7574 expressions = ( 7575 self._parse_wrapped_csv(self._parse_column) 7576 if self._match(TokenType.L_PAREN, advance=False) 7577 else None 7578 ) 7579 7580 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7581 7582 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7583 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7584 principal = self._parse_id_var() 7585 7586 if not principal: 7587 return None 7588 7589 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7590 7591 def _parse_grant(self) -> exp.Grant | exp.Command: 7592 start = self._prev 7593 7594 privileges = self._parse_csv(self._parse_grant_privilege) 7595 7596 self._match(TokenType.ON) 7597 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7598 7599 # Attempt to parse the securable e.g. MySQL allows names 7600 # such as "foo.*", "*.*" which are not easily parseable yet 7601 securable = self._try_parse(self._parse_table_parts) 7602 7603 if not securable or not self._match_text_seq("TO"): 7604 return self._parse_as_command(start) 7605 7606 principals = self._parse_csv(self._parse_grant_principal) 7607 7608 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7609 7610 if self._curr: 7611 return self._parse_as_command(start) 7612 7613 return self.expression( 7614 exp.Grant, 7615 privileges=privileges, 7616 kind=kind, 7617 securable=securable, 7618 principals=principals, 7619 grant_option=grant_option, 7620 ) 7621 7622 def _parse_overlay(self) -> exp.Overlay: 7623 return self.expression( 7624 exp.Overlay, 7625 **{ # type: ignore 7626 "this": self._parse_bitwise(), 7627 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7628 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7629 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7630 }, 7631 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.DECIMAL256, 356 TokenType.UDECIMAL, 357 TokenType.BIGDECIMAL, 358 TokenType.UUID, 359 TokenType.GEOGRAPHY, 360 TokenType.GEOMETRY, 361 TokenType.POINT, 362 TokenType.RING, 363 TokenType.LINESTRING, 364 TokenType.MULTILINESTRING, 365 TokenType.POLYGON, 366 TokenType.MULTIPOLYGON, 367 TokenType.HLLSKETCH, 368 TokenType.HSTORE, 369 TokenType.PSEUDO_TYPE, 370 TokenType.SUPER, 371 TokenType.SERIAL, 372 TokenType.SMALLSERIAL, 373 TokenType.BIGSERIAL, 374 TokenType.XML, 375 TokenType.YEAR, 376 TokenType.UNIQUEIDENTIFIER, 377 TokenType.USERDEFINED, 378 TokenType.MONEY, 379 TokenType.SMALLMONEY, 380 TokenType.ROWVERSION, 381 TokenType.IMAGE, 382 TokenType.VARIANT, 383 TokenType.VECTOR, 384 TokenType.OBJECT, 385 TokenType.OBJECT_IDENTIFIER, 386 TokenType.INET, 387 TokenType.IPADDRESS, 388 TokenType.IPPREFIX, 389 TokenType.IPV4, 390 TokenType.IPV6, 391 TokenType.UNKNOWN, 392 TokenType.NULL, 393 TokenType.NAME, 394 TokenType.TDIGEST, 395 *ENUM_TYPE_TOKENS, 396 *NESTED_TYPE_TOKENS, 397 *AGGREGATE_TYPE_TOKENS, 398 } 399 400 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 401 TokenType.BIGINT: TokenType.UBIGINT, 402 TokenType.INT: TokenType.UINT, 403 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 404 TokenType.SMALLINT: TokenType.USMALLINT, 405 TokenType.TINYINT: TokenType.UTINYINT, 406 TokenType.DECIMAL: TokenType.UDECIMAL, 407 } 408 409 SUBQUERY_PREDICATES = { 410 TokenType.ANY: exp.Any, 411 TokenType.ALL: exp.All, 412 TokenType.EXISTS: exp.Exists, 413 TokenType.SOME: exp.Any, 414 } 415 416 RESERVED_TOKENS = { 417 *Tokenizer.SINGLE_TOKENS.values(), 418 TokenType.SELECT, 419 } - {TokenType.IDENTIFIER} 420 421 DB_CREATABLES = { 422 TokenType.DATABASE, 423 TokenType.DICTIONARY, 424 TokenType.MODEL, 425 TokenType.SCHEMA, 426 TokenType.SEQUENCE, 427 TokenType.STORAGE_INTEGRATION, 428 TokenType.TABLE, 429 TokenType.TAG, 430 TokenType.VIEW, 431 TokenType.WAREHOUSE, 432 TokenType.STREAMLIT, 433 TokenType.SINK, 434 TokenType.SOURCE, 435 } 436 437 CREATABLES = { 438 TokenType.COLUMN, 439 TokenType.CONSTRAINT, 440 TokenType.FOREIGN_KEY, 441 TokenType.FUNCTION, 442 TokenType.INDEX, 443 TokenType.PROCEDURE, 444 *DB_CREATABLES, 445 } 446 447 ALTERABLES = { 448 TokenType.INDEX, 449 TokenType.TABLE, 450 TokenType.VIEW, 451 } 452 453 # Tokens that can represent identifiers 454 ID_VAR_TOKENS = { 455 TokenType.ALL, 456 TokenType.ATTACH, 457 TokenType.VAR, 458 TokenType.ANTI, 459 TokenType.APPLY, 460 TokenType.ASC, 461 TokenType.ASOF, 462 TokenType.AUTO_INCREMENT, 463 TokenType.BEGIN, 464 TokenType.BPCHAR, 465 TokenType.CACHE, 466 TokenType.CASE, 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.COMMENT, 470 TokenType.COMMIT, 471 TokenType.CONSTRAINT, 472 TokenType.COPY, 473 TokenType.CUBE, 474 TokenType.DEFAULT, 475 TokenType.DELETE, 476 TokenType.DESC, 477 TokenType.DESCRIBE, 478 TokenType.DETACH, 479 TokenType.DICTIONARY, 480 TokenType.DIV, 481 TokenType.END, 482 TokenType.EXECUTE, 483 TokenType.ESCAPE, 484 TokenType.FALSE, 485 TokenType.FIRST, 486 TokenType.FILTER, 487 TokenType.FINAL, 488 TokenType.FORMAT, 489 TokenType.FULL, 490 TokenType.IDENTIFIER, 491 TokenType.IS, 492 TokenType.ISNULL, 493 TokenType.INTERVAL, 494 TokenType.KEEP, 495 TokenType.KILL, 496 TokenType.LEFT, 497 TokenType.LOAD, 498 TokenType.MERGE, 499 TokenType.NATURAL, 500 TokenType.NEXT, 501 TokenType.OFFSET, 502 TokenType.OPERATOR, 503 TokenType.ORDINALITY, 504 TokenType.OVERLAPS, 505 TokenType.OVERWRITE, 506 TokenType.PARTITION, 507 TokenType.PERCENT, 508 TokenType.PIVOT, 509 TokenType.PRAGMA, 510 TokenType.RANGE, 511 TokenType.RECURSIVE, 512 TokenType.REFERENCES, 513 TokenType.REFRESH, 514 TokenType.RENAME, 515 TokenType.REPLACE, 516 TokenType.RIGHT, 517 TokenType.ROLLUP, 518 TokenType.ROW, 519 TokenType.ROWS, 520 TokenType.SEMI, 521 TokenType.SET, 522 TokenType.SETTINGS, 523 TokenType.SHOW, 524 TokenType.TEMPORARY, 525 TokenType.TOP, 526 TokenType.TRUE, 527 TokenType.TRUNCATE, 528 TokenType.UNIQUE, 529 TokenType.UNNEST, 530 TokenType.UNPIVOT, 531 TokenType.UPDATE, 532 TokenType.USE, 533 TokenType.VOLATILE, 534 TokenType.WINDOW, 535 *CREATABLES, 536 *SUBQUERY_PREDICATES, 537 *TYPE_TOKENS, 538 *NO_PAREN_FUNCTIONS, 539 } 540 ID_VAR_TOKENS.remove(TokenType.UNION) 541 542 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 543 544 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 545 TokenType.ANTI, 546 TokenType.APPLY, 547 TokenType.ASOF, 548 TokenType.FULL, 549 TokenType.LEFT, 550 TokenType.LOCK, 551 TokenType.NATURAL, 552 TokenType.OFFSET, 553 TokenType.RIGHT, 554 TokenType.SEMI, 555 TokenType.WINDOW, 556 } 557 558 ALIAS_TOKENS = ID_VAR_TOKENS 559 560 ARRAY_CONSTRUCTORS = { 561 "ARRAY": exp.Array, 562 "LIST": exp.List, 563 } 564 565 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 566 567 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 568 569 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 570 571 FUNC_TOKENS = { 572 TokenType.COLLATE, 573 TokenType.COMMAND, 574 TokenType.CURRENT_DATE, 575 TokenType.CURRENT_DATETIME, 576 TokenType.CURRENT_TIMESTAMP, 577 TokenType.CURRENT_TIME, 578 TokenType.CURRENT_USER, 579 TokenType.FILTER, 580 TokenType.FIRST, 581 TokenType.FORMAT, 582 TokenType.GLOB, 583 TokenType.IDENTIFIER, 584 TokenType.INDEX, 585 TokenType.ISNULL, 586 TokenType.ILIKE, 587 TokenType.INSERT, 588 TokenType.LIKE, 589 TokenType.MERGE, 590 TokenType.OFFSET, 591 TokenType.PRIMARY_KEY, 592 TokenType.RANGE, 593 TokenType.REPLACE, 594 TokenType.RLIKE, 595 TokenType.ROW, 596 TokenType.UNNEST, 597 TokenType.VAR, 598 TokenType.LEFT, 599 TokenType.RIGHT, 600 TokenType.SEQUENCE, 601 TokenType.DATE, 602 TokenType.DATETIME, 603 TokenType.TABLE, 604 TokenType.TIMESTAMP, 605 TokenType.TIMESTAMPTZ, 606 TokenType.TRUNCATE, 607 TokenType.WINDOW, 608 TokenType.XOR, 609 *TYPE_TOKENS, 610 *SUBQUERY_PREDICATES, 611 } 612 613 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 614 TokenType.AND: exp.And, 615 } 616 617 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 618 TokenType.COLON_EQ: exp.PropertyEQ, 619 } 620 621 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 622 TokenType.OR: exp.Or, 623 } 624 625 EQUALITY = { 626 TokenType.EQ: exp.EQ, 627 TokenType.NEQ: exp.NEQ, 628 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 629 } 630 631 COMPARISON = { 632 TokenType.GT: exp.GT, 633 TokenType.GTE: exp.GTE, 634 TokenType.LT: exp.LT, 635 TokenType.LTE: exp.LTE, 636 } 637 638 BITWISE = { 639 TokenType.AMP: exp.BitwiseAnd, 640 TokenType.CARET: exp.BitwiseXor, 641 TokenType.PIPE: exp.BitwiseOr, 642 } 643 644 TERM = { 645 TokenType.DASH: exp.Sub, 646 TokenType.PLUS: exp.Add, 647 TokenType.MOD: exp.Mod, 648 TokenType.COLLATE: exp.Collate, 649 } 650 651 FACTOR = { 652 TokenType.DIV: exp.IntDiv, 653 TokenType.LR_ARROW: exp.Distance, 654 TokenType.SLASH: exp.Div, 655 TokenType.STAR: exp.Mul, 656 } 657 658 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 659 660 TIMES = { 661 TokenType.TIME, 662 TokenType.TIMETZ, 663 } 664 665 TIMESTAMPS = { 666 TokenType.TIMESTAMP, 667 TokenType.TIMESTAMPTZ, 668 TokenType.TIMESTAMPLTZ, 669 *TIMES, 670 } 671 672 SET_OPERATIONS = { 673 TokenType.UNION, 674 TokenType.INTERSECT, 675 TokenType.EXCEPT, 676 } 677 678 JOIN_METHODS = { 679 TokenType.ASOF, 680 TokenType.NATURAL, 681 TokenType.POSITIONAL, 682 } 683 684 JOIN_SIDES = { 685 TokenType.LEFT, 686 TokenType.RIGHT, 687 TokenType.FULL, 688 } 689 690 JOIN_KINDS = { 691 TokenType.ANTI, 692 TokenType.CROSS, 693 TokenType.INNER, 694 TokenType.OUTER, 695 TokenType.SEMI, 696 TokenType.STRAIGHT_JOIN, 697 } 698 699 JOIN_HINTS: t.Set[str] = set() 700 701 LAMBDAS = { 702 TokenType.ARROW: lambda self, expressions: self.expression( 703 exp.Lambda, 704 this=self._replace_lambda( 705 self._parse_assignment(), 706 expressions, 707 ), 708 expressions=expressions, 709 ), 710 TokenType.FARROW: lambda self, expressions: self.expression( 711 exp.Kwarg, 712 this=exp.var(expressions[0].name), 713 expression=self._parse_assignment(), 714 ), 715 } 716 717 COLUMN_OPERATORS = { 718 TokenType.DOT: None, 719 TokenType.DCOLON: lambda self, this, to: self.expression( 720 exp.Cast if self.STRICT_CAST else exp.TryCast, 721 this=this, 722 to=to, 723 ), 724 TokenType.ARROW: lambda self, this, path: self.expression( 725 exp.JSONExtract, 726 this=this, 727 expression=self.dialect.to_json_path(path), 728 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 729 ), 730 TokenType.DARROW: lambda self, this, path: self.expression( 731 exp.JSONExtractScalar, 732 this=this, 733 expression=self.dialect.to_json_path(path), 734 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 735 ), 736 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 737 exp.JSONBExtract, 738 this=this, 739 expression=path, 740 ), 741 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 742 exp.JSONBExtractScalar, 743 this=this, 744 expression=path, 745 ), 746 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 747 exp.JSONBContains, 748 this=this, 749 expression=key, 750 ), 751 } 752 753 EXPRESSION_PARSERS = { 754 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 755 exp.Column: lambda self: self._parse_column(), 756 exp.Condition: lambda self: self._parse_assignment(), 757 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 758 exp.Expression: lambda self: self._parse_expression(), 759 exp.From: lambda self: self._parse_from(joins=True), 760 exp.Group: lambda self: self._parse_group(), 761 exp.Having: lambda self: self._parse_having(), 762 exp.Hint: lambda self: self._parse_hint_body(), 763 exp.Identifier: lambda self: self._parse_id_var(), 764 exp.Join: lambda self: self._parse_join(), 765 exp.Lambda: lambda self: self._parse_lambda(), 766 exp.Lateral: lambda self: self._parse_lateral(), 767 exp.Limit: lambda self: self._parse_limit(), 768 exp.Offset: lambda self: self._parse_offset(), 769 exp.Order: lambda self: self._parse_order(), 770 exp.Ordered: lambda self: self._parse_ordered(), 771 exp.Properties: lambda self: self._parse_properties(), 772 exp.Qualify: lambda self: self._parse_qualify(), 773 exp.Returning: lambda self: self._parse_returning(), 774 exp.Select: lambda self: self._parse_select(), 775 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 776 exp.Table: lambda self: self._parse_table_parts(), 777 exp.TableAlias: lambda self: self._parse_table_alias(), 778 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 779 exp.Where: lambda self: self._parse_where(), 780 exp.Window: lambda self: self._parse_named_window(), 781 exp.With: lambda self: self._parse_with(), 782 "JOIN_TYPE": lambda self: self._parse_join_parts(), 783 } 784 785 STATEMENT_PARSERS = { 786 TokenType.ALTER: lambda self: self._parse_alter(), 787 TokenType.BEGIN: lambda self: self._parse_transaction(), 788 TokenType.CACHE: lambda self: self._parse_cache(), 789 TokenType.COMMENT: lambda self: self._parse_comment(), 790 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 791 TokenType.COPY: lambda self: self._parse_copy(), 792 TokenType.CREATE: lambda self: self._parse_create(), 793 TokenType.DELETE: lambda self: self._parse_delete(), 794 TokenType.DESC: lambda self: self._parse_describe(), 795 TokenType.DESCRIBE: lambda self: self._parse_describe(), 796 TokenType.DROP: lambda self: self._parse_drop(), 797 TokenType.GRANT: lambda self: self._parse_grant(), 798 TokenType.INSERT: lambda self: self._parse_insert(), 799 TokenType.KILL: lambda self: self._parse_kill(), 800 TokenType.LOAD: lambda self: self._parse_load(), 801 TokenType.MERGE: lambda self: self._parse_merge(), 802 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 803 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 804 TokenType.REFRESH: lambda self: self._parse_refresh(), 805 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 806 TokenType.SET: lambda self: self._parse_set(), 807 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 808 TokenType.UNCACHE: lambda self: self._parse_uncache(), 809 TokenType.UPDATE: lambda self: self._parse_update(), 810 TokenType.USE: lambda self: self.expression( 811 exp.Use, 812 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 813 this=self._parse_table(schema=False), 814 ), 815 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 816 } 817 818 UNARY_PARSERS = { 819 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 820 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 821 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 822 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 823 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 824 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 825 } 826 827 STRING_PARSERS = { 828 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 829 exp.RawString, this=token.text 830 ), 831 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 832 exp.National, this=token.text 833 ), 834 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 835 TokenType.STRING: lambda self, token: self.expression( 836 exp.Literal, this=token.text, is_string=True 837 ), 838 TokenType.UNICODE_STRING: lambda self, token: self.expression( 839 exp.UnicodeString, 840 this=token.text, 841 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 842 ), 843 } 844 845 NUMERIC_PARSERS = { 846 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 847 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 848 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 849 TokenType.NUMBER: lambda self, token: self.expression( 850 exp.Literal, this=token.text, is_string=False 851 ), 852 } 853 854 PRIMARY_PARSERS = { 855 **STRING_PARSERS, 856 **NUMERIC_PARSERS, 857 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 858 TokenType.NULL: lambda self, _: self.expression(exp.Null), 859 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 860 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 861 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 862 TokenType.STAR: lambda self, _: self._parse_star_ops(), 863 } 864 865 PLACEHOLDER_PARSERS = { 866 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 867 TokenType.PARAMETER: lambda self: self._parse_parameter(), 868 TokenType.COLON: lambda self: ( 869 self.expression(exp.Placeholder, this=self._prev.text) 870 if self._match_set(self.ID_VAR_TOKENS) 871 else None 872 ), 873 } 874 875 RANGE_PARSERS = { 876 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 877 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 878 TokenType.GLOB: binary_range_parser(exp.Glob), 879 TokenType.ILIKE: binary_range_parser(exp.ILike), 880 TokenType.IN: lambda self, this: self._parse_in(this), 881 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 882 TokenType.IS: lambda self, this: self._parse_is(this), 883 TokenType.LIKE: binary_range_parser(exp.Like), 884 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 885 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 886 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 887 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 888 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 889 } 890 891 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 892 "ALLOWED_VALUES": lambda self: self.expression( 893 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 894 ), 895 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 896 "AUTO": lambda self: self._parse_auto_property(), 897 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 898 "BACKUP": lambda self: self.expression( 899 exp.BackupProperty, this=self._parse_var(any_token=True) 900 ), 901 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 902 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 903 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 904 "CHECKSUM": lambda self: self._parse_checksum(), 905 "CLUSTER BY": lambda self: self._parse_cluster(), 906 "CLUSTERED": lambda self: self._parse_clustered_by(), 907 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 908 exp.CollateProperty, **kwargs 909 ), 910 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 911 "CONTAINS": lambda self: self._parse_contains_property(), 912 "COPY": lambda self: self._parse_copy_property(), 913 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 914 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 915 "DEFINER": lambda self: self._parse_definer(), 916 "DETERMINISTIC": lambda self: self.expression( 917 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 918 ), 919 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 920 "DUPLICATE": lambda self: self._parse_duplicate(), 921 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 922 "DISTKEY": lambda self: self._parse_distkey(), 923 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 924 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 925 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 926 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 927 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 928 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 929 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 930 "FREESPACE": lambda self: self._parse_freespace(), 931 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 932 "HEAP": lambda self: self.expression(exp.HeapProperty), 933 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 934 "IMMUTABLE": lambda self: self.expression( 935 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 936 ), 937 "INHERITS": lambda self: self.expression( 938 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 939 ), 940 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 941 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 942 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 943 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 944 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 945 "LIKE": lambda self: self._parse_create_like(), 946 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 947 "LOCK": lambda self: self._parse_locking(), 948 "LOCKING": lambda self: self._parse_locking(), 949 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 950 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 951 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 952 "MODIFIES": lambda self: self._parse_modifies_property(), 953 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 954 "NO": lambda self: self._parse_no_property(), 955 "ON": lambda self: self._parse_on_property(), 956 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 957 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 958 "PARTITION": lambda self: self._parse_partitioned_of(), 959 "PARTITION BY": lambda self: self._parse_partitioned_by(), 960 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 961 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 962 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 963 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 964 "READS": lambda self: self._parse_reads_property(), 965 "REMOTE": lambda self: self._parse_remote_with_connection(), 966 "RETURNS": lambda self: self._parse_returns(), 967 "STRICT": lambda self: self.expression(exp.StrictProperty), 968 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 969 "ROW": lambda self: self._parse_row(), 970 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 971 "SAMPLE": lambda self: self.expression( 972 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 973 ), 974 "SECURE": lambda self: self.expression(exp.SecureProperty), 975 "SECURITY": lambda self: self._parse_security(), 976 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 977 "SETTINGS": lambda self: self._parse_settings_property(), 978 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 979 "SORTKEY": lambda self: self._parse_sortkey(), 980 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 981 "STABLE": lambda self: self.expression( 982 exp.StabilityProperty, this=exp.Literal.string("STABLE") 983 ), 984 "STORED": lambda self: self._parse_stored(), 985 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 986 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 987 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 988 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 989 "TO": lambda self: self._parse_to_table(), 990 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 991 "TRANSFORM": lambda self: self.expression( 992 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 993 ), 994 "TTL": lambda self: self._parse_ttl(), 995 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 996 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 997 "VOLATILE": lambda self: self._parse_volatile_property(), 998 "WITH": lambda self: self._parse_with_property(), 999 } 1000 1001 CONSTRAINT_PARSERS = { 1002 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1003 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1004 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1005 "CHARACTER SET": lambda self: self.expression( 1006 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1007 ), 1008 "CHECK": lambda self: self.expression( 1009 exp.CheckColumnConstraint, 1010 this=self._parse_wrapped(self._parse_assignment), 1011 enforced=self._match_text_seq("ENFORCED"), 1012 ), 1013 "COLLATE": lambda self: self.expression( 1014 exp.CollateColumnConstraint, 1015 this=self._parse_identifier() or self._parse_column(), 1016 ), 1017 "COMMENT": lambda self: self.expression( 1018 exp.CommentColumnConstraint, this=self._parse_string() 1019 ), 1020 "COMPRESS": lambda self: self._parse_compress(), 1021 "CLUSTERED": lambda self: self.expression( 1022 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1023 ), 1024 "NONCLUSTERED": lambda self: self.expression( 1025 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1026 ), 1027 "DEFAULT": lambda self: self.expression( 1028 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1029 ), 1030 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1031 "EPHEMERAL": lambda self: self.expression( 1032 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1033 ), 1034 "EXCLUDE": lambda self: self.expression( 1035 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1036 ), 1037 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1038 "FORMAT": lambda self: self.expression( 1039 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1040 ), 1041 "GENERATED": lambda self: self._parse_generated_as_identity(), 1042 "IDENTITY": lambda self: self._parse_auto_increment(), 1043 "INLINE": lambda self: self._parse_inline(), 1044 "LIKE": lambda self: self._parse_create_like(), 1045 "NOT": lambda self: self._parse_not_constraint(), 1046 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1047 "ON": lambda self: ( 1048 self._match(TokenType.UPDATE) 1049 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1050 ) 1051 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1052 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1053 "PERIOD": lambda self: self._parse_period_for_system_time(), 1054 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1055 "REFERENCES": lambda self: self._parse_references(match=False), 1056 "TITLE": lambda self: self.expression( 1057 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1058 ), 1059 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1060 "UNIQUE": lambda self: self._parse_unique(), 1061 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1062 "WATERMARK": lambda self: self.expression( 1063 exp.WatermarkColumnConstraint, 1064 this=self._match(TokenType.FOR) and self._parse_column(), 1065 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1066 ), 1067 "WITH": lambda self: self.expression( 1068 exp.Properties, expressions=self._parse_wrapped_properties() 1069 ), 1070 } 1071 1072 ALTER_PARSERS = { 1073 "ADD": lambda self: self._parse_alter_table_add(), 1074 "AS": lambda self: self._parse_select(), 1075 "ALTER": lambda self: self._parse_alter_table_alter(), 1076 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1077 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1078 "DROP": lambda self: self._parse_alter_table_drop(), 1079 "RENAME": lambda self: self._parse_alter_table_rename(), 1080 "SET": lambda self: self._parse_alter_table_set(), 1081 "SWAP": lambda self: self.expression( 1082 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1083 ), 1084 } 1085 1086 ALTER_ALTER_PARSERS = { 1087 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1088 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1089 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1090 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1091 } 1092 1093 SCHEMA_UNNAMED_CONSTRAINTS = { 1094 "CHECK", 1095 "EXCLUDE", 1096 "FOREIGN KEY", 1097 "LIKE", 1098 "PERIOD", 1099 "PRIMARY KEY", 1100 "UNIQUE", 1101 "WATERMARK", 1102 } 1103 1104 NO_PAREN_FUNCTION_PARSERS = { 1105 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1106 "CASE": lambda self: self._parse_case(), 1107 "CONNECT_BY_ROOT": lambda self: self.expression( 1108 exp.ConnectByRoot, this=self._parse_column() 1109 ), 1110 "IF": lambda self: self._parse_if(), 1111 "NEXT": lambda self: self._parse_next_value_for(), 1112 } 1113 1114 INVALID_FUNC_NAME_TOKENS = { 1115 TokenType.IDENTIFIER, 1116 TokenType.STRING, 1117 } 1118 1119 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1120 1121 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1122 1123 FUNCTION_PARSERS = { 1124 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1125 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1126 "DECODE": lambda self: self._parse_decode(), 1127 "EXTRACT": lambda self: self._parse_extract(), 1128 "GAP_FILL": lambda self: self._parse_gap_fill(), 1129 "JSON_OBJECT": lambda self: self._parse_json_object(), 1130 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1131 "JSON_TABLE": lambda self: self._parse_json_table(), 1132 "MATCH": lambda self: self._parse_match_against(), 1133 "NORMALIZE": lambda self: self._parse_normalize(), 1134 "OPENJSON": lambda self: self._parse_open_json(), 1135 "OVERLAY": lambda self: self._parse_overlay(), 1136 "POSITION": lambda self: self._parse_position(), 1137 "PREDICT": lambda self: self._parse_predict(), 1138 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1139 "STRING_AGG": lambda self: self._parse_string_agg(), 1140 "SUBSTRING": lambda self: self._parse_substring(), 1141 "TRIM": lambda self: self._parse_trim(), 1142 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1143 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1144 } 1145 1146 QUERY_MODIFIER_PARSERS = { 1147 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1148 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1149 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1150 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1151 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1152 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1153 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1154 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1155 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1156 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1157 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1158 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1159 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1160 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1161 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1162 TokenType.CLUSTER_BY: lambda self: ( 1163 "cluster", 1164 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1165 ), 1166 TokenType.DISTRIBUTE_BY: lambda self: ( 1167 "distribute", 1168 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1169 ), 1170 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1171 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1172 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1173 } 1174 1175 SET_PARSERS = { 1176 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1177 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1178 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1179 "TRANSACTION": lambda self: self._parse_set_transaction(), 1180 } 1181 1182 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1183 1184 TYPE_LITERAL_PARSERS = { 1185 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1186 } 1187 1188 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1189 1190 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1191 1192 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1193 1194 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1195 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1196 "ISOLATION": ( 1197 ("LEVEL", "REPEATABLE", "READ"), 1198 ("LEVEL", "READ", "COMMITTED"), 1199 ("LEVEL", "READ", "UNCOMITTED"), 1200 ("LEVEL", "SERIALIZABLE"), 1201 ), 1202 "READ": ("WRITE", "ONLY"), 1203 } 1204 1205 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1206 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1207 ) 1208 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1209 1210 CREATE_SEQUENCE: OPTIONS_TYPE = { 1211 "SCALE": ("EXTEND", "NOEXTEND"), 1212 "SHARD": ("EXTEND", "NOEXTEND"), 1213 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1214 **dict.fromkeys( 1215 ( 1216 "SESSION", 1217 "GLOBAL", 1218 "KEEP", 1219 "NOKEEP", 1220 "ORDER", 1221 "NOORDER", 1222 "NOCACHE", 1223 "CYCLE", 1224 "NOCYCLE", 1225 "NOMINVALUE", 1226 "NOMAXVALUE", 1227 "NOSCALE", 1228 "NOSHARD", 1229 ), 1230 tuple(), 1231 ), 1232 } 1233 1234 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1235 1236 USABLES: OPTIONS_TYPE = dict.fromkeys( 1237 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1238 ) 1239 1240 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1241 1242 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1243 "TYPE": ("EVOLUTION",), 1244 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1245 } 1246 1247 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1248 1249 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1250 1251 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1252 "NOT": ("ENFORCED",), 1253 "MATCH": ( 1254 "FULL", 1255 "PARTIAL", 1256 "SIMPLE", 1257 ), 1258 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1259 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1260 } 1261 1262 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1263 1264 CLONE_KEYWORDS = {"CLONE", "COPY"} 1265 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1266 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1267 1268 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1269 1270 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1271 1272 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1273 1274 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1275 1276 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1277 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1278 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1279 1280 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1281 1282 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1283 1284 ADD_CONSTRAINT_TOKENS = { 1285 TokenType.CONSTRAINT, 1286 TokenType.FOREIGN_KEY, 1287 TokenType.INDEX, 1288 TokenType.KEY, 1289 TokenType.PRIMARY_KEY, 1290 TokenType.UNIQUE, 1291 } 1292 1293 DISTINCT_TOKENS = {TokenType.DISTINCT} 1294 1295 NULL_TOKENS = {TokenType.NULL} 1296 1297 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1298 1299 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1300 1301 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1302 1303 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1304 1305 ODBC_DATETIME_LITERALS = { 1306 "d": exp.Date, 1307 "t": exp.Time, 1308 "ts": exp.Timestamp, 1309 } 1310 1311 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1312 1313 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1314 1315 # The style options for the DESCRIBE statement 1316 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1317 1318 OPERATION_MODIFIERS: t.Set[str] = set() 1319 1320 STRICT_CAST = True 1321 1322 PREFIXED_PIVOT_COLUMNS = False 1323 IDENTIFY_PIVOT_STRINGS = False 1324 1325 LOG_DEFAULTS_TO_LN = False 1326 1327 # Whether ADD is present for each column added by ALTER TABLE 1328 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1329 1330 # Whether the table sample clause expects CSV syntax 1331 TABLESAMPLE_CSV = False 1332 1333 # The default method used for table sampling 1334 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1335 1336 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1337 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1338 1339 # Whether the TRIM function expects the characters to trim as its first argument 1340 TRIM_PATTERN_FIRST = False 1341 1342 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1343 STRING_ALIASES = False 1344 1345 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1346 MODIFIERS_ATTACHED_TO_SET_OP = True 1347 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1348 1349 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1350 NO_PAREN_IF_COMMANDS = True 1351 1352 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1353 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1354 1355 # Whether the `:` operator is used to extract a value from a VARIANT column 1356 COLON_IS_VARIANT_EXTRACT = False 1357 1358 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1359 # If this is True and '(' is not found, the keyword will be treated as an identifier 1360 VALUES_FOLLOWED_BY_PAREN = True 1361 1362 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1363 SUPPORTS_IMPLICIT_UNNEST = False 1364 1365 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1366 INTERVAL_SPANS = True 1367 1368 # Whether a PARTITION clause can follow a table reference 1369 SUPPORTS_PARTITION_SELECTION = False 1370 1371 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1372 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1373 1374 __slots__ = ( 1375 "error_level", 1376 "error_message_context", 1377 "max_errors", 1378 "dialect", 1379 "sql", 1380 "errors", 1381 "_tokens", 1382 "_index", 1383 "_curr", 1384 "_next", 1385 "_prev", 1386 "_prev_comments", 1387 ) 1388 1389 # Autofilled 1390 SHOW_TRIE: t.Dict = {} 1391 SET_TRIE: t.Dict = {} 1392 1393 def __init__( 1394 self, 1395 error_level: t.Optional[ErrorLevel] = None, 1396 error_message_context: int = 100, 1397 max_errors: int = 3, 1398 dialect: DialectType = None, 1399 ): 1400 from sqlglot.dialects import Dialect 1401 1402 self.error_level = error_level or ErrorLevel.IMMEDIATE 1403 self.error_message_context = error_message_context 1404 self.max_errors = max_errors 1405 self.dialect = Dialect.get_or_raise(dialect) 1406 self.reset() 1407 1408 def reset(self): 1409 self.sql = "" 1410 self.errors = [] 1411 self._tokens = [] 1412 self._index = 0 1413 self._curr = None 1414 self._next = None 1415 self._prev = None 1416 self._prev_comments = None 1417 1418 def parse( 1419 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1420 ) -> t.List[t.Optional[exp.Expression]]: 1421 """ 1422 Parses a list of tokens and returns a list of syntax trees, one tree 1423 per parsed SQL statement. 1424 1425 Args: 1426 raw_tokens: The list of tokens. 1427 sql: The original SQL string, used to produce helpful debug messages. 1428 1429 Returns: 1430 The list of the produced syntax trees. 1431 """ 1432 return self._parse( 1433 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1434 ) 1435 1436 def parse_into( 1437 self, 1438 expression_types: exp.IntoType, 1439 raw_tokens: t.List[Token], 1440 sql: t.Optional[str] = None, 1441 ) -> t.List[t.Optional[exp.Expression]]: 1442 """ 1443 Parses a list of tokens into a given Expression type. If a collection of Expression 1444 types is given instead, this method will try to parse the token list into each one 1445 of them, stopping at the first for which the parsing succeeds. 1446 1447 Args: 1448 expression_types: The expression type(s) to try and parse the token list into. 1449 raw_tokens: The list of tokens. 1450 sql: The original SQL string, used to produce helpful debug messages. 1451 1452 Returns: 1453 The target Expression. 1454 """ 1455 errors = [] 1456 for expression_type in ensure_list(expression_types): 1457 parser = self.EXPRESSION_PARSERS.get(expression_type) 1458 if not parser: 1459 raise TypeError(f"No parser registered for {expression_type}") 1460 1461 try: 1462 return self._parse(parser, raw_tokens, sql) 1463 except ParseError as e: 1464 e.errors[0]["into_expression"] = expression_type 1465 errors.append(e) 1466 1467 raise ParseError( 1468 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1469 errors=merge_errors(errors), 1470 ) from errors[-1] 1471 1472 def _parse( 1473 self, 1474 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1475 raw_tokens: t.List[Token], 1476 sql: t.Optional[str] = None, 1477 ) -> t.List[t.Optional[exp.Expression]]: 1478 self.reset() 1479 self.sql = sql or "" 1480 1481 total = len(raw_tokens) 1482 chunks: t.List[t.List[Token]] = [[]] 1483 1484 for i, token in enumerate(raw_tokens): 1485 if token.token_type == TokenType.SEMICOLON: 1486 if token.comments: 1487 chunks.append([token]) 1488 1489 if i < total - 1: 1490 chunks.append([]) 1491 else: 1492 chunks[-1].append(token) 1493 1494 expressions = [] 1495 1496 for tokens in chunks: 1497 self._index = -1 1498 self._tokens = tokens 1499 self._advance() 1500 1501 expressions.append(parse_method(self)) 1502 1503 if self._index < len(self._tokens): 1504 self.raise_error("Invalid expression / Unexpected token") 1505 1506 self.check_errors() 1507 1508 return expressions 1509 1510 def check_errors(self) -> None: 1511 """Logs or raises any found errors, depending on the chosen error level setting.""" 1512 if self.error_level == ErrorLevel.WARN: 1513 for error in self.errors: 1514 logger.error(str(error)) 1515 elif self.error_level == ErrorLevel.RAISE and self.errors: 1516 raise ParseError( 1517 concat_messages(self.errors, self.max_errors), 1518 errors=merge_errors(self.errors), 1519 ) 1520 1521 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1522 """ 1523 Appends an error in the list of recorded errors or raises it, depending on the chosen 1524 error level setting. 1525 """ 1526 token = token or self._curr or self._prev or Token.string("") 1527 start = token.start 1528 end = token.end + 1 1529 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1530 highlight = self.sql[start:end] 1531 end_context = self.sql[end : end + self.error_message_context] 1532 1533 error = ParseError.new( 1534 f"{message}. Line {token.line}, Col: {token.col}.\n" 1535 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1536 description=message, 1537 line=token.line, 1538 col=token.col, 1539 start_context=start_context, 1540 highlight=highlight, 1541 end_context=end_context, 1542 ) 1543 1544 if self.error_level == ErrorLevel.IMMEDIATE: 1545 raise error 1546 1547 self.errors.append(error) 1548 1549 def expression( 1550 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1551 ) -> E: 1552 """ 1553 Creates a new, validated Expression. 1554 1555 Args: 1556 exp_class: The expression class to instantiate. 1557 comments: An optional list of comments to attach to the expression. 1558 kwargs: The arguments to set for the expression along with their respective values. 1559 1560 Returns: 1561 The target expression. 1562 """ 1563 instance = exp_class(**kwargs) 1564 instance.add_comments(comments) if comments else self._add_comments(instance) 1565 return self.validate_expression(instance) 1566 1567 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1568 if expression and self._prev_comments: 1569 expression.add_comments(self._prev_comments) 1570 self._prev_comments = None 1571 1572 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1573 """ 1574 Validates an Expression, making sure that all its mandatory arguments are set. 1575 1576 Args: 1577 expression: The expression to validate. 1578 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1579 1580 Returns: 1581 The validated expression. 1582 """ 1583 if self.error_level != ErrorLevel.IGNORE: 1584 for error_message in expression.error_messages(args): 1585 self.raise_error(error_message) 1586 1587 return expression 1588 1589 def _find_sql(self, start: Token, end: Token) -> str: 1590 return self.sql[start.start : end.end + 1] 1591 1592 def _is_connected(self) -> bool: 1593 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1594 1595 def _advance(self, times: int = 1) -> None: 1596 self._index += times 1597 self._curr = seq_get(self._tokens, self._index) 1598 self._next = seq_get(self._tokens, self._index + 1) 1599 1600 if self._index > 0: 1601 self._prev = self._tokens[self._index - 1] 1602 self._prev_comments = self._prev.comments 1603 else: 1604 self._prev = None 1605 self._prev_comments = None 1606 1607 def _retreat(self, index: int) -> None: 1608 if index != self._index: 1609 self._advance(index - self._index) 1610 1611 def _warn_unsupported(self) -> None: 1612 if len(self._tokens) <= 1: 1613 return 1614 1615 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1616 # interested in emitting a warning for the one being currently processed. 1617 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1618 1619 logger.warning( 1620 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1621 ) 1622 1623 def _parse_command(self) -> exp.Command: 1624 self._warn_unsupported() 1625 return self.expression( 1626 exp.Command, 1627 comments=self._prev_comments, 1628 this=self._prev.text.upper(), 1629 expression=self._parse_string(), 1630 ) 1631 1632 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1633 """ 1634 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1635 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1636 solve this by setting & resetting the parser state accordingly 1637 """ 1638 index = self._index 1639 error_level = self.error_level 1640 1641 self.error_level = ErrorLevel.IMMEDIATE 1642 try: 1643 this = parse_method() 1644 except ParseError: 1645 this = None 1646 finally: 1647 if not this or retreat: 1648 self._retreat(index) 1649 self.error_level = error_level 1650 1651 return this 1652 1653 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1654 start = self._prev 1655 exists = self._parse_exists() if allow_exists else None 1656 1657 self._match(TokenType.ON) 1658 1659 materialized = self._match_text_seq("MATERIALIZED") 1660 kind = self._match_set(self.CREATABLES) and self._prev 1661 if not kind: 1662 return self._parse_as_command(start) 1663 1664 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1665 this = self._parse_user_defined_function(kind=kind.token_type) 1666 elif kind.token_type == TokenType.TABLE: 1667 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1668 elif kind.token_type == TokenType.COLUMN: 1669 this = self._parse_column() 1670 else: 1671 this = self._parse_id_var() 1672 1673 self._match(TokenType.IS) 1674 1675 return self.expression( 1676 exp.Comment, 1677 this=this, 1678 kind=kind.text, 1679 expression=self._parse_string(), 1680 exists=exists, 1681 materialized=materialized, 1682 ) 1683 1684 def _parse_to_table( 1685 self, 1686 ) -> exp.ToTableProperty: 1687 table = self._parse_table_parts(schema=True) 1688 return self.expression(exp.ToTableProperty, this=table) 1689 1690 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1691 def _parse_ttl(self) -> exp.Expression: 1692 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1693 this = self._parse_bitwise() 1694 1695 if self._match_text_seq("DELETE"): 1696 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1697 if self._match_text_seq("RECOMPRESS"): 1698 return self.expression( 1699 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1700 ) 1701 if self._match_text_seq("TO", "DISK"): 1702 return self.expression( 1703 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1704 ) 1705 if self._match_text_seq("TO", "VOLUME"): 1706 return self.expression( 1707 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1708 ) 1709 1710 return this 1711 1712 expressions = self._parse_csv(_parse_ttl_action) 1713 where = self._parse_where() 1714 group = self._parse_group() 1715 1716 aggregates = None 1717 if group and self._match(TokenType.SET): 1718 aggregates = self._parse_csv(self._parse_set_item) 1719 1720 return self.expression( 1721 exp.MergeTreeTTL, 1722 expressions=expressions, 1723 where=where, 1724 group=group, 1725 aggregates=aggregates, 1726 ) 1727 1728 def _parse_statement(self) -> t.Optional[exp.Expression]: 1729 if self._curr is None: 1730 return None 1731 1732 if self._match_set(self.STATEMENT_PARSERS): 1733 comments = self._prev_comments 1734 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1735 stmt.add_comments(comments, prepend=True) 1736 return stmt 1737 1738 if self._match_set(self.dialect.tokenizer.COMMANDS): 1739 return self._parse_command() 1740 1741 expression = self._parse_expression() 1742 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1743 return self._parse_query_modifiers(expression) 1744 1745 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1746 start = self._prev 1747 temporary = self._match(TokenType.TEMPORARY) 1748 materialized = self._match_text_seq("MATERIALIZED") 1749 1750 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1751 if not kind: 1752 return self._parse_as_command(start) 1753 1754 concurrently = self._match_text_seq("CONCURRENTLY") 1755 if_exists = exists or self._parse_exists() 1756 1757 if kind == "COLUMN": 1758 this = self._parse_column() 1759 else: 1760 this = self._parse_table_parts( 1761 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1762 ) 1763 1764 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1765 1766 if self._match(TokenType.L_PAREN, advance=False): 1767 expressions = self._parse_wrapped_csv(self._parse_types) 1768 else: 1769 expressions = None 1770 1771 return self.expression( 1772 exp.Drop, 1773 exists=if_exists, 1774 this=this, 1775 expressions=expressions, 1776 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1777 temporary=temporary, 1778 materialized=materialized, 1779 cascade=self._match_text_seq("CASCADE"), 1780 constraints=self._match_text_seq("CONSTRAINTS"), 1781 purge=self._match_text_seq("PURGE"), 1782 cluster=cluster, 1783 concurrently=concurrently, 1784 ) 1785 1786 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1787 return ( 1788 self._match_text_seq("IF") 1789 and (not not_ or self._match(TokenType.NOT)) 1790 and self._match(TokenType.EXISTS) 1791 ) 1792 1793 def _parse_create(self) -> exp.Create | exp.Command: 1794 # Note: this can't be None because we've matched a statement parser 1795 start = self._prev 1796 1797 replace = ( 1798 start.token_type == TokenType.REPLACE 1799 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1800 or self._match_pair(TokenType.OR, TokenType.ALTER) 1801 ) 1802 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1803 1804 unique = self._match(TokenType.UNIQUE) 1805 1806 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1807 clustered = True 1808 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1809 "COLUMNSTORE" 1810 ): 1811 clustered = False 1812 else: 1813 clustered = None 1814 1815 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1816 self._advance() 1817 1818 properties = None 1819 create_token = self._match_set(self.CREATABLES) and self._prev 1820 1821 if not create_token: 1822 # exp.Properties.Location.POST_CREATE 1823 properties = self._parse_properties() 1824 create_token = self._match_set(self.CREATABLES) and self._prev 1825 1826 if not properties or not create_token: 1827 return self._parse_as_command(start) 1828 1829 concurrently = self._match_text_seq("CONCURRENTLY") 1830 exists = self._parse_exists(not_=True) 1831 this = None 1832 expression: t.Optional[exp.Expression] = None 1833 indexes = None 1834 no_schema_binding = None 1835 begin = None 1836 end = None 1837 clone = None 1838 1839 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1840 nonlocal properties 1841 if properties and temp_props: 1842 properties.expressions.extend(temp_props.expressions) 1843 elif temp_props: 1844 properties = temp_props 1845 1846 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1847 this = self._parse_user_defined_function(kind=create_token.token_type) 1848 1849 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1850 extend_props(self._parse_properties()) 1851 1852 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1853 extend_props(self._parse_properties()) 1854 1855 if not expression: 1856 if self._match(TokenType.COMMAND): 1857 expression = self._parse_as_command(self._prev) 1858 else: 1859 begin = self._match(TokenType.BEGIN) 1860 return_ = self._match_text_seq("RETURN") 1861 1862 if self._match(TokenType.STRING, advance=False): 1863 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1864 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1865 expression = self._parse_string() 1866 extend_props(self._parse_properties()) 1867 else: 1868 expression = self._parse_user_defined_function_expression() 1869 1870 end = self._match_text_seq("END") 1871 1872 if return_: 1873 expression = self.expression(exp.Return, this=expression) 1874 elif create_token.token_type == TokenType.INDEX: 1875 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1876 if not self._match(TokenType.ON): 1877 index = self._parse_id_var() 1878 anonymous = False 1879 else: 1880 index = None 1881 anonymous = True 1882 1883 this = self._parse_index(index=index, anonymous=anonymous) 1884 elif create_token.token_type in self.DB_CREATABLES: 1885 table_parts = self._parse_table_parts( 1886 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1887 ) 1888 1889 # exp.Properties.Location.POST_NAME 1890 self._match(TokenType.COMMA) 1891 extend_props(self._parse_properties(before=True)) 1892 1893 this = self._parse_schema(this=table_parts) 1894 1895 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1896 extend_props(self._parse_properties()) 1897 1898 self._match(TokenType.ALIAS) 1899 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1900 # exp.Properties.Location.POST_ALIAS 1901 extend_props(self._parse_properties()) 1902 1903 if create_token.token_type == TokenType.SEQUENCE: 1904 expression = self._parse_types() 1905 extend_props(self._parse_properties()) 1906 else: 1907 expression = self._parse_ddl_select() 1908 1909 if create_token.token_type == TokenType.TABLE: 1910 # exp.Properties.Location.POST_EXPRESSION 1911 extend_props(self._parse_properties()) 1912 1913 indexes = [] 1914 while True: 1915 index = self._parse_index() 1916 1917 # exp.Properties.Location.POST_INDEX 1918 extend_props(self._parse_properties()) 1919 if not index: 1920 break 1921 else: 1922 self._match(TokenType.COMMA) 1923 indexes.append(index) 1924 elif create_token.token_type == TokenType.VIEW: 1925 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1926 no_schema_binding = True 1927 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1928 extend_props(self._parse_properties()) 1929 1930 shallow = self._match_text_seq("SHALLOW") 1931 1932 if self._match_texts(self.CLONE_KEYWORDS): 1933 copy = self._prev.text.lower() == "copy" 1934 clone = self.expression( 1935 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1936 ) 1937 1938 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1939 return self._parse_as_command(start) 1940 1941 create_kind_text = create_token.text.upper() 1942 return self.expression( 1943 exp.Create, 1944 this=this, 1945 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1946 replace=replace, 1947 refresh=refresh, 1948 unique=unique, 1949 expression=expression, 1950 exists=exists, 1951 properties=properties, 1952 indexes=indexes, 1953 no_schema_binding=no_schema_binding, 1954 begin=begin, 1955 end=end, 1956 clone=clone, 1957 concurrently=concurrently, 1958 clustered=clustered, 1959 ) 1960 1961 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1962 seq = exp.SequenceProperties() 1963 1964 options = [] 1965 index = self._index 1966 1967 while self._curr: 1968 self._match(TokenType.COMMA) 1969 if self._match_text_seq("INCREMENT"): 1970 self._match_text_seq("BY") 1971 self._match_text_seq("=") 1972 seq.set("increment", self._parse_term()) 1973 elif self._match_text_seq("MINVALUE"): 1974 seq.set("minvalue", self._parse_term()) 1975 elif self._match_text_seq("MAXVALUE"): 1976 seq.set("maxvalue", self._parse_term()) 1977 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1978 self._match_text_seq("=") 1979 seq.set("start", self._parse_term()) 1980 elif self._match_text_seq("CACHE"): 1981 # T-SQL allows empty CACHE which is initialized dynamically 1982 seq.set("cache", self._parse_number() or True) 1983 elif self._match_text_seq("OWNED", "BY"): 1984 # "OWNED BY NONE" is the default 1985 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1986 else: 1987 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1988 if opt: 1989 options.append(opt) 1990 else: 1991 break 1992 1993 seq.set("options", options if options else None) 1994 return None if self._index == index else seq 1995 1996 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1997 # only used for teradata currently 1998 self._match(TokenType.COMMA) 1999 2000 kwargs = { 2001 "no": self._match_text_seq("NO"), 2002 "dual": self._match_text_seq("DUAL"), 2003 "before": self._match_text_seq("BEFORE"), 2004 "default": self._match_text_seq("DEFAULT"), 2005 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2006 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2007 "after": self._match_text_seq("AFTER"), 2008 "minimum": self._match_texts(("MIN", "MINIMUM")), 2009 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2010 } 2011 2012 if self._match_texts(self.PROPERTY_PARSERS): 2013 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2014 try: 2015 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2016 except TypeError: 2017 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2018 2019 return None 2020 2021 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2022 return self._parse_wrapped_csv(self._parse_property) 2023 2024 def _parse_property(self) -> t.Optional[exp.Expression]: 2025 if self._match_texts(self.PROPERTY_PARSERS): 2026 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2027 2028 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2029 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2030 2031 if self._match_text_seq("COMPOUND", "SORTKEY"): 2032 return self._parse_sortkey(compound=True) 2033 2034 if self._match_text_seq("SQL", "SECURITY"): 2035 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2036 2037 index = self._index 2038 key = self._parse_column() 2039 2040 if not self._match(TokenType.EQ): 2041 self._retreat(index) 2042 return self._parse_sequence_properties() 2043 2044 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2045 if isinstance(key, exp.Column): 2046 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2047 2048 value = self._parse_bitwise() or self._parse_var(any_token=True) 2049 2050 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2051 if isinstance(value, exp.Column): 2052 value = exp.var(value.name) 2053 2054 return self.expression(exp.Property, this=key, value=value) 2055 2056 def _parse_stored(self) -> exp.FileFormatProperty: 2057 self._match(TokenType.ALIAS) 2058 2059 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2060 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2061 2062 return self.expression( 2063 exp.FileFormatProperty, 2064 this=( 2065 self.expression( 2066 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2067 ) 2068 if input_format or output_format 2069 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2070 ), 2071 ) 2072 2073 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2074 field = self._parse_field() 2075 if isinstance(field, exp.Identifier) and not field.quoted: 2076 field = exp.var(field) 2077 2078 return field 2079 2080 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2081 self._match(TokenType.EQ) 2082 self._match(TokenType.ALIAS) 2083 2084 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2085 2086 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2087 properties = [] 2088 while True: 2089 if before: 2090 prop = self._parse_property_before() 2091 else: 2092 prop = self._parse_property() 2093 if not prop: 2094 break 2095 for p in ensure_list(prop): 2096 properties.append(p) 2097 2098 if properties: 2099 return self.expression(exp.Properties, expressions=properties) 2100 2101 return None 2102 2103 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2104 return self.expression( 2105 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2106 ) 2107 2108 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2109 if self._match_texts(("DEFINER", "INVOKER")): 2110 security_specifier = self._prev.text.upper() 2111 return self.expression(exp.SecurityProperty, this=security_specifier) 2112 return None 2113 2114 def _parse_settings_property(self) -> exp.SettingsProperty: 2115 return self.expression( 2116 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2117 ) 2118 2119 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2120 if self._index >= 2: 2121 pre_volatile_token = self._tokens[self._index - 2] 2122 else: 2123 pre_volatile_token = None 2124 2125 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2126 return exp.VolatileProperty() 2127 2128 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2129 2130 def _parse_retention_period(self) -> exp.Var: 2131 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2132 number = self._parse_number() 2133 number_str = f"{number} " if number else "" 2134 unit = self._parse_var(any_token=True) 2135 return exp.var(f"{number_str}{unit}") 2136 2137 def _parse_system_versioning_property( 2138 self, with_: bool = False 2139 ) -> exp.WithSystemVersioningProperty: 2140 self._match(TokenType.EQ) 2141 prop = self.expression( 2142 exp.WithSystemVersioningProperty, 2143 **{ # type: ignore 2144 "on": True, 2145 "with": with_, 2146 }, 2147 ) 2148 2149 if self._match_text_seq("OFF"): 2150 prop.set("on", False) 2151 return prop 2152 2153 self._match(TokenType.ON) 2154 if self._match(TokenType.L_PAREN): 2155 while self._curr and not self._match(TokenType.R_PAREN): 2156 if self._match_text_seq("HISTORY_TABLE", "="): 2157 prop.set("this", self._parse_table_parts()) 2158 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2159 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2160 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2161 prop.set("retention_period", self._parse_retention_period()) 2162 2163 self._match(TokenType.COMMA) 2164 2165 return prop 2166 2167 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2168 self._match(TokenType.EQ) 2169 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2170 prop = self.expression(exp.DataDeletionProperty, on=on) 2171 2172 if self._match(TokenType.L_PAREN): 2173 while self._curr and not self._match(TokenType.R_PAREN): 2174 if self._match_text_seq("FILTER_COLUMN", "="): 2175 prop.set("filter_column", self._parse_column()) 2176 elif self._match_text_seq("RETENTION_PERIOD", "="): 2177 prop.set("retention_period", self._parse_retention_period()) 2178 2179 self._match(TokenType.COMMA) 2180 2181 return prop 2182 2183 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2184 kind = "HASH" 2185 expressions: t.Optional[t.List[exp.Expression]] = None 2186 if self._match_text_seq("BY", "HASH"): 2187 expressions = self._parse_wrapped_csv(self._parse_id_var) 2188 elif self._match_text_seq("BY", "RANDOM"): 2189 kind = "RANDOM" 2190 2191 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2192 buckets: t.Optional[exp.Expression] = None 2193 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2194 buckets = self._parse_number() 2195 2196 return self.expression( 2197 exp.DistributedByProperty, 2198 expressions=expressions, 2199 kind=kind, 2200 buckets=buckets, 2201 order=self._parse_order(), 2202 ) 2203 2204 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2205 self._match_text_seq("KEY") 2206 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2207 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2208 2209 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2210 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2211 prop = self._parse_system_versioning_property(with_=True) 2212 self._match_r_paren() 2213 return prop 2214 2215 if self._match(TokenType.L_PAREN, advance=False): 2216 return self._parse_wrapped_properties() 2217 2218 if self._match_text_seq("JOURNAL"): 2219 return self._parse_withjournaltable() 2220 2221 if self._match_texts(self.VIEW_ATTRIBUTES): 2222 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2223 2224 if self._match_text_seq("DATA"): 2225 return self._parse_withdata(no=False) 2226 elif self._match_text_seq("NO", "DATA"): 2227 return self._parse_withdata(no=True) 2228 2229 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2230 return self._parse_serde_properties(with_=True) 2231 2232 if self._match(TokenType.SCHEMA): 2233 return self.expression( 2234 exp.WithSchemaBindingProperty, 2235 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2236 ) 2237 2238 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2239 return self.expression( 2240 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2241 ) 2242 2243 if not self._next: 2244 return None 2245 2246 return self._parse_withisolatedloading() 2247 2248 def _parse_procedure_option(self) -> exp.Expression | None: 2249 if self._match_text_seq("EXECUTE", "AS"): 2250 return self.expression( 2251 exp.ExecuteAsProperty, 2252 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2253 or self._parse_string(), 2254 ) 2255 2256 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2257 2258 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2259 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2260 self._match(TokenType.EQ) 2261 2262 user = self._parse_id_var() 2263 self._match(TokenType.PARAMETER) 2264 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2265 2266 if not user or not host: 2267 return None 2268 2269 return exp.DefinerProperty(this=f"{user}@{host}") 2270 2271 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2272 self._match(TokenType.TABLE) 2273 self._match(TokenType.EQ) 2274 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2275 2276 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2277 return self.expression(exp.LogProperty, no=no) 2278 2279 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2280 return self.expression(exp.JournalProperty, **kwargs) 2281 2282 def _parse_checksum(self) -> exp.ChecksumProperty: 2283 self._match(TokenType.EQ) 2284 2285 on = None 2286 if self._match(TokenType.ON): 2287 on = True 2288 elif self._match_text_seq("OFF"): 2289 on = False 2290 2291 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2292 2293 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2294 return self.expression( 2295 exp.Cluster, 2296 expressions=( 2297 self._parse_wrapped_csv(self._parse_ordered) 2298 if wrapped 2299 else self._parse_csv(self._parse_ordered) 2300 ), 2301 ) 2302 2303 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2304 self._match_text_seq("BY") 2305 2306 self._match_l_paren() 2307 expressions = self._parse_csv(self._parse_column) 2308 self._match_r_paren() 2309 2310 if self._match_text_seq("SORTED", "BY"): 2311 self._match_l_paren() 2312 sorted_by = self._parse_csv(self._parse_ordered) 2313 self._match_r_paren() 2314 else: 2315 sorted_by = None 2316 2317 self._match(TokenType.INTO) 2318 buckets = self._parse_number() 2319 self._match_text_seq("BUCKETS") 2320 2321 return self.expression( 2322 exp.ClusteredByProperty, 2323 expressions=expressions, 2324 sorted_by=sorted_by, 2325 buckets=buckets, 2326 ) 2327 2328 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2329 if not self._match_text_seq("GRANTS"): 2330 self._retreat(self._index - 1) 2331 return None 2332 2333 return self.expression(exp.CopyGrantsProperty) 2334 2335 def _parse_freespace(self) -> exp.FreespaceProperty: 2336 self._match(TokenType.EQ) 2337 return self.expression( 2338 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2339 ) 2340 2341 def _parse_mergeblockratio( 2342 self, no: bool = False, default: bool = False 2343 ) -> exp.MergeBlockRatioProperty: 2344 if self._match(TokenType.EQ): 2345 return self.expression( 2346 exp.MergeBlockRatioProperty, 2347 this=self._parse_number(), 2348 percent=self._match(TokenType.PERCENT), 2349 ) 2350 2351 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2352 2353 def _parse_datablocksize( 2354 self, 2355 default: t.Optional[bool] = None, 2356 minimum: t.Optional[bool] = None, 2357 maximum: t.Optional[bool] = None, 2358 ) -> exp.DataBlocksizeProperty: 2359 self._match(TokenType.EQ) 2360 size = self._parse_number() 2361 2362 units = None 2363 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2364 units = self._prev.text 2365 2366 return self.expression( 2367 exp.DataBlocksizeProperty, 2368 size=size, 2369 units=units, 2370 default=default, 2371 minimum=minimum, 2372 maximum=maximum, 2373 ) 2374 2375 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2376 self._match(TokenType.EQ) 2377 always = self._match_text_seq("ALWAYS") 2378 manual = self._match_text_seq("MANUAL") 2379 never = self._match_text_seq("NEVER") 2380 default = self._match_text_seq("DEFAULT") 2381 2382 autotemp = None 2383 if self._match_text_seq("AUTOTEMP"): 2384 autotemp = self._parse_schema() 2385 2386 return self.expression( 2387 exp.BlockCompressionProperty, 2388 always=always, 2389 manual=manual, 2390 never=never, 2391 default=default, 2392 autotemp=autotemp, 2393 ) 2394 2395 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2396 index = self._index 2397 no = self._match_text_seq("NO") 2398 concurrent = self._match_text_seq("CONCURRENT") 2399 2400 if not self._match_text_seq("ISOLATED", "LOADING"): 2401 self._retreat(index) 2402 return None 2403 2404 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2405 return self.expression( 2406 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2407 ) 2408 2409 def _parse_locking(self) -> exp.LockingProperty: 2410 if self._match(TokenType.TABLE): 2411 kind = "TABLE" 2412 elif self._match(TokenType.VIEW): 2413 kind = "VIEW" 2414 elif self._match(TokenType.ROW): 2415 kind = "ROW" 2416 elif self._match_text_seq("DATABASE"): 2417 kind = "DATABASE" 2418 else: 2419 kind = None 2420 2421 if kind in ("DATABASE", "TABLE", "VIEW"): 2422 this = self._parse_table_parts() 2423 else: 2424 this = None 2425 2426 if self._match(TokenType.FOR): 2427 for_or_in = "FOR" 2428 elif self._match(TokenType.IN): 2429 for_or_in = "IN" 2430 else: 2431 for_or_in = None 2432 2433 if self._match_text_seq("ACCESS"): 2434 lock_type = "ACCESS" 2435 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2436 lock_type = "EXCLUSIVE" 2437 elif self._match_text_seq("SHARE"): 2438 lock_type = "SHARE" 2439 elif self._match_text_seq("READ"): 2440 lock_type = "READ" 2441 elif self._match_text_seq("WRITE"): 2442 lock_type = "WRITE" 2443 elif self._match_text_seq("CHECKSUM"): 2444 lock_type = "CHECKSUM" 2445 else: 2446 lock_type = None 2447 2448 override = self._match_text_seq("OVERRIDE") 2449 2450 return self.expression( 2451 exp.LockingProperty, 2452 this=this, 2453 kind=kind, 2454 for_or_in=for_or_in, 2455 lock_type=lock_type, 2456 override=override, 2457 ) 2458 2459 def _parse_partition_by(self) -> t.List[exp.Expression]: 2460 if self._match(TokenType.PARTITION_BY): 2461 return self._parse_csv(self._parse_assignment) 2462 return [] 2463 2464 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2465 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2466 if self._match_text_seq("MINVALUE"): 2467 return exp.var("MINVALUE") 2468 if self._match_text_seq("MAXVALUE"): 2469 return exp.var("MAXVALUE") 2470 return self._parse_bitwise() 2471 2472 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2473 expression = None 2474 from_expressions = None 2475 to_expressions = None 2476 2477 if self._match(TokenType.IN): 2478 this = self._parse_wrapped_csv(self._parse_bitwise) 2479 elif self._match(TokenType.FROM): 2480 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2481 self._match_text_seq("TO") 2482 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2483 elif self._match_text_seq("WITH", "(", "MODULUS"): 2484 this = self._parse_number() 2485 self._match_text_seq(",", "REMAINDER") 2486 expression = self._parse_number() 2487 self._match_r_paren() 2488 else: 2489 self.raise_error("Failed to parse partition bound spec.") 2490 2491 return self.expression( 2492 exp.PartitionBoundSpec, 2493 this=this, 2494 expression=expression, 2495 from_expressions=from_expressions, 2496 to_expressions=to_expressions, 2497 ) 2498 2499 # https://www.postgresql.org/docs/current/sql-createtable.html 2500 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2501 if not self._match_text_seq("OF"): 2502 self._retreat(self._index - 1) 2503 return None 2504 2505 this = self._parse_table(schema=True) 2506 2507 if self._match(TokenType.DEFAULT): 2508 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2509 elif self._match_text_seq("FOR", "VALUES"): 2510 expression = self._parse_partition_bound_spec() 2511 else: 2512 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2513 2514 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2515 2516 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2517 self._match(TokenType.EQ) 2518 return self.expression( 2519 exp.PartitionedByProperty, 2520 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2521 ) 2522 2523 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2524 if self._match_text_seq("AND", "STATISTICS"): 2525 statistics = True 2526 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2527 statistics = False 2528 else: 2529 statistics = None 2530 2531 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2532 2533 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2534 if self._match_text_seq("SQL"): 2535 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2536 return None 2537 2538 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2539 if self._match_text_seq("SQL", "DATA"): 2540 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2541 return None 2542 2543 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2544 if self._match_text_seq("PRIMARY", "INDEX"): 2545 return exp.NoPrimaryIndexProperty() 2546 if self._match_text_seq("SQL"): 2547 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2548 return None 2549 2550 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2551 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2552 return exp.OnCommitProperty() 2553 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2554 return exp.OnCommitProperty(delete=True) 2555 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2556 2557 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2558 if self._match_text_seq("SQL", "DATA"): 2559 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2560 return None 2561 2562 def _parse_distkey(self) -> exp.DistKeyProperty: 2563 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2564 2565 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2566 table = self._parse_table(schema=True) 2567 2568 options = [] 2569 while self._match_texts(("INCLUDING", "EXCLUDING")): 2570 this = self._prev.text.upper() 2571 2572 id_var = self._parse_id_var() 2573 if not id_var: 2574 return None 2575 2576 options.append( 2577 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2578 ) 2579 2580 return self.expression(exp.LikeProperty, this=table, expressions=options) 2581 2582 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2583 return self.expression( 2584 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2585 ) 2586 2587 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2588 self._match(TokenType.EQ) 2589 return self.expression( 2590 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2591 ) 2592 2593 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2594 self._match_text_seq("WITH", "CONNECTION") 2595 return self.expression( 2596 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2597 ) 2598 2599 def _parse_returns(self) -> exp.ReturnsProperty: 2600 value: t.Optional[exp.Expression] 2601 null = None 2602 is_table = self._match(TokenType.TABLE) 2603 2604 if is_table: 2605 if self._match(TokenType.LT): 2606 value = self.expression( 2607 exp.Schema, 2608 this="TABLE", 2609 expressions=self._parse_csv(self._parse_struct_types), 2610 ) 2611 if not self._match(TokenType.GT): 2612 self.raise_error("Expecting >") 2613 else: 2614 value = self._parse_schema(exp.var("TABLE")) 2615 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2616 null = True 2617 value = None 2618 else: 2619 value = self._parse_types() 2620 2621 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2622 2623 def _parse_describe(self) -> exp.Describe: 2624 kind = self._match_set(self.CREATABLES) and self._prev.text 2625 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2626 if self._match(TokenType.DOT): 2627 style = None 2628 self._retreat(self._index - 2) 2629 2630 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2631 2632 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2633 this = self._parse_statement() 2634 else: 2635 this = self._parse_table(schema=True) 2636 2637 properties = self._parse_properties() 2638 expressions = properties.expressions if properties else None 2639 partition = self._parse_partition() 2640 return self.expression( 2641 exp.Describe, 2642 this=this, 2643 style=style, 2644 kind=kind, 2645 expressions=expressions, 2646 partition=partition, 2647 format=format, 2648 ) 2649 2650 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2651 kind = self._prev.text.upper() 2652 expressions = [] 2653 2654 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2655 if self._match(TokenType.WHEN): 2656 expression = self._parse_disjunction() 2657 self._match(TokenType.THEN) 2658 else: 2659 expression = None 2660 2661 else_ = self._match(TokenType.ELSE) 2662 2663 if not self._match(TokenType.INTO): 2664 return None 2665 2666 return self.expression( 2667 exp.ConditionalInsert, 2668 this=self.expression( 2669 exp.Insert, 2670 this=self._parse_table(schema=True), 2671 expression=self._parse_derived_table_values(), 2672 ), 2673 expression=expression, 2674 else_=else_, 2675 ) 2676 2677 expression = parse_conditional_insert() 2678 while expression is not None: 2679 expressions.append(expression) 2680 expression = parse_conditional_insert() 2681 2682 return self.expression( 2683 exp.MultitableInserts, 2684 kind=kind, 2685 comments=comments, 2686 expressions=expressions, 2687 source=self._parse_table(), 2688 ) 2689 2690 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2691 comments = [] 2692 hint = self._parse_hint() 2693 overwrite = self._match(TokenType.OVERWRITE) 2694 ignore = self._match(TokenType.IGNORE) 2695 local = self._match_text_seq("LOCAL") 2696 alternative = None 2697 is_function = None 2698 2699 if self._match_text_seq("DIRECTORY"): 2700 this: t.Optional[exp.Expression] = self.expression( 2701 exp.Directory, 2702 this=self._parse_var_or_string(), 2703 local=local, 2704 row_format=self._parse_row_format(match_row=True), 2705 ) 2706 else: 2707 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2708 comments += ensure_list(self._prev_comments) 2709 return self._parse_multitable_inserts(comments) 2710 2711 if self._match(TokenType.OR): 2712 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2713 2714 self._match(TokenType.INTO) 2715 comments += ensure_list(self._prev_comments) 2716 self._match(TokenType.TABLE) 2717 is_function = self._match(TokenType.FUNCTION) 2718 2719 this = ( 2720 self._parse_table(schema=True, parse_partition=True) 2721 if not is_function 2722 else self._parse_function() 2723 ) 2724 2725 returning = self._parse_returning() 2726 2727 return self.expression( 2728 exp.Insert, 2729 comments=comments, 2730 hint=hint, 2731 is_function=is_function, 2732 this=this, 2733 stored=self._match_text_seq("STORED") and self._parse_stored(), 2734 by_name=self._match_text_seq("BY", "NAME"), 2735 exists=self._parse_exists(), 2736 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2737 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2738 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2739 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2740 conflict=self._parse_on_conflict(), 2741 returning=returning or self._parse_returning(), 2742 overwrite=overwrite, 2743 alternative=alternative, 2744 ignore=ignore, 2745 source=self._match(TokenType.TABLE) and self._parse_table(), 2746 ) 2747 2748 def _parse_kill(self) -> exp.Kill: 2749 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2750 2751 return self.expression( 2752 exp.Kill, 2753 this=self._parse_primary(), 2754 kind=kind, 2755 ) 2756 2757 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2758 conflict = self._match_text_seq("ON", "CONFLICT") 2759 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2760 2761 if not conflict and not duplicate: 2762 return None 2763 2764 conflict_keys = None 2765 constraint = None 2766 2767 if conflict: 2768 if self._match_text_seq("ON", "CONSTRAINT"): 2769 constraint = self._parse_id_var() 2770 elif self._match(TokenType.L_PAREN): 2771 conflict_keys = self._parse_csv(self._parse_id_var) 2772 self._match_r_paren() 2773 2774 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2775 if self._prev.token_type == TokenType.UPDATE: 2776 self._match(TokenType.SET) 2777 expressions = self._parse_csv(self._parse_equality) 2778 else: 2779 expressions = None 2780 2781 return self.expression( 2782 exp.OnConflict, 2783 duplicate=duplicate, 2784 expressions=expressions, 2785 action=action, 2786 conflict_keys=conflict_keys, 2787 constraint=constraint, 2788 ) 2789 2790 def _parse_returning(self) -> t.Optional[exp.Returning]: 2791 if not self._match(TokenType.RETURNING): 2792 return None 2793 return self.expression( 2794 exp.Returning, 2795 expressions=self._parse_csv(self._parse_expression), 2796 into=self._match(TokenType.INTO) and self._parse_table_part(), 2797 ) 2798 2799 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2800 if not self._match(TokenType.FORMAT): 2801 return None 2802 return self._parse_row_format() 2803 2804 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2805 index = self._index 2806 with_ = with_ or self._match_text_seq("WITH") 2807 2808 if not self._match(TokenType.SERDE_PROPERTIES): 2809 self._retreat(index) 2810 return None 2811 return self.expression( 2812 exp.SerdeProperties, 2813 **{ # type: ignore 2814 "expressions": self._parse_wrapped_properties(), 2815 "with": with_, 2816 }, 2817 ) 2818 2819 def _parse_row_format( 2820 self, match_row: bool = False 2821 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2822 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2823 return None 2824 2825 if self._match_text_seq("SERDE"): 2826 this = self._parse_string() 2827 2828 serde_properties = self._parse_serde_properties() 2829 2830 return self.expression( 2831 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2832 ) 2833 2834 self._match_text_seq("DELIMITED") 2835 2836 kwargs = {} 2837 2838 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2839 kwargs["fields"] = self._parse_string() 2840 if self._match_text_seq("ESCAPED", "BY"): 2841 kwargs["escaped"] = self._parse_string() 2842 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2843 kwargs["collection_items"] = self._parse_string() 2844 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2845 kwargs["map_keys"] = self._parse_string() 2846 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2847 kwargs["lines"] = self._parse_string() 2848 if self._match_text_seq("NULL", "DEFINED", "AS"): 2849 kwargs["null"] = self._parse_string() 2850 2851 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2852 2853 def _parse_load(self) -> exp.LoadData | exp.Command: 2854 if self._match_text_seq("DATA"): 2855 local = self._match_text_seq("LOCAL") 2856 self._match_text_seq("INPATH") 2857 inpath = self._parse_string() 2858 overwrite = self._match(TokenType.OVERWRITE) 2859 self._match_pair(TokenType.INTO, TokenType.TABLE) 2860 2861 return self.expression( 2862 exp.LoadData, 2863 this=self._parse_table(schema=True), 2864 local=local, 2865 overwrite=overwrite, 2866 inpath=inpath, 2867 partition=self._parse_partition(), 2868 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2869 serde=self._match_text_seq("SERDE") and self._parse_string(), 2870 ) 2871 return self._parse_as_command(self._prev) 2872 2873 def _parse_delete(self) -> exp.Delete: 2874 # This handles MySQL's "Multiple-Table Syntax" 2875 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2876 tables = None 2877 if not self._match(TokenType.FROM, advance=False): 2878 tables = self._parse_csv(self._parse_table) or None 2879 2880 returning = self._parse_returning() 2881 2882 return self.expression( 2883 exp.Delete, 2884 tables=tables, 2885 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2886 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2887 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2888 where=self._parse_where(), 2889 returning=returning or self._parse_returning(), 2890 limit=self._parse_limit(), 2891 ) 2892 2893 def _parse_update(self) -> exp.Update: 2894 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2895 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2896 returning = self._parse_returning() 2897 return self.expression( 2898 exp.Update, 2899 **{ # type: ignore 2900 "this": this, 2901 "expressions": expressions, 2902 "from": self._parse_from(joins=True), 2903 "where": self._parse_where(), 2904 "returning": returning or self._parse_returning(), 2905 "order": self._parse_order(), 2906 "limit": self._parse_limit(), 2907 }, 2908 ) 2909 2910 def _parse_uncache(self) -> exp.Uncache: 2911 if not self._match(TokenType.TABLE): 2912 self.raise_error("Expecting TABLE after UNCACHE") 2913 2914 return self.expression( 2915 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2916 ) 2917 2918 def _parse_cache(self) -> exp.Cache: 2919 lazy = self._match_text_seq("LAZY") 2920 self._match(TokenType.TABLE) 2921 table = self._parse_table(schema=True) 2922 2923 options = [] 2924 if self._match_text_seq("OPTIONS"): 2925 self._match_l_paren() 2926 k = self._parse_string() 2927 self._match(TokenType.EQ) 2928 v = self._parse_string() 2929 options = [k, v] 2930 self._match_r_paren() 2931 2932 self._match(TokenType.ALIAS) 2933 return self.expression( 2934 exp.Cache, 2935 this=table, 2936 lazy=lazy, 2937 options=options, 2938 expression=self._parse_select(nested=True), 2939 ) 2940 2941 def _parse_partition(self) -> t.Optional[exp.Partition]: 2942 if not self._match(TokenType.PARTITION): 2943 return None 2944 2945 return self.expression( 2946 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2947 ) 2948 2949 def _parse_value(self) -> t.Optional[exp.Tuple]: 2950 if self._match(TokenType.L_PAREN): 2951 expressions = self._parse_csv(self._parse_expression) 2952 self._match_r_paren() 2953 return self.expression(exp.Tuple, expressions=expressions) 2954 2955 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2956 expression = self._parse_expression() 2957 if expression: 2958 return self.expression(exp.Tuple, expressions=[expression]) 2959 return None 2960 2961 def _parse_projections(self) -> t.List[exp.Expression]: 2962 return self._parse_expressions() 2963 2964 def _parse_select( 2965 self, 2966 nested: bool = False, 2967 table: bool = False, 2968 parse_subquery_alias: bool = True, 2969 parse_set_operation: bool = True, 2970 ) -> t.Optional[exp.Expression]: 2971 cte = self._parse_with() 2972 2973 if cte: 2974 this = self._parse_statement() 2975 2976 if not this: 2977 self.raise_error("Failed to parse any statement following CTE") 2978 return cte 2979 2980 if "with" in this.arg_types: 2981 this.set("with", cte) 2982 else: 2983 self.raise_error(f"{this.key} does not support CTE") 2984 this = cte 2985 2986 return this 2987 2988 # duckdb supports leading with FROM x 2989 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2990 2991 if self._match(TokenType.SELECT): 2992 comments = self._prev_comments 2993 2994 hint = self._parse_hint() 2995 2996 if self._next and not self._next.token_type == TokenType.DOT: 2997 all_ = self._match(TokenType.ALL) 2998 distinct = self._match_set(self.DISTINCT_TOKENS) 2999 else: 3000 all_, distinct = None, None 3001 3002 kind = ( 3003 self._match(TokenType.ALIAS) 3004 and self._match_texts(("STRUCT", "VALUE")) 3005 and self._prev.text.upper() 3006 ) 3007 3008 if distinct: 3009 distinct = self.expression( 3010 exp.Distinct, 3011 on=self._parse_value() if self._match(TokenType.ON) else None, 3012 ) 3013 3014 if all_ and distinct: 3015 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3016 3017 operation_modifiers = [] 3018 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3019 operation_modifiers.append(exp.var(self._prev.text.upper())) 3020 3021 limit = self._parse_limit(top=True) 3022 projections = self._parse_projections() 3023 3024 this = self.expression( 3025 exp.Select, 3026 kind=kind, 3027 hint=hint, 3028 distinct=distinct, 3029 expressions=projections, 3030 limit=limit, 3031 operation_modifiers=operation_modifiers or None, 3032 ) 3033 this.comments = comments 3034 3035 into = self._parse_into() 3036 if into: 3037 this.set("into", into) 3038 3039 if not from_: 3040 from_ = self._parse_from() 3041 3042 if from_: 3043 this.set("from", from_) 3044 3045 this = self._parse_query_modifiers(this) 3046 elif (table or nested) and self._match(TokenType.L_PAREN): 3047 if self._match(TokenType.PIVOT): 3048 this = self._parse_simplified_pivot() 3049 elif self._match(TokenType.FROM): 3050 this = exp.select("*").from_( 3051 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3052 ) 3053 else: 3054 this = ( 3055 self._parse_table() 3056 if table 3057 else self._parse_select(nested=True, parse_set_operation=False) 3058 ) 3059 3060 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3061 # in case a modifier (e.g. join) is following 3062 if table and isinstance(this, exp.Values) and this.alias: 3063 alias = this.args["alias"].pop() 3064 this = exp.Table(this=this, alias=alias) 3065 3066 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3067 3068 self._match_r_paren() 3069 3070 # We return early here so that the UNION isn't attached to the subquery by the 3071 # following call to _parse_set_operations, but instead becomes the parent node 3072 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3073 elif self._match(TokenType.VALUES, advance=False): 3074 this = self._parse_derived_table_values() 3075 elif from_: 3076 this = exp.select("*").from_(from_.this, copy=False) 3077 elif self._match(TokenType.SUMMARIZE): 3078 table = self._match(TokenType.TABLE) 3079 this = self._parse_select() or self._parse_string() or self._parse_table() 3080 return self.expression(exp.Summarize, this=this, table=table) 3081 elif self._match(TokenType.DESCRIBE): 3082 this = self._parse_describe() 3083 elif self._match_text_seq("STREAM"): 3084 this = self._parse_function() 3085 if this: 3086 this = self.expression(exp.Stream, this=this) 3087 else: 3088 self._retreat(self._index - 1) 3089 else: 3090 this = None 3091 3092 return self._parse_set_operations(this) if parse_set_operation else this 3093 3094 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3095 if not skip_with_token and not self._match(TokenType.WITH): 3096 return None 3097 3098 comments = self._prev_comments 3099 recursive = self._match(TokenType.RECURSIVE) 3100 3101 last_comments = None 3102 expressions = [] 3103 while True: 3104 expressions.append(self._parse_cte()) 3105 if last_comments: 3106 expressions[-1].add_comments(last_comments) 3107 3108 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3109 break 3110 else: 3111 self._match(TokenType.WITH) 3112 3113 last_comments = self._prev_comments 3114 3115 return self.expression( 3116 exp.With, comments=comments, expressions=expressions, recursive=recursive 3117 ) 3118 3119 def _parse_cte(self) -> exp.CTE: 3120 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3121 if not alias or not alias.this: 3122 self.raise_error("Expected CTE to have alias") 3123 3124 self._match(TokenType.ALIAS) 3125 comments = self._prev_comments 3126 3127 if self._match_text_seq("NOT", "MATERIALIZED"): 3128 materialized = False 3129 elif self._match_text_seq("MATERIALIZED"): 3130 materialized = True 3131 else: 3132 materialized = None 3133 3134 return self.expression( 3135 exp.CTE, 3136 this=self._parse_wrapped(self._parse_statement), 3137 alias=alias, 3138 materialized=materialized, 3139 comments=comments, 3140 ) 3141 3142 def _parse_table_alias( 3143 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3144 ) -> t.Optional[exp.TableAlias]: 3145 any_token = self._match(TokenType.ALIAS) 3146 alias = ( 3147 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3148 or self._parse_string_as_identifier() 3149 ) 3150 3151 index = self._index 3152 if self._match(TokenType.L_PAREN): 3153 columns = self._parse_csv(self._parse_function_parameter) 3154 self._match_r_paren() if columns else self._retreat(index) 3155 else: 3156 columns = None 3157 3158 if not alias and not columns: 3159 return None 3160 3161 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3162 3163 # We bubble up comments from the Identifier to the TableAlias 3164 if isinstance(alias, exp.Identifier): 3165 table_alias.add_comments(alias.pop_comments()) 3166 3167 return table_alias 3168 3169 def _parse_subquery( 3170 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3171 ) -> t.Optional[exp.Subquery]: 3172 if not this: 3173 return None 3174 3175 return self.expression( 3176 exp.Subquery, 3177 this=this, 3178 pivots=self._parse_pivots(), 3179 alias=self._parse_table_alias() if parse_alias else None, 3180 sample=self._parse_table_sample(), 3181 ) 3182 3183 def _implicit_unnests_to_explicit(self, this: E) -> E: 3184 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3185 3186 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3187 for i, join in enumerate(this.args.get("joins") or []): 3188 table = join.this 3189 normalized_table = table.copy() 3190 normalized_table.meta["maybe_column"] = True 3191 normalized_table = _norm(normalized_table, dialect=self.dialect) 3192 3193 if isinstance(table, exp.Table) and not join.args.get("on"): 3194 if normalized_table.parts[0].name in refs: 3195 table_as_column = table.to_column() 3196 unnest = exp.Unnest(expressions=[table_as_column]) 3197 3198 # Table.to_column creates a parent Alias node that we want to convert to 3199 # a TableAlias and attach to the Unnest, so it matches the parser's output 3200 if isinstance(table.args.get("alias"), exp.TableAlias): 3201 table_as_column.replace(table_as_column.this) 3202 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3203 3204 table.replace(unnest) 3205 3206 refs.add(normalized_table.alias_or_name) 3207 3208 return this 3209 3210 def _parse_query_modifiers( 3211 self, this: t.Optional[exp.Expression] 3212 ) -> t.Optional[exp.Expression]: 3213 if isinstance(this, (exp.Query, exp.Table)): 3214 for join in self._parse_joins(): 3215 this.append("joins", join) 3216 for lateral in iter(self._parse_lateral, None): 3217 this.append("laterals", lateral) 3218 3219 while True: 3220 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3221 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3222 key, expression = parser(self) 3223 3224 if expression: 3225 this.set(key, expression) 3226 if key == "limit": 3227 offset = expression.args.pop("offset", None) 3228 3229 if offset: 3230 offset = exp.Offset(expression=offset) 3231 this.set("offset", offset) 3232 3233 limit_by_expressions = expression.expressions 3234 expression.set("expressions", None) 3235 offset.set("expressions", limit_by_expressions) 3236 continue 3237 break 3238 3239 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3240 this = self._implicit_unnests_to_explicit(this) 3241 3242 return this 3243 3244 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3245 start = self._curr 3246 while self._curr: 3247 self._advance() 3248 3249 end = self._tokens[self._index - 1] 3250 return exp.Hint(expressions=[self._find_sql(start, end)]) 3251 3252 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3253 return self._parse_function_call() 3254 3255 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3256 start_index = self._index 3257 should_fallback_to_string = False 3258 3259 hints = [] 3260 try: 3261 for hint in iter( 3262 lambda: self._parse_csv( 3263 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3264 ), 3265 [], 3266 ): 3267 hints.extend(hint) 3268 except ParseError: 3269 should_fallback_to_string = True 3270 3271 if should_fallback_to_string or self._curr: 3272 self._retreat(start_index) 3273 return self._parse_hint_fallback_to_string() 3274 3275 return self.expression(exp.Hint, expressions=hints) 3276 3277 def _parse_hint(self) -> t.Optional[exp.Hint]: 3278 if self._match(TokenType.HINT) and self._prev_comments: 3279 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3280 3281 return None 3282 3283 def _parse_into(self) -> t.Optional[exp.Into]: 3284 if not self._match(TokenType.INTO): 3285 return None 3286 3287 temp = self._match(TokenType.TEMPORARY) 3288 unlogged = self._match_text_seq("UNLOGGED") 3289 self._match(TokenType.TABLE) 3290 3291 return self.expression( 3292 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3293 ) 3294 3295 def _parse_from( 3296 self, joins: bool = False, skip_from_token: bool = False 3297 ) -> t.Optional[exp.From]: 3298 if not skip_from_token and not self._match(TokenType.FROM): 3299 return None 3300 3301 return self.expression( 3302 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3303 ) 3304 3305 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3306 return self.expression( 3307 exp.MatchRecognizeMeasure, 3308 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3309 this=self._parse_expression(), 3310 ) 3311 3312 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3313 if not self._match(TokenType.MATCH_RECOGNIZE): 3314 return None 3315 3316 self._match_l_paren() 3317 3318 partition = self._parse_partition_by() 3319 order = self._parse_order() 3320 3321 measures = ( 3322 self._parse_csv(self._parse_match_recognize_measure) 3323 if self._match_text_seq("MEASURES") 3324 else None 3325 ) 3326 3327 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3328 rows = exp.var("ONE ROW PER MATCH") 3329 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3330 text = "ALL ROWS PER MATCH" 3331 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3332 text += " SHOW EMPTY MATCHES" 3333 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3334 text += " OMIT EMPTY MATCHES" 3335 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3336 text += " WITH UNMATCHED ROWS" 3337 rows = exp.var(text) 3338 else: 3339 rows = None 3340 3341 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3342 text = "AFTER MATCH SKIP" 3343 if self._match_text_seq("PAST", "LAST", "ROW"): 3344 text += " PAST LAST ROW" 3345 elif self._match_text_seq("TO", "NEXT", "ROW"): 3346 text += " TO NEXT ROW" 3347 elif self._match_text_seq("TO", "FIRST"): 3348 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3349 elif self._match_text_seq("TO", "LAST"): 3350 text += f" TO LAST {self._advance_any().text}" # type: ignore 3351 after = exp.var(text) 3352 else: 3353 after = None 3354 3355 if self._match_text_seq("PATTERN"): 3356 self._match_l_paren() 3357 3358 if not self._curr: 3359 self.raise_error("Expecting )", self._curr) 3360 3361 paren = 1 3362 start = self._curr 3363 3364 while self._curr and paren > 0: 3365 if self._curr.token_type == TokenType.L_PAREN: 3366 paren += 1 3367 if self._curr.token_type == TokenType.R_PAREN: 3368 paren -= 1 3369 3370 end = self._prev 3371 self._advance() 3372 3373 if paren > 0: 3374 self.raise_error("Expecting )", self._curr) 3375 3376 pattern = exp.var(self._find_sql(start, end)) 3377 else: 3378 pattern = None 3379 3380 define = ( 3381 self._parse_csv(self._parse_name_as_expression) 3382 if self._match_text_seq("DEFINE") 3383 else None 3384 ) 3385 3386 self._match_r_paren() 3387 3388 return self.expression( 3389 exp.MatchRecognize, 3390 partition_by=partition, 3391 order=order, 3392 measures=measures, 3393 rows=rows, 3394 after=after, 3395 pattern=pattern, 3396 define=define, 3397 alias=self._parse_table_alias(), 3398 ) 3399 3400 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3401 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3402 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3403 cross_apply = False 3404 3405 if cross_apply is not None: 3406 this = self._parse_select(table=True) 3407 view = None 3408 outer = None 3409 elif self._match(TokenType.LATERAL): 3410 this = self._parse_select(table=True) 3411 view = self._match(TokenType.VIEW) 3412 outer = self._match(TokenType.OUTER) 3413 else: 3414 return None 3415 3416 if not this: 3417 this = ( 3418 self._parse_unnest() 3419 or self._parse_function() 3420 or self._parse_id_var(any_token=False) 3421 ) 3422 3423 while self._match(TokenType.DOT): 3424 this = exp.Dot( 3425 this=this, 3426 expression=self._parse_function() or self._parse_id_var(any_token=False), 3427 ) 3428 3429 if view: 3430 table = self._parse_id_var(any_token=False) 3431 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3432 table_alias: t.Optional[exp.TableAlias] = self.expression( 3433 exp.TableAlias, this=table, columns=columns 3434 ) 3435 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3436 # We move the alias from the lateral's child node to the lateral itself 3437 table_alias = this.args["alias"].pop() 3438 else: 3439 table_alias = self._parse_table_alias() 3440 3441 return self.expression( 3442 exp.Lateral, 3443 this=this, 3444 view=view, 3445 outer=outer, 3446 alias=table_alias, 3447 cross_apply=cross_apply, 3448 ) 3449 3450 def _parse_join_parts( 3451 self, 3452 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3453 return ( 3454 self._match_set(self.JOIN_METHODS) and self._prev, 3455 self._match_set(self.JOIN_SIDES) and self._prev, 3456 self._match_set(self.JOIN_KINDS) and self._prev, 3457 ) 3458 3459 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3460 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3461 this = self._parse_column() 3462 if isinstance(this, exp.Column): 3463 return this.this 3464 return this 3465 3466 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3467 3468 def _parse_join( 3469 self, skip_join_token: bool = False, parse_bracket: bool = False 3470 ) -> t.Optional[exp.Join]: 3471 if self._match(TokenType.COMMA): 3472 return self.expression(exp.Join, this=self._parse_table()) 3473 3474 index = self._index 3475 method, side, kind = self._parse_join_parts() 3476 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3477 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3478 3479 if not skip_join_token and not join: 3480 self._retreat(index) 3481 kind = None 3482 method = None 3483 side = None 3484 3485 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3486 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3487 3488 if not skip_join_token and not join and not outer_apply and not cross_apply: 3489 return None 3490 3491 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3492 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3493 kwargs["expressions"] = self._parse_csv( 3494 lambda: self._parse_table(parse_bracket=parse_bracket) 3495 ) 3496 3497 if method: 3498 kwargs["method"] = method.text 3499 if side: 3500 kwargs["side"] = side.text 3501 if kind: 3502 kwargs["kind"] = kind.text 3503 if hint: 3504 kwargs["hint"] = hint 3505 3506 if self._match(TokenType.MATCH_CONDITION): 3507 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3508 3509 if self._match(TokenType.ON): 3510 kwargs["on"] = self._parse_assignment() 3511 elif self._match(TokenType.USING): 3512 kwargs["using"] = self._parse_using_identifiers() 3513 elif ( 3514 not (outer_apply or cross_apply) 3515 and not isinstance(kwargs["this"], exp.Unnest) 3516 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3517 ): 3518 index = self._index 3519 joins: t.Optional[list] = list(self._parse_joins()) 3520 3521 if joins and self._match(TokenType.ON): 3522 kwargs["on"] = self._parse_assignment() 3523 elif joins and self._match(TokenType.USING): 3524 kwargs["using"] = self._parse_using_identifiers() 3525 else: 3526 joins = None 3527 self._retreat(index) 3528 3529 kwargs["this"].set("joins", joins if joins else None) 3530 3531 comments = [c for token in (method, side, kind) if token for c in token.comments] 3532 return self.expression(exp.Join, comments=comments, **kwargs) 3533 3534 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3535 this = self._parse_assignment() 3536 3537 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3538 return this 3539 3540 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3541 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3542 3543 return this 3544 3545 def _parse_index_params(self) -> exp.IndexParameters: 3546 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3547 3548 if self._match(TokenType.L_PAREN, advance=False): 3549 columns = self._parse_wrapped_csv(self._parse_with_operator) 3550 else: 3551 columns = None 3552 3553 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3554 partition_by = self._parse_partition_by() 3555 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3556 tablespace = ( 3557 self._parse_var(any_token=True) 3558 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3559 else None 3560 ) 3561 where = self._parse_where() 3562 3563 on = self._parse_field() if self._match(TokenType.ON) else None 3564 3565 return self.expression( 3566 exp.IndexParameters, 3567 using=using, 3568 columns=columns, 3569 include=include, 3570 partition_by=partition_by, 3571 where=where, 3572 with_storage=with_storage, 3573 tablespace=tablespace, 3574 on=on, 3575 ) 3576 3577 def _parse_index( 3578 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3579 ) -> t.Optional[exp.Index]: 3580 if index or anonymous: 3581 unique = None 3582 primary = None 3583 amp = None 3584 3585 self._match(TokenType.ON) 3586 self._match(TokenType.TABLE) # hive 3587 table = self._parse_table_parts(schema=True) 3588 else: 3589 unique = self._match(TokenType.UNIQUE) 3590 primary = self._match_text_seq("PRIMARY") 3591 amp = self._match_text_seq("AMP") 3592 3593 if not self._match(TokenType.INDEX): 3594 return None 3595 3596 index = self._parse_id_var() 3597 table = None 3598 3599 params = self._parse_index_params() 3600 3601 return self.expression( 3602 exp.Index, 3603 this=index, 3604 table=table, 3605 unique=unique, 3606 primary=primary, 3607 amp=amp, 3608 params=params, 3609 ) 3610 3611 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3612 hints: t.List[exp.Expression] = [] 3613 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3614 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3615 hints.append( 3616 self.expression( 3617 exp.WithTableHint, 3618 expressions=self._parse_csv( 3619 lambda: self._parse_function() or self._parse_var(any_token=True) 3620 ), 3621 ) 3622 ) 3623 self._match_r_paren() 3624 else: 3625 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3626 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3627 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3628 3629 self._match_set((TokenType.INDEX, TokenType.KEY)) 3630 if self._match(TokenType.FOR): 3631 hint.set("target", self._advance_any() and self._prev.text.upper()) 3632 3633 hint.set("expressions", self._parse_wrapped_id_vars()) 3634 hints.append(hint) 3635 3636 return hints or None 3637 3638 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3639 return ( 3640 (not schema and self._parse_function(optional_parens=False)) 3641 or self._parse_id_var(any_token=False) 3642 or self._parse_string_as_identifier() 3643 or self._parse_placeholder() 3644 ) 3645 3646 def _parse_table_parts( 3647 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3648 ) -> exp.Table: 3649 catalog = None 3650 db = None 3651 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3652 3653 while self._match(TokenType.DOT): 3654 if catalog: 3655 # This allows nesting the table in arbitrarily many dot expressions if needed 3656 table = self.expression( 3657 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3658 ) 3659 else: 3660 catalog = db 3661 db = table 3662 # "" used for tsql FROM a..b case 3663 table = self._parse_table_part(schema=schema) or "" 3664 3665 if ( 3666 wildcard 3667 and self._is_connected() 3668 and (isinstance(table, exp.Identifier) or not table) 3669 and self._match(TokenType.STAR) 3670 ): 3671 if isinstance(table, exp.Identifier): 3672 table.args["this"] += "*" 3673 else: 3674 table = exp.Identifier(this="*") 3675 3676 # We bubble up comments from the Identifier to the Table 3677 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3678 3679 if is_db_reference: 3680 catalog = db 3681 db = table 3682 table = None 3683 3684 if not table and not is_db_reference: 3685 self.raise_error(f"Expected table name but got {self._curr}") 3686 if not db and is_db_reference: 3687 self.raise_error(f"Expected database name but got {self._curr}") 3688 3689 table = self.expression( 3690 exp.Table, 3691 comments=comments, 3692 this=table, 3693 db=db, 3694 catalog=catalog, 3695 ) 3696 3697 changes = self._parse_changes() 3698 if changes: 3699 table.set("changes", changes) 3700 3701 at_before = self._parse_historical_data() 3702 if at_before: 3703 table.set("when", at_before) 3704 3705 pivots = self._parse_pivots() 3706 if pivots: 3707 table.set("pivots", pivots) 3708 3709 return table 3710 3711 def _parse_table( 3712 self, 3713 schema: bool = False, 3714 joins: bool = False, 3715 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3716 parse_bracket: bool = False, 3717 is_db_reference: bool = False, 3718 parse_partition: bool = False, 3719 ) -> t.Optional[exp.Expression]: 3720 lateral = self._parse_lateral() 3721 if lateral: 3722 return lateral 3723 3724 unnest = self._parse_unnest() 3725 if unnest: 3726 return unnest 3727 3728 values = self._parse_derived_table_values() 3729 if values: 3730 return values 3731 3732 subquery = self._parse_select(table=True) 3733 if subquery: 3734 if not subquery.args.get("pivots"): 3735 subquery.set("pivots", self._parse_pivots()) 3736 return subquery 3737 3738 bracket = parse_bracket and self._parse_bracket(None) 3739 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3740 3741 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3742 self._parse_table 3743 ) 3744 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3745 3746 only = self._match(TokenType.ONLY) 3747 3748 this = t.cast( 3749 exp.Expression, 3750 bracket 3751 or rows_from 3752 or self._parse_bracket( 3753 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3754 ), 3755 ) 3756 3757 if only: 3758 this.set("only", only) 3759 3760 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3761 self._match_text_seq("*") 3762 3763 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3764 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3765 this.set("partition", self._parse_partition()) 3766 3767 if schema: 3768 return self._parse_schema(this=this) 3769 3770 version = self._parse_version() 3771 3772 if version: 3773 this.set("version", version) 3774 3775 if self.dialect.ALIAS_POST_TABLESAMPLE: 3776 this.set("sample", self._parse_table_sample()) 3777 3778 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3779 if alias: 3780 this.set("alias", alias) 3781 3782 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3783 return self.expression( 3784 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3785 ) 3786 3787 this.set("hints", self._parse_table_hints()) 3788 3789 if not this.args.get("pivots"): 3790 this.set("pivots", self._parse_pivots()) 3791 3792 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3793 this.set("sample", self._parse_table_sample()) 3794 3795 if joins: 3796 for join in self._parse_joins(): 3797 this.append("joins", join) 3798 3799 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3800 this.set("ordinality", True) 3801 this.set("alias", self._parse_table_alias()) 3802 3803 return this 3804 3805 def _parse_version(self) -> t.Optional[exp.Version]: 3806 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3807 this = "TIMESTAMP" 3808 elif self._match(TokenType.VERSION_SNAPSHOT): 3809 this = "VERSION" 3810 else: 3811 return None 3812 3813 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3814 kind = self._prev.text.upper() 3815 start = self._parse_bitwise() 3816 self._match_texts(("TO", "AND")) 3817 end = self._parse_bitwise() 3818 expression: t.Optional[exp.Expression] = self.expression( 3819 exp.Tuple, expressions=[start, end] 3820 ) 3821 elif self._match_text_seq("CONTAINED", "IN"): 3822 kind = "CONTAINED IN" 3823 expression = self.expression( 3824 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3825 ) 3826 elif self._match(TokenType.ALL): 3827 kind = "ALL" 3828 expression = None 3829 else: 3830 self._match_text_seq("AS", "OF") 3831 kind = "AS OF" 3832 expression = self._parse_type() 3833 3834 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3835 3836 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3837 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3838 index = self._index 3839 historical_data = None 3840 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3841 this = self._prev.text.upper() 3842 kind = ( 3843 self._match(TokenType.L_PAREN) 3844 and self._match_texts(self.HISTORICAL_DATA_KIND) 3845 and self._prev.text.upper() 3846 ) 3847 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3848 3849 if expression: 3850 self._match_r_paren() 3851 historical_data = self.expression( 3852 exp.HistoricalData, this=this, kind=kind, expression=expression 3853 ) 3854 else: 3855 self._retreat(index) 3856 3857 return historical_data 3858 3859 def _parse_changes(self) -> t.Optional[exp.Changes]: 3860 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3861 return None 3862 3863 information = self._parse_var(any_token=True) 3864 self._match_r_paren() 3865 3866 return self.expression( 3867 exp.Changes, 3868 information=information, 3869 at_before=self._parse_historical_data(), 3870 end=self._parse_historical_data(), 3871 ) 3872 3873 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3874 if not self._match(TokenType.UNNEST): 3875 return None 3876 3877 expressions = self._parse_wrapped_csv(self._parse_equality) 3878 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3879 3880 alias = self._parse_table_alias() if with_alias else None 3881 3882 if alias: 3883 if self.dialect.UNNEST_COLUMN_ONLY: 3884 if alias.args.get("columns"): 3885 self.raise_error("Unexpected extra column alias in unnest.") 3886 3887 alias.set("columns", [alias.this]) 3888 alias.set("this", None) 3889 3890 columns = alias.args.get("columns") or [] 3891 if offset and len(expressions) < len(columns): 3892 offset = columns.pop() 3893 3894 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3895 self._match(TokenType.ALIAS) 3896 offset = self._parse_id_var( 3897 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3898 ) or exp.to_identifier("offset") 3899 3900 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3901 3902 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3903 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3904 if not is_derived and not ( 3905 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3906 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3907 ): 3908 return None 3909 3910 expressions = self._parse_csv(self._parse_value) 3911 alias = self._parse_table_alias() 3912 3913 if is_derived: 3914 self._match_r_paren() 3915 3916 return self.expression( 3917 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3918 ) 3919 3920 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3921 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3922 as_modifier and self._match_text_seq("USING", "SAMPLE") 3923 ): 3924 return None 3925 3926 bucket_numerator = None 3927 bucket_denominator = None 3928 bucket_field = None 3929 percent = None 3930 size = None 3931 seed = None 3932 3933 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3934 matched_l_paren = self._match(TokenType.L_PAREN) 3935 3936 if self.TABLESAMPLE_CSV: 3937 num = None 3938 expressions = self._parse_csv(self._parse_primary) 3939 else: 3940 expressions = None 3941 num = ( 3942 self._parse_factor() 3943 if self._match(TokenType.NUMBER, advance=False) 3944 else self._parse_primary() or self._parse_placeholder() 3945 ) 3946 3947 if self._match_text_seq("BUCKET"): 3948 bucket_numerator = self._parse_number() 3949 self._match_text_seq("OUT", "OF") 3950 bucket_denominator = bucket_denominator = self._parse_number() 3951 self._match(TokenType.ON) 3952 bucket_field = self._parse_field() 3953 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3954 percent = num 3955 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3956 size = num 3957 else: 3958 percent = num 3959 3960 if matched_l_paren: 3961 self._match_r_paren() 3962 3963 if self._match(TokenType.L_PAREN): 3964 method = self._parse_var(upper=True) 3965 seed = self._match(TokenType.COMMA) and self._parse_number() 3966 self._match_r_paren() 3967 elif self._match_texts(("SEED", "REPEATABLE")): 3968 seed = self._parse_wrapped(self._parse_number) 3969 3970 if not method and self.DEFAULT_SAMPLING_METHOD: 3971 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3972 3973 return self.expression( 3974 exp.TableSample, 3975 expressions=expressions, 3976 method=method, 3977 bucket_numerator=bucket_numerator, 3978 bucket_denominator=bucket_denominator, 3979 bucket_field=bucket_field, 3980 percent=percent, 3981 size=size, 3982 seed=seed, 3983 ) 3984 3985 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3986 return list(iter(self._parse_pivot, None)) or None 3987 3988 def _parse_joins(self) -> t.Iterator[exp.Join]: 3989 return iter(self._parse_join, None) 3990 3991 # https://duckdb.org/docs/sql/statements/pivot 3992 def _parse_simplified_pivot(self) -> exp.Pivot: 3993 def _parse_on() -> t.Optional[exp.Expression]: 3994 this = self._parse_bitwise() 3995 return self._parse_in(this) if self._match(TokenType.IN) else this 3996 3997 this = self._parse_table() 3998 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3999 using = self._match(TokenType.USING) and self._parse_csv( 4000 lambda: self._parse_alias(self._parse_function()) 4001 ) 4002 group = self._parse_group() 4003 return self.expression( 4004 exp.Pivot, this=this, expressions=expressions, using=using, group=group 4005 ) 4006 4007 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4008 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4009 this = self._parse_select_or_expression() 4010 4011 self._match(TokenType.ALIAS) 4012 alias = self._parse_bitwise() 4013 if alias: 4014 if isinstance(alias, exp.Column) and not alias.db: 4015 alias = alias.this 4016 return self.expression(exp.PivotAlias, this=this, alias=alias) 4017 4018 return this 4019 4020 value = self._parse_column() 4021 4022 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4023 self.raise_error("Expecting IN (") 4024 4025 if self._match(TokenType.ANY): 4026 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4027 else: 4028 exprs = self._parse_csv(_parse_aliased_expression) 4029 4030 self._match_r_paren() 4031 return self.expression(exp.In, this=value, expressions=exprs) 4032 4033 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4034 index = self._index 4035 include_nulls = None 4036 4037 if self._match(TokenType.PIVOT): 4038 unpivot = False 4039 elif self._match(TokenType.UNPIVOT): 4040 unpivot = True 4041 4042 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4043 if self._match_text_seq("INCLUDE", "NULLS"): 4044 include_nulls = True 4045 elif self._match_text_seq("EXCLUDE", "NULLS"): 4046 include_nulls = False 4047 else: 4048 return None 4049 4050 expressions = [] 4051 4052 if not self._match(TokenType.L_PAREN): 4053 self._retreat(index) 4054 return None 4055 4056 if unpivot: 4057 expressions = self._parse_csv(self._parse_column) 4058 else: 4059 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4060 4061 if not expressions: 4062 self.raise_error("Failed to parse PIVOT's aggregation list") 4063 4064 if not self._match(TokenType.FOR): 4065 self.raise_error("Expecting FOR") 4066 4067 field = self._parse_pivot_in() 4068 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4069 self._parse_bitwise 4070 ) 4071 4072 self._match_r_paren() 4073 4074 pivot = self.expression( 4075 exp.Pivot, 4076 expressions=expressions, 4077 field=field, 4078 unpivot=unpivot, 4079 include_nulls=include_nulls, 4080 default_on_null=default_on_null, 4081 ) 4082 4083 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4084 pivot.set("alias", self._parse_table_alias()) 4085 4086 if not unpivot: 4087 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4088 4089 columns: t.List[exp.Expression] = [] 4090 for fld in pivot.args["field"].expressions: 4091 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4092 for name in names: 4093 if self.PREFIXED_PIVOT_COLUMNS: 4094 name = f"{name}_{field_name}" if name else field_name 4095 else: 4096 name = f"{field_name}_{name}" if name else field_name 4097 4098 columns.append(exp.to_identifier(name)) 4099 4100 pivot.set("columns", columns) 4101 4102 return pivot 4103 4104 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4105 return [agg.alias for agg in aggregations] 4106 4107 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4108 if not skip_where_token and not self._match(TokenType.PREWHERE): 4109 return None 4110 4111 return self.expression( 4112 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4113 ) 4114 4115 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4116 if not skip_where_token and not self._match(TokenType.WHERE): 4117 return None 4118 4119 return self.expression( 4120 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4121 ) 4122 4123 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4124 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4125 return None 4126 4127 elements: t.Dict[str, t.Any] = defaultdict(list) 4128 4129 if self._match(TokenType.ALL): 4130 elements["all"] = True 4131 elif self._match(TokenType.DISTINCT): 4132 elements["all"] = False 4133 4134 while True: 4135 index = self._index 4136 4137 elements["expressions"].extend( 4138 self._parse_csv( 4139 lambda: None 4140 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4141 else self._parse_assignment() 4142 ) 4143 ) 4144 4145 before_with_index = self._index 4146 with_prefix = self._match(TokenType.WITH) 4147 4148 if self._match(TokenType.ROLLUP): 4149 elements["rollup"].append( 4150 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4151 ) 4152 elif self._match(TokenType.CUBE): 4153 elements["cube"].append( 4154 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4155 ) 4156 elif self._match(TokenType.GROUPING_SETS): 4157 elements["grouping_sets"].append( 4158 self.expression( 4159 exp.GroupingSets, 4160 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4161 ) 4162 ) 4163 elif self._match_text_seq("TOTALS"): 4164 elements["totals"] = True # type: ignore 4165 4166 if before_with_index <= self._index <= before_with_index + 1: 4167 self._retreat(before_with_index) 4168 break 4169 4170 if index == self._index: 4171 break 4172 4173 return self.expression(exp.Group, **elements) # type: ignore 4174 4175 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4176 return self.expression( 4177 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4178 ) 4179 4180 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4181 if self._match(TokenType.L_PAREN): 4182 grouping_set = self._parse_csv(self._parse_column) 4183 self._match_r_paren() 4184 return self.expression(exp.Tuple, expressions=grouping_set) 4185 4186 return self._parse_column() 4187 4188 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4189 if not skip_having_token and not self._match(TokenType.HAVING): 4190 return None 4191 return self.expression(exp.Having, this=self._parse_assignment()) 4192 4193 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4194 if not self._match(TokenType.QUALIFY): 4195 return None 4196 return self.expression(exp.Qualify, this=self._parse_assignment()) 4197 4198 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4199 if skip_start_token: 4200 start = None 4201 elif self._match(TokenType.START_WITH): 4202 start = self._parse_assignment() 4203 else: 4204 return None 4205 4206 self._match(TokenType.CONNECT_BY) 4207 nocycle = self._match_text_seq("NOCYCLE") 4208 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4209 exp.Prior, this=self._parse_bitwise() 4210 ) 4211 connect = self._parse_assignment() 4212 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4213 4214 if not start and self._match(TokenType.START_WITH): 4215 start = self._parse_assignment() 4216 4217 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4218 4219 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4220 this = self._parse_id_var(any_token=True) 4221 if self._match(TokenType.ALIAS): 4222 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4223 return this 4224 4225 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4226 if self._match_text_seq("INTERPOLATE"): 4227 return self._parse_wrapped_csv(self._parse_name_as_expression) 4228 return None 4229 4230 def _parse_order( 4231 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4232 ) -> t.Optional[exp.Expression]: 4233 siblings = None 4234 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4235 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4236 return this 4237 4238 siblings = True 4239 4240 return self.expression( 4241 exp.Order, 4242 this=this, 4243 expressions=self._parse_csv(self._parse_ordered), 4244 siblings=siblings, 4245 ) 4246 4247 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4248 if not self._match(token): 4249 return None 4250 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4251 4252 def _parse_ordered( 4253 self, parse_method: t.Optional[t.Callable] = None 4254 ) -> t.Optional[exp.Ordered]: 4255 this = parse_method() if parse_method else self._parse_assignment() 4256 if not this: 4257 return None 4258 4259 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4260 this = exp.var("ALL") 4261 4262 asc = self._match(TokenType.ASC) 4263 desc = self._match(TokenType.DESC) or (asc and False) 4264 4265 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4266 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4267 4268 nulls_first = is_nulls_first or False 4269 explicitly_null_ordered = is_nulls_first or is_nulls_last 4270 4271 if ( 4272 not explicitly_null_ordered 4273 and ( 4274 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4275 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4276 ) 4277 and self.dialect.NULL_ORDERING != "nulls_are_last" 4278 ): 4279 nulls_first = True 4280 4281 if self._match_text_seq("WITH", "FILL"): 4282 with_fill = self.expression( 4283 exp.WithFill, 4284 **{ # type: ignore 4285 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4286 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4287 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4288 "interpolate": self._parse_interpolate(), 4289 }, 4290 ) 4291 else: 4292 with_fill = None 4293 4294 return self.expression( 4295 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4296 ) 4297 4298 def _parse_limit( 4299 self, 4300 this: t.Optional[exp.Expression] = None, 4301 top: bool = False, 4302 skip_limit_token: bool = False, 4303 ) -> t.Optional[exp.Expression]: 4304 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4305 comments = self._prev_comments 4306 if top: 4307 limit_paren = self._match(TokenType.L_PAREN) 4308 expression = self._parse_term() if limit_paren else self._parse_number() 4309 4310 if limit_paren: 4311 self._match_r_paren() 4312 else: 4313 expression = self._parse_term() 4314 4315 if self._match(TokenType.COMMA): 4316 offset = expression 4317 expression = self._parse_term() 4318 else: 4319 offset = None 4320 4321 limit_exp = self.expression( 4322 exp.Limit, 4323 this=this, 4324 expression=expression, 4325 offset=offset, 4326 comments=comments, 4327 expressions=self._parse_limit_by(), 4328 ) 4329 4330 return limit_exp 4331 4332 if self._match(TokenType.FETCH): 4333 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4334 direction = self._prev.text.upper() if direction else "FIRST" 4335 4336 count = self._parse_field(tokens=self.FETCH_TOKENS) 4337 percent = self._match(TokenType.PERCENT) 4338 4339 self._match_set((TokenType.ROW, TokenType.ROWS)) 4340 4341 only = self._match_text_seq("ONLY") 4342 with_ties = self._match_text_seq("WITH", "TIES") 4343 4344 if only and with_ties: 4345 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4346 4347 return self.expression( 4348 exp.Fetch, 4349 direction=direction, 4350 count=count, 4351 percent=percent, 4352 with_ties=with_ties, 4353 ) 4354 4355 return this 4356 4357 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4358 if not self._match(TokenType.OFFSET): 4359 return this 4360 4361 count = self._parse_term() 4362 self._match_set((TokenType.ROW, TokenType.ROWS)) 4363 4364 return self.expression( 4365 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4366 ) 4367 4368 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4369 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4370 4371 def _parse_locks(self) -> t.List[exp.Lock]: 4372 locks = [] 4373 while True: 4374 if self._match_text_seq("FOR", "UPDATE"): 4375 update = True 4376 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4377 "LOCK", "IN", "SHARE", "MODE" 4378 ): 4379 update = False 4380 else: 4381 break 4382 4383 expressions = None 4384 if self._match_text_seq("OF"): 4385 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4386 4387 wait: t.Optional[bool | exp.Expression] = None 4388 if self._match_text_seq("NOWAIT"): 4389 wait = True 4390 elif self._match_text_seq("WAIT"): 4391 wait = self._parse_primary() 4392 elif self._match_text_seq("SKIP", "LOCKED"): 4393 wait = False 4394 4395 locks.append( 4396 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4397 ) 4398 4399 return locks 4400 4401 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4402 while this and self._match_set(self.SET_OPERATIONS): 4403 token_type = self._prev.token_type 4404 4405 if token_type == TokenType.UNION: 4406 operation: t.Type[exp.SetOperation] = exp.Union 4407 elif token_type == TokenType.EXCEPT: 4408 operation = exp.Except 4409 else: 4410 operation = exp.Intersect 4411 4412 comments = self._prev.comments 4413 4414 if self._match(TokenType.DISTINCT): 4415 distinct: t.Optional[bool] = True 4416 elif self._match(TokenType.ALL): 4417 distinct = False 4418 else: 4419 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4420 if distinct is None: 4421 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4422 4423 by_name = self._match_text_seq("BY", "NAME") 4424 expression = self._parse_select(nested=True, parse_set_operation=False) 4425 4426 this = self.expression( 4427 operation, 4428 comments=comments, 4429 this=this, 4430 distinct=distinct, 4431 by_name=by_name, 4432 expression=expression, 4433 ) 4434 4435 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4436 expression = this.expression 4437 4438 if expression: 4439 for arg in self.SET_OP_MODIFIERS: 4440 expr = expression.args.get(arg) 4441 if expr: 4442 this.set(arg, expr.pop()) 4443 4444 return this 4445 4446 def _parse_expression(self) -> t.Optional[exp.Expression]: 4447 return self._parse_alias(self._parse_assignment()) 4448 4449 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4450 this = self._parse_disjunction() 4451 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4452 # This allows us to parse <non-identifier token> := <expr> 4453 this = exp.column( 4454 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4455 ) 4456 4457 while self._match_set(self.ASSIGNMENT): 4458 if isinstance(this, exp.Column) and len(this.parts) == 1: 4459 this = this.this 4460 4461 this = self.expression( 4462 self.ASSIGNMENT[self._prev.token_type], 4463 this=this, 4464 comments=self._prev_comments, 4465 expression=self._parse_assignment(), 4466 ) 4467 4468 return this 4469 4470 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4471 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4472 4473 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4474 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4475 4476 def _parse_equality(self) -> t.Optional[exp.Expression]: 4477 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4478 4479 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4480 return self._parse_tokens(self._parse_range, self.COMPARISON) 4481 4482 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4483 this = this or self._parse_bitwise() 4484 negate = self._match(TokenType.NOT) 4485 4486 if self._match_set(self.RANGE_PARSERS): 4487 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4488 if not expression: 4489 return this 4490 4491 this = expression 4492 elif self._match(TokenType.ISNULL): 4493 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4494 4495 # Postgres supports ISNULL and NOTNULL for conditions. 4496 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4497 if self._match(TokenType.NOTNULL): 4498 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4499 this = self.expression(exp.Not, this=this) 4500 4501 if negate: 4502 this = self._negate_range(this) 4503 4504 if self._match(TokenType.IS): 4505 this = self._parse_is(this) 4506 4507 return this 4508 4509 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4510 if not this: 4511 return this 4512 4513 return self.expression(exp.Not, this=this) 4514 4515 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4516 index = self._index - 1 4517 negate = self._match(TokenType.NOT) 4518 4519 if self._match_text_seq("DISTINCT", "FROM"): 4520 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4521 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4522 4523 if self._match(TokenType.JSON): 4524 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4525 4526 if self._match_text_seq("WITH"): 4527 _with = True 4528 elif self._match_text_seq("WITHOUT"): 4529 _with = False 4530 else: 4531 _with = None 4532 4533 unique = self._match(TokenType.UNIQUE) 4534 self._match_text_seq("KEYS") 4535 expression: t.Optional[exp.Expression] = self.expression( 4536 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4537 ) 4538 else: 4539 expression = self._parse_primary() or self._parse_null() 4540 if not expression: 4541 self._retreat(index) 4542 return None 4543 4544 this = self.expression(exp.Is, this=this, expression=expression) 4545 return self.expression(exp.Not, this=this) if negate else this 4546 4547 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4548 unnest = self._parse_unnest(with_alias=False) 4549 if unnest: 4550 this = self.expression(exp.In, this=this, unnest=unnest) 4551 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4552 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4553 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4554 4555 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4556 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4557 else: 4558 this = self.expression(exp.In, this=this, expressions=expressions) 4559 4560 if matched_l_paren: 4561 self._match_r_paren(this) 4562 elif not self._match(TokenType.R_BRACKET, expression=this): 4563 self.raise_error("Expecting ]") 4564 else: 4565 this = self.expression(exp.In, this=this, field=self._parse_column()) 4566 4567 return this 4568 4569 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4570 low = self._parse_bitwise() 4571 self._match(TokenType.AND) 4572 high = self._parse_bitwise() 4573 return self.expression(exp.Between, this=this, low=low, high=high) 4574 4575 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4576 if not self._match(TokenType.ESCAPE): 4577 return this 4578 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4579 4580 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4581 index = self._index 4582 4583 if not self._match(TokenType.INTERVAL) and match_interval: 4584 return None 4585 4586 if self._match(TokenType.STRING, advance=False): 4587 this = self._parse_primary() 4588 else: 4589 this = self._parse_term() 4590 4591 if not this or ( 4592 isinstance(this, exp.Column) 4593 and not this.table 4594 and not this.this.quoted 4595 and this.name.upper() == "IS" 4596 ): 4597 self._retreat(index) 4598 return None 4599 4600 unit = self._parse_function() or ( 4601 not self._match(TokenType.ALIAS, advance=False) 4602 and self._parse_var(any_token=True, upper=True) 4603 ) 4604 4605 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4606 # each INTERVAL expression into this canonical form so it's easy to transpile 4607 if this and this.is_number: 4608 this = exp.Literal.string(this.to_py()) 4609 elif this and this.is_string: 4610 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4611 if len(parts) == 1: 4612 if unit: 4613 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4614 self._retreat(self._index - 1) 4615 4616 this = exp.Literal.string(parts[0][0]) 4617 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4618 4619 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4620 unit = self.expression( 4621 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4622 ) 4623 4624 interval = self.expression(exp.Interval, this=this, unit=unit) 4625 4626 index = self._index 4627 self._match(TokenType.PLUS) 4628 4629 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4630 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4631 return self.expression( 4632 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4633 ) 4634 4635 self._retreat(index) 4636 return interval 4637 4638 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4639 this = self._parse_term() 4640 4641 while True: 4642 if self._match_set(self.BITWISE): 4643 this = self.expression( 4644 self.BITWISE[self._prev.token_type], 4645 this=this, 4646 expression=self._parse_term(), 4647 ) 4648 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4649 this = self.expression( 4650 exp.DPipe, 4651 this=this, 4652 expression=self._parse_term(), 4653 safe=not self.dialect.STRICT_STRING_CONCAT, 4654 ) 4655 elif self._match(TokenType.DQMARK): 4656 this = self.expression( 4657 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4658 ) 4659 elif self._match_pair(TokenType.LT, TokenType.LT): 4660 this = self.expression( 4661 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4662 ) 4663 elif self._match_pair(TokenType.GT, TokenType.GT): 4664 this = self.expression( 4665 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4666 ) 4667 else: 4668 break 4669 4670 return this 4671 4672 def _parse_term(self) -> t.Optional[exp.Expression]: 4673 this = self._parse_factor() 4674 4675 while self._match_set(self.TERM): 4676 klass = self.TERM[self._prev.token_type] 4677 comments = self._prev_comments 4678 expression = self._parse_factor() 4679 4680 this = self.expression(klass, this=this, comments=comments, expression=expression) 4681 4682 if isinstance(this, exp.Collate): 4683 expr = this.expression 4684 4685 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4686 # fallback to Identifier / Var 4687 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4688 ident = expr.this 4689 if isinstance(ident, exp.Identifier): 4690 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4691 4692 return this 4693 4694 def _parse_factor(self) -> t.Optional[exp.Expression]: 4695 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4696 this = parse_method() 4697 4698 while self._match_set(self.FACTOR): 4699 klass = self.FACTOR[self._prev.token_type] 4700 comments = self._prev_comments 4701 expression = parse_method() 4702 4703 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4704 self._retreat(self._index - 1) 4705 return this 4706 4707 this = self.expression(klass, this=this, comments=comments, expression=expression) 4708 4709 if isinstance(this, exp.Div): 4710 this.args["typed"] = self.dialect.TYPED_DIVISION 4711 this.args["safe"] = self.dialect.SAFE_DIVISION 4712 4713 return this 4714 4715 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4716 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4717 4718 def _parse_unary(self) -> t.Optional[exp.Expression]: 4719 if self._match_set(self.UNARY_PARSERS): 4720 return self.UNARY_PARSERS[self._prev.token_type](self) 4721 return self._parse_at_time_zone(self._parse_type()) 4722 4723 def _parse_type( 4724 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4725 ) -> t.Optional[exp.Expression]: 4726 interval = parse_interval and self._parse_interval() 4727 if interval: 4728 return interval 4729 4730 index = self._index 4731 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4732 4733 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4734 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4735 if isinstance(data_type, exp.Cast): 4736 # This constructor can contain ops directly after it, for instance struct unnesting: 4737 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4738 return self._parse_column_ops(data_type) 4739 4740 if data_type: 4741 index2 = self._index 4742 this = self._parse_primary() 4743 4744 if isinstance(this, exp.Literal): 4745 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4746 if parser: 4747 return parser(self, this, data_type) 4748 4749 return self.expression(exp.Cast, this=this, to=data_type) 4750 4751 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4752 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4753 # 4754 # If the index difference here is greater than 1, that means the parser itself must have 4755 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4756 # 4757 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4758 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4759 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4760 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4761 # 4762 # In these cases, we don't really want to return the converted type, but instead retreat 4763 # and try to parse a Column or Identifier in the section below. 4764 if data_type.expressions and index2 - index > 1: 4765 self._retreat(index2) 4766 return self._parse_column_ops(data_type) 4767 4768 self._retreat(index) 4769 4770 if fallback_to_identifier: 4771 return self._parse_id_var() 4772 4773 this = self._parse_column() 4774 return this and self._parse_column_ops(this) 4775 4776 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4777 this = self._parse_type() 4778 if not this: 4779 return None 4780 4781 if isinstance(this, exp.Column) and not this.table: 4782 this = exp.var(this.name.upper()) 4783 4784 return self.expression( 4785 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4786 ) 4787 4788 def _parse_types( 4789 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4790 ) -> t.Optional[exp.Expression]: 4791 index = self._index 4792 4793 this: t.Optional[exp.Expression] = None 4794 prefix = self._match_text_seq("SYSUDTLIB", ".") 4795 4796 if not self._match_set(self.TYPE_TOKENS): 4797 identifier = allow_identifiers and self._parse_id_var( 4798 any_token=False, tokens=(TokenType.VAR,) 4799 ) 4800 if isinstance(identifier, exp.Identifier): 4801 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4802 4803 if len(tokens) != 1: 4804 self.raise_error("Unexpected identifier", self._prev) 4805 4806 if tokens[0].token_type in self.TYPE_TOKENS: 4807 self._prev = tokens[0] 4808 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4809 type_name = identifier.name 4810 4811 while self._match(TokenType.DOT): 4812 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4813 4814 this = exp.DataType.build(type_name, udt=True) 4815 else: 4816 self._retreat(self._index - 1) 4817 return None 4818 else: 4819 return None 4820 4821 type_token = self._prev.token_type 4822 4823 if type_token == TokenType.PSEUDO_TYPE: 4824 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4825 4826 if type_token == TokenType.OBJECT_IDENTIFIER: 4827 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4828 4829 # https://materialize.com/docs/sql/types/map/ 4830 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4831 key_type = self._parse_types( 4832 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4833 ) 4834 if not self._match(TokenType.FARROW): 4835 self._retreat(index) 4836 return None 4837 4838 value_type = self._parse_types( 4839 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4840 ) 4841 if not self._match(TokenType.R_BRACKET): 4842 self._retreat(index) 4843 return None 4844 4845 return exp.DataType( 4846 this=exp.DataType.Type.MAP, 4847 expressions=[key_type, value_type], 4848 nested=True, 4849 prefix=prefix, 4850 ) 4851 4852 nested = type_token in self.NESTED_TYPE_TOKENS 4853 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4854 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4855 expressions = None 4856 maybe_func = False 4857 4858 if self._match(TokenType.L_PAREN): 4859 if is_struct: 4860 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4861 elif nested: 4862 expressions = self._parse_csv( 4863 lambda: self._parse_types( 4864 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4865 ) 4866 ) 4867 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4868 this = expressions[0] 4869 this.set("nullable", True) 4870 self._match_r_paren() 4871 return this 4872 elif type_token in self.ENUM_TYPE_TOKENS: 4873 expressions = self._parse_csv(self._parse_equality) 4874 elif is_aggregate: 4875 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4876 any_token=False, tokens=(TokenType.VAR,) 4877 ) 4878 if not func_or_ident or not self._match(TokenType.COMMA): 4879 return None 4880 expressions = self._parse_csv( 4881 lambda: self._parse_types( 4882 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4883 ) 4884 ) 4885 expressions.insert(0, func_or_ident) 4886 else: 4887 expressions = self._parse_csv(self._parse_type_size) 4888 4889 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4890 if type_token == TokenType.VECTOR and len(expressions) == 2: 4891 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4892 4893 if not expressions or not self._match(TokenType.R_PAREN): 4894 self._retreat(index) 4895 return None 4896 4897 maybe_func = True 4898 4899 values: t.Optional[t.List[exp.Expression]] = None 4900 4901 if nested and self._match(TokenType.LT): 4902 if is_struct: 4903 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4904 else: 4905 expressions = self._parse_csv( 4906 lambda: self._parse_types( 4907 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4908 ) 4909 ) 4910 4911 if not self._match(TokenType.GT): 4912 self.raise_error("Expecting >") 4913 4914 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4915 values = self._parse_csv(self._parse_assignment) 4916 if not values and is_struct: 4917 values = None 4918 self._retreat(self._index - 1) 4919 else: 4920 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4921 4922 if type_token in self.TIMESTAMPS: 4923 if self._match_text_seq("WITH", "TIME", "ZONE"): 4924 maybe_func = False 4925 tz_type = ( 4926 exp.DataType.Type.TIMETZ 4927 if type_token in self.TIMES 4928 else exp.DataType.Type.TIMESTAMPTZ 4929 ) 4930 this = exp.DataType(this=tz_type, expressions=expressions) 4931 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4932 maybe_func = False 4933 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4934 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4935 maybe_func = False 4936 elif type_token == TokenType.INTERVAL: 4937 unit = self._parse_var(upper=True) 4938 if unit: 4939 if self._match_text_seq("TO"): 4940 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4941 4942 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4943 else: 4944 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4945 4946 if maybe_func and check_func: 4947 index2 = self._index 4948 peek = self._parse_string() 4949 4950 if not peek: 4951 self._retreat(index) 4952 return None 4953 4954 self._retreat(index2) 4955 4956 if not this: 4957 if self._match_text_seq("UNSIGNED"): 4958 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4959 if not unsigned_type_token: 4960 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4961 4962 type_token = unsigned_type_token or type_token 4963 4964 this = exp.DataType( 4965 this=exp.DataType.Type[type_token.value], 4966 expressions=expressions, 4967 nested=nested, 4968 prefix=prefix, 4969 ) 4970 4971 # Empty arrays/structs are allowed 4972 if values is not None: 4973 cls = exp.Struct if is_struct else exp.Array 4974 this = exp.cast(cls(expressions=values), this, copy=False) 4975 4976 elif expressions: 4977 this.set("expressions", expressions) 4978 4979 # https://materialize.com/docs/sql/types/list/#type-name 4980 while self._match(TokenType.LIST): 4981 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4982 4983 index = self._index 4984 4985 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4986 matched_array = self._match(TokenType.ARRAY) 4987 4988 while self._curr: 4989 datatype_token = self._prev.token_type 4990 matched_l_bracket = self._match(TokenType.L_BRACKET) 4991 if not matched_l_bracket and not matched_array: 4992 break 4993 4994 matched_array = False 4995 values = self._parse_csv(self._parse_assignment) or None 4996 if ( 4997 values 4998 and not schema 4999 and ( 5000 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5001 ) 5002 ): 5003 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5004 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5005 self._retreat(index) 5006 break 5007 5008 this = exp.DataType( 5009 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5010 ) 5011 self._match(TokenType.R_BRACKET) 5012 5013 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5014 converter = self.TYPE_CONVERTERS.get(this.this) 5015 if converter: 5016 this = converter(t.cast(exp.DataType, this)) 5017 5018 return this 5019 5020 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5021 index = self._index 5022 5023 if ( 5024 self._curr 5025 and self._next 5026 and self._curr.token_type in self.TYPE_TOKENS 5027 and self._next.token_type in self.TYPE_TOKENS 5028 ): 5029 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5030 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5031 this = self._parse_id_var() 5032 else: 5033 this = ( 5034 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5035 or self._parse_id_var() 5036 ) 5037 5038 self._match(TokenType.COLON) 5039 5040 if ( 5041 type_required 5042 and not isinstance(this, exp.DataType) 5043 and not self._match_set(self.TYPE_TOKENS, advance=False) 5044 ): 5045 self._retreat(index) 5046 return self._parse_types() 5047 5048 return self._parse_column_def(this) 5049 5050 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5051 if not self._match_text_seq("AT", "TIME", "ZONE"): 5052 return this 5053 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5054 5055 def _parse_column(self) -> t.Optional[exp.Expression]: 5056 this = self._parse_column_reference() 5057 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5058 5059 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5060 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5061 5062 return column 5063 5064 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5065 this = self._parse_field() 5066 if ( 5067 not this 5068 and self._match(TokenType.VALUES, advance=False) 5069 and self.VALUES_FOLLOWED_BY_PAREN 5070 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5071 ): 5072 this = self._parse_id_var() 5073 5074 if isinstance(this, exp.Identifier): 5075 # We bubble up comments from the Identifier to the Column 5076 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5077 5078 return this 5079 5080 def _parse_colon_as_variant_extract( 5081 self, this: t.Optional[exp.Expression] 5082 ) -> t.Optional[exp.Expression]: 5083 casts = [] 5084 json_path = [] 5085 escape = None 5086 5087 while self._match(TokenType.COLON): 5088 start_index = self._index 5089 5090 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5091 path = self._parse_column_ops( 5092 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5093 ) 5094 5095 # The cast :: operator has a lower precedence than the extraction operator :, so 5096 # we rearrange the AST appropriately to avoid casting the JSON path 5097 while isinstance(path, exp.Cast): 5098 casts.append(path.to) 5099 path = path.this 5100 5101 if casts: 5102 dcolon_offset = next( 5103 i 5104 for i, t in enumerate(self._tokens[start_index:]) 5105 if t.token_type == TokenType.DCOLON 5106 ) 5107 end_token = self._tokens[start_index + dcolon_offset - 1] 5108 else: 5109 end_token = self._prev 5110 5111 if path: 5112 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5113 # it'll roundtrip to a string literal in GET_PATH 5114 if isinstance(path, exp.Identifier) and path.quoted: 5115 escape = True 5116 5117 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5118 5119 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5120 # Databricks transforms it back to the colon/dot notation 5121 if json_path: 5122 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5123 5124 if json_path_expr: 5125 json_path_expr.set("escape", escape) 5126 5127 this = self.expression( 5128 exp.JSONExtract, 5129 this=this, 5130 expression=json_path_expr, 5131 variant_extract=True, 5132 ) 5133 5134 while casts: 5135 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5136 5137 return this 5138 5139 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5140 return self._parse_types() 5141 5142 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5143 this = self._parse_bracket(this) 5144 5145 while self._match_set(self.COLUMN_OPERATORS): 5146 op_token = self._prev.token_type 5147 op = self.COLUMN_OPERATORS.get(op_token) 5148 5149 if op_token == TokenType.DCOLON: 5150 field = self._parse_dcolon() 5151 if not field: 5152 self.raise_error("Expected type") 5153 elif op and self._curr: 5154 field = self._parse_column_reference() or self._parse_bracket() 5155 else: 5156 field = self._parse_field(any_token=True, anonymous_func=True) 5157 5158 if isinstance(field, (exp.Func, exp.Window)) and this: 5159 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5160 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5161 this = exp.replace_tree( 5162 this, 5163 lambda n: ( 5164 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5165 if n.table 5166 else n.this 5167 ) 5168 if isinstance(n, exp.Column) 5169 else n, 5170 ) 5171 5172 if op: 5173 this = op(self, this, field) 5174 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5175 this = self.expression( 5176 exp.Column, 5177 comments=this.comments, 5178 this=field, 5179 table=this.this, 5180 db=this.args.get("table"), 5181 catalog=this.args.get("db"), 5182 ) 5183 elif isinstance(field, exp.Window): 5184 # Move the exp.Dot's to the window's function 5185 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5186 field.set("this", window_func) 5187 this = field 5188 else: 5189 this = self.expression(exp.Dot, this=this, expression=field) 5190 5191 if field and field.comments: 5192 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5193 5194 this = self._parse_bracket(this) 5195 5196 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5197 5198 def _parse_primary(self) -> t.Optional[exp.Expression]: 5199 if self._match_set(self.PRIMARY_PARSERS): 5200 token_type = self._prev.token_type 5201 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5202 5203 if token_type == TokenType.STRING: 5204 expressions = [primary] 5205 while self._match(TokenType.STRING): 5206 expressions.append(exp.Literal.string(self._prev.text)) 5207 5208 if len(expressions) > 1: 5209 return self.expression(exp.Concat, expressions=expressions) 5210 5211 return primary 5212 5213 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5214 return exp.Literal.number(f"0.{self._prev.text}") 5215 5216 if self._match(TokenType.L_PAREN): 5217 comments = self._prev_comments 5218 query = self._parse_select() 5219 5220 if query: 5221 expressions = [query] 5222 else: 5223 expressions = self._parse_expressions() 5224 5225 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5226 5227 if not this and self._match(TokenType.R_PAREN, advance=False): 5228 this = self.expression(exp.Tuple) 5229 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5230 this = self._parse_subquery(this=this, parse_alias=False) 5231 elif isinstance(this, exp.Subquery): 5232 this = self._parse_subquery( 5233 this=self._parse_set_operations(this), parse_alias=False 5234 ) 5235 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5236 this = self.expression(exp.Tuple, expressions=expressions) 5237 else: 5238 this = self.expression(exp.Paren, this=this) 5239 5240 if this: 5241 this.add_comments(comments) 5242 5243 self._match_r_paren(expression=this) 5244 return this 5245 5246 return None 5247 5248 def _parse_field( 5249 self, 5250 any_token: bool = False, 5251 tokens: t.Optional[t.Collection[TokenType]] = None, 5252 anonymous_func: bool = False, 5253 ) -> t.Optional[exp.Expression]: 5254 if anonymous_func: 5255 field = ( 5256 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5257 or self._parse_primary() 5258 ) 5259 else: 5260 field = self._parse_primary() or self._parse_function( 5261 anonymous=anonymous_func, any_token=any_token 5262 ) 5263 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5264 5265 def _parse_function( 5266 self, 5267 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5268 anonymous: bool = False, 5269 optional_parens: bool = True, 5270 any_token: bool = False, 5271 ) -> t.Optional[exp.Expression]: 5272 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5273 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5274 fn_syntax = False 5275 if ( 5276 self._match(TokenType.L_BRACE, advance=False) 5277 and self._next 5278 and self._next.text.upper() == "FN" 5279 ): 5280 self._advance(2) 5281 fn_syntax = True 5282 5283 func = self._parse_function_call( 5284 functions=functions, 5285 anonymous=anonymous, 5286 optional_parens=optional_parens, 5287 any_token=any_token, 5288 ) 5289 5290 if fn_syntax: 5291 self._match(TokenType.R_BRACE) 5292 5293 return func 5294 5295 def _parse_function_call( 5296 self, 5297 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5298 anonymous: bool = False, 5299 optional_parens: bool = True, 5300 any_token: bool = False, 5301 ) -> t.Optional[exp.Expression]: 5302 if not self._curr: 5303 return None 5304 5305 comments = self._curr.comments 5306 token_type = self._curr.token_type 5307 this = self._curr.text 5308 upper = this.upper() 5309 5310 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5311 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5312 self._advance() 5313 return self._parse_window(parser(self)) 5314 5315 if not self._next or self._next.token_type != TokenType.L_PAREN: 5316 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5317 self._advance() 5318 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5319 5320 return None 5321 5322 if any_token: 5323 if token_type in self.RESERVED_TOKENS: 5324 return None 5325 elif token_type not in self.FUNC_TOKENS: 5326 return None 5327 5328 self._advance(2) 5329 5330 parser = self.FUNCTION_PARSERS.get(upper) 5331 if parser and not anonymous: 5332 this = parser(self) 5333 else: 5334 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5335 5336 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5337 this = self.expression( 5338 subquery_predicate, comments=comments, this=self._parse_select() 5339 ) 5340 self._match_r_paren() 5341 return this 5342 5343 if functions is None: 5344 functions = self.FUNCTIONS 5345 5346 function = functions.get(upper) 5347 5348 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5349 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5350 5351 if alias: 5352 args = self._kv_to_prop_eq(args) 5353 5354 if function and not anonymous: 5355 if "dialect" in function.__code__.co_varnames: 5356 func = function(args, dialect=self.dialect) 5357 else: 5358 func = function(args) 5359 5360 func = self.validate_expression(func, args) 5361 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5362 func.meta["name"] = this 5363 5364 this = func 5365 else: 5366 if token_type == TokenType.IDENTIFIER: 5367 this = exp.Identifier(this=this, quoted=True) 5368 this = self.expression(exp.Anonymous, this=this, expressions=args) 5369 5370 if isinstance(this, exp.Expression): 5371 this.add_comments(comments) 5372 5373 self._match_r_paren(this) 5374 return self._parse_window(this) 5375 5376 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5377 return expression 5378 5379 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5380 transformed = [] 5381 5382 for index, e in enumerate(expressions): 5383 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5384 if isinstance(e, exp.Alias): 5385 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5386 5387 if not isinstance(e, exp.PropertyEQ): 5388 e = self.expression( 5389 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5390 ) 5391 5392 if isinstance(e.this, exp.Column): 5393 e.this.replace(e.this.this) 5394 else: 5395 e = self._to_prop_eq(e, index) 5396 5397 transformed.append(e) 5398 5399 return transformed 5400 5401 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5402 return self._parse_statement() 5403 5404 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5405 return self._parse_column_def(self._parse_id_var()) 5406 5407 def _parse_user_defined_function( 5408 self, kind: t.Optional[TokenType] = None 5409 ) -> t.Optional[exp.Expression]: 5410 this = self._parse_id_var() 5411 5412 while self._match(TokenType.DOT): 5413 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5414 5415 if not self._match(TokenType.L_PAREN): 5416 return this 5417 5418 expressions = self._parse_csv(self._parse_function_parameter) 5419 self._match_r_paren() 5420 return self.expression( 5421 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5422 ) 5423 5424 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5425 literal = self._parse_primary() 5426 if literal: 5427 return self.expression(exp.Introducer, this=token.text, expression=literal) 5428 5429 return self.expression(exp.Identifier, this=token.text) 5430 5431 def _parse_session_parameter(self) -> exp.SessionParameter: 5432 kind = None 5433 this = self._parse_id_var() or self._parse_primary() 5434 5435 if this and self._match(TokenType.DOT): 5436 kind = this.name 5437 this = self._parse_var() or self._parse_primary() 5438 5439 return self.expression(exp.SessionParameter, this=this, kind=kind) 5440 5441 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5442 return self._parse_id_var() 5443 5444 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5445 index = self._index 5446 5447 if self._match(TokenType.L_PAREN): 5448 expressions = t.cast( 5449 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5450 ) 5451 5452 if not self._match(TokenType.R_PAREN): 5453 self._retreat(index) 5454 else: 5455 expressions = [self._parse_lambda_arg()] 5456 5457 if self._match_set(self.LAMBDAS): 5458 return self.LAMBDAS[self._prev.token_type](self, expressions) 5459 5460 self._retreat(index) 5461 5462 this: t.Optional[exp.Expression] 5463 5464 if self._match(TokenType.DISTINCT): 5465 this = self.expression( 5466 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5467 ) 5468 else: 5469 this = self._parse_select_or_expression(alias=alias) 5470 5471 return self._parse_limit( 5472 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5473 ) 5474 5475 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5476 index = self._index 5477 if not self._match(TokenType.L_PAREN): 5478 return this 5479 5480 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5481 # expr can be of both types 5482 if self._match_set(self.SELECT_START_TOKENS): 5483 self._retreat(index) 5484 return this 5485 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5486 self._match_r_paren() 5487 return self.expression(exp.Schema, this=this, expressions=args) 5488 5489 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5490 return self._parse_column_def(self._parse_field(any_token=True)) 5491 5492 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5493 # column defs are not really columns, they're identifiers 5494 if isinstance(this, exp.Column): 5495 this = this.this 5496 5497 kind = self._parse_types(schema=True) 5498 5499 if self._match_text_seq("FOR", "ORDINALITY"): 5500 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5501 5502 constraints: t.List[exp.Expression] = [] 5503 5504 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5505 ("ALIAS", "MATERIALIZED") 5506 ): 5507 persisted = self._prev.text.upper() == "MATERIALIZED" 5508 constraint_kind = exp.ComputedColumnConstraint( 5509 this=self._parse_assignment(), 5510 persisted=persisted or self._match_text_seq("PERSISTED"), 5511 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5512 ) 5513 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5514 elif ( 5515 kind 5516 and self._match(TokenType.ALIAS, advance=False) 5517 and ( 5518 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5519 or (self._next and self._next.token_type == TokenType.L_PAREN) 5520 ) 5521 ): 5522 self._advance() 5523 constraints.append( 5524 self.expression( 5525 exp.ColumnConstraint, 5526 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5527 ) 5528 ) 5529 5530 while True: 5531 constraint = self._parse_column_constraint() 5532 if not constraint: 5533 break 5534 constraints.append(constraint) 5535 5536 if not kind and not constraints: 5537 return this 5538 5539 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5540 5541 def _parse_auto_increment( 5542 self, 5543 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5544 start = None 5545 increment = None 5546 5547 if self._match(TokenType.L_PAREN, advance=False): 5548 args = self._parse_wrapped_csv(self._parse_bitwise) 5549 start = seq_get(args, 0) 5550 increment = seq_get(args, 1) 5551 elif self._match_text_seq("START"): 5552 start = self._parse_bitwise() 5553 self._match_text_seq("INCREMENT") 5554 increment = self._parse_bitwise() 5555 5556 if start and increment: 5557 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5558 5559 return exp.AutoIncrementColumnConstraint() 5560 5561 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5562 if not self._match_text_seq("REFRESH"): 5563 self._retreat(self._index - 1) 5564 return None 5565 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5566 5567 def _parse_compress(self) -> exp.CompressColumnConstraint: 5568 if self._match(TokenType.L_PAREN, advance=False): 5569 return self.expression( 5570 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5571 ) 5572 5573 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5574 5575 def _parse_generated_as_identity( 5576 self, 5577 ) -> ( 5578 exp.GeneratedAsIdentityColumnConstraint 5579 | exp.ComputedColumnConstraint 5580 | exp.GeneratedAsRowColumnConstraint 5581 ): 5582 if self._match_text_seq("BY", "DEFAULT"): 5583 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5584 this = self.expression( 5585 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5586 ) 5587 else: 5588 self._match_text_seq("ALWAYS") 5589 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5590 5591 self._match(TokenType.ALIAS) 5592 5593 if self._match_text_seq("ROW"): 5594 start = self._match_text_seq("START") 5595 if not start: 5596 self._match(TokenType.END) 5597 hidden = self._match_text_seq("HIDDEN") 5598 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5599 5600 identity = self._match_text_seq("IDENTITY") 5601 5602 if self._match(TokenType.L_PAREN): 5603 if self._match(TokenType.START_WITH): 5604 this.set("start", self._parse_bitwise()) 5605 if self._match_text_seq("INCREMENT", "BY"): 5606 this.set("increment", self._parse_bitwise()) 5607 if self._match_text_seq("MINVALUE"): 5608 this.set("minvalue", self._parse_bitwise()) 5609 if self._match_text_seq("MAXVALUE"): 5610 this.set("maxvalue", self._parse_bitwise()) 5611 5612 if self._match_text_seq("CYCLE"): 5613 this.set("cycle", True) 5614 elif self._match_text_seq("NO", "CYCLE"): 5615 this.set("cycle", False) 5616 5617 if not identity: 5618 this.set("expression", self._parse_range()) 5619 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5620 args = self._parse_csv(self._parse_bitwise) 5621 this.set("start", seq_get(args, 0)) 5622 this.set("increment", seq_get(args, 1)) 5623 5624 self._match_r_paren() 5625 5626 return this 5627 5628 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5629 self._match_text_seq("LENGTH") 5630 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5631 5632 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5633 if self._match_text_seq("NULL"): 5634 return self.expression(exp.NotNullColumnConstraint) 5635 if self._match_text_seq("CASESPECIFIC"): 5636 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5637 if self._match_text_seq("FOR", "REPLICATION"): 5638 return self.expression(exp.NotForReplicationColumnConstraint) 5639 5640 # Unconsume the `NOT` token 5641 self._retreat(self._index - 1) 5642 return None 5643 5644 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5645 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5646 5647 procedure_option_follows = ( 5648 self._match(TokenType.WITH, advance=False) 5649 and self._next 5650 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5651 ) 5652 5653 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5654 return self.expression( 5655 exp.ColumnConstraint, 5656 this=this, 5657 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5658 ) 5659 5660 return this 5661 5662 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5663 if not self._match(TokenType.CONSTRAINT): 5664 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5665 5666 return self.expression( 5667 exp.Constraint, 5668 this=self._parse_id_var(), 5669 expressions=self._parse_unnamed_constraints(), 5670 ) 5671 5672 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5673 constraints = [] 5674 while True: 5675 constraint = self._parse_unnamed_constraint() or self._parse_function() 5676 if not constraint: 5677 break 5678 constraints.append(constraint) 5679 5680 return constraints 5681 5682 def _parse_unnamed_constraint( 5683 self, constraints: t.Optional[t.Collection[str]] = None 5684 ) -> t.Optional[exp.Expression]: 5685 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5686 constraints or self.CONSTRAINT_PARSERS 5687 ): 5688 return None 5689 5690 constraint = self._prev.text.upper() 5691 if constraint not in self.CONSTRAINT_PARSERS: 5692 self.raise_error(f"No parser found for schema constraint {constraint}.") 5693 5694 return self.CONSTRAINT_PARSERS[constraint](self) 5695 5696 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5697 return self._parse_id_var(any_token=False) 5698 5699 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5700 self._match_text_seq("KEY") 5701 return self.expression( 5702 exp.UniqueColumnConstraint, 5703 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5704 this=self._parse_schema(self._parse_unique_key()), 5705 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5706 on_conflict=self._parse_on_conflict(), 5707 ) 5708 5709 def _parse_key_constraint_options(self) -> t.List[str]: 5710 options = [] 5711 while True: 5712 if not self._curr: 5713 break 5714 5715 if self._match(TokenType.ON): 5716 action = None 5717 on = self._advance_any() and self._prev.text 5718 5719 if self._match_text_seq("NO", "ACTION"): 5720 action = "NO ACTION" 5721 elif self._match_text_seq("CASCADE"): 5722 action = "CASCADE" 5723 elif self._match_text_seq("RESTRICT"): 5724 action = "RESTRICT" 5725 elif self._match_pair(TokenType.SET, TokenType.NULL): 5726 action = "SET NULL" 5727 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5728 action = "SET DEFAULT" 5729 else: 5730 self.raise_error("Invalid key constraint") 5731 5732 options.append(f"ON {on} {action}") 5733 else: 5734 var = self._parse_var_from_options( 5735 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5736 ) 5737 if not var: 5738 break 5739 options.append(var.name) 5740 5741 return options 5742 5743 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5744 if match and not self._match(TokenType.REFERENCES): 5745 return None 5746 5747 expressions = None 5748 this = self._parse_table(schema=True) 5749 options = self._parse_key_constraint_options() 5750 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5751 5752 def _parse_foreign_key(self) -> exp.ForeignKey: 5753 expressions = self._parse_wrapped_id_vars() 5754 reference = self._parse_references() 5755 options = {} 5756 5757 while self._match(TokenType.ON): 5758 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5759 self.raise_error("Expected DELETE or UPDATE") 5760 5761 kind = self._prev.text.lower() 5762 5763 if self._match_text_seq("NO", "ACTION"): 5764 action = "NO ACTION" 5765 elif self._match(TokenType.SET): 5766 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5767 action = "SET " + self._prev.text.upper() 5768 else: 5769 self._advance() 5770 action = self._prev.text.upper() 5771 5772 options[kind] = action 5773 5774 return self.expression( 5775 exp.ForeignKey, 5776 expressions=expressions, 5777 reference=reference, 5778 **options, # type: ignore 5779 ) 5780 5781 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5782 return self._parse_field() 5783 5784 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5785 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5786 self._retreat(self._index - 1) 5787 return None 5788 5789 id_vars = self._parse_wrapped_id_vars() 5790 return self.expression( 5791 exp.PeriodForSystemTimeConstraint, 5792 this=seq_get(id_vars, 0), 5793 expression=seq_get(id_vars, 1), 5794 ) 5795 5796 def _parse_primary_key( 5797 self, wrapped_optional: bool = False, in_props: bool = False 5798 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5799 desc = ( 5800 self._match_set((TokenType.ASC, TokenType.DESC)) 5801 and self._prev.token_type == TokenType.DESC 5802 ) 5803 5804 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5805 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5806 5807 expressions = self._parse_wrapped_csv( 5808 self._parse_primary_key_part, optional=wrapped_optional 5809 ) 5810 options = self._parse_key_constraint_options() 5811 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5812 5813 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5814 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5815 5816 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5817 """ 5818 Parses a datetime column in ODBC format. We parse the column into the corresponding 5819 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5820 same as we did for `DATE('yyyy-mm-dd')`. 5821 5822 Reference: 5823 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5824 """ 5825 self._match(TokenType.VAR) 5826 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5827 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5828 if not self._match(TokenType.R_BRACE): 5829 self.raise_error("Expected }") 5830 return expression 5831 5832 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5833 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5834 return this 5835 5836 bracket_kind = self._prev.token_type 5837 if ( 5838 bracket_kind == TokenType.L_BRACE 5839 and self._curr 5840 and self._curr.token_type == TokenType.VAR 5841 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5842 ): 5843 return self._parse_odbc_datetime_literal() 5844 5845 expressions = self._parse_csv( 5846 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5847 ) 5848 5849 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5850 self.raise_error("Expected ]") 5851 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5852 self.raise_error("Expected }") 5853 5854 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5855 if bracket_kind == TokenType.L_BRACE: 5856 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5857 elif not this: 5858 this = build_array_constructor( 5859 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5860 ) 5861 else: 5862 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5863 if constructor_type: 5864 return build_array_constructor( 5865 constructor_type, 5866 args=expressions, 5867 bracket_kind=bracket_kind, 5868 dialect=self.dialect, 5869 ) 5870 5871 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5872 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5873 5874 self._add_comments(this) 5875 return self._parse_bracket(this) 5876 5877 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5878 if self._match(TokenType.COLON): 5879 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5880 return this 5881 5882 def _parse_case(self) -> t.Optional[exp.Expression]: 5883 ifs = [] 5884 default = None 5885 5886 comments = self._prev_comments 5887 expression = self._parse_assignment() 5888 5889 while self._match(TokenType.WHEN): 5890 this = self._parse_assignment() 5891 self._match(TokenType.THEN) 5892 then = self._parse_assignment() 5893 ifs.append(self.expression(exp.If, this=this, true=then)) 5894 5895 if self._match(TokenType.ELSE): 5896 default = self._parse_assignment() 5897 5898 if not self._match(TokenType.END): 5899 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5900 default = exp.column("interval") 5901 else: 5902 self.raise_error("Expected END after CASE", self._prev) 5903 5904 return self.expression( 5905 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5906 ) 5907 5908 def _parse_if(self) -> t.Optional[exp.Expression]: 5909 if self._match(TokenType.L_PAREN): 5910 args = self._parse_csv(self._parse_assignment) 5911 this = self.validate_expression(exp.If.from_arg_list(args), args) 5912 self._match_r_paren() 5913 else: 5914 index = self._index - 1 5915 5916 if self.NO_PAREN_IF_COMMANDS and index == 0: 5917 return self._parse_as_command(self._prev) 5918 5919 condition = self._parse_assignment() 5920 5921 if not condition: 5922 self._retreat(index) 5923 return None 5924 5925 self._match(TokenType.THEN) 5926 true = self._parse_assignment() 5927 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5928 self._match(TokenType.END) 5929 this = self.expression(exp.If, this=condition, true=true, false=false) 5930 5931 return this 5932 5933 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5934 if not self._match_text_seq("VALUE", "FOR"): 5935 self._retreat(self._index - 1) 5936 return None 5937 5938 return self.expression( 5939 exp.NextValueFor, 5940 this=self._parse_column(), 5941 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5942 ) 5943 5944 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5945 this = self._parse_function() or self._parse_var_or_string(upper=True) 5946 5947 if self._match(TokenType.FROM): 5948 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5949 5950 if not self._match(TokenType.COMMA): 5951 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5952 5953 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5954 5955 def _parse_gap_fill(self) -> exp.GapFill: 5956 self._match(TokenType.TABLE) 5957 this = self._parse_table() 5958 5959 self._match(TokenType.COMMA) 5960 args = [this, *self._parse_csv(self._parse_lambda)] 5961 5962 gap_fill = exp.GapFill.from_arg_list(args) 5963 return self.validate_expression(gap_fill, args) 5964 5965 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5966 this = self._parse_assignment() 5967 5968 if not self._match(TokenType.ALIAS): 5969 if self._match(TokenType.COMMA): 5970 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5971 5972 self.raise_error("Expected AS after CAST") 5973 5974 fmt = None 5975 to = self._parse_types() 5976 5977 if self._match(TokenType.FORMAT): 5978 fmt_string = self._parse_string() 5979 fmt = self._parse_at_time_zone(fmt_string) 5980 5981 if not to: 5982 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5983 if to.this in exp.DataType.TEMPORAL_TYPES: 5984 this = self.expression( 5985 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5986 this=this, 5987 format=exp.Literal.string( 5988 format_time( 5989 fmt_string.this if fmt_string else "", 5990 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5991 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5992 ) 5993 ), 5994 safe=safe, 5995 ) 5996 5997 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5998 this.set("zone", fmt.args["zone"]) 5999 return this 6000 elif not to: 6001 self.raise_error("Expected TYPE after CAST") 6002 elif isinstance(to, exp.Identifier): 6003 to = exp.DataType.build(to.name, udt=True) 6004 elif to.this == exp.DataType.Type.CHAR: 6005 if self._match(TokenType.CHARACTER_SET): 6006 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6007 6008 return self.expression( 6009 exp.Cast if strict else exp.TryCast, 6010 this=this, 6011 to=to, 6012 format=fmt, 6013 safe=safe, 6014 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6015 ) 6016 6017 def _parse_string_agg(self) -> exp.GroupConcat: 6018 if self._match(TokenType.DISTINCT): 6019 args: t.List[t.Optional[exp.Expression]] = [ 6020 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6021 ] 6022 if self._match(TokenType.COMMA): 6023 args.extend(self._parse_csv(self._parse_assignment)) 6024 else: 6025 args = self._parse_csv(self._parse_assignment) # type: ignore 6026 6027 if self._match_text_seq("ON", "OVERFLOW"): 6028 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6029 if self._match_text_seq("ERROR"): 6030 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6031 else: 6032 self._match_text_seq("TRUNCATE") 6033 on_overflow = self.expression( 6034 exp.OverflowTruncateBehavior, 6035 this=self._parse_string(), 6036 with_count=( 6037 self._match_text_seq("WITH", "COUNT") 6038 or not self._match_text_seq("WITHOUT", "COUNT") 6039 ), 6040 ) 6041 else: 6042 on_overflow = None 6043 6044 index = self._index 6045 if not self._match(TokenType.R_PAREN) and args: 6046 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6047 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6048 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6049 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6050 6051 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6052 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6053 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6054 if not self._match_text_seq("WITHIN", "GROUP"): 6055 self._retreat(index) 6056 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6057 6058 # The corresponding match_r_paren will be called in parse_function (caller) 6059 self._match_l_paren() 6060 6061 return self.expression( 6062 exp.GroupConcat, 6063 this=self._parse_order(this=seq_get(args, 0)), 6064 separator=seq_get(args, 1), 6065 on_overflow=on_overflow, 6066 ) 6067 6068 def _parse_convert( 6069 self, strict: bool, safe: t.Optional[bool] = None 6070 ) -> t.Optional[exp.Expression]: 6071 this = self._parse_bitwise() 6072 6073 if self._match(TokenType.USING): 6074 to: t.Optional[exp.Expression] = self.expression( 6075 exp.CharacterSet, this=self._parse_var() 6076 ) 6077 elif self._match(TokenType.COMMA): 6078 to = self._parse_types() 6079 else: 6080 to = None 6081 6082 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6083 6084 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6085 """ 6086 There are generally two variants of the DECODE function: 6087 6088 - DECODE(bin, charset) 6089 - DECODE(expression, search, result [, search, result] ... [, default]) 6090 6091 The second variant will always be parsed into a CASE expression. Note that NULL 6092 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6093 instead of relying on pattern matching. 6094 """ 6095 args = self._parse_csv(self._parse_assignment) 6096 6097 if len(args) < 3: 6098 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6099 6100 expression, *expressions = args 6101 if not expression: 6102 return None 6103 6104 ifs = [] 6105 for search, result in zip(expressions[::2], expressions[1::2]): 6106 if not search or not result: 6107 return None 6108 6109 if isinstance(search, exp.Literal): 6110 ifs.append( 6111 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6112 ) 6113 elif isinstance(search, exp.Null): 6114 ifs.append( 6115 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6116 ) 6117 else: 6118 cond = exp.or_( 6119 exp.EQ(this=expression.copy(), expression=search), 6120 exp.and_( 6121 exp.Is(this=expression.copy(), expression=exp.Null()), 6122 exp.Is(this=search.copy(), expression=exp.Null()), 6123 copy=False, 6124 ), 6125 copy=False, 6126 ) 6127 ifs.append(exp.If(this=cond, true=result)) 6128 6129 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6130 6131 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6132 self._match_text_seq("KEY") 6133 key = self._parse_column() 6134 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6135 self._match_text_seq("VALUE") 6136 value = self._parse_bitwise() 6137 6138 if not key and not value: 6139 return None 6140 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6141 6142 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6143 if not this or not self._match_text_seq("FORMAT", "JSON"): 6144 return this 6145 6146 return self.expression(exp.FormatJson, this=this) 6147 6148 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6149 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6150 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6151 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6152 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6153 else: 6154 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6155 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6156 6157 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6158 6159 if not empty and not error and not null: 6160 return None 6161 6162 return self.expression( 6163 exp.OnCondition, 6164 empty=empty, 6165 error=error, 6166 null=null, 6167 ) 6168 6169 def _parse_on_handling( 6170 self, on: str, *values: str 6171 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6172 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6173 for value in values: 6174 if self._match_text_seq(value, "ON", on): 6175 return f"{value} ON {on}" 6176 6177 index = self._index 6178 if self._match(TokenType.DEFAULT): 6179 default_value = self._parse_bitwise() 6180 if self._match_text_seq("ON", on): 6181 return default_value 6182 6183 self._retreat(index) 6184 6185 return None 6186 6187 @t.overload 6188 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6189 6190 @t.overload 6191 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6192 6193 def _parse_json_object(self, agg=False): 6194 star = self._parse_star() 6195 expressions = ( 6196 [star] 6197 if star 6198 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6199 ) 6200 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6201 6202 unique_keys = None 6203 if self._match_text_seq("WITH", "UNIQUE"): 6204 unique_keys = True 6205 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6206 unique_keys = False 6207 6208 self._match_text_seq("KEYS") 6209 6210 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6211 self._parse_type() 6212 ) 6213 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6214 6215 return self.expression( 6216 exp.JSONObjectAgg if agg else exp.JSONObject, 6217 expressions=expressions, 6218 null_handling=null_handling, 6219 unique_keys=unique_keys, 6220 return_type=return_type, 6221 encoding=encoding, 6222 ) 6223 6224 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6225 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6226 if not self._match_text_seq("NESTED"): 6227 this = self._parse_id_var() 6228 kind = self._parse_types(allow_identifiers=False) 6229 nested = None 6230 else: 6231 this = None 6232 kind = None 6233 nested = True 6234 6235 path = self._match_text_seq("PATH") and self._parse_string() 6236 nested_schema = nested and self._parse_json_schema() 6237 6238 return self.expression( 6239 exp.JSONColumnDef, 6240 this=this, 6241 kind=kind, 6242 path=path, 6243 nested_schema=nested_schema, 6244 ) 6245 6246 def _parse_json_schema(self) -> exp.JSONSchema: 6247 self._match_text_seq("COLUMNS") 6248 return self.expression( 6249 exp.JSONSchema, 6250 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6251 ) 6252 6253 def _parse_json_table(self) -> exp.JSONTable: 6254 this = self._parse_format_json(self._parse_bitwise()) 6255 path = self._match(TokenType.COMMA) and self._parse_string() 6256 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6257 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6258 schema = self._parse_json_schema() 6259 6260 return exp.JSONTable( 6261 this=this, 6262 schema=schema, 6263 path=path, 6264 error_handling=error_handling, 6265 empty_handling=empty_handling, 6266 ) 6267 6268 def _parse_match_against(self) -> exp.MatchAgainst: 6269 expressions = self._parse_csv(self._parse_column) 6270 6271 self._match_text_seq(")", "AGAINST", "(") 6272 6273 this = self._parse_string() 6274 6275 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6276 modifier = "IN NATURAL LANGUAGE MODE" 6277 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6278 modifier = f"{modifier} WITH QUERY EXPANSION" 6279 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6280 modifier = "IN BOOLEAN MODE" 6281 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6282 modifier = "WITH QUERY EXPANSION" 6283 else: 6284 modifier = None 6285 6286 return self.expression( 6287 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6288 ) 6289 6290 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6291 def _parse_open_json(self) -> exp.OpenJSON: 6292 this = self._parse_bitwise() 6293 path = self._match(TokenType.COMMA) and self._parse_string() 6294 6295 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6296 this = self._parse_field(any_token=True) 6297 kind = self._parse_types() 6298 path = self._parse_string() 6299 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6300 6301 return self.expression( 6302 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6303 ) 6304 6305 expressions = None 6306 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6307 self._match_l_paren() 6308 expressions = self._parse_csv(_parse_open_json_column_def) 6309 6310 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6311 6312 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6313 args = self._parse_csv(self._parse_bitwise) 6314 6315 if self._match(TokenType.IN): 6316 return self.expression( 6317 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6318 ) 6319 6320 if haystack_first: 6321 haystack = seq_get(args, 0) 6322 needle = seq_get(args, 1) 6323 else: 6324 needle = seq_get(args, 0) 6325 haystack = seq_get(args, 1) 6326 6327 return self.expression( 6328 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6329 ) 6330 6331 def _parse_predict(self) -> exp.Predict: 6332 self._match_text_seq("MODEL") 6333 this = self._parse_table() 6334 6335 self._match(TokenType.COMMA) 6336 self._match_text_seq("TABLE") 6337 6338 return self.expression( 6339 exp.Predict, 6340 this=this, 6341 expression=self._parse_table(), 6342 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6343 ) 6344 6345 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6346 args = self._parse_csv(self._parse_table) 6347 return exp.JoinHint(this=func_name.upper(), expressions=args) 6348 6349 def _parse_substring(self) -> exp.Substring: 6350 # Postgres supports the form: substring(string [from int] [for int]) 6351 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6352 6353 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6354 6355 if self._match(TokenType.FROM): 6356 args.append(self._parse_bitwise()) 6357 if self._match(TokenType.FOR): 6358 if len(args) == 1: 6359 args.append(exp.Literal.number(1)) 6360 args.append(self._parse_bitwise()) 6361 6362 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6363 6364 def _parse_trim(self) -> exp.Trim: 6365 # https://www.w3resource.com/sql/character-functions/trim.php 6366 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6367 6368 position = None 6369 collation = None 6370 expression = None 6371 6372 if self._match_texts(self.TRIM_TYPES): 6373 position = self._prev.text.upper() 6374 6375 this = self._parse_bitwise() 6376 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6377 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6378 expression = self._parse_bitwise() 6379 6380 if invert_order: 6381 this, expression = expression, this 6382 6383 if self._match(TokenType.COLLATE): 6384 collation = self._parse_bitwise() 6385 6386 return self.expression( 6387 exp.Trim, this=this, position=position, expression=expression, collation=collation 6388 ) 6389 6390 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6391 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6392 6393 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6394 return self._parse_window(self._parse_id_var(), alias=True) 6395 6396 def _parse_respect_or_ignore_nulls( 6397 self, this: t.Optional[exp.Expression] 6398 ) -> t.Optional[exp.Expression]: 6399 if self._match_text_seq("IGNORE", "NULLS"): 6400 return self.expression(exp.IgnoreNulls, this=this) 6401 if self._match_text_seq("RESPECT", "NULLS"): 6402 return self.expression(exp.RespectNulls, this=this) 6403 return this 6404 6405 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6406 if self._match(TokenType.HAVING): 6407 self._match_texts(("MAX", "MIN")) 6408 max = self._prev.text.upper() != "MIN" 6409 return self.expression( 6410 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6411 ) 6412 6413 return this 6414 6415 def _parse_window( 6416 self, this: t.Optional[exp.Expression], alias: bool = False 6417 ) -> t.Optional[exp.Expression]: 6418 func = this 6419 comments = func.comments if isinstance(func, exp.Expression) else None 6420 6421 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6422 self._match(TokenType.WHERE) 6423 this = self.expression( 6424 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6425 ) 6426 self._match_r_paren() 6427 6428 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6429 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6430 if self._match_text_seq("WITHIN", "GROUP"): 6431 order = self._parse_wrapped(self._parse_order) 6432 this = self.expression(exp.WithinGroup, this=this, expression=order) 6433 6434 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6435 # Some dialects choose to implement and some do not. 6436 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6437 6438 # There is some code above in _parse_lambda that handles 6439 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6440 6441 # The below changes handle 6442 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6443 6444 # Oracle allows both formats 6445 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6446 # and Snowflake chose to do the same for familiarity 6447 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6448 if isinstance(this, exp.AggFunc): 6449 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6450 6451 if ignore_respect and ignore_respect is not this: 6452 ignore_respect.replace(ignore_respect.this) 6453 this = self.expression(ignore_respect.__class__, this=this) 6454 6455 this = self._parse_respect_or_ignore_nulls(this) 6456 6457 # bigquery select from window x AS (partition by ...) 6458 if alias: 6459 over = None 6460 self._match(TokenType.ALIAS) 6461 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6462 return this 6463 else: 6464 over = self._prev.text.upper() 6465 6466 if comments and isinstance(func, exp.Expression): 6467 func.pop_comments() 6468 6469 if not self._match(TokenType.L_PAREN): 6470 return self.expression( 6471 exp.Window, 6472 comments=comments, 6473 this=this, 6474 alias=self._parse_id_var(False), 6475 over=over, 6476 ) 6477 6478 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6479 6480 first = self._match(TokenType.FIRST) 6481 if self._match_text_seq("LAST"): 6482 first = False 6483 6484 partition, order = self._parse_partition_and_order() 6485 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6486 6487 if kind: 6488 self._match(TokenType.BETWEEN) 6489 start = self._parse_window_spec() 6490 self._match(TokenType.AND) 6491 end = self._parse_window_spec() 6492 6493 spec = self.expression( 6494 exp.WindowSpec, 6495 kind=kind, 6496 start=start["value"], 6497 start_side=start["side"], 6498 end=end["value"], 6499 end_side=end["side"], 6500 ) 6501 else: 6502 spec = None 6503 6504 self._match_r_paren() 6505 6506 window = self.expression( 6507 exp.Window, 6508 comments=comments, 6509 this=this, 6510 partition_by=partition, 6511 order=order, 6512 spec=spec, 6513 alias=window_alias, 6514 over=over, 6515 first=first, 6516 ) 6517 6518 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6519 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6520 return self._parse_window(window, alias=alias) 6521 6522 return window 6523 6524 def _parse_partition_and_order( 6525 self, 6526 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6527 return self._parse_partition_by(), self._parse_order() 6528 6529 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6530 self._match(TokenType.BETWEEN) 6531 6532 return { 6533 "value": ( 6534 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6535 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6536 or self._parse_bitwise() 6537 ), 6538 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6539 } 6540 6541 def _parse_alias( 6542 self, this: t.Optional[exp.Expression], explicit: bool = False 6543 ) -> t.Optional[exp.Expression]: 6544 any_token = self._match(TokenType.ALIAS) 6545 comments = self._prev_comments or [] 6546 6547 if explicit and not any_token: 6548 return this 6549 6550 if self._match(TokenType.L_PAREN): 6551 aliases = self.expression( 6552 exp.Aliases, 6553 comments=comments, 6554 this=this, 6555 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6556 ) 6557 self._match_r_paren(aliases) 6558 return aliases 6559 6560 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6561 self.STRING_ALIASES and self._parse_string_as_identifier() 6562 ) 6563 6564 if alias: 6565 comments.extend(alias.pop_comments()) 6566 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6567 column = this.this 6568 6569 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6570 if not this.comments and column and column.comments: 6571 this.comments = column.pop_comments() 6572 6573 return this 6574 6575 def _parse_id_var( 6576 self, 6577 any_token: bool = True, 6578 tokens: t.Optional[t.Collection[TokenType]] = None, 6579 ) -> t.Optional[exp.Expression]: 6580 expression = self._parse_identifier() 6581 if not expression and ( 6582 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6583 ): 6584 quoted = self._prev.token_type == TokenType.STRING 6585 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6586 6587 return expression 6588 6589 def _parse_string(self) -> t.Optional[exp.Expression]: 6590 if self._match_set(self.STRING_PARSERS): 6591 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6592 return self._parse_placeholder() 6593 6594 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6595 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6596 6597 def _parse_number(self) -> t.Optional[exp.Expression]: 6598 if self._match_set(self.NUMERIC_PARSERS): 6599 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6600 return self._parse_placeholder() 6601 6602 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6603 if self._match(TokenType.IDENTIFIER): 6604 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6605 return self._parse_placeholder() 6606 6607 def _parse_var( 6608 self, 6609 any_token: bool = False, 6610 tokens: t.Optional[t.Collection[TokenType]] = None, 6611 upper: bool = False, 6612 ) -> t.Optional[exp.Expression]: 6613 if ( 6614 (any_token and self._advance_any()) 6615 or self._match(TokenType.VAR) 6616 or (self._match_set(tokens) if tokens else False) 6617 ): 6618 return self.expression( 6619 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6620 ) 6621 return self._parse_placeholder() 6622 6623 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6624 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6625 self._advance() 6626 return self._prev 6627 return None 6628 6629 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6630 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6631 6632 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6633 return self._parse_primary() or self._parse_var(any_token=True) 6634 6635 def _parse_null(self) -> t.Optional[exp.Expression]: 6636 if self._match_set(self.NULL_TOKENS): 6637 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6638 return self._parse_placeholder() 6639 6640 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6641 if self._match(TokenType.TRUE): 6642 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6643 if self._match(TokenType.FALSE): 6644 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6645 return self._parse_placeholder() 6646 6647 def _parse_star(self) -> t.Optional[exp.Expression]: 6648 if self._match(TokenType.STAR): 6649 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6650 return self._parse_placeholder() 6651 6652 def _parse_parameter(self) -> exp.Parameter: 6653 this = self._parse_identifier() or self._parse_primary_or_var() 6654 return self.expression(exp.Parameter, this=this) 6655 6656 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6657 if self._match_set(self.PLACEHOLDER_PARSERS): 6658 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6659 if placeholder: 6660 return placeholder 6661 self._advance(-1) 6662 return None 6663 6664 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6665 if not self._match_texts(keywords): 6666 return None 6667 if self._match(TokenType.L_PAREN, advance=False): 6668 return self._parse_wrapped_csv(self._parse_expression) 6669 6670 expression = self._parse_expression() 6671 return [expression] if expression else None 6672 6673 def _parse_csv( 6674 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6675 ) -> t.List[exp.Expression]: 6676 parse_result = parse_method() 6677 items = [parse_result] if parse_result is not None else [] 6678 6679 while self._match(sep): 6680 self._add_comments(parse_result) 6681 parse_result = parse_method() 6682 if parse_result is not None: 6683 items.append(parse_result) 6684 6685 return items 6686 6687 def _parse_tokens( 6688 self, parse_method: t.Callable, expressions: t.Dict 6689 ) -> t.Optional[exp.Expression]: 6690 this = parse_method() 6691 6692 while self._match_set(expressions): 6693 this = self.expression( 6694 expressions[self._prev.token_type], 6695 this=this, 6696 comments=self._prev_comments, 6697 expression=parse_method(), 6698 ) 6699 6700 return this 6701 6702 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6703 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6704 6705 def _parse_wrapped_csv( 6706 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6707 ) -> t.List[exp.Expression]: 6708 return self._parse_wrapped( 6709 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6710 ) 6711 6712 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6713 wrapped = self._match(TokenType.L_PAREN) 6714 if not wrapped and not optional: 6715 self.raise_error("Expecting (") 6716 parse_result = parse_method() 6717 if wrapped: 6718 self._match_r_paren() 6719 return parse_result 6720 6721 def _parse_expressions(self) -> t.List[exp.Expression]: 6722 return self._parse_csv(self._parse_expression) 6723 6724 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6725 return self._parse_select() or self._parse_set_operations( 6726 self._parse_expression() if alias else self._parse_assignment() 6727 ) 6728 6729 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6730 return self._parse_query_modifiers( 6731 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6732 ) 6733 6734 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6735 this = None 6736 if self._match_texts(self.TRANSACTION_KIND): 6737 this = self._prev.text 6738 6739 self._match_texts(("TRANSACTION", "WORK")) 6740 6741 modes = [] 6742 while True: 6743 mode = [] 6744 while self._match(TokenType.VAR): 6745 mode.append(self._prev.text) 6746 6747 if mode: 6748 modes.append(" ".join(mode)) 6749 if not self._match(TokenType.COMMA): 6750 break 6751 6752 return self.expression(exp.Transaction, this=this, modes=modes) 6753 6754 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6755 chain = None 6756 savepoint = None 6757 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6758 6759 self._match_texts(("TRANSACTION", "WORK")) 6760 6761 if self._match_text_seq("TO"): 6762 self._match_text_seq("SAVEPOINT") 6763 savepoint = self._parse_id_var() 6764 6765 if self._match(TokenType.AND): 6766 chain = not self._match_text_seq("NO") 6767 self._match_text_seq("CHAIN") 6768 6769 if is_rollback: 6770 return self.expression(exp.Rollback, savepoint=savepoint) 6771 6772 return self.expression(exp.Commit, chain=chain) 6773 6774 def _parse_refresh(self) -> exp.Refresh: 6775 self._match(TokenType.TABLE) 6776 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6777 6778 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6779 if not self._match_text_seq("ADD"): 6780 return None 6781 6782 self._match(TokenType.COLUMN) 6783 exists_column = self._parse_exists(not_=True) 6784 expression = self._parse_field_def() 6785 6786 if expression: 6787 expression.set("exists", exists_column) 6788 6789 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6790 if self._match_texts(("FIRST", "AFTER")): 6791 position = self._prev.text 6792 column_position = self.expression( 6793 exp.ColumnPosition, this=self._parse_column(), position=position 6794 ) 6795 expression.set("position", column_position) 6796 6797 return expression 6798 6799 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6800 drop = self._match(TokenType.DROP) and self._parse_drop() 6801 if drop and not isinstance(drop, exp.Command): 6802 drop.set("kind", drop.args.get("kind", "COLUMN")) 6803 return drop 6804 6805 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6806 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6807 return self.expression( 6808 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6809 ) 6810 6811 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6812 index = self._index - 1 6813 6814 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6815 return self._parse_csv( 6816 lambda: self.expression( 6817 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6818 ) 6819 ) 6820 6821 self._retreat(index) 6822 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6823 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6824 6825 if self._match_text_seq("ADD", "COLUMNS"): 6826 schema = self._parse_schema() 6827 if schema: 6828 return [schema] 6829 return [] 6830 6831 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6832 6833 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6834 if self._match_texts(self.ALTER_ALTER_PARSERS): 6835 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6836 6837 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6838 # keyword after ALTER we default to parsing this statement 6839 self._match(TokenType.COLUMN) 6840 column = self._parse_field(any_token=True) 6841 6842 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6843 return self.expression(exp.AlterColumn, this=column, drop=True) 6844 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6845 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6846 if self._match(TokenType.COMMENT): 6847 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6848 if self._match_text_seq("DROP", "NOT", "NULL"): 6849 return self.expression( 6850 exp.AlterColumn, 6851 this=column, 6852 drop=True, 6853 allow_null=True, 6854 ) 6855 if self._match_text_seq("SET", "NOT", "NULL"): 6856 return self.expression( 6857 exp.AlterColumn, 6858 this=column, 6859 allow_null=False, 6860 ) 6861 self._match_text_seq("SET", "DATA") 6862 self._match_text_seq("TYPE") 6863 return self.expression( 6864 exp.AlterColumn, 6865 this=column, 6866 dtype=self._parse_types(), 6867 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6868 using=self._match(TokenType.USING) and self._parse_assignment(), 6869 ) 6870 6871 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6872 if self._match_texts(("ALL", "EVEN", "AUTO")): 6873 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6874 6875 self._match_text_seq("KEY", "DISTKEY") 6876 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6877 6878 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6879 if compound: 6880 self._match_text_seq("SORTKEY") 6881 6882 if self._match(TokenType.L_PAREN, advance=False): 6883 return self.expression( 6884 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6885 ) 6886 6887 self._match_texts(("AUTO", "NONE")) 6888 return self.expression( 6889 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6890 ) 6891 6892 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6893 index = self._index - 1 6894 6895 partition_exists = self._parse_exists() 6896 if self._match(TokenType.PARTITION, advance=False): 6897 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6898 6899 self._retreat(index) 6900 return self._parse_csv(self._parse_drop_column) 6901 6902 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6903 if self._match(TokenType.COLUMN): 6904 exists = self._parse_exists() 6905 old_column = self._parse_column() 6906 to = self._match_text_seq("TO") 6907 new_column = self._parse_column() 6908 6909 if old_column is None or to is None or new_column is None: 6910 return None 6911 6912 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6913 6914 self._match_text_seq("TO") 6915 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6916 6917 def _parse_alter_table_set(self) -> exp.AlterSet: 6918 alter_set = self.expression(exp.AlterSet) 6919 6920 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6921 "TABLE", "PROPERTIES" 6922 ): 6923 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6924 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6925 alter_set.set("expressions", [self._parse_assignment()]) 6926 elif self._match_texts(("LOGGED", "UNLOGGED")): 6927 alter_set.set("option", exp.var(self._prev.text.upper())) 6928 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6929 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6930 elif self._match_text_seq("LOCATION"): 6931 alter_set.set("location", self._parse_field()) 6932 elif self._match_text_seq("ACCESS", "METHOD"): 6933 alter_set.set("access_method", self._parse_field()) 6934 elif self._match_text_seq("TABLESPACE"): 6935 alter_set.set("tablespace", self._parse_field()) 6936 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6937 alter_set.set("file_format", [self._parse_field()]) 6938 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6939 alter_set.set("file_format", self._parse_wrapped_options()) 6940 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6941 alter_set.set("copy_options", self._parse_wrapped_options()) 6942 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6943 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6944 else: 6945 if self._match_text_seq("SERDE"): 6946 alter_set.set("serde", self._parse_field()) 6947 6948 alter_set.set("expressions", [self._parse_properties()]) 6949 6950 return alter_set 6951 6952 def _parse_alter(self) -> exp.Alter | exp.Command: 6953 start = self._prev 6954 6955 alter_token = self._match_set(self.ALTERABLES) and self._prev 6956 if not alter_token: 6957 return self._parse_as_command(start) 6958 6959 exists = self._parse_exists() 6960 only = self._match_text_seq("ONLY") 6961 this = self._parse_table(schema=True) 6962 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6963 6964 if self._next: 6965 self._advance() 6966 6967 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6968 if parser: 6969 actions = ensure_list(parser(self)) 6970 not_valid = self._match_text_seq("NOT", "VALID") 6971 options = self._parse_csv(self._parse_property) 6972 6973 if not self._curr and actions: 6974 return self.expression( 6975 exp.Alter, 6976 this=this, 6977 kind=alter_token.text.upper(), 6978 exists=exists, 6979 actions=actions, 6980 only=only, 6981 options=options, 6982 cluster=cluster, 6983 not_valid=not_valid, 6984 ) 6985 6986 return self._parse_as_command(start) 6987 6988 def _parse_merge(self) -> exp.Merge: 6989 self._match(TokenType.INTO) 6990 target = self._parse_table() 6991 6992 if target and self._match(TokenType.ALIAS, advance=False): 6993 target.set("alias", self._parse_table_alias()) 6994 6995 self._match(TokenType.USING) 6996 using = self._parse_table() 6997 6998 self._match(TokenType.ON) 6999 on = self._parse_assignment() 7000 7001 return self.expression( 7002 exp.Merge, 7003 this=target, 7004 using=using, 7005 on=on, 7006 expressions=self._parse_when_matched(), 7007 returning=self._parse_returning(), 7008 ) 7009 7010 def _parse_when_matched(self) -> t.List[exp.When]: 7011 whens = [] 7012 7013 while self._match(TokenType.WHEN): 7014 matched = not self._match(TokenType.NOT) 7015 self._match_text_seq("MATCHED") 7016 source = ( 7017 False 7018 if self._match_text_seq("BY", "TARGET") 7019 else self._match_text_seq("BY", "SOURCE") 7020 ) 7021 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7022 7023 self._match(TokenType.THEN) 7024 7025 if self._match(TokenType.INSERT): 7026 this = self._parse_star() 7027 if this: 7028 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7029 else: 7030 then = self.expression( 7031 exp.Insert, 7032 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7033 expression=self._match_text_seq("VALUES") and self._parse_value(), 7034 ) 7035 elif self._match(TokenType.UPDATE): 7036 expressions = self._parse_star() 7037 if expressions: 7038 then = self.expression(exp.Update, expressions=expressions) 7039 else: 7040 then = self.expression( 7041 exp.Update, 7042 expressions=self._match(TokenType.SET) 7043 and self._parse_csv(self._parse_equality), 7044 ) 7045 elif self._match(TokenType.DELETE): 7046 then = self.expression(exp.Var, this=self._prev.text) 7047 else: 7048 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7049 7050 whens.append( 7051 self.expression( 7052 exp.When, 7053 matched=matched, 7054 source=source, 7055 condition=condition, 7056 then=then, 7057 ) 7058 ) 7059 return whens 7060 7061 def _parse_show(self) -> t.Optional[exp.Expression]: 7062 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7063 if parser: 7064 return parser(self) 7065 return self._parse_as_command(self._prev) 7066 7067 def _parse_set_item_assignment( 7068 self, kind: t.Optional[str] = None 7069 ) -> t.Optional[exp.Expression]: 7070 index = self._index 7071 7072 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7073 return self._parse_set_transaction(global_=kind == "GLOBAL") 7074 7075 left = self._parse_primary() or self._parse_column() 7076 assignment_delimiter = self._match_texts(("=", "TO")) 7077 7078 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7079 self._retreat(index) 7080 return None 7081 7082 right = self._parse_statement() or self._parse_id_var() 7083 if isinstance(right, (exp.Column, exp.Identifier)): 7084 right = exp.var(right.name) 7085 7086 this = self.expression(exp.EQ, this=left, expression=right) 7087 return self.expression(exp.SetItem, this=this, kind=kind) 7088 7089 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7090 self._match_text_seq("TRANSACTION") 7091 characteristics = self._parse_csv( 7092 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7093 ) 7094 return self.expression( 7095 exp.SetItem, 7096 expressions=characteristics, 7097 kind="TRANSACTION", 7098 **{"global": global_}, # type: ignore 7099 ) 7100 7101 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7102 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7103 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7104 7105 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7106 index = self._index 7107 set_ = self.expression( 7108 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7109 ) 7110 7111 if self._curr: 7112 self._retreat(index) 7113 return self._parse_as_command(self._prev) 7114 7115 return set_ 7116 7117 def _parse_var_from_options( 7118 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7119 ) -> t.Optional[exp.Var]: 7120 start = self._curr 7121 if not start: 7122 return None 7123 7124 option = start.text.upper() 7125 continuations = options.get(option) 7126 7127 index = self._index 7128 self._advance() 7129 for keywords in continuations or []: 7130 if isinstance(keywords, str): 7131 keywords = (keywords,) 7132 7133 if self._match_text_seq(*keywords): 7134 option = f"{option} {' '.join(keywords)}" 7135 break 7136 else: 7137 if continuations or continuations is None: 7138 if raise_unmatched: 7139 self.raise_error(f"Unknown option {option}") 7140 7141 self._retreat(index) 7142 return None 7143 7144 return exp.var(option) 7145 7146 def _parse_as_command(self, start: Token) -> exp.Command: 7147 while self._curr: 7148 self._advance() 7149 text = self._find_sql(start, self._prev) 7150 size = len(start.text) 7151 self._warn_unsupported() 7152 return exp.Command(this=text[:size], expression=text[size:]) 7153 7154 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7155 settings = [] 7156 7157 self._match_l_paren() 7158 kind = self._parse_id_var() 7159 7160 if self._match(TokenType.L_PAREN): 7161 while True: 7162 key = self._parse_id_var() 7163 value = self._parse_primary() 7164 if not key and value is None: 7165 break 7166 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7167 self._match(TokenType.R_PAREN) 7168 7169 self._match_r_paren() 7170 7171 return self.expression( 7172 exp.DictProperty, 7173 this=this, 7174 kind=kind.this if kind else None, 7175 settings=settings, 7176 ) 7177 7178 def _parse_dict_range(self, this: str) -> exp.DictRange: 7179 self._match_l_paren() 7180 has_min = self._match_text_seq("MIN") 7181 if has_min: 7182 min = self._parse_var() or self._parse_primary() 7183 self._match_text_seq("MAX") 7184 max = self._parse_var() or self._parse_primary() 7185 else: 7186 max = self._parse_var() or self._parse_primary() 7187 min = exp.Literal.number(0) 7188 self._match_r_paren() 7189 return self.expression(exp.DictRange, this=this, min=min, max=max) 7190 7191 def _parse_comprehension( 7192 self, this: t.Optional[exp.Expression] 7193 ) -> t.Optional[exp.Comprehension]: 7194 index = self._index 7195 expression = self._parse_column() 7196 if not self._match(TokenType.IN): 7197 self._retreat(index - 1) 7198 return None 7199 iterator = self._parse_column() 7200 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7201 return self.expression( 7202 exp.Comprehension, 7203 this=this, 7204 expression=expression, 7205 iterator=iterator, 7206 condition=condition, 7207 ) 7208 7209 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7210 if self._match(TokenType.HEREDOC_STRING): 7211 return self.expression(exp.Heredoc, this=self._prev.text) 7212 7213 if not self._match_text_seq("$"): 7214 return None 7215 7216 tags = ["$"] 7217 tag_text = None 7218 7219 if self._is_connected(): 7220 self._advance() 7221 tags.append(self._prev.text.upper()) 7222 else: 7223 self.raise_error("No closing $ found") 7224 7225 if tags[-1] != "$": 7226 if self._is_connected() and self._match_text_seq("$"): 7227 tag_text = tags[-1] 7228 tags.append("$") 7229 else: 7230 self.raise_error("No closing $ found") 7231 7232 heredoc_start = self._curr 7233 7234 while self._curr: 7235 if self._match_text_seq(*tags, advance=False): 7236 this = self._find_sql(heredoc_start, self._prev) 7237 self._advance(len(tags)) 7238 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7239 7240 self._advance() 7241 7242 self.raise_error(f"No closing {''.join(tags)} found") 7243 return None 7244 7245 def _find_parser( 7246 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7247 ) -> t.Optional[t.Callable]: 7248 if not self._curr: 7249 return None 7250 7251 index = self._index 7252 this = [] 7253 while True: 7254 # The current token might be multiple words 7255 curr = self._curr.text.upper() 7256 key = curr.split(" ") 7257 this.append(curr) 7258 7259 self._advance() 7260 result, trie = in_trie(trie, key) 7261 if result == TrieResult.FAILED: 7262 break 7263 7264 if result == TrieResult.EXISTS: 7265 subparser = parsers[" ".join(this)] 7266 return subparser 7267 7268 self._retreat(index) 7269 return None 7270 7271 def _match(self, token_type, advance=True, expression=None): 7272 if not self._curr: 7273 return None 7274 7275 if self._curr.token_type == token_type: 7276 if advance: 7277 self._advance() 7278 self._add_comments(expression) 7279 return True 7280 7281 return None 7282 7283 def _match_set(self, types, advance=True): 7284 if not self._curr: 7285 return None 7286 7287 if self._curr.token_type in types: 7288 if advance: 7289 self._advance() 7290 return True 7291 7292 return None 7293 7294 def _match_pair(self, token_type_a, token_type_b, advance=True): 7295 if not self._curr or not self._next: 7296 return None 7297 7298 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7299 if advance: 7300 self._advance(2) 7301 return True 7302 7303 return None 7304 7305 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7306 if not self._match(TokenType.L_PAREN, expression=expression): 7307 self.raise_error("Expecting (") 7308 7309 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7310 if not self._match(TokenType.R_PAREN, expression=expression): 7311 self.raise_error("Expecting )") 7312 7313 def _match_texts(self, texts, advance=True): 7314 if ( 7315 self._curr 7316 and self._curr.token_type != TokenType.STRING 7317 and self._curr.text.upper() in texts 7318 ): 7319 if advance: 7320 self._advance() 7321 return True 7322 return None 7323 7324 def _match_text_seq(self, *texts, advance=True): 7325 index = self._index 7326 for text in texts: 7327 if ( 7328 self._curr 7329 and self._curr.token_type != TokenType.STRING 7330 and self._curr.text.upper() == text 7331 ): 7332 self._advance() 7333 else: 7334 self._retreat(index) 7335 return None 7336 7337 if not advance: 7338 self._retreat(index) 7339 7340 return True 7341 7342 def _replace_lambda( 7343 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7344 ) -> t.Optional[exp.Expression]: 7345 if not node: 7346 return node 7347 7348 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7349 7350 for column in node.find_all(exp.Column): 7351 typ = lambda_types.get(column.parts[0].name) 7352 if typ is not None: 7353 dot_or_id = column.to_dot() if column.table else column.this 7354 7355 if typ: 7356 dot_or_id = self.expression( 7357 exp.Cast, 7358 this=dot_or_id, 7359 to=typ, 7360 ) 7361 7362 parent = column.parent 7363 7364 while isinstance(parent, exp.Dot): 7365 if not isinstance(parent.parent, exp.Dot): 7366 parent.replace(dot_or_id) 7367 break 7368 parent = parent.parent 7369 else: 7370 if column is node: 7371 node = dot_or_id 7372 else: 7373 column.replace(dot_or_id) 7374 return node 7375 7376 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7377 start = self._prev 7378 7379 # Not to be confused with TRUNCATE(number, decimals) function call 7380 if self._match(TokenType.L_PAREN): 7381 self._retreat(self._index - 2) 7382 return self._parse_function() 7383 7384 # Clickhouse supports TRUNCATE DATABASE as well 7385 is_database = self._match(TokenType.DATABASE) 7386 7387 self._match(TokenType.TABLE) 7388 7389 exists = self._parse_exists(not_=False) 7390 7391 expressions = self._parse_csv( 7392 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7393 ) 7394 7395 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7396 7397 if self._match_text_seq("RESTART", "IDENTITY"): 7398 identity = "RESTART" 7399 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7400 identity = "CONTINUE" 7401 else: 7402 identity = None 7403 7404 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7405 option = self._prev.text 7406 else: 7407 option = None 7408 7409 partition = self._parse_partition() 7410 7411 # Fallback case 7412 if self._curr: 7413 return self._parse_as_command(start) 7414 7415 return self.expression( 7416 exp.TruncateTable, 7417 expressions=expressions, 7418 is_database=is_database, 7419 exists=exists, 7420 cluster=cluster, 7421 identity=identity, 7422 option=option, 7423 partition=partition, 7424 ) 7425 7426 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7427 this = self._parse_ordered(self._parse_opclass) 7428 7429 if not self._match(TokenType.WITH): 7430 return this 7431 7432 op = self._parse_var(any_token=True) 7433 7434 return self.expression(exp.WithOperator, this=this, op=op) 7435 7436 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7437 self._match(TokenType.EQ) 7438 self._match(TokenType.L_PAREN) 7439 7440 opts: t.List[t.Optional[exp.Expression]] = [] 7441 while self._curr and not self._match(TokenType.R_PAREN): 7442 if self._match_text_seq("FORMAT_NAME", "="): 7443 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7444 # so we parse it separately to use _parse_field() 7445 prop = self.expression( 7446 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7447 ) 7448 opts.append(prop) 7449 else: 7450 opts.append(self._parse_property()) 7451 7452 self._match(TokenType.COMMA) 7453 7454 return opts 7455 7456 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7457 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7458 7459 options = [] 7460 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7461 option = self._parse_var(any_token=True) 7462 prev = self._prev.text.upper() 7463 7464 # Different dialects might separate options and values by white space, "=" and "AS" 7465 self._match(TokenType.EQ) 7466 self._match(TokenType.ALIAS) 7467 7468 param = self.expression(exp.CopyParameter, this=option) 7469 7470 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7471 TokenType.L_PAREN, advance=False 7472 ): 7473 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7474 param.set("expressions", self._parse_wrapped_options()) 7475 elif prev == "FILE_FORMAT": 7476 # T-SQL's external file format case 7477 param.set("expression", self._parse_field()) 7478 else: 7479 param.set("expression", self._parse_unquoted_field()) 7480 7481 options.append(param) 7482 self._match(sep) 7483 7484 return options 7485 7486 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7487 expr = self.expression(exp.Credentials) 7488 7489 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7490 expr.set("storage", self._parse_field()) 7491 if self._match_text_seq("CREDENTIALS"): 7492 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7493 creds = ( 7494 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7495 ) 7496 expr.set("credentials", creds) 7497 if self._match_text_seq("ENCRYPTION"): 7498 expr.set("encryption", self._parse_wrapped_options()) 7499 if self._match_text_seq("IAM_ROLE"): 7500 expr.set("iam_role", self._parse_field()) 7501 if self._match_text_seq("REGION"): 7502 expr.set("region", self._parse_field()) 7503 7504 return expr 7505 7506 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7507 return self._parse_field() 7508 7509 def _parse_copy(self) -> exp.Copy | exp.Command: 7510 start = self._prev 7511 7512 self._match(TokenType.INTO) 7513 7514 this = ( 7515 self._parse_select(nested=True, parse_subquery_alias=False) 7516 if self._match(TokenType.L_PAREN, advance=False) 7517 else self._parse_table(schema=True) 7518 ) 7519 7520 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7521 7522 files = self._parse_csv(self._parse_file_location) 7523 credentials = self._parse_credentials() 7524 7525 self._match_text_seq("WITH") 7526 7527 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7528 7529 # Fallback case 7530 if self._curr: 7531 return self._parse_as_command(start) 7532 7533 return self.expression( 7534 exp.Copy, 7535 this=this, 7536 kind=kind, 7537 credentials=credentials, 7538 files=files, 7539 params=params, 7540 ) 7541 7542 def _parse_normalize(self) -> exp.Normalize: 7543 return self.expression( 7544 exp.Normalize, 7545 this=self._parse_bitwise(), 7546 form=self._match(TokenType.COMMA) and self._parse_var(), 7547 ) 7548 7549 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7550 if self._match_text_seq("COLUMNS", "(", advance=False): 7551 this = self._parse_function() 7552 if isinstance(this, exp.Columns): 7553 this.set("unpack", True) 7554 return this 7555 7556 return self.expression( 7557 exp.Star, 7558 **{ # type: ignore 7559 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7560 "replace": self._parse_star_op("REPLACE"), 7561 "rename": self._parse_star_op("RENAME"), 7562 }, 7563 ) 7564 7565 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7566 privilege_parts = [] 7567 7568 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7569 # (end of privilege list) or L_PAREN (start of column list) are met 7570 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7571 privilege_parts.append(self._curr.text.upper()) 7572 self._advance() 7573 7574 this = exp.var(" ".join(privilege_parts)) 7575 expressions = ( 7576 self._parse_wrapped_csv(self._parse_column) 7577 if self._match(TokenType.L_PAREN, advance=False) 7578 else None 7579 ) 7580 7581 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7582 7583 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7584 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7585 principal = self._parse_id_var() 7586 7587 if not principal: 7588 return None 7589 7590 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7591 7592 def _parse_grant(self) -> exp.Grant | exp.Command: 7593 start = self._prev 7594 7595 privileges = self._parse_csv(self._parse_grant_privilege) 7596 7597 self._match(TokenType.ON) 7598 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7599 7600 # Attempt to parse the securable e.g. MySQL allows names 7601 # such as "foo.*", "*.*" which are not easily parseable yet 7602 securable = self._try_parse(self._parse_table_parts) 7603 7604 if not securable or not self._match_text_seq("TO"): 7605 return self._parse_as_command(start) 7606 7607 principals = self._parse_csv(self._parse_grant_principal) 7608 7609 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7610 7611 if self._curr: 7612 return self._parse_as_command(start) 7613 7614 return self.expression( 7615 exp.Grant, 7616 privileges=privileges, 7617 kind=kind, 7618 securable=securable, 7619 principals=principals, 7620 grant_option=grant_option, 7621 ) 7622 7623 def _parse_overlay(self) -> exp.Overlay: 7624 return self.expression( 7625 exp.Overlay, 7626 **{ # type: ignore 7627 "this": self._parse_bitwise(), 7628 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7629 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7630 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7631 }, 7632 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1393 def __init__( 1394 self, 1395 error_level: t.Optional[ErrorLevel] = None, 1396 error_message_context: int = 100, 1397 max_errors: int = 3, 1398 dialect: DialectType = None, 1399 ): 1400 from sqlglot.dialects import Dialect 1401 1402 self.error_level = error_level or ErrorLevel.IMMEDIATE 1403 self.error_message_context = error_message_context 1404 self.max_errors = max_errors 1405 self.dialect = Dialect.get_or_raise(dialect) 1406 self.reset()
1418 def parse( 1419 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1420 ) -> t.List[t.Optional[exp.Expression]]: 1421 """ 1422 Parses a list of tokens and returns a list of syntax trees, one tree 1423 per parsed SQL statement. 1424 1425 Args: 1426 raw_tokens: The list of tokens. 1427 sql: The original SQL string, used to produce helpful debug messages. 1428 1429 Returns: 1430 The list of the produced syntax trees. 1431 """ 1432 return self._parse( 1433 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1434 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1436 def parse_into( 1437 self, 1438 expression_types: exp.IntoType, 1439 raw_tokens: t.List[Token], 1440 sql: t.Optional[str] = None, 1441 ) -> t.List[t.Optional[exp.Expression]]: 1442 """ 1443 Parses a list of tokens into a given Expression type. If a collection of Expression 1444 types is given instead, this method will try to parse the token list into each one 1445 of them, stopping at the first for which the parsing succeeds. 1446 1447 Args: 1448 expression_types: The expression type(s) to try and parse the token list into. 1449 raw_tokens: The list of tokens. 1450 sql: The original SQL string, used to produce helpful debug messages. 1451 1452 Returns: 1453 The target Expression. 1454 """ 1455 errors = [] 1456 for expression_type in ensure_list(expression_types): 1457 parser = self.EXPRESSION_PARSERS.get(expression_type) 1458 if not parser: 1459 raise TypeError(f"No parser registered for {expression_type}") 1460 1461 try: 1462 return self._parse(parser, raw_tokens, sql) 1463 except ParseError as e: 1464 e.errors[0]["into_expression"] = expression_type 1465 errors.append(e) 1466 1467 raise ParseError( 1468 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1469 errors=merge_errors(errors), 1470 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1510 def check_errors(self) -> None: 1511 """Logs or raises any found errors, depending on the chosen error level setting.""" 1512 if self.error_level == ErrorLevel.WARN: 1513 for error in self.errors: 1514 logger.error(str(error)) 1515 elif self.error_level == ErrorLevel.RAISE and self.errors: 1516 raise ParseError( 1517 concat_messages(self.errors, self.max_errors), 1518 errors=merge_errors(self.errors), 1519 )
Logs or raises any found errors, depending on the chosen error level setting.
1521 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1522 """ 1523 Appends an error in the list of recorded errors or raises it, depending on the chosen 1524 error level setting. 1525 """ 1526 token = token or self._curr or self._prev or Token.string("") 1527 start = token.start 1528 end = token.end + 1 1529 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1530 highlight = self.sql[start:end] 1531 end_context = self.sql[end : end + self.error_message_context] 1532 1533 error = ParseError.new( 1534 f"{message}. Line {token.line}, Col: {token.col}.\n" 1535 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1536 description=message, 1537 line=token.line, 1538 col=token.col, 1539 start_context=start_context, 1540 highlight=highlight, 1541 end_context=end_context, 1542 ) 1543 1544 if self.error_level == ErrorLevel.IMMEDIATE: 1545 raise error 1546 1547 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1549 def expression( 1550 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1551 ) -> E: 1552 """ 1553 Creates a new, validated Expression. 1554 1555 Args: 1556 exp_class: The expression class to instantiate. 1557 comments: An optional list of comments to attach to the expression. 1558 kwargs: The arguments to set for the expression along with their respective values. 1559 1560 Returns: 1561 The target expression. 1562 """ 1563 instance = exp_class(**kwargs) 1564 instance.add_comments(comments) if comments else self._add_comments(instance) 1565 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1572 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1573 """ 1574 Validates an Expression, making sure that all its mandatory arguments are set. 1575 1576 Args: 1577 expression: The expression to validate. 1578 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1579 1580 Returns: 1581 The validated expression. 1582 """ 1583 if self.error_level != ErrorLevel.IGNORE: 1584 for error_message in expression.error_messages(args): 1585 self.raise_error(error_message) 1586 1587 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.