sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.UDECIMAL, 355 TokenType.BIGDECIMAL, 356 TokenType.UUID, 357 TokenType.GEOGRAPHY, 358 TokenType.GEOMETRY, 359 TokenType.HLLSKETCH, 360 TokenType.HSTORE, 361 TokenType.PSEUDO_TYPE, 362 TokenType.SUPER, 363 TokenType.SERIAL, 364 TokenType.SMALLSERIAL, 365 TokenType.BIGSERIAL, 366 TokenType.XML, 367 TokenType.YEAR, 368 TokenType.UNIQUEIDENTIFIER, 369 TokenType.USERDEFINED, 370 TokenType.MONEY, 371 TokenType.SMALLMONEY, 372 TokenType.ROWVERSION, 373 TokenType.IMAGE, 374 TokenType.VARIANT, 375 TokenType.VECTOR, 376 TokenType.OBJECT, 377 TokenType.OBJECT_IDENTIFIER, 378 TokenType.INET, 379 TokenType.IPADDRESS, 380 TokenType.IPPREFIX, 381 TokenType.IPV4, 382 TokenType.IPV6, 383 TokenType.UNKNOWN, 384 TokenType.NULL, 385 TokenType.NAME, 386 TokenType.TDIGEST, 387 *ENUM_TYPE_TOKENS, 388 *NESTED_TYPE_TOKENS, 389 *AGGREGATE_TYPE_TOKENS, 390 } 391 392 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 393 TokenType.BIGINT: TokenType.UBIGINT, 394 TokenType.INT: TokenType.UINT, 395 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 396 TokenType.SMALLINT: TokenType.USMALLINT, 397 TokenType.TINYINT: TokenType.UTINYINT, 398 TokenType.DECIMAL: TokenType.UDECIMAL, 399 } 400 401 SUBQUERY_PREDICATES = { 402 TokenType.ANY: exp.Any, 403 TokenType.ALL: exp.All, 404 TokenType.EXISTS: exp.Exists, 405 TokenType.SOME: exp.Any, 406 } 407 408 RESERVED_TOKENS = { 409 *Tokenizer.SINGLE_TOKENS.values(), 410 TokenType.SELECT, 411 } - {TokenType.IDENTIFIER} 412 413 DB_CREATABLES = { 414 TokenType.DATABASE, 415 TokenType.DICTIONARY, 416 TokenType.MODEL, 417 TokenType.SCHEMA, 418 TokenType.SEQUENCE, 419 TokenType.STORAGE_INTEGRATION, 420 TokenType.TABLE, 421 TokenType.TAG, 422 TokenType.VIEW, 423 TokenType.WAREHOUSE, 424 TokenType.STREAMLIT, 425 } 426 427 CREATABLES = { 428 TokenType.COLUMN, 429 TokenType.CONSTRAINT, 430 TokenType.FOREIGN_KEY, 431 TokenType.FUNCTION, 432 TokenType.INDEX, 433 TokenType.PROCEDURE, 434 *DB_CREATABLES, 435 } 436 437 ALTERABLES = { 438 TokenType.INDEX, 439 TokenType.TABLE, 440 TokenType.VIEW, 441 } 442 443 # Tokens that can represent identifiers 444 ID_VAR_TOKENS = { 445 TokenType.ALL, 446 TokenType.VAR, 447 TokenType.ANTI, 448 TokenType.APPLY, 449 TokenType.ASC, 450 TokenType.ASOF, 451 TokenType.AUTO_INCREMENT, 452 TokenType.BEGIN, 453 TokenType.BPCHAR, 454 TokenType.CACHE, 455 TokenType.CASE, 456 TokenType.COLLATE, 457 TokenType.COMMAND, 458 TokenType.COMMENT, 459 TokenType.COMMIT, 460 TokenType.CONSTRAINT, 461 TokenType.COPY, 462 TokenType.CUBE, 463 TokenType.DEFAULT, 464 TokenType.DELETE, 465 TokenType.DESC, 466 TokenType.DESCRIBE, 467 TokenType.DICTIONARY, 468 TokenType.DIV, 469 TokenType.END, 470 TokenType.EXECUTE, 471 TokenType.ESCAPE, 472 TokenType.FALSE, 473 TokenType.FIRST, 474 TokenType.FILTER, 475 TokenType.FINAL, 476 TokenType.FORMAT, 477 TokenType.FULL, 478 TokenType.IDENTIFIER, 479 TokenType.IS, 480 TokenType.ISNULL, 481 TokenType.INTERVAL, 482 TokenType.KEEP, 483 TokenType.KILL, 484 TokenType.LEFT, 485 TokenType.LOAD, 486 TokenType.MERGE, 487 TokenType.NATURAL, 488 TokenType.NEXT, 489 TokenType.OFFSET, 490 TokenType.OPERATOR, 491 TokenType.ORDINALITY, 492 TokenType.OVERLAPS, 493 TokenType.OVERWRITE, 494 TokenType.PARTITION, 495 TokenType.PERCENT, 496 TokenType.PIVOT, 497 TokenType.PRAGMA, 498 TokenType.RANGE, 499 TokenType.RECURSIVE, 500 TokenType.REFERENCES, 501 TokenType.REFRESH, 502 TokenType.RENAME, 503 TokenType.REPLACE, 504 TokenType.RIGHT, 505 TokenType.ROLLUP, 506 TokenType.ROW, 507 TokenType.ROWS, 508 TokenType.SEMI, 509 TokenType.SET, 510 TokenType.SETTINGS, 511 TokenType.SHOW, 512 TokenType.TEMPORARY, 513 TokenType.TOP, 514 TokenType.TRUE, 515 TokenType.TRUNCATE, 516 TokenType.UNIQUE, 517 TokenType.UNNEST, 518 TokenType.UNPIVOT, 519 TokenType.UPDATE, 520 TokenType.USE, 521 TokenType.VOLATILE, 522 TokenType.WINDOW, 523 *CREATABLES, 524 *SUBQUERY_PREDICATES, 525 *TYPE_TOKENS, 526 *NO_PAREN_FUNCTIONS, 527 } 528 ID_VAR_TOKENS.remove(TokenType.UNION) 529 530 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 531 532 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 533 TokenType.ANTI, 534 TokenType.APPLY, 535 TokenType.ASOF, 536 TokenType.FULL, 537 TokenType.LEFT, 538 TokenType.LOCK, 539 TokenType.NATURAL, 540 TokenType.OFFSET, 541 TokenType.RIGHT, 542 TokenType.SEMI, 543 TokenType.WINDOW, 544 } 545 546 ALIAS_TOKENS = ID_VAR_TOKENS 547 548 ARRAY_CONSTRUCTORS = { 549 "ARRAY": exp.Array, 550 "LIST": exp.List, 551 } 552 553 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 554 555 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 556 557 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 558 559 FUNC_TOKENS = { 560 TokenType.COLLATE, 561 TokenType.COMMAND, 562 TokenType.CURRENT_DATE, 563 TokenType.CURRENT_DATETIME, 564 TokenType.CURRENT_TIMESTAMP, 565 TokenType.CURRENT_TIME, 566 TokenType.CURRENT_USER, 567 TokenType.FILTER, 568 TokenType.FIRST, 569 TokenType.FORMAT, 570 TokenType.GLOB, 571 TokenType.IDENTIFIER, 572 TokenType.INDEX, 573 TokenType.ISNULL, 574 TokenType.ILIKE, 575 TokenType.INSERT, 576 TokenType.LIKE, 577 TokenType.MERGE, 578 TokenType.OFFSET, 579 TokenType.PRIMARY_KEY, 580 TokenType.RANGE, 581 TokenType.REPLACE, 582 TokenType.RLIKE, 583 TokenType.ROW, 584 TokenType.UNNEST, 585 TokenType.VAR, 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.SEQUENCE, 589 TokenType.DATE, 590 TokenType.DATETIME, 591 TokenType.TABLE, 592 TokenType.TIMESTAMP, 593 TokenType.TIMESTAMPTZ, 594 TokenType.TRUNCATE, 595 TokenType.WINDOW, 596 TokenType.XOR, 597 *TYPE_TOKENS, 598 *SUBQUERY_PREDICATES, 599 } 600 601 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 602 TokenType.AND: exp.And, 603 } 604 605 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 606 TokenType.COLON_EQ: exp.PropertyEQ, 607 } 608 609 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 610 TokenType.OR: exp.Or, 611 } 612 613 EQUALITY = { 614 TokenType.EQ: exp.EQ, 615 TokenType.NEQ: exp.NEQ, 616 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 617 } 618 619 COMPARISON = { 620 TokenType.GT: exp.GT, 621 TokenType.GTE: exp.GTE, 622 TokenType.LT: exp.LT, 623 TokenType.LTE: exp.LTE, 624 } 625 626 BITWISE = { 627 TokenType.AMP: exp.BitwiseAnd, 628 TokenType.CARET: exp.BitwiseXor, 629 TokenType.PIPE: exp.BitwiseOr, 630 } 631 632 TERM = { 633 TokenType.DASH: exp.Sub, 634 TokenType.PLUS: exp.Add, 635 TokenType.MOD: exp.Mod, 636 TokenType.COLLATE: exp.Collate, 637 } 638 639 FACTOR = { 640 TokenType.DIV: exp.IntDiv, 641 TokenType.LR_ARROW: exp.Distance, 642 TokenType.SLASH: exp.Div, 643 TokenType.STAR: exp.Mul, 644 } 645 646 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 647 648 TIMES = { 649 TokenType.TIME, 650 TokenType.TIMETZ, 651 } 652 653 TIMESTAMPS = { 654 TokenType.TIMESTAMP, 655 TokenType.TIMESTAMPTZ, 656 TokenType.TIMESTAMPLTZ, 657 *TIMES, 658 } 659 660 SET_OPERATIONS = { 661 TokenType.UNION, 662 TokenType.INTERSECT, 663 TokenType.EXCEPT, 664 } 665 666 JOIN_METHODS = { 667 TokenType.ASOF, 668 TokenType.NATURAL, 669 TokenType.POSITIONAL, 670 } 671 672 JOIN_SIDES = { 673 TokenType.LEFT, 674 TokenType.RIGHT, 675 TokenType.FULL, 676 } 677 678 JOIN_KINDS = { 679 TokenType.ANTI, 680 TokenType.CROSS, 681 TokenType.INNER, 682 TokenType.OUTER, 683 TokenType.SEMI, 684 TokenType.STRAIGHT_JOIN, 685 } 686 687 JOIN_HINTS: t.Set[str] = set() 688 689 LAMBDAS = { 690 TokenType.ARROW: lambda self, expressions: self.expression( 691 exp.Lambda, 692 this=self._replace_lambda( 693 self._parse_assignment(), 694 expressions, 695 ), 696 expressions=expressions, 697 ), 698 TokenType.FARROW: lambda self, expressions: self.expression( 699 exp.Kwarg, 700 this=exp.var(expressions[0].name), 701 expression=self._parse_assignment(), 702 ), 703 } 704 705 COLUMN_OPERATORS = { 706 TokenType.DOT: None, 707 TokenType.DCOLON: lambda self, this, to: self.expression( 708 exp.Cast if self.STRICT_CAST else exp.TryCast, 709 this=this, 710 to=to, 711 ), 712 TokenType.ARROW: lambda self, this, path: self.expression( 713 exp.JSONExtract, 714 this=this, 715 expression=self.dialect.to_json_path(path), 716 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 717 ), 718 TokenType.DARROW: lambda self, this, path: self.expression( 719 exp.JSONExtractScalar, 720 this=this, 721 expression=self.dialect.to_json_path(path), 722 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 723 ), 724 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 725 exp.JSONBExtract, 726 this=this, 727 expression=path, 728 ), 729 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 730 exp.JSONBExtractScalar, 731 this=this, 732 expression=path, 733 ), 734 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 735 exp.JSONBContains, 736 this=this, 737 expression=key, 738 ), 739 } 740 741 EXPRESSION_PARSERS = { 742 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 743 exp.Column: lambda self: self._parse_column(), 744 exp.Condition: lambda self: self._parse_assignment(), 745 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 746 exp.Expression: lambda self: self._parse_expression(), 747 exp.From: lambda self: self._parse_from(joins=True), 748 exp.Group: lambda self: self._parse_group(), 749 exp.Having: lambda self: self._parse_having(), 750 exp.Identifier: lambda self: self._parse_id_var(), 751 exp.Join: lambda self: self._parse_join(), 752 exp.Lambda: lambda self: self._parse_lambda(), 753 exp.Lateral: lambda self: self._parse_lateral(), 754 exp.Limit: lambda self: self._parse_limit(), 755 exp.Offset: lambda self: self._parse_offset(), 756 exp.Order: lambda self: self._parse_order(), 757 exp.Ordered: lambda self: self._parse_ordered(), 758 exp.Properties: lambda self: self._parse_properties(), 759 exp.Qualify: lambda self: self._parse_qualify(), 760 exp.Returning: lambda self: self._parse_returning(), 761 exp.Select: lambda self: self._parse_select(), 762 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 763 exp.Table: lambda self: self._parse_table_parts(), 764 exp.TableAlias: lambda self: self._parse_table_alias(), 765 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 766 exp.Where: lambda self: self._parse_where(), 767 exp.Window: lambda self: self._parse_named_window(), 768 exp.With: lambda self: self._parse_with(), 769 "JOIN_TYPE": lambda self: self._parse_join_parts(), 770 } 771 772 STATEMENT_PARSERS = { 773 TokenType.ALTER: lambda self: self._parse_alter(), 774 TokenType.BEGIN: lambda self: self._parse_transaction(), 775 TokenType.CACHE: lambda self: self._parse_cache(), 776 TokenType.COMMENT: lambda self: self._parse_comment(), 777 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 778 TokenType.COPY: lambda self: self._parse_copy(), 779 TokenType.CREATE: lambda self: self._parse_create(), 780 TokenType.DELETE: lambda self: self._parse_delete(), 781 TokenType.DESC: lambda self: self._parse_describe(), 782 TokenType.DESCRIBE: lambda self: self._parse_describe(), 783 TokenType.DROP: lambda self: self._parse_drop(), 784 TokenType.GRANT: lambda self: self._parse_grant(), 785 TokenType.INSERT: lambda self: self._parse_insert(), 786 TokenType.KILL: lambda self: self._parse_kill(), 787 TokenType.LOAD: lambda self: self._parse_load(), 788 TokenType.MERGE: lambda self: self._parse_merge(), 789 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 790 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 791 TokenType.REFRESH: lambda self: self._parse_refresh(), 792 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 793 TokenType.SET: lambda self: self._parse_set(), 794 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 795 TokenType.UNCACHE: lambda self: self._parse_uncache(), 796 TokenType.UPDATE: lambda self: self._parse_update(), 797 TokenType.USE: lambda self: self.expression( 798 exp.Use, 799 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 800 this=self._parse_table(schema=False), 801 ), 802 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 803 } 804 805 UNARY_PARSERS = { 806 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 807 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 808 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 809 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 810 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 811 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 812 } 813 814 STRING_PARSERS = { 815 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 816 exp.RawString, this=token.text 817 ), 818 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 819 exp.National, this=token.text 820 ), 821 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 822 TokenType.STRING: lambda self, token: self.expression( 823 exp.Literal, this=token.text, is_string=True 824 ), 825 TokenType.UNICODE_STRING: lambda self, token: self.expression( 826 exp.UnicodeString, 827 this=token.text, 828 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 829 ), 830 } 831 832 NUMERIC_PARSERS = { 833 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 834 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 835 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 836 TokenType.NUMBER: lambda self, token: self.expression( 837 exp.Literal, this=token.text, is_string=False 838 ), 839 } 840 841 PRIMARY_PARSERS = { 842 **STRING_PARSERS, 843 **NUMERIC_PARSERS, 844 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 845 TokenType.NULL: lambda self, _: self.expression(exp.Null), 846 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 847 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 848 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 849 TokenType.STAR: lambda self, _: self._parse_star_ops(), 850 } 851 852 PLACEHOLDER_PARSERS = { 853 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 854 TokenType.PARAMETER: lambda self: self._parse_parameter(), 855 TokenType.COLON: lambda self: ( 856 self.expression(exp.Placeholder, this=self._prev.text) 857 if self._match_set(self.ID_VAR_TOKENS) 858 else None 859 ), 860 } 861 862 RANGE_PARSERS = { 863 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 864 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 865 TokenType.GLOB: binary_range_parser(exp.Glob), 866 TokenType.ILIKE: binary_range_parser(exp.ILike), 867 TokenType.IN: lambda self, this: self._parse_in(this), 868 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 869 TokenType.IS: lambda self, this: self._parse_is(this), 870 TokenType.LIKE: binary_range_parser(exp.Like), 871 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 872 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 873 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 874 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 875 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 876 } 877 878 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 879 "ALLOWED_VALUES": lambda self: self.expression( 880 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 881 ), 882 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 883 "AUTO": lambda self: self._parse_auto_property(), 884 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 885 "BACKUP": lambda self: self.expression( 886 exp.BackupProperty, this=self._parse_var(any_token=True) 887 ), 888 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 889 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 890 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 891 "CHECKSUM": lambda self: self._parse_checksum(), 892 "CLUSTER BY": lambda self: self._parse_cluster(), 893 "CLUSTERED": lambda self: self._parse_clustered_by(), 894 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 895 exp.CollateProperty, **kwargs 896 ), 897 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 898 "CONTAINS": lambda self: self._parse_contains_property(), 899 "COPY": lambda self: self._parse_copy_property(), 900 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 901 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 902 "DEFINER": lambda self: self._parse_definer(), 903 "DETERMINISTIC": lambda self: self.expression( 904 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 905 ), 906 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 907 "DUPLICATE": lambda self: self._parse_duplicate(), 908 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 909 "DISTKEY": lambda self: self._parse_distkey(), 910 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 911 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 912 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 913 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 914 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 915 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 916 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 917 "FREESPACE": lambda self: self._parse_freespace(), 918 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 919 "HEAP": lambda self: self.expression(exp.HeapProperty), 920 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 921 "IMMUTABLE": lambda self: self.expression( 922 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 923 ), 924 "INHERITS": lambda self: self.expression( 925 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 926 ), 927 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 928 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 929 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 930 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 931 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 932 "LIKE": lambda self: self._parse_create_like(), 933 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 934 "LOCK": lambda self: self._parse_locking(), 935 "LOCKING": lambda self: self._parse_locking(), 936 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 937 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 938 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 939 "MODIFIES": lambda self: self._parse_modifies_property(), 940 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 941 "NO": lambda self: self._parse_no_property(), 942 "ON": lambda self: self._parse_on_property(), 943 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 944 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 945 "PARTITION": lambda self: self._parse_partitioned_of(), 946 "PARTITION BY": lambda self: self._parse_partitioned_by(), 947 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 948 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 949 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 950 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 951 "READS": lambda self: self._parse_reads_property(), 952 "REMOTE": lambda self: self._parse_remote_with_connection(), 953 "RETURNS": lambda self: self._parse_returns(), 954 "STRICT": lambda self: self.expression(exp.StrictProperty), 955 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 956 "ROW": lambda self: self._parse_row(), 957 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 958 "SAMPLE": lambda self: self.expression( 959 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 960 ), 961 "SECURE": lambda self: self.expression(exp.SecureProperty), 962 "SECURITY": lambda self: self._parse_security(), 963 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 964 "SETTINGS": lambda self: self._parse_settings_property(), 965 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 966 "SORTKEY": lambda self: self._parse_sortkey(), 967 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 968 "STABLE": lambda self: self.expression( 969 exp.StabilityProperty, this=exp.Literal.string("STABLE") 970 ), 971 "STORED": lambda self: self._parse_stored(), 972 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 973 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 974 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 975 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 976 "TO": lambda self: self._parse_to_table(), 977 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 978 "TRANSFORM": lambda self: self.expression( 979 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 980 ), 981 "TTL": lambda self: self._parse_ttl(), 982 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 983 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 984 "VOLATILE": lambda self: self._parse_volatile_property(), 985 "WITH": lambda self: self._parse_with_property(), 986 } 987 988 CONSTRAINT_PARSERS = { 989 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 990 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 991 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 992 "CHARACTER SET": lambda self: self.expression( 993 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 994 ), 995 "CHECK": lambda self: self.expression( 996 exp.CheckColumnConstraint, 997 this=self._parse_wrapped(self._parse_assignment), 998 enforced=self._match_text_seq("ENFORCED"), 999 ), 1000 "COLLATE": lambda self: self.expression( 1001 exp.CollateColumnConstraint, 1002 this=self._parse_identifier() or self._parse_column(), 1003 ), 1004 "COMMENT": lambda self: self.expression( 1005 exp.CommentColumnConstraint, this=self._parse_string() 1006 ), 1007 "COMPRESS": lambda self: self._parse_compress(), 1008 "CLUSTERED": lambda self: self.expression( 1009 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1010 ), 1011 "NONCLUSTERED": lambda self: self.expression( 1012 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1013 ), 1014 "DEFAULT": lambda self: self.expression( 1015 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1016 ), 1017 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1018 "EPHEMERAL": lambda self: self.expression( 1019 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1020 ), 1021 "EXCLUDE": lambda self: self.expression( 1022 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1023 ), 1024 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1025 "FORMAT": lambda self: self.expression( 1026 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1027 ), 1028 "GENERATED": lambda self: self._parse_generated_as_identity(), 1029 "IDENTITY": lambda self: self._parse_auto_increment(), 1030 "INLINE": lambda self: self._parse_inline(), 1031 "LIKE": lambda self: self._parse_create_like(), 1032 "NOT": lambda self: self._parse_not_constraint(), 1033 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1034 "ON": lambda self: ( 1035 self._match(TokenType.UPDATE) 1036 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1037 ) 1038 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1039 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1040 "PERIOD": lambda self: self._parse_period_for_system_time(), 1041 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1042 "REFERENCES": lambda self: self._parse_references(match=False), 1043 "TITLE": lambda self: self.expression( 1044 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1045 ), 1046 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1047 "UNIQUE": lambda self: self._parse_unique(), 1048 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1049 "WITH": lambda self: self.expression( 1050 exp.Properties, expressions=self._parse_wrapped_properties() 1051 ), 1052 } 1053 1054 ALTER_PARSERS = { 1055 "ADD": lambda self: self._parse_alter_table_add(), 1056 "ALTER": lambda self: self._parse_alter_table_alter(), 1057 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1058 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1059 "DROP": lambda self: self._parse_alter_table_drop(), 1060 "RENAME": lambda self: self._parse_alter_table_rename(), 1061 "SET": lambda self: self._parse_alter_table_set(), 1062 "AS": lambda self: self._parse_select(), 1063 } 1064 1065 ALTER_ALTER_PARSERS = { 1066 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1067 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1068 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1069 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1070 } 1071 1072 SCHEMA_UNNAMED_CONSTRAINTS = { 1073 "CHECK", 1074 "EXCLUDE", 1075 "FOREIGN KEY", 1076 "LIKE", 1077 "PERIOD", 1078 "PRIMARY KEY", 1079 "UNIQUE", 1080 } 1081 1082 NO_PAREN_FUNCTION_PARSERS = { 1083 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1084 "CASE": lambda self: self._parse_case(), 1085 "CONNECT_BY_ROOT": lambda self: self.expression( 1086 exp.ConnectByRoot, this=self._parse_column() 1087 ), 1088 "IF": lambda self: self._parse_if(), 1089 "NEXT": lambda self: self._parse_next_value_for(), 1090 } 1091 1092 INVALID_FUNC_NAME_TOKENS = { 1093 TokenType.IDENTIFIER, 1094 TokenType.STRING, 1095 } 1096 1097 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1098 1099 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1100 1101 FUNCTION_PARSERS = { 1102 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1103 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1104 "DECODE": lambda self: self._parse_decode(), 1105 "EXTRACT": lambda self: self._parse_extract(), 1106 "GAP_FILL": lambda self: self._parse_gap_fill(), 1107 "JSON_OBJECT": lambda self: self._parse_json_object(), 1108 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1109 "JSON_TABLE": lambda self: self._parse_json_table(), 1110 "MATCH": lambda self: self._parse_match_against(), 1111 "NORMALIZE": lambda self: self._parse_normalize(), 1112 "OPENJSON": lambda self: self._parse_open_json(), 1113 "OVERLAY": lambda self: self._parse_overlay(), 1114 "POSITION": lambda self: self._parse_position(), 1115 "PREDICT": lambda self: self._parse_predict(), 1116 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1117 "STRING_AGG": lambda self: self._parse_string_agg(), 1118 "SUBSTRING": lambda self: self._parse_substring(), 1119 "TRIM": lambda self: self._parse_trim(), 1120 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1121 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1122 } 1123 1124 QUERY_MODIFIER_PARSERS = { 1125 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1126 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1127 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1128 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1129 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1130 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1131 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1132 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1133 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1134 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1135 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1136 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1137 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1138 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1139 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1140 TokenType.CLUSTER_BY: lambda self: ( 1141 "cluster", 1142 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1143 ), 1144 TokenType.DISTRIBUTE_BY: lambda self: ( 1145 "distribute", 1146 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1147 ), 1148 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1149 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1150 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1151 } 1152 1153 SET_PARSERS = { 1154 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1155 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1156 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1157 "TRANSACTION": lambda self: self._parse_set_transaction(), 1158 } 1159 1160 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1161 1162 TYPE_LITERAL_PARSERS = { 1163 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1164 } 1165 1166 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1167 1168 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1169 1170 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1171 1172 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1173 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1174 "ISOLATION": ( 1175 ("LEVEL", "REPEATABLE", "READ"), 1176 ("LEVEL", "READ", "COMMITTED"), 1177 ("LEVEL", "READ", "UNCOMITTED"), 1178 ("LEVEL", "SERIALIZABLE"), 1179 ), 1180 "READ": ("WRITE", "ONLY"), 1181 } 1182 1183 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1184 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1185 ) 1186 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1187 1188 CREATE_SEQUENCE: OPTIONS_TYPE = { 1189 "SCALE": ("EXTEND", "NOEXTEND"), 1190 "SHARD": ("EXTEND", "NOEXTEND"), 1191 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1192 **dict.fromkeys( 1193 ( 1194 "SESSION", 1195 "GLOBAL", 1196 "KEEP", 1197 "NOKEEP", 1198 "ORDER", 1199 "NOORDER", 1200 "NOCACHE", 1201 "CYCLE", 1202 "NOCYCLE", 1203 "NOMINVALUE", 1204 "NOMAXVALUE", 1205 "NOSCALE", 1206 "NOSHARD", 1207 ), 1208 tuple(), 1209 ), 1210 } 1211 1212 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1213 1214 USABLES: OPTIONS_TYPE = dict.fromkeys( 1215 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1216 ) 1217 1218 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1219 1220 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1221 "TYPE": ("EVOLUTION",), 1222 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1223 } 1224 1225 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1226 "NOT": ("ENFORCED",), 1227 "MATCH": ( 1228 "FULL", 1229 "PARTIAL", 1230 "SIMPLE", 1231 ), 1232 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1233 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1234 } 1235 1236 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1237 1238 CLONE_KEYWORDS = {"CLONE", "COPY"} 1239 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1240 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1241 1242 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1243 1244 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1245 1246 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1247 1248 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1249 1250 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1251 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1252 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1253 1254 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1255 1256 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1257 1258 ADD_CONSTRAINT_TOKENS = { 1259 TokenType.CONSTRAINT, 1260 TokenType.FOREIGN_KEY, 1261 TokenType.INDEX, 1262 TokenType.KEY, 1263 TokenType.PRIMARY_KEY, 1264 TokenType.UNIQUE, 1265 } 1266 1267 DISTINCT_TOKENS = {TokenType.DISTINCT} 1268 1269 NULL_TOKENS = {TokenType.NULL} 1270 1271 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1272 1273 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1274 1275 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1276 1277 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1278 1279 ODBC_DATETIME_LITERALS = { 1280 "d": exp.Date, 1281 "t": exp.Time, 1282 "ts": exp.Timestamp, 1283 } 1284 1285 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1286 1287 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1288 1289 # The style options for the DESCRIBE statement 1290 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1291 1292 OPERATION_MODIFIERS: t.Set[str] = set() 1293 1294 STRICT_CAST = True 1295 1296 PREFIXED_PIVOT_COLUMNS = False 1297 IDENTIFY_PIVOT_STRINGS = False 1298 1299 LOG_DEFAULTS_TO_LN = False 1300 1301 # Whether ADD is present for each column added by ALTER TABLE 1302 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1303 1304 # Whether the table sample clause expects CSV syntax 1305 TABLESAMPLE_CSV = False 1306 1307 # The default method used for table sampling 1308 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1309 1310 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1311 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1312 1313 # Whether the TRIM function expects the characters to trim as its first argument 1314 TRIM_PATTERN_FIRST = False 1315 1316 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1317 STRING_ALIASES = False 1318 1319 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1320 MODIFIERS_ATTACHED_TO_SET_OP = True 1321 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1322 1323 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1324 NO_PAREN_IF_COMMANDS = True 1325 1326 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1327 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1328 1329 # Whether the `:` operator is used to extract a value from a VARIANT column 1330 COLON_IS_VARIANT_EXTRACT = False 1331 1332 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1333 # If this is True and '(' is not found, the keyword will be treated as an identifier 1334 VALUES_FOLLOWED_BY_PAREN = True 1335 1336 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1337 SUPPORTS_IMPLICIT_UNNEST = False 1338 1339 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1340 INTERVAL_SPANS = True 1341 1342 # Whether a PARTITION clause can follow a table reference 1343 SUPPORTS_PARTITION_SELECTION = False 1344 1345 __slots__ = ( 1346 "error_level", 1347 "error_message_context", 1348 "max_errors", 1349 "dialect", 1350 "sql", 1351 "errors", 1352 "_tokens", 1353 "_index", 1354 "_curr", 1355 "_next", 1356 "_prev", 1357 "_prev_comments", 1358 ) 1359 1360 # Autofilled 1361 SHOW_TRIE: t.Dict = {} 1362 SET_TRIE: t.Dict = {} 1363 1364 def __init__( 1365 self, 1366 error_level: t.Optional[ErrorLevel] = None, 1367 error_message_context: int = 100, 1368 max_errors: int = 3, 1369 dialect: DialectType = None, 1370 ): 1371 from sqlglot.dialects import Dialect 1372 1373 self.error_level = error_level or ErrorLevel.IMMEDIATE 1374 self.error_message_context = error_message_context 1375 self.max_errors = max_errors 1376 self.dialect = Dialect.get_or_raise(dialect) 1377 self.reset() 1378 1379 def reset(self): 1380 self.sql = "" 1381 self.errors = [] 1382 self._tokens = [] 1383 self._index = 0 1384 self._curr = None 1385 self._next = None 1386 self._prev = None 1387 self._prev_comments = None 1388 1389 def parse( 1390 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1391 ) -> t.List[t.Optional[exp.Expression]]: 1392 """ 1393 Parses a list of tokens and returns a list of syntax trees, one tree 1394 per parsed SQL statement. 1395 1396 Args: 1397 raw_tokens: The list of tokens. 1398 sql: The original SQL string, used to produce helpful debug messages. 1399 1400 Returns: 1401 The list of the produced syntax trees. 1402 """ 1403 return self._parse( 1404 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1405 ) 1406 1407 def parse_into( 1408 self, 1409 expression_types: exp.IntoType, 1410 raw_tokens: t.List[Token], 1411 sql: t.Optional[str] = None, 1412 ) -> t.List[t.Optional[exp.Expression]]: 1413 """ 1414 Parses a list of tokens into a given Expression type. If a collection of Expression 1415 types is given instead, this method will try to parse the token list into each one 1416 of them, stopping at the first for which the parsing succeeds. 1417 1418 Args: 1419 expression_types: The expression type(s) to try and parse the token list into. 1420 raw_tokens: The list of tokens. 1421 sql: The original SQL string, used to produce helpful debug messages. 1422 1423 Returns: 1424 The target Expression. 1425 """ 1426 errors = [] 1427 for expression_type in ensure_list(expression_types): 1428 parser = self.EXPRESSION_PARSERS.get(expression_type) 1429 if not parser: 1430 raise TypeError(f"No parser registered for {expression_type}") 1431 1432 try: 1433 return self._parse(parser, raw_tokens, sql) 1434 except ParseError as e: 1435 e.errors[0]["into_expression"] = expression_type 1436 errors.append(e) 1437 1438 raise ParseError( 1439 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1440 errors=merge_errors(errors), 1441 ) from errors[-1] 1442 1443 def _parse( 1444 self, 1445 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1446 raw_tokens: t.List[Token], 1447 sql: t.Optional[str] = None, 1448 ) -> t.List[t.Optional[exp.Expression]]: 1449 self.reset() 1450 self.sql = sql or "" 1451 1452 total = len(raw_tokens) 1453 chunks: t.List[t.List[Token]] = [[]] 1454 1455 for i, token in enumerate(raw_tokens): 1456 if token.token_type == TokenType.SEMICOLON: 1457 if token.comments: 1458 chunks.append([token]) 1459 1460 if i < total - 1: 1461 chunks.append([]) 1462 else: 1463 chunks[-1].append(token) 1464 1465 expressions = [] 1466 1467 for tokens in chunks: 1468 self._index = -1 1469 self._tokens = tokens 1470 self._advance() 1471 1472 expressions.append(parse_method(self)) 1473 1474 if self._index < len(self._tokens): 1475 self.raise_error("Invalid expression / Unexpected token") 1476 1477 self.check_errors() 1478 1479 return expressions 1480 1481 def check_errors(self) -> None: 1482 """Logs or raises any found errors, depending on the chosen error level setting.""" 1483 if self.error_level == ErrorLevel.WARN: 1484 for error in self.errors: 1485 logger.error(str(error)) 1486 elif self.error_level == ErrorLevel.RAISE and self.errors: 1487 raise ParseError( 1488 concat_messages(self.errors, self.max_errors), 1489 errors=merge_errors(self.errors), 1490 ) 1491 1492 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1493 """ 1494 Appends an error in the list of recorded errors or raises it, depending on the chosen 1495 error level setting. 1496 """ 1497 token = token or self._curr or self._prev or Token.string("") 1498 start = token.start 1499 end = token.end + 1 1500 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1501 highlight = self.sql[start:end] 1502 end_context = self.sql[end : end + self.error_message_context] 1503 1504 error = ParseError.new( 1505 f"{message}. Line {token.line}, Col: {token.col}.\n" 1506 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1507 description=message, 1508 line=token.line, 1509 col=token.col, 1510 start_context=start_context, 1511 highlight=highlight, 1512 end_context=end_context, 1513 ) 1514 1515 if self.error_level == ErrorLevel.IMMEDIATE: 1516 raise error 1517 1518 self.errors.append(error) 1519 1520 def expression( 1521 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1522 ) -> E: 1523 """ 1524 Creates a new, validated Expression. 1525 1526 Args: 1527 exp_class: The expression class to instantiate. 1528 comments: An optional list of comments to attach to the expression. 1529 kwargs: The arguments to set for the expression along with their respective values. 1530 1531 Returns: 1532 The target expression. 1533 """ 1534 instance = exp_class(**kwargs) 1535 instance.add_comments(comments) if comments else self._add_comments(instance) 1536 return self.validate_expression(instance) 1537 1538 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1539 if expression and self._prev_comments: 1540 expression.add_comments(self._prev_comments) 1541 self._prev_comments = None 1542 1543 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1544 """ 1545 Validates an Expression, making sure that all its mandatory arguments are set. 1546 1547 Args: 1548 expression: The expression to validate. 1549 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1550 1551 Returns: 1552 The validated expression. 1553 """ 1554 if self.error_level != ErrorLevel.IGNORE: 1555 for error_message in expression.error_messages(args): 1556 self.raise_error(error_message) 1557 1558 return expression 1559 1560 def _find_sql(self, start: Token, end: Token) -> str: 1561 return self.sql[start.start : end.end + 1] 1562 1563 def _is_connected(self) -> bool: 1564 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1565 1566 def _advance(self, times: int = 1) -> None: 1567 self._index += times 1568 self._curr = seq_get(self._tokens, self._index) 1569 self._next = seq_get(self._tokens, self._index + 1) 1570 1571 if self._index > 0: 1572 self._prev = self._tokens[self._index - 1] 1573 self._prev_comments = self._prev.comments 1574 else: 1575 self._prev = None 1576 self._prev_comments = None 1577 1578 def _retreat(self, index: int) -> None: 1579 if index != self._index: 1580 self._advance(index - self._index) 1581 1582 def _warn_unsupported(self) -> None: 1583 if len(self._tokens) <= 1: 1584 return 1585 1586 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1587 # interested in emitting a warning for the one being currently processed. 1588 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1589 1590 logger.warning( 1591 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1592 ) 1593 1594 def _parse_command(self) -> exp.Command: 1595 self._warn_unsupported() 1596 return self.expression( 1597 exp.Command, 1598 comments=self._prev_comments, 1599 this=self._prev.text.upper(), 1600 expression=self._parse_string(), 1601 ) 1602 1603 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1604 """ 1605 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1606 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1607 solve this by setting & resetting the parser state accordingly 1608 """ 1609 index = self._index 1610 error_level = self.error_level 1611 1612 self.error_level = ErrorLevel.IMMEDIATE 1613 try: 1614 this = parse_method() 1615 except ParseError: 1616 this = None 1617 finally: 1618 if not this or retreat: 1619 self._retreat(index) 1620 self.error_level = error_level 1621 1622 return this 1623 1624 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1625 start = self._prev 1626 exists = self._parse_exists() if allow_exists else None 1627 1628 self._match(TokenType.ON) 1629 1630 materialized = self._match_text_seq("MATERIALIZED") 1631 kind = self._match_set(self.CREATABLES) and self._prev 1632 if not kind: 1633 return self._parse_as_command(start) 1634 1635 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1636 this = self._parse_user_defined_function(kind=kind.token_type) 1637 elif kind.token_type == TokenType.TABLE: 1638 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1639 elif kind.token_type == TokenType.COLUMN: 1640 this = self._parse_column() 1641 else: 1642 this = self._parse_id_var() 1643 1644 self._match(TokenType.IS) 1645 1646 return self.expression( 1647 exp.Comment, 1648 this=this, 1649 kind=kind.text, 1650 expression=self._parse_string(), 1651 exists=exists, 1652 materialized=materialized, 1653 ) 1654 1655 def _parse_to_table( 1656 self, 1657 ) -> exp.ToTableProperty: 1658 table = self._parse_table_parts(schema=True) 1659 return self.expression(exp.ToTableProperty, this=table) 1660 1661 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1662 def _parse_ttl(self) -> exp.Expression: 1663 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1664 this = self._parse_bitwise() 1665 1666 if self._match_text_seq("DELETE"): 1667 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1668 if self._match_text_seq("RECOMPRESS"): 1669 return self.expression( 1670 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1671 ) 1672 if self._match_text_seq("TO", "DISK"): 1673 return self.expression( 1674 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1675 ) 1676 if self._match_text_seq("TO", "VOLUME"): 1677 return self.expression( 1678 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1679 ) 1680 1681 return this 1682 1683 expressions = self._parse_csv(_parse_ttl_action) 1684 where = self._parse_where() 1685 group = self._parse_group() 1686 1687 aggregates = None 1688 if group and self._match(TokenType.SET): 1689 aggregates = self._parse_csv(self._parse_set_item) 1690 1691 return self.expression( 1692 exp.MergeTreeTTL, 1693 expressions=expressions, 1694 where=where, 1695 group=group, 1696 aggregates=aggregates, 1697 ) 1698 1699 def _parse_statement(self) -> t.Optional[exp.Expression]: 1700 if self._curr is None: 1701 return None 1702 1703 if self._match_set(self.STATEMENT_PARSERS): 1704 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1705 1706 if self._match_set(self.dialect.tokenizer.COMMANDS): 1707 return self._parse_command() 1708 1709 expression = self._parse_expression() 1710 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1711 return self._parse_query_modifiers(expression) 1712 1713 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1714 start = self._prev 1715 temporary = self._match(TokenType.TEMPORARY) 1716 materialized = self._match_text_seq("MATERIALIZED") 1717 1718 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1719 if not kind: 1720 return self._parse_as_command(start) 1721 1722 concurrently = self._match_text_seq("CONCURRENTLY") 1723 if_exists = exists or self._parse_exists() 1724 table = self._parse_table_parts( 1725 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1726 ) 1727 1728 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1729 1730 if self._match(TokenType.L_PAREN, advance=False): 1731 expressions = self._parse_wrapped_csv(self._parse_types) 1732 else: 1733 expressions = None 1734 1735 return self.expression( 1736 exp.Drop, 1737 comments=start.comments, 1738 exists=if_exists, 1739 this=table, 1740 expressions=expressions, 1741 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1742 temporary=temporary, 1743 materialized=materialized, 1744 cascade=self._match_text_seq("CASCADE"), 1745 constraints=self._match_text_seq("CONSTRAINTS"), 1746 purge=self._match_text_seq("PURGE"), 1747 cluster=cluster, 1748 concurrently=concurrently, 1749 ) 1750 1751 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1752 return ( 1753 self._match_text_seq("IF") 1754 and (not not_ or self._match(TokenType.NOT)) 1755 and self._match(TokenType.EXISTS) 1756 ) 1757 1758 def _parse_create(self) -> exp.Create | exp.Command: 1759 # Note: this can't be None because we've matched a statement parser 1760 start = self._prev 1761 comments = self._prev_comments 1762 1763 replace = ( 1764 start.token_type == TokenType.REPLACE 1765 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1766 or self._match_pair(TokenType.OR, TokenType.ALTER) 1767 ) 1768 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1769 1770 unique = self._match(TokenType.UNIQUE) 1771 1772 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1773 clustered = True 1774 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1775 "COLUMNSTORE" 1776 ): 1777 clustered = False 1778 else: 1779 clustered = None 1780 1781 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1782 self._advance() 1783 1784 properties = None 1785 create_token = self._match_set(self.CREATABLES) and self._prev 1786 1787 if not create_token: 1788 # exp.Properties.Location.POST_CREATE 1789 properties = self._parse_properties() 1790 create_token = self._match_set(self.CREATABLES) and self._prev 1791 1792 if not properties or not create_token: 1793 return self._parse_as_command(start) 1794 1795 concurrently = self._match_text_seq("CONCURRENTLY") 1796 exists = self._parse_exists(not_=True) 1797 this = None 1798 expression: t.Optional[exp.Expression] = None 1799 indexes = None 1800 no_schema_binding = None 1801 begin = None 1802 end = None 1803 clone = None 1804 1805 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1806 nonlocal properties 1807 if properties and temp_props: 1808 properties.expressions.extend(temp_props.expressions) 1809 elif temp_props: 1810 properties = temp_props 1811 1812 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1813 this = self._parse_user_defined_function(kind=create_token.token_type) 1814 1815 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1816 extend_props(self._parse_properties()) 1817 1818 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1819 extend_props(self._parse_properties()) 1820 1821 if not expression: 1822 if self._match(TokenType.COMMAND): 1823 expression = self._parse_as_command(self._prev) 1824 else: 1825 begin = self._match(TokenType.BEGIN) 1826 return_ = self._match_text_seq("RETURN") 1827 1828 if self._match(TokenType.STRING, advance=False): 1829 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1830 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1831 expression = self._parse_string() 1832 extend_props(self._parse_properties()) 1833 else: 1834 expression = self._parse_statement() 1835 1836 end = self._match_text_seq("END") 1837 1838 if return_: 1839 expression = self.expression(exp.Return, this=expression) 1840 elif create_token.token_type == TokenType.INDEX: 1841 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1842 if not self._match(TokenType.ON): 1843 index = self._parse_id_var() 1844 anonymous = False 1845 else: 1846 index = None 1847 anonymous = True 1848 1849 this = self._parse_index(index=index, anonymous=anonymous) 1850 elif create_token.token_type in self.DB_CREATABLES: 1851 table_parts = self._parse_table_parts( 1852 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1853 ) 1854 1855 # exp.Properties.Location.POST_NAME 1856 self._match(TokenType.COMMA) 1857 extend_props(self._parse_properties(before=True)) 1858 1859 this = self._parse_schema(this=table_parts) 1860 1861 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1862 extend_props(self._parse_properties()) 1863 1864 self._match(TokenType.ALIAS) 1865 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1866 # exp.Properties.Location.POST_ALIAS 1867 extend_props(self._parse_properties()) 1868 1869 if create_token.token_type == TokenType.SEQUENCE: 1870 expression = self._parse_types() 1871 extend_props(self._parse_properties()) 1872 else: 1873 expression = self._parse_ddl_select() 1874 1875 if create_token.token_type == TokenType.TABLE: 1876 # exp.Properties.Location.POST_EXPRESSION 1877 extend_props(self._parse_properties()) 1878 1879 indexes = [] 1880 while True: 1881 index = self._parse_index() 1882 1883 # exp.Properties.Location.POST_INDEX 1884 extend_props(self._parse_properties()) 1885 if not index: 1886 break 1887 else: 1888 self._match(TokenType.COMMA) 1889 indexes.append(index) 1890 elif create_token.token_type == TokenType.VIEW: 1891 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1892 no_schema_binding = True 1893 1894 shallow = self._match_text_seq("SHALLOW") 1895 1896 if self._match_texts(self.CLONE_KEYWORDS): 1897 copy = self._prev.text.lower() == "copy" 1898 clone = self.expression( 1899 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1900 ) 1901 1902 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1903 return self._parse_as_command(start) 1904 1905 create_kind_text = create_token.text.upper() 1906 return self.expression( 1907 exp.Create, 1908 comments=comments, 1909 this=this, 1910 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1911 replace=replace, 1912 refresh=refresh, 1913 unique=unique, 1914 expression=expression, 1915 exists=exists, 1916 properties=properties, 1917 indexes=indexes, 1918 no_schema_binding=no_schema_binding, 1919 begin=begin, 1920 end=end, 1921 clone=clone, 1922 concurrently=concurrently, 1923 clustered=clustered, 1924 ) 1925 1926 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1927 seq = exp.SequenceProperties() 1928 1929 options = [] 1930 index = self._index 1931 1932 while self._curr: 1933 self._match(TokenType.COMMA) 1934 if self._match_text_seq("INCREMENT"): 1935 self._match_text_seq("BY") 1936 self._match_text_seq("=") 1937 seq.set("increment", self._parse_term()) 1938 elif self._match_text_seq("MINVALUE"): 1939 seq.set("minvalue", self._parse_term()) 1940 elif self._match_text_seq("MAXVALUE"): 1941 seq.set("maxvalue", self._parse_term()) 1942 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1943 self._match_text_seq("=") 1944 seq.set("start", self._parse_term()) 1945 elif self._match_text_seq("CACHE"): 1946 # T-SQL allows empty CACHE which is initialized dynamically 1947 seq.set("cache", self._parse_number() or True) 1948 elif self._match_text_seq("OWNED", "BY"): 1949 # "OWNED BY NONE" is the default 1950 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1951 else: 1952 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1953 if opt: 1954 options.append(opt) 1955 else: 1956 break 1957 1958 seq.set("options", options if options else None) 1959 return None if self._index == index else seq 1960 1961 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1962 # only used for teradata currently 1963 self._match(TokenType.COMMA) 1964 1965 kwargs = { 1966 "no": self._match_text_seq("NO"), 1967 "dual": self._match_text_seq("DUAL"), 1968 "before": self._match_text_seq("BEFORE"), 1969 "default": self._match_text_seq("DEFAULT"), 1970 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1971 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1972 "after": self._match_text_seq("AFTER"), 1973 "minimum": self._match_texts(("MIN", "MINIMUM")), 1974 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1975 } 1976 1977 if self._match_texts(self.PROPERTY_PARSERS): 1978 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1979 try: 1980 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1981 except TypeError: 1982 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1983 1984 return None 1985 1986 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1987 return self._parse_wrapped_csv(self._parse_property) 1988 1989 def _parse_property(self) -> t.Optional[exp.Expression]: 1990 if self._match_texts(self.PROPERTY_PARSERS): 1991 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1992 1993 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1994 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1995 1996 if self._match_text_seq("COMPOUND", "SORTKEY"): 1997 return self._parse_sortkey(compound=True) 1998 1999 if self._match_text_seq("SQL", "SECURITY"): 2000 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2001 2002 index = self._index 2003 key = self._parse_column() 2004 2005 if not self._match(TokenType.EQ): 2006 self._retreat(index) 2007 return self._parse_sequence_properties() 2008 2009 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2010 if isinstance(key, exp.Column): 2011 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2012 2013 value = self._parse_bitwise() or self._parse_var(any_token=True) 2014 2015 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2016 if isinstance(value, exp.Column): 2017 value = exp.var(value.name) 2018 2019 return self.expression(exp.Property, this=key, value=value) 2020 2021 def _parse_stored(self) -> exp.FileFormatProperty: 2022 self._match(TokenType.ALIAS) 2023 2024 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2025 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2026 2027 return self.expression( 2028 exp.FileFormatProperty, 2029 this=( 2030 self.expression( 2031 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2032 ) 2033 if input_format or output_format 2034 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2035 ), 2036 ) 2037 2038 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2039 field = self._parse_field() 2040 if isinstance(field, exp.Identifier) and not field.quoted: 2041 field = exp.var(field) 2042 2043 return field 2044 2045 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2046 self._match(TokenType.EQ) 2047 self._match(TokenType.ALIAS) 2048 2049 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2050 2051 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2052 properties = [] 2053 while True: 2054 if before: 2055 prop = self._parse_property_before() 2056 else: 2057 prop = self._parse_property() 2058 if not prop: 2059 break 2060 for p in ensure_list(prop): 2061 properties.append(p) 2062 2063 if properties: 2064 return self.expression(exp.Properties, expressions=properties) 2065 2066 return None 2067 2068 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2069 return self.expression( 2070 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2071 ) 2072 2073 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2074 if self._match_texts(("DEFINER", "INVOKER")): 2075 security_specifier = self._prev.text.upper() 2076 return self.expression(exp.SecurityProperty, this=security_specifier) 2077 return None 2078 2079 def _parse_settings_property(self) -> exp.SettingsProperty: 2080 return self.expression( 2081 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2082 ) 2083 2084 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2085 if self._index >= 2: 2086 pre_volatile_token = self._tokens[self._index - 2] 2087 else: 2088 pre_volatile_token = None 2089 2090 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2091 return exp.VolatileProperty() 2092 2093 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2094 2095 def _parse_retention_period(self) -> exp.Var: 2096 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2097 number = self._parse_number() 2098 number_str = f"{number} " if number else "" 2099 unit = self._parse_var(any_token=True) 2100 return exp.var(f"{number_str}{unit}") 2101 2102 def _parse_system_versioning_property( 2103 self, with_: bool = False 2104 ) -> exp.WithSystemVersioningProperty: 2105 self._match(TokenType.EQ) 2106 prop = self.expression( 2107 exp.WithSystemVersioningProperty, 2108 **{ # type: ignore 2109 "on": True, 2110 "with": with_, 2111 }, 2112 ) 2113 2114 if self._match_text_seq("OFF"): 2115 prop.set("on", False) 2116 return prop 2117 2118 self._match(TokenType.ON) 2119 if self._match(TokenType.L_PAREN): 2120 while self._curr and not self._match(TokenType.R_PAREN): 2121 if self._match_text_seq("HISTORY_TABLE", "="): 2122 prop.set("this", self._parse_table_parts()) 2123 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2124 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2125 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2126 prop.set("retention_period", self._parse_retention_period()) 2127 2128 self._match(TokenType.COMMA) 2129 2130 return prop 2131 2132 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2133 self._match(TokenType.EQ) 2134 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2135 prop = self.expression(exp.DataDeletionProperty, on=on) 2136 2137 if self._match(TokenType.L_PAREN): 2138 while self._curr and not self._match(TokenType.R_PAREN): 2139 if self._match_text_seq("FILTER_COLUMN", "="): 2140 prop.set("filter_column", self._parse_column()) 2141 elif self._match_text_seq("RETENTION_PERIOD", "="): 2142 prop.set("retention_period", self._parse_retention_period()) 2143 2144 self._match(TokenType.COMMA) 2145 2146 return prop 2147 2148 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2149 kind = "HASH" 2150 expressions: t.Optional[t.List[exp.Expression]] = None 2151 if self._match_text_seq("BY", "HASH"): 2152 expressions = self._parse_wrapped_csv(self._parse_id_var) 2153 elif self._match_text_seq("BY", "RANDOM"): 2154 kind = "RANDOM" 2155 2156 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2157 buckets: t.Optional[exp.Expression] = None 2158 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2159 buckets = self._parse_number() 2160 2161 return self.expression( 2162 exp.DistributedByProperty, 2163 expressions=expressions, 2164 kind=kind, 2165 buckets=buckets, 2166 order=self._parse_order(), 2167 ) 2168 2169 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2170 self._match_text_seq("KEY") 2171 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2172 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2173 2174 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2175 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2176 prop = self._parse_system_versioning_property(with_=True) 2177 self._match_r_paren() 2178 return prop 2179 2180 if self._match(TokenType.L_PAREN, advance=False): 2181 return self._parse_wrapped_properties() 2182 2183 if self._match_text_seq("JOURNAL"): 2184 return self._parse_withjournaltable() 2185 2186 if self._match_texts(self.VIEW_ATTRIBUTES): 2187 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2188 2189 if self._match_text_seq("DATA"): 2190 return self._parse_withdata(no=False) 2191 elif self._match_text_seq("NO", "DATA"): 2192 return self._parse_withdata(no=True) 2193 2194 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2195 return self._parse_serde_properties(with_=True) 2196 2197 if self._match(TokenType.SCHEMA): 2198 return self.expression( 2199 exp.WithSchemaBindingProperty, 2200 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2201 ) 2202 2203 if not self._next: 2204 return None 2205 2206 return self._parse_withisolatedloading() 2207 2208 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2209 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2210 self._match(TokenType.EQ) 2211 2212 user = self._parse_id_var() 2213 self._match(TokenType.PARAMETER) 2214 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2215 2216 if not user or not host: 2217 return None 2218 2219 return exp.DefinerProperty(this=f"{user}@{host}") 2220 2221 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2222 self._match(TokenType.TABLE) 2223 self._match(TokenType.EQ) 2224 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2225 2226 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2227 return self.expression(exp.LogProperty, no=no) 2228 2229 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2230 return self.expression(exp.JournalProperty, **kwargs) 2231 2232 def _parse_checksum(self) -> exp.ChecksumProperty: 2233 self._match(TokenType.EQ) 2234 2235 on = None 2236 if self._match(TokenType.ON): 2237 on = True 2238 elif self._match_text_seq("OFF"): 2239 on = False 2240 2241 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2242 2243 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2244 return self.expression( 2245 exp.Cluster, 2246 expressions=( 2247 self._parse_wrapped_csv(self._parse_ordered) 2248 if wrapped 2249 else self._parse_csv(self._parse_ordered) 2250 ), 2251 ) 2252 2253 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2254 self._match_text_seq("BY") 2255 2256 self._match_l_paren() 2257 expressions = self._parse_csv(self._parse_column) 2258 self._match_r_paren() 2259 2260 if self._match_text_seq("SORTED", "BY"): 2261 self._match_l_paren() 2262 sorted_by = self._parse_csv(self._parse_ordered) 2263 self._match_r_paren() 2264 else: 2265 sorted_by = None 2266 2267 self._match(TokenType.INTO) 2268 buckets = self._parse_number() 2269 self._match_text_seq("BUCKETS") 2270 2271 return self.expression( 2272 exp.ClusteredByProperty, 2273 expressions=expressions, 2274 sorted_by=sorted_by, 2275 buckets=buckets, 2276 ) 2277 2278 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2279 if not self._match_text_seq("GRANTS"): 2280 self._retreat(self._index - 1) 2281 return None 2282 2283 return self.expression(exp.CopyGrantsProperty) 2284 2285 def _parse_freespace(self) -> exp.FreespaceProperty: 2286 self._match(TokenType.EQ) 2287 return self.expression( 2288 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2289 ) 2290 2291 def _parse_mergeblockratio( 2292 self, no: bool = False, default: bool = False 2293 ) -> exp.MergeBlockRatioProperty: 2294 if self._match(TokenType.EQ): 2295 return self.expression( 2296 exp.MergeBlockRatioProperty, 2297 this=self._parse_number(), 2298 percent=self._match(TokenType.PERCENT), 2299 ) 2300 2301 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2302 2303 def _parse_datablocksize( 2304 self, 2305 default: t.Optional[bool] = None, 2306 minimum: t.Optional[bool] = None, 2307 maximum: t.Optional[bool] = None, 2308 ) -> exp.DataBlocksizeProperty: 2309 self._match(TokenType.EQ) 2310 size = self._parse_number() 2311 2312 units = None 2313 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2314 units = self._prev.text 2315 2316 return self.expression( 2317 exp.DataBlocksizeProperty, 2318 size=size, 2319 units=units, 2320 default=default, 2321 minimum=minimum, 2322 maximum=maximum, 2323 ) 2324 2325 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2326 self._match(TokenType.EQ) 2327 always = self._match_text_seq("ALWAYS") 2328 manual = self._match_text_seq("MANUAL") 2329 never = self._match_text_seq("NEVER") 2330 default = self._match_text_seq("DEFAULT") 2331 2332 autotemp = None 2333 if self._match_text_seq("AUTOTEMP"): 2334 autotemp = self._parse_schema() 2335 2336 return self.expression( 2337 exp.BlockCompressionProperty, 2338 always=always, 2339 manual=manual, 2340 never=never, 2341 default=default, 2342 autotemp=autotemp, 2343 ) 2344 2345 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2346 index = self._index 2347 no = self._match_text_seq("NO") 2348 concurrent = self._match_text_seq("CONCURRENT") 2349 2350 if not self._match_text_seq("ISOLATED", "LOADING"): 2351 self._retreat(index) 2352 return None 2353 2354 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2355 return self.expression( 2356 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2357 ) 2358 2359 def _parse_locking(self) -> exp.LockingProperty: 2360 if self._match(TokenType.TABLE): 2361 kind = "TABLE" 2362 elif self._match(TokenType.VIEW): 2363 kind = "VIEW" 2364 elif self._match(TokenType.ROW): 2365 kind = "ROW" 2366 elif self._match_text_seq("DATABASE"): 2367 kind = "DATABASE" 2368 else: 2369 kind = None 2370 2371 if kind in ("DATABASE", "TABLE", "VIEW"): 2372 this = self._parse_table_parts() 2373 else: 2374 this = None 2375 2376 if self._match(TokenType.FOR): 2377 for_or_in = "FOR" 2378 elif self._match(TokenType.IN): 2379 for_or_in = "IN" 2380 else: 2381 for_or_in = None 2382 2383 if self._match_text_seq("ACCESS"): 2384 lock_type = "ACCESS" 2385 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2386 lock_type = "EXCLUSIVE" 2387 elif self._match_text_seq("SHARE"): 2388 lock_type = "SHARE" 2389 elif self._match_text_seq("READ"): 2390 lock_type = "READ" 2391 elif self._match_text_seq("WRITE"): 2392 lock_type = "WRITE" 2393 elif self._match_text_seq("CHECKSUM"): 2394 lock_type = "CHECKSUM" 2395 else: 2396 lock_type = None 2397 2398 override = self._match_text_seq("OVERRIDE") 2399 2400 return self.expression( 2401 exp.LockingProperty, 2402 this=this, 2403 kind=kind, 2404 for_or_in=for_or_in, 2405 lock_type=lock_type, 2406 override=override, 2407 ) 2408 2409 def _parse_partition_by(self) -> t.List[exp.Expression]: 2410 if self._match(TokenType.PARTITION_BY): 2411 return self._parse_csv(self._parse_assignment) 2412 return [] 2413 2414 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2415 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2416 if self._match_text_seq("MINVALUE"): 2417 return exp.var("MINVALUE") 2418 if self._match_text_seq("MAXVALUE"): 2419 return exp.var("MAXVALUE") 2420 return self._parse_bitwise() 2421 2422 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2423 expression = None 2424 from_expressions = None 2425 to_expressions = None 2426 2427 if self._match(TokenType.IN): 2428 this = self._parse_wrapped_csv(self._parse_bitwise) 2429 elif self._match(TokenType.FROM): 2430 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2431 self._match_text_seq("TO") 2432 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2433 elif self._match_text_seq("WITH", "(", "MODULUS"): 2434 this = self._parse_number() 2435 self._match_text_seq(",", "REMAINDER") 2436 expression = self._parse_number() 2437 self._match_r_paren() 2438 else: 2439 self.raise_error("Failed to parse partition bound spec.") 2440 2441 return self.expression( 2442 exp.PartitionBoundSpec, 2443 this=this, 2444 expression=expression, 2445 from_expressions=from_expressions, 2446 to_expressions=to_expressions, 2447 ) 2448 2449 # https://www.postgresql.org/docs/current/sql-createtable.html 2450 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2451 if not self._match_text_seq("OF"): 2452 self._retreat(self._index - 1) 2453 return None 2454 2455 this = self._parse_table(schema=True) 2456 2457 if self._match(TokenType.DEFAULT): 2458 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2459 elif self._match_text_seq("FOR", "VALUES"): 2460 expression = self._parse_partition_bound_spec() 2461 else: 2462 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2463 2464 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2465 2466 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2467 self._match(TokenType.EQ) 2468 return self.expression( 2469 exp.PartitionedByProperty, 2470 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2471 ) 2472 2473 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2474 if self._match_text_seq("AND", "STATISTICS"): 2475 statistics = True 2476 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2477 statistics = False 2478 else: 2479 statistics = None 2480 2481 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2482 2483 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2484 if self._match_text_seq("SQL"): 2485 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2486 return None 2487 2488 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2489 if self._match_text_seq("SQL", "DATA"): 2490 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2491 return None 2492 2493 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2494 if self._match_text_seq("PRIMARY", "INDEX"): 2495 return exp.NoPrimaryIndexProperty() 2496 if self._match_text_seq("SQL"): 2497 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2498 return None 2499 2500 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2501 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2502 return exp.OnCommitProperty() 2503 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2504 return exp.OnCommitProperty(delete=True) 2505 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2506 2507 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2508 if self._match_text_seq("SQL", "DATA"): 2509 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2510 return None 2511 2512 def _parse_distkey(self) -> exp.DistKeyProperty: 2513 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2514 2515 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2516 table = self._parse_table(schema=True) 2517 2518 options = [] 2519 while self._match_texts(("INCLUDING", "EXCLUDING")): 2520 this = self._prev.text.upper() 2521 2522 id_var = self._parse_id_var() 2523 if not id_var: 2524 return None 2525 2526 options.append( 2527 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2528 ) 2529 2530 return self.expression(exp.LikeProperty, this=table, expressions=options) 2531 2532 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2533 return self.expression( 2534 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2535 ) 2536 2537 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2538 self._match(TokenType.EQ) 2539 return self.expression( 2540 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2541 ) 2542 2543 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2544 self._match_text_seq("WITH", "CONNECTION") 2545 return self.expression( 2546 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2547 ) 2548 2549 def _parse_returns(self) -> exp.ReturnsProperty: 2550 value: t.Optional[exp.Expression] 2551 null = None 2552 is_table = self._match(TokenType.TABLE) 2553 2554 if is_table: 2555 if self._match(TokenType.LT): 2556 value = self.expression( 2557 exp.Schema, 2558 this="TABLE", 2559 expressions=self._parse_csv(self._parse_struct_types), 2560 ) 2561 if not self._match(TokenType.GT): 2562 self.raise_error("Expecting >") 2563 else: 2564 value = self._parse_schema(exp.var("TABLE")) 2565 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2566 null = True 2567 value = None 2568 else: 2569 value = self._parse_types() 2570 2571 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2572 2573 def _parse_describe(self) -> exp.Describe: 2574 kind = self._match_set(self.CREATABLES) and self._prev.text 2575 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2576 if self._match(TokenType.DOT): 2577 style = None 2578 self._retreat(self._index - 2) 2579 this = self._parse_table(schema=True) 2580 properties = self._parse_properties() 2581 expressions = properties.expressions if properties else None 2582 partition = self._parse_partition() 2583 return self.expression( 2584 exp.Describe, 2585 this=this, 2586 style=style, 2587 kind=kind, 2588 expressions=expressions, 2589 partition=partition, 2590 ) 2591 2592 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2593 kind = self._prev.text.upper() 2594 expressions = [] 2595 2596 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2597 if self._match(TokenType.WHEN): 2598 expression = self._parse_disjunction() 2599 self._match(TokenType.THEN) 2600 else: 2601 expression = None 2602 2603 else_ = self._match(TokenType.ELSE) 2604 2605 if not self._match(TokenType.INTO): 2606 return None 2607 2608 return self.expression( 2609 exp.ConditionalInsert, 2610 this=self.expression( 2611 exp.Insert, 2612 this=self._parse_table(schema=True), 2613 expression=self._parse_derived_table_values(), 2614 ), 2615 expression=expression, 2616 else_=else_, 2617 ) 2618 2619 expression = parse_conditional_insert() 2620 while expression is not None: 2621 expressions.append(expression) 2622 expression = parse_conditional_insert() 2623 2624 return self.expression( 2625 exp.MultitableInserts, 2626 kind=kind, 2627 comments=comments, 2628 expressions=expressions, 2629 source=self._parse_table(), 2630 ) 2631 2632 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2633 comments = ensure_list(self._prev_comments) 2634 hint = self._parse_hint() 2635 overwrite = self._match(TokenType.OVERWRITE) 2636 ignore = self._match(TokenType.IGNORE) 2637 local = self._match_text_seq("LOCAL") 2638 alternative = None 2639 is_function = None 2640 2641 if self._match_text_seq("DIRECTORY"): 2642 this: t.Optional[exp.Expression] = self.expression( 2643 exp.Directory, 2644 this=self._parse_var_or_string(), 2645 local=local, 2646 row_format=self._parse_row_format(match_row=True), 2647 ) 2648 else: 2649 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2650 comments += ensure_list(self._prev_comments) 2651 return self._parse_multitable_inserts(comments) 2652 2653 if self._match(TokenType.OR): 2654 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2655 2656 self._match(TokenType.INTO) 2657 comments += ensure_list(self._prev_comments) 2658 self._match(TokenType.TABLE) 2659 is_function = self._match(TokenType.FUNCTION) 2660 2661 this = ( 2662 self._parse_table(schema=True, parse_partition=True) 2663 if not is_function 2664 else self._parse_function() 2665 ) 2666 2667 returning = self._parse_returning() 2668 2669 return self.expression( 2670 exp.Insert, 2671 comments=comments, 2672 hint=hint, 2673 is_function=is_function, 2674 this=this, 2675 stored=self._match_text_seq("STORED") and self._parse_stored(), 2676 by_name=self._match_text_seq("BY", "NAME"), 2677 exists=self._parse_exists(), 2678 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2679 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2680 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2681 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2682 conflict=self._parse_on_conflict(), 2683 returning=returning or self._parse_returning(), 2684 overwrite=overwrite, 2685 alternative=alternative, 2686 ignore=ignore, 2687 source=self._match(TokenType.TABLE) and self._parse_table(), 2688 ) 2689 2690 def _parse_kill(self) -> exp.Kill: 2691 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2692 2693 return self.expression( 2694 exp.Kill, 2695 this=self._parse_primary(), 2696 kind=kind, 2697 ) 2698 2699 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2700 conflict = self._match_text_seq("ON", "CONFLICT") 2701 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2702 2703 if not conflict and not duplicate: 2704 return None 2705 2706 conflict_keys = None 2707 constraint = None 2708 2709 if conflict: 2710 if self._match_text_seq("ON", "CONSTRAINT"): 2711 constraint = self._parse_id_var() 2712 elif self._match(TokenType.L_PAREN): 2713 conflict_keys = self._parse_csv(self._parse_id_var) 2714 self._match_r_paren() 2715 2716 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2717 if self._prev.token_type == TokenType.UPDATE: 2718 self._match(TokenType.SET) 2719 expressions = self._parse_csv(self._parse_equality) 2720 else: 2721 expressions = None 2722 2723 return self.expression( 2724 exp.OnConflict, 2725 duplicate=duplicate, 2726 expressions=expressions, 2727 action=action, 2728 conflict_keys=conflict_keys, 2729 constraint=constraint, 2730 ) 2731 2732 def _parse_returning(self) -> t.Optional[exp.Returning]: 2733 if not self._match(TokenType.RETURNING): 2734 return None 2735 return self.expression( 2736 exp.Returning, 2737 expressions=self._parse_csv(self._parse_expression), 2738 into=self._match(TokenType.INTO) and self._parse_table_part(), 2739 ) 2740 2741 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2742 if not self._match(TokenType.FORMAT): 2743 return None 2744 return self._parse_row_format() 2745 2746 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2747 index = self._index 2748 with_ = with_ or self._match_text_seq("WITH") 2749 2750 if not self._match(TokenType.SERDE_PROPERTIES): 2751 self._retreat(index) 2752 return None 2753 return self.expression( 2754 exp.SerdeProperties, 2755 **{ # type: ignore 2756 "expressions": self._parse_wrapped_properties(), 2757 "with": with_, 2758 }, 2759 ) 2760 2761 def _parse_row_format( 2762 self, match_row: bool = False 2763 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2764 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2765 return None 2766 2767 if self._match_text_seq("SERDE"): 2768 this = self._parse_string() 2769 2770 serde_properties = self._parse_serde_properties() 2771 2772 return self.expression( 2773 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2774 ) 2775 2776 self._match_text_seq("DELIMITED") 2777 2778 kwargs = {} 2779 2780 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2781 kwargs["fields"] = self._parse_string() 2782 if self._match_text_seq("ESCAPED", "BY"): 2783 kwargs["escaped"] = self._parse_string() 2784 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2785 kwargs["collection_items"] = self._parse_string() 2786 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2787 kwargs["map_keys"] = self._parse_string() 2788 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2789 kwargs["lines"] = self._parse_string() 2790 if self._match_text_seq("NULL", "DEFINED", "AS"): 2791 kwargs["null"] = self._parse_string() 2792 2793 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2794 2795 def _parse_load(self) -> exp.LoadData | exp.Command: 2796 if self._match_text_seq("DATA"): 2797 local = self._match_text_seq("LOCAL") 2798 self._match_text_seq("INPATH") 2799 inpath = self._parse_string() 2800 overwrite = self._match(TokenType.OVERWRITE) 2801 self._match_pair(TokenType.INTO, TokenType.TABLE) 2802 2803 return self.expression( 2804 exp.LoadData, 2805 this=self._parse_table(schema=True), 2806 local=local, 2807 overwrite=overwrite, 2808 inpath=inpath, 2809 partition=self._parse_partition(), 2810 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2811 serde=self._match_text_seq("SERDE") and self._parse_string(), 2812 ) 2813 return self._parse_as_command(self._prev) 2814 2815 def _parse_delete(self) -> exp.Delete: 2816 # This handles MySQL's "Multiple-Table Syntax" 2817 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2818 tables = None 2819 comments = self._prev_comments 2820 if not self._match(TokenType.FROM, advance=False): 2821 tables = self._parse_csv(self._parse_table) or None 2822 2823 returning = self._parse_returning() 2824 2825 return self.expression( 2826 exp.Delete, 2827 comments=comments, 2828 tables=tables, 2829 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2830 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2831 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2832 where=self._parse_where(), 2833 returning=returning or self._parse_returning(), 2834 limit=self._parse_limit(), 2835 ) 2836 2837 def _parse_update(self) -> exp.Update: 2838 comments = self._prev_comments 2839 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2840 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2841 returning = self._parse_returning() 2842 return self.expression( 2843 exp.Update, 2844 comments=comments, 2845 **{ # type: ignore 2846 "this": this, 2847 "expressions": expressions, 2848 "from": self._parse_from(joins=True), 2849 "where": self._parse_where(), 2850 "returning": returning or self._parse_returning(), 2851 "order": self._parse_order(), 2852 "limit": self._parse_limit(), 2853 }, 2854 ) 2855 2856 def _parse_uncache(self) -> exp.Uncache: 2857 if not self._match(TokenType.TABLE): 2858 self.raise_error("Expecting TABLE after UNCACHE") 2859 2860 return self.expression( 2861 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2862 ) 2863 2864 def _parse_cache(self) -> exp.Cache: 2865 lazy = self._match_text_seq("LAZY") 2866 self._match(TokenType.TABLE) 2867 table = self._parse_table(schema=True) 2868 2869 options = [] 2870 if self._match_text_seq("OPTIONS"): 2871 self._match_l_paren() 2872 k = self._parse_string() 2873 self._match(TokenType.EQ) 2874 v = self._parse_string() 2875 options = [k, v] 2876 self._match_r_paren() 2877 2878 self._match(TokenType.ALIAS) 2879 return self.expression( 2880 exp.Cache, 2881 this=table, 2882 lazy=lazy, 2883 options=options, 2884 expression=self._parse_select(nested=True), 2885 ) 2886 2887 def _parse_partition(self) -> t.Optional[exp.Partition]: 2888 if not self._match(TokenType.PARTITION): 2889 return None 2890 2891 return self.expression( 2892 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2893 ) 2894 2895 def _parse_value(self) -> t.Optional[exp.Tuple]: 2896 if self._match(TokenType.L_PAREN): 2897 expressions = self._parse_csv(self._parse_expression) 2898 self._match_r_paren() 2899 return self.expression(exp.Tuple, expressions=expressions) 2900 2901 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2902 expression = self._parse_expression() 2903 if expression: 2904 return self.expression(exp.Tuple, expressions=[expression]) 2905 return None 2906 2907 def _parse_projections(self) -> t.List[exp.Expression]: 2908 return self._parse_expressions() 2909 2910 def _parse_select( 2911 self, 2912 nested: bool = False, 2913 table: bool = False, 2914 parse_subquery_alias: bool = True, 2915 parse_set_operation: bool = True, 2916 ) -> t.Optional[exp.Expression]: 2917 cte = self._parse_with() 2918 2919 if cte: 2920 this = self._parse_statement() 2921 2922 if not this: 2923 self.raise_error("Failed to parse any statement following CTE") 2924 return cte 2925 2926 if "with" in this.arg_types: 2927 this.set("with", cte) 2928 else: 2929 self.raise_error(f"{this.key} does not support CTE") 2930 this = cte 2931 2932 return this 2933 2934 # duckdb supports leading with FROM x 2935 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2936 2937 if self._match(TokenType.SELECT): 2938 comments = self._prev_comments 2939 2940 hint = self._parse_hint() 2941 2942 if self._next and not self._next.token_type == TokenType.DOT: 2943 all_ = self._match(TokenType.ALL) 2944 distinct = self._match_set(self.DISTINCT_TOKENS) 2945 else: 2946 all_, distinct = None, None 2947 2948 kind = ( 2949 self._match(TokenType.ALIAS) 2950 and self._match_texts(("STRUCT", "VALUE")) 2951 and self._prev.text.upper() 2952 ) 2953 2954 if distinct: 2955 distinct = self.expression( 2956 exp.Distinct, 2957 on=self._parse_value() if self._match(TokenType.ON) else None, 2958 ) 2959 2960 if all_ and distinct: 2961 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2962 2963 operation_modifiers = [] 2964 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2965 operation_modifiers.append(exp.var(self._prev.text.upper())) 2966 2967 limit = self._parse_limit(top=True) 2968 projections = self._parse_projections() 2969 2970 this = self.expression( 2971 exp.Select, 2972 kind=kind, 2973 hint=hint, 2974 distinct=distinct, 2975 expressions=projections, 2976 limit=limit, 2977 operation_modifiers=operation_modifiers or None, 2978 ) 2979 this.comments = comments 2980 2981 into = self._parse_into() 2982 if into: 2983 this.set("into", into) 2984 2985 if not from_: 2986 from_ = self._parse_from() 2987 2988 if from_: 2989 this.set("from", from_) 2990 2991 this = self._parse_query_modifiers(this) 2992 elif (table or nested) and self._match(TokenType.L_PAREN): 2993 if self._match(TokenType.PIVOT): 2994 this = self._parse_simplified_pivot() 2995 elif self._match(TokenType.FROM): 2996 this = exp.select("*").from_( 2997 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2998 ) 2999 else: 3000 this = ( 3001 self._parse_table() 3002 if table 3003 else self._parse_select(nested=True, parse_set_operation=False) 3004 ) 3005 3006 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3007 # in case a modifier (e.g. join) is following 3008 if table and isinstance(this, exp.Values) and this.alias: 3009 alias = this.args["alias"].pop() 3010 this = exp.Table(this=this, alias=alias) 3011 3012 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3013 3014 self._match_r_paren() 3015 3016 # We return early here so that the UNION isn't attached to the subquery by the 3017 # following call to _parse_set_operations, but instead becomes the parent node 3018 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3019 elif self._match(TokenType.VALUES, advance=False): 3020 this = self._parse_derived_table_values() 3021 elif from_: 3022 this = exp.select("*").from_(from_.this, copy=False) 3023 elif self._match(TokenType.SUMMARIZE): 3024 table = self._match(TokenType.TABLE) 3025 this = self._parse_select() or self._parse_string() or self._parse_table() 3026 return self.expression(exp.Summarize, this=this, table=table) 3027 elif self._match(TokenType.DESCRIBE): 3028 this = self._parse_describe() 3029 elif self._match_text_seq("STREAM"): 3030 this = self.expression(exp.Stream, this=self._parse_function()) 3031 else: 3032 this = None 3033 3034 return self._parse_set_operations(this) if parse_set_operation else this 3035 3036 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3037 if not skip_with_token and not self._match(TokenType.WITH): 3038 return None 3039 3040 comments = self._prev_comments 3041 recursive = self._match(TokenType.RECURSIVE) 3042 3043 last_comments = None 3044 expressions = [] 3045 while True: 3046 expressions.append(self._parse_cte()) 3047 if last_comments: 3048 expressions[-1].add_comments(last_comments) 3049 3050 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3051 break 3052 else: 3053 self._match(TokenType.WITH) 3054 3055 last_comments = self._prev_comments 3056 3057 return self.expression( 3058 exp.With, comments=comments, expressions=expressions, recursive=recursive 3059 ) 3060 3061 def _parse_cte(self) -> exp.CTE: 3062 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3063 if not alias or not alias.this: 3064 self.raise_error("Expected CTE to have alias") 3065 3066 self._match(TokenType.ALIAS) 3067 comments = self._prev_comments 3068 3069 if self._match_text_seq("NOT", "MATERIALIZED"): 3070 materialized = False 3071 elif self._match_text_seq("MATERIALIZED"): 3072 materialized = True 3073 else: 3074 materialized = None 3075 3076 return self.expression( 3077 exp.CTE, 3078 this=self._parse_wrapped(self._parse_statement), 3079 alias=alias, 3080 materialized=materialized, 3081 comments=comments, 3082 ) 3083 3084 def _parse_table_alias( 3085 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3086 ) -> t.Optional[exp.TableAlias]: 3087 any_token = self._match(TokenType.ALIAS) 3088 alias = ( 3089 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3090 or self._parse_string_as_identifier() 3091 ) 3092 3093 index = self._index 3094 if self._match(TokenType.L_PAREN): 3095 columns = self._parse_csv(self._parse_function_parameter) 3096 self._match_r_paren() if columns else self._retreat(index) 3097 else: 3098 columns = None 3099 3100 if not alias and not columns: 3101 return None 3102 3103 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3104 3105 # We bubble up comments from the Identifier to the TableAlias 3106 if isinstance(alias, exp.Identifier): 3107 table_alias.add_comments(alias.pop_comments()) 3108 3109 return table_alias 3110 3111 def _parse_subquery( 3112 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3113 ) -> t.Optional[exp.Subquery]: 3114 if not this: 3115 return None 3116 3117 return self.expression( 3118 exp.Subquery, 3119 this=this, 3120 pivots=self._parse_pivots(), 3121 alias=self._parse_table_alias() if parse_alias else None, 3122 sample=self._parse_table_sample(), 3123 ) 3124 3125 def _implicit_unnests_to_explicit(self, this: E) -> E: 3126 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3127 3128 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3129 for i, join in enumerate(this.args.get("joins") or []): 3130 table = join.this 3131 normalized_table = table.copy() 3132 normalized_table.meta["maybe_column"] = True 3133 normalized_table = _norm(normalized_table, dialect=self.dialect) 3134 3135 if isinstance(table, exp.Table) and not join.args.get("on"): 3136 if normalized_table.parts[0].name in refs: 3137 table_as_column = table.to_column() 3138 unnest = exp.Unnest(expressions=[table_as_column]) 3139 3140 # Table.to_column creates a parent Alias node that we want to convert to 3141 # a TableAlias and attach to the Unnest, so it matches the parser's output 3142 if isinstance(table.args.get("alias"), exp.TableAlias): 3143 table_as_column.replace(table_as_column.this) 3144 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3145 3146 table.replace(unnest) 3147 3148 refs.add(normalized_table.alias_or_name) 3149 3150 return this 3151 3152 def _parse_query_modifiers( 3153 self, this: t.Optional[exp.Expression] 3154 ) -> t.Optional[exp.Expression]: 3155 if isinstance(this, (exp.Query, exp.Table)): 3156 for join in self._parse_joins(): 3157 this.append("joins", join) 3158 for lateral in iter(self._parse_lateral, None): 3159 this.append("laterals", lateral) 3160 3161 while True: 3162 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3163 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3164 key, expression = parser(self) 3165 3166 if expression: 3167 this.set(key, expression) 3168 if key == "limit": 3169 offset = expression.args.pop("offset", None) 3170 3171 if offset: 3172 offset = exp.Offset(expression=offset) 3173 this.set("offset", offset) 3174 3175 limit_by_expressions = expression.expressions 3176 expression.set("expressions", None) 3177 offset.set("expressions", limit_by_expressions) 3178 continue 3179 break 3180 3181 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3182 this = self._implicit_unnests_to_explicit(this) 3183 3184 return this 3185 3186 def _parse_hint(self) -> t.Optional[exp.Hint]: 3187 if self._match(TokenType.HINT): 3188 hints = [] 3189 for hint in iter( 3190 lambda: self._parse_csv( 3191 lambda: self._parse_function() or self._parse_var(upper=True) 3192 ), 3193 [], 3194 ): 3195 hints.extend(hint) 3196 3197 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3198 self.raise_error("Expected */ after HINT") 3199 3200 return self.expression(exp.Hint, expressions=hints) 3201 3202 return None 3203 3204 def _parse_into(self) -> t.Optional[exp.Into]: 3205 if not self._match(TokenType.INTO): 3206 return None 3207 3208 temp = self._match(TokenType.TEMPORARY) 3209 unlogged = self._match_text_seq("UNLOGGED") 3210 self._match(TokenType.TABLE) 3211 3212 return self.expression( 3213 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3214 ) 3215 3216 def _parse_from( 3217 self, joins: bool = False, skip_from_token: bool = False 3218 ) -> t.Optional[exp.From]: 3219 if not skip_from_token and not self._match(TokenType.FROM): 3220 return None 3221 3222 return self.expression( 3223 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3224 ) 3225 3226 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3227 return self.expression( 3228 exp.MatchRecognizeMeasure, 3229 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3230 this=self._parse_expression(), 3231 ) 3232 3233 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3234 if not self._match(TokenType.MATCH_RECOGNIZE): 3235 return None 3236 3237 self._match_l_paren() 3238 3239 partition = self._parse_partition_by() 3240 order = self._parse_order() 3241 3242 measures = ( 3243 self._parse_csv(self._parse_match_recognize_measure) 3244 if self._match_text_seq("MEASURES") 3245 else None 3246 ) 3247 3248 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3249 rows = exp.var("ONE ROW PER MATCH") 3250 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3251 text = "ALL ROWS PER MATCH" 3252 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3253 text += " SHOW EMPTY MATCHES" 3254 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3255 text += " OMIT EMPTY MATCHES" 3256 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3257 text += " WITH UNMATCHED ROWS" 3258 rows = exp.var(text) 3259 else: 3260 rows = None 3261 3262 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3263 text = "AFTER MATCH SKIP" 3264 if self._match_text_seq("PAST", "LAST", "ROW"): 3265 text += " PAST LAST ROW" 3266 elif self._match_text_seq("TO", "NEXT", "ROW"): 3267 text += " TO NEXT ROW" 3268 elif self._match_text_seq("TO", "FIRST"): 3269 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3270 elif self._match_text_seq("TO", "LAST"): 3271 text += f" TO LAST {self._advance_any().text}" # type: ignore 3272 after = exp.var(text) 3273 else: 3274 after = None 3275 3276 if self._match_text_seq("PATTERN"): 3277 self._match_l_paren() 3278 3279 if not self._curr: 3280 self.raise_error("Expecting )", self._curr) 3281 3282 paren = 1 3283 start = self._curr 3284 3285 while self._curr and paren > 0: 3286 if self._curr.token_type == TokenType.L_PAREN: 3287 paren += 1 3288 if self._curr.token_type == TokenType.R_PAREN: 3289 paren -= 1 3290 3291 end = self._prev 3292 self._advance() 3293 3294 if paren > 0: 3295 self.raise_error("Expecting )", self._curr) 3296 3297 pattern = exp.var(self._find_sql(start, end)) 3298 else: 3299 pattern = None 3300 3301 define = ( 3302 self._parse_csv(self._parse_name_as_expression) 3303 if self._match_text_seq("DEFINE") 3304 else None 3305 ) 3306 3307 self._match_r_paren() 3308 3309 return self.expression( 3310 exp.MatchRecognize, 3311 partition_by=partition, 3312 order=order, 3313 measures=measures, 3314 rows=rows, 3315 after=after, 3316 pattern=pattern, 3317 define=define, 3318 alias=self._parse_table_alias(), 3319 ) 3320 3321 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3322 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3323 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3324 cross_apply = False 3325 3326 if cross_apply is not None: 3327 this = self._parse_select(table=True) 3328 view = None 3329 outer = None 3330 elif self._match(TokenType.LATERAL): 3331 this = self._parse_select(table=True) 3332 view = self._match(TokenType.VIEW) 3333 outer = self._match(TokenType.OUTER) 3334 else: 3335 return None 3336 3337 if not this: 3338 this = ( 3339 self._parse_unnest() 3340 or self._parse_function() 3341 or self._parse_id_var(any_token=False) 3342 ) 3343 3344 while self._match(TokenType.DOT): 3345 this = exp.Dot( 3346 this=this, 3347 expression=self._parse_function() or self._parse_id_var(any_token=False), 3348 ) 3349 3350 if view: 3351 table = self._parse_id_var(any_token=False) 3352 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3353 table_alias: t.Optional[exp.TableAlias] = self.expression( 3354 exp.TableAlias, this=table, columns=columns 3355 ) 3356 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3357 # We move the alias from the lateral's child node to the lateral itself 3358 table_alias = this.args["alias"].pop() 3359 else: 3360 table_alias = self._parse_table_alias() 3361 3362 return self.expression( 3363 exp.Lateral, 3364 this=this, 3365 view=view, 3366 outer=outer, 3367 alias=table_alias, 3368 cross_apply=cross_apply, 3369 ) 3370 3371 def _parse_join_parts( 3372 self, 3373 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3374 return ( 3375 self._match_set(self.JOIN_METHODS) and self._prev, 3376 self._match_set(self.JOIN_SIDES) and self._prev, 3377 self._match_set(self.JOIN_KINDS) and self._prev, 3378 ) 3379 3380 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3381 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3382 this = self._parse_column() 3383 if isinstance(this, exp.Column): 3384 return this.this 3385 return this 3386 3387 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3388 3389 def _parse_join( 3390 self, skip_join_token: bool = False, parse_bracket: bool = False 3391 ) -> t.Optional[exp.Join]: 3392 if self._match(TokenType.COMMA): 3393 return self.expression(exp.Join, this=self._parse_table()) 3394 3395 index = self._index 3396 method, side, kind = self._parse_join_parts() 3397 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3398 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3399 3400 if not skip_join_token and not join: 3401 self._retreat(index) 3402 kind = None 3403 method = None 3404 side = None 3405 3406 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3407 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3408 3409 if not skip_join_token and not join and not outer_apply and not cross_apply: 3410 return None 3411 3412 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3413 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3414 kwargs["expressions"] = self._parse_csv( 3415 lambda: self._parse_table(parse_bracket=parse_bracket) 3416 ) 3417 3418 if method: 3419 kwargs["method"] = method.text 3420 if side: 3421 kwargs["side"] = side.text 3422 if kind: 3423 kwargs["kind"] = kind.text 3424 if hint: 3425 kwargs["hint"] = hint 3426 3427 if self._match(TokenType.MATCH_CONDITION): 3428 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3429 3430 if self._match(TokenType.ON): 3431 kwargs["on"] = self._parse_assignment() 3432 elif self._match(TokenType.USING): 3433 kwargs["using"] = self._parse_using_identifiers() 3434 elif ( 3435 not (outer_apply or cross_apply) 3436 and not isinstance(kwargs["this"], exp.Unnest) 3437 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3438 ): 3439 index = self._index 3440 joins: t.Optional[list] = list(self._parse_joins()) 3441 3442 if joins and self._match(TokenType.ON): 3443 kwargs["on"] = self._parse_assignment() 3444 elif joins and self._match(TokenType.USING): 3445 kwargs["using"] = self._parse_using_identifiers() 3446 else: 3447 joins = None 3448 self._retreat(index) 3449 3450 kwargs["this"].set("joins", joins if joins else None) 3451 3452 comments = [c for token in (method, side, kind) if token for c in token.comments] 3453 return self.expression(exp.Join, comments=comments, **kwargs) 3454 3455 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3456 this = self._parse_assignment() 3457 3458 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3459 return this 3460 3461 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3462 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3463 3464 return this 3465 3466 def _parse_index_params(self) -> exp.IndexParameters: 3467 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3468 3469 if self._match(TokenType.L_PAREN, advance=False): 3470 columns = self._parse_wrapped_csv(self._parse_with_operator) 3471 else: 3472 columns = None 3473 3474 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3475 partition_by = self._parse_partition_by() 3476 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3477 tablespace = ( 3478 self._parse_var(any_token=True) 3479 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3480 else None 3481 ) 3482 where = self._parse_where() 3483 3484 on = self._parse_field() if self._match(TokenType.ON) else None 3485 3486 return self.expression( 3487 exp.IndexParameters, 3488 using=using, 3489 columns=columns, 3490 include=include, 3491 partition_by=partition_by, 3492 where=where, 3493 with_storage=with_storage, 3494 tablespace=tablespace, 3495 on=on, 3496 ) 3497 3498 def _parse_index( 3499 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3500 ) -> t.Optional[exp.Index]: 3501 if index or anonymous: 3502 unique = None 3503 primary = None 3504 amp = None 3505 3506 self._match(TokenType.ON) 3507 self._match(TokenType.TABLE) # hive 3508 table = self._parse_table_parts(schema=True) 3509 else: 3510 unique = self._match(TokenType.UNIQUE) 3511 primary = self._match_text_seq("PRIMARY") 3512 amp = self._match_text_seq("AMP") 3513 3514 if not self._match(TokenType.INDEX): 3515 return None 3516 3517 index = self._parse_id_var() 3518 table = None 3519 3520 params = self._parse_index_params() 3521 3522 return self.expression( 3523 exp.Index, 3524 this=index, 3525 table=table, 3526 unique=unique, 3527 primary=primary, 3528 amp=amp, 3529 params=params, 3530 ) 3531 3532 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3533 hints: t.List[exp.Expression] = [] 3534 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3535 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3536 hints.append( 3537 self.expression( 3538 exp.WithTableHint, 3539 expressions=self._parse_csv( 3540 lambda: self._parse_function() or self._parse_var(any_token=True) 3541 ), 3542 ) 3543 ) 3544 self._match_r_paren() 3545 else: 3546 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3547 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3548 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3549 3550 self._match_set((TokenType.INDEX, TokenType.KEY)) 3551 if self._match(TokenType.FOR): 3552 hint.set("target", self._advance_any() and self._prev.text.upper()) 3553 3554 hint.set("expressions", self._parse_wrapped_id_vars()) 3555 hints.append(hint) 3556 3557 return hints or None 3558 3559 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3560 return ( 3561 (not schema and self._parse_function(optional_parens=False)) 3562 or self._parse_id_var(any_token=False) 3563 or self._parse_string_as_identifier() 3564 or self._parse_placeholder() 3565 ) 3566 3567 def _parse_table_parts( 3568 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3569 ) -> exp.Table: 3570 catalog = None 3571 db = None 3572 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3573 3574 while self._match(TokenType.DOT): 3575 if catalog: 3576 # This allows nesting the table in arbitrarily many dot expressions if needed 3577 table = self.expression( 3578 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3579 ) 3580 else: 3581 catalog = db 3582 db = table 3583 # "" used for tsql FROM a..b case 3584 table = self._parse_table_part(schema=schema) or "" 3585 3586 if ( 3587 wildcard 3588 and self._is_connected() 3589 and (isinstance(table, exp.Identifier) or not table) 3590 and self._match(TokenType.STAR) 3591 ): 3592 if isinstance(table, exp.Identifier): 3593 table.args["this"] += "*" 3594 else: 3595 table = exp.Identifier(this="*") 3596 3597 # We bubble up comments from the Identifier to the Table 3598 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3599 3600 if is_db_reference: 3601 catalog = db 3602 db = table 3603 table = None 3604 3605 if not table and not is_db_reference: 3606 self.raise_error(f"Expected table name but got {self._curr}") 3607 if not db and is_db_reference: 3608 self.raise_error(f"Expected database name but got {self._curr}") 3609 3610 table = self.expression( 3611 exp.Table, 3612 comments=comments, 3613 this=table, 3614 db=db, 3615 catalog=catalog, 3616 ) 3617 3618 changes = self._parse_changes() 3619 if changes: 3620 table.set("changes", changes) 3621 3622 at_before = self._parse_historical_data() 3623 if at_before: 3624 table.set("when", at_before) 3625 3626 pivots = self._parse_pivots() 3627 if pivots: 3628 table.set("pivots", pivots) 3629 3630 return table 3631 3632 def _parse_table( 3633 self, 3634 schema: bool = False, 3635 joins: bool = False, 3636 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3637 parse_bracket: bool = False, 3638 is_db_reference: bool = False, 3639 parse_partition: bool = False, 3640 ) -> t.Optional[exp.Expression]: 3641 lateral = self._parse_lateral() 3642 if lateral: 3643 return lateral 3644 3645 unnest = self._parse_unnest() 3646 if unnest: 3647 return unnest 3648 3649 values = self._parse_derived_table_values() 3650 if values: 3651 return values 3652 3653 subquery = self._parse_select(table=True) 3654 if subquery: 3655 if not subquery.args.get("pivots"): 3656 subquery.set("pivots", self._parse_pivots()) 3657 return subquery 3658 3659 bracket = parse_bracket and self._parse_bracket(None) 3660 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3661 3662 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3663 self._parse_table 3664 ) 3665 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3666 3667 only = self._match(TokenType.ONLY) 3668 3669 this = t.cast( 3670 exp.Expression, 3671 bracket 3672 or rows_from 3673 or self._parse_bracket( 3674 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3675 ), 3676 ) 3677 3678 if only: 3679 this.set("only", only) 3680 3681 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3682 self._match_text_seq("*") 3683 3684 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3685 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3686 this.set("partition", self._parse_partition()) 3687 3688 if schema: 3689 return self._parse_schema(this=this) 3690 3691 version = self._parse_version() 3692 3693 if version: 3694 this.set("version", version) 3695 3696 if self.dialect.ALIAS_POST_TABLESAMPLE: 3697 this.set("sample", self._parse_table_sample()) 3698 3699 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3700 if alias: 3701 this.set("alias", alias) 3702 3703 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3704 return self.expression( 3705 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3706 ) 3707 3708 this.set("hints", self._parse_table_hints()) 3709 3710 if not this.args.get("pivots"): 3711 this.set("pivots", self._parse_pivots()) 3712 3713 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3714 this.set("sample", self._parse_table_sample()) 3715 3716 if joins: 3717 for join in self._parse_joins(): 3718 this.append("joins", join) 3719 3720 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3721 this.set("ordinality", True) 3722 this.set("alias", self._parse_table_alias()) 3723 3724 return this 3725 3726 def _parse_version(self) -> t.Optional[exp.Version]: 3727 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3728 this = "TIMESTAMP" 3729 elif self._match(TokenType.VERSION_SNAPSHOT): 3730 this = "VERSION" 3731 else: 3732 return None 3733 3734 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3735 kind = self._prev.text.upper() 3736 start = self._parse_bitwise() 3737 self._match_texts(("TO", "AND")) 3738 end = self._parse_bitwise() 3739 expression: t.Optional[exp.Expression] = self.expression( 3740 exp.Tuple, expressions=[start, end] 3741 ) 3742 elif self._match_text_seq("CONTAINED", "IN"): 3743 kind = "CONTAINED IN" 3744 expression = self.expression( 3745 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3746 ) 3747 elif self._match(TokenType.ALL): 3748 kind = "ALL" 3749 expression = None 3750 else: 3751 self._match_text_seq("AS", "OF") 3752 kind = "AS OF" 3753 expression = self._parse_type() 3754 3755 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3756 3757 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3758 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3759 index = self._index 3760 historical_data = None 3761 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3762 this = self._prev.text.upper() 3763 kind = ( 3764 self._match(TokenType.L_PAREN) 3765 and self._match_texts(self.HISTORICAL_DATA_KIND) 3766 and self._prev.text.upper() 3767 ) 3768 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3769 3770 if expression: 3771 self._match_r_paren() 3772 historical_data = self.expression( 3773 exp.HistoricalData, this=this, kind=kind, expression=expression 3774 ) 3775 else: 3776 self._retreat(index) 3777 3778 return historical_data 3779 3780 def _parse_changes(self) -> t.Optional[exp.Changes]: 3781 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3782 return None 3783 3784 information = self._parse_var(any_token=True) 3785 self._match_r_paren() 3786 3787 return self.expression( 3788 exp.Changes, 3789 information=information, 3790 at_before=self._parse_historical_data(), 3791 end=self._parse_historical_data(), 3792 ) 3793 3794 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3795 if not self._match(TokenType.UNNEST): 3796 return None 3797 3798 expressions = self._parse_wrapped_csv(self._parse_equality) 3799 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3800 3801 alias = self._parse_table_alias() if with_alias else None 3802 3803 if alias: 3804 if self.dialect.UNNEST_COLUMN_ONLY: 3805 if alias.args.get("columns"): 3806 self.raise_error("Unexpected extra column alias in unnest.") 3807 3808 alias.set("columns", [alias.this]) 3809 alias.set("this", None) 3810 3811 columns = alias.args.get("columns") or [] 3812 if offset and len(expressions) < len(columns): 3813 offset = columns.pop() 3814 3815 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3816 self._match(TokenType.ALIAS) 3817 offset = self._parse_id_var( 3818 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3819 ) or exp.to_identifier("offset") 3820 3821 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3822 3823 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3824 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3825 if not is_derived and not ( 3826 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3827 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3828 ): 3829 return None 3830 3831 expressions = self._parse_csv(self._parse_value) 3832 alias = self._parse_table_alias() 3833 3834 if is_derived: 3835 self._match_r_paren() 3836 3837 return self.expression( 3838 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3839 ) 3840 3841 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3842 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3843 as_modifier and self._match_text_seq("USING", "SAMPLE") 3844 ): 3845 return None 3846 3847 bucket_numerator = None 3848 bucket_denominator = None 3849 bucket_field = None 3850 percent = None 3851 size = None 3852 seed = None 3853 3854 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3855 matched_l_paren = self._match(TokenType.L_PAREN) 3856 3857 if self.TABLESAMPLE_CSV: 3858 num = None 3859 expressions = self._parse_csv(self._parse_primary) 3860 else: 3861 expressions = None 3862 num = ( 3863 self._parse_factor() 3864 if self._match(TokenType.NUMBER, advance=False) 3865 else self._parse_primary() or self._parse_placeholder() 3866 ) 3867 3868 if self._match_text_seq("BUCKET"): 3869 bucket_numerator = self._parse_number() 3870 self._match_text_seq("OUT", "OF") 3871 bucket_denominator = bucket_denominator = self._parse_number() 3872 self._match(TokenType.ON) 3873 bucket_field = self._parse_field() 3874 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3875 percent = num 3876 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3877 size = num 3878 else: 3879 percent = num 3880 3881 if matched_l_paren: 3882 self._match_r_paren() 3883 3884 if self._match(TokenType.L_PAREN): 3885 method = self._parse_var(upper=True) 3886 seed = self._match(TokenType.COMMA) and self._parse_number() 3887 self._match_r_paren() 3888 elif self._match_texts(("SEED", "REPEATABLE")): 3889 seed = self._parse_wrapped(self._parse_number) 3890 3891 if not method and self.DEFAULT_SAMPLING_METHOD: 3892 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3893 3894 return self.expression( 3895 exp.TableSample, 3896 expressions=expressions, 3897 method=method, 3898 bucket_numerator=bucket_numerator, 3899 bucket_denominator=bucket_denominator, 3900 bucket_field=bucket_field, 3901 percent=percent, 3902 size=size, 3903 seed=seed, 3904 ) 3905 3906 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3907 return list(iter(self._parse_pivot, None)) or None 3908 3909 def _parse_joins(self) -> t.Iterator[exp.Join]: 3910 return iter(self._parse_join, None) 3911 3912 # https://duckdb.org/docs/sql/statements/pivot 3913 def _parse_simplified_pivot(self) -> exp.Pivot: 3914 def _parse_on() -> t.Optional[exp.Expression]: 3915 this = self._parse_bitwise() 3916 return self._parse_in(this) if self._match(TokenType.IN) else this 3917 3918 this = self._parse_table() 3919 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3920 using = self._match(TokenType.USING) and self._parse_csv( 3921 lambda: self._parse_alias(self._parse_function()) 3922 ) 3923 group = self._parse_group() 3924 return self.expression( 3925 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3926 ) 3927 3928 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3929 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3930 this = self._parse_select_or_expression() 3931 3932 self._match(TokenType.ALIAS) 3933 alias = self._parse_bitwise() 3934 if alias: 3935 if isinstance(alias, exp.Column) and not alias.db: 3936 alias = alias.this 3937 return self.expression(exp.PivotAlias, this=this, alias=alias) 3938 3939 return this 3940 3941 value = self._parse_column() 3942 3943 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3944 self.raise_error("Expecting IN (") 3945 3946 if self._match(TokenType.ANY): 3947 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3948 else: 3949 exprs = self._parse_csv(_parse_aliased_expression) 3950 3951 self._match_r_paren() 3952 return self.expression(exp.In, this=value, expressions=exprs) 3953 3954 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3955 index = self._index 3956 include_nulls = None 3957 3958 if self._match(TokenType.PIVOT): 3959 unpivot = False 3960 elif self._match(TokenType.UNPIVOT): 3961 unpivot = True 3962 3963 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3964 if self._match_text_seq("INCLUDE", "NULLS"): 3965 include_nulls = True 3966 elif self._match_text_seq("EXCLUDE", "NULLS"): 3967 include_nulls = False 3968 else: 3969 return None 3970 3971 expressions = [] 3972 3973 if not self._match(TokenType.L_PAREN): 3974 self._retreat(index) 3975 return None 3976 3977 if unpivot: 3978 expressions = self._parse_csv(self._parse_column) 3979 else: 3980 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3981 3982 if not expressions: 3983 self.raise_error("Failed to parse PIVOT's aggregation list") 3984 3985 if not self._match(TokenType.FOR): 3986 self.raise_error("Expecting FOR") 3987 3988 field = self._parse_pivot_in() 3989 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3990 self._parse_bitwise 3991 ) 3992 3993 self._match_r_paren() 3994 3995 pivot = self.expression( 3996 exp.Pivot, 3997 expressions=expressions, 3998 field=field, 3999 unpivot=unpivot, 4000 include_nulls=include_nulls, 4001 default_on_null=default_on_null, 4002 ) 4003 4004 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4005 pivot.set("alias", self._parse_table_alias()) 4006 4007 if not unpivot: 4008 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4009 4010 columns: t.List[exp.Expression] = [] 4011 for fld in pivot.args["field"].expressions: 4012 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4013 for name in names: 4014 if self.PREFIXED_PIVOT_COLUMNS: 4015 name = f"{name}_{field_name}" if name else field_name 4016 else: 4017 name = f"{field_name}_{name}" if name else field_name 4018 4019 columns.append(exp.to_identifier(name)) 4020 4021 pivot.set("columns", columns) 4022 4023 return pivot 4024 4025 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4026 return [agg.alias for agg in aggregations] 4027 4028 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4029 if not skip_where_token and not self._match(TokenType.PREWHERE): 4030 return None 4031 4032 return self.expression( 4033 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4034 ) 4035 4036 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4037 if not skip_where_token and not self._match(TokenType.WHERE): 4038 return None 4039 4040 return self.expression( 4041 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4042 ) 4043 4044 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4045 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4046 return None 4047 4048 elements: t.Dict[str, t.Any] = defaultdict(list) 4049 4050 if self._match(TokenType.ALL): 4051 elements["all"] = True 4052 elif self._match(TokenType.DISTINCT): 4053 elements["all"] = False 4054 4055 while True: 4056 index = self._index 4057 4058 elements["expressions"].extend( 4059 self._parse_csv( 4060 lambda: None 4061 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4062 else self._parse_assignment() 4063 ) 4064 ) 4065 4066 before_with_index = self._index 4067 with_prefix = self._match(TokenType.WITH) 4068 4069 if self._match(TokenType.ROLLUP): 4070 elements["rollup"].append( 4071 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4072 ) 4073 elif self._match(TokenType.CUBE): 4074 elements["cube"].append( 4075 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4076 ) 4077 elif self._match(TokenType.GROUPING_SETS): 4078 elements["grouping_sets"].append( 4079 self.expression( 4080 exp.GroupingSets, 4081 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4082 ) 4083 ) 4084 elif self._match_text_seq("TOTALS"): 4085 elements["totals"] = True # type: ignore 4086 4087 if before_with_index <= self._index <= before_with_index + 1: 4088 self._retreat(before_with_index) 4089 break 4090 4091 if index == self._index: 4092 break 4093 4094 return self.expression(exp.Group, **elements) # type: ignore 4095 4096 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4097 return self.expression( 4098 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4099 ) 4100 4101 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4102 if self._match(TokenType.L_PAREN): 4103 grouping_set = self._parse_csv(self._parse_column) 4104 self._match_r_paren() 4105 return self.expression(exp.Tuple, expressions=grouping_set) 4106 4107 return self._parse_column() 4108 4109 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4110 if not skip_having_token and not self._match(TokenType.HAVING): 4111 return None 4112 return self.expression(exp.Having, this=self._parse_assignment()) 4113 4114 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4115 if not self._match(TokenType.QUALIFY): 4116 return None 4117 return self.expression(exp.Qualify, this=self._parse_assignment()) 4118 4119 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4120 if skip_start_token: 4121 start = None 4122 elif self._match(TokenType.START_WITH): 4123 start = self._parse_assignment() 4124 else: 4125 return None 4126 4127 self._match(TokenType.CONNECT_BY) 4128 nocycle = self._match_text_seq("NOCYCLE") 4129 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4130 exp.Prior, this=self._parse_bitwise() 4131 ) 4132 connect = self._parse_assignment() 4133 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4134 4135 if not start and self._match(TokenType.START_WITH): 4136 start = self._parse_assignment() 4137 4138 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4139 4140 def _parse_name_as_expression(self) -> exp.Alias: 4141 return self.expression( 4142 exp.Alias, 4143 alias=self._parse_id_var(any_token=True), 4144 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4145 ) 4146 4147 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4148 if self._match_text_seq("INTERPOLATE"): 4149 return self._parse_wrapped_csv(self._parse_name_as_expression) 4150 return None 4151 4152 def _parse_order( 4153 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4154 ) -> t.Optional[exp.Expression]: 4155 siblings = None 4156 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4157 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4158 return this 4159 4160 siblings = True 4161 4162 return self.expression( 4163 exp.Order, 4164 this=this, 4165 expressions=self._parse_csv(self._parse_ordered), 4166 siblings=siblings, 4167 ) 4168 4169 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4170 if not self._match(token): 4171 return None 4172 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4173 4174 def _parse_ordered( 4175 self, parse_method: t.Optional[t.Callable] = None 4176 ) -> t.Optional[exp.Ordered]: 4177 this = parse_method() if parse_method else self._parse_assignment() 4178 if not this: 4179 return None 4180 4181 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4182 this = exp.var("ALL") 4183 4184 asc = self._match(TokenType.ASC) 4185 desc = self._match(TokenType.DESC) or (asc and False) 4186 4187 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4188 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4189 4190 nulls_first = is_nulls_first or False 4191 explicitly_null_ordered = is_nulls_first or is_nulls_last 4192 4193 if ( 4194 not explicitly_null_ordered 4195 and ( 4196 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4197 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4198 ) 4199 and self.dialect.NULL_ORDERING != "nulls_are_last" 4200 ): 4201 nulls_first = True 4202 4203 if self._match_text_seq("WITH", "FILL"): 4204 with_fill = self.expression( 4205 exp.WithFill, 4206 **{ # type: ignore 4207 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4208 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4209 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4210 "interpolate": self._parse_interpolate(), 4211 }, 4212 ) 4213 else: 4214 with_fill = None 4215 4216 return self.expression( 4217 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4218 ) 4219 4220 def _parse_limit( 4221 self, 4222 this: t.Optional[exp.Expression] = None, 4223 top: bool = False, 4224 skip_limit_token: bool = False, 4225 ) -> t.Optional[exp.Expression]: 4226 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4227 comments = self._prev_comments 4228 if top: 4229 limit_paren = self._match(TokenType.L_PAREN) 4230 expression = self._parse_term() if limit_paren else self._parse_number() 4231 4232 if limit_paren: 4233 self._match_r_paren() 4234 else: 4235 expression = self._parse_term() 4236 4237 if self._match(TokenType.COMMA): 4238 offset = expression 4239 expression = self._parse_term() 4240 else: 4241 offset = None 4242 4243 limit_exp = self.expression( 4244 exp.Limit, 4245 this=this, 4246 expression=expression, 4247 offset=offset, 4248 comments=comments, 4249 expressions=self._parse_limit_by(), 4250 ) 4251 4252 return limit_exp 4253 4254 if self._match(TokenType.FETCH): 4255 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4256 direction = self._prev.text.upper() if direction else "FIRST" 4257 4258 count = self._parse_field(tokens=self.FETCH_TOKENS) 4259 percent = self._match(TokenType.PERCENT) 4260 4261 self._match_set((TokenType.ROW, TokenType.ROWS)) 4262 4263 only = self._match_text_seq("ONLY") 4264 with_ties = self._match_text_seq("WITH", "TIES") 4265 4266 if only and with_ties: 4267 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4268 4269 return self.expression( 4270 exp.Fetch, 4271 direction=direction, 4272 count=count, 4273 percent=percent, 4274 with_ties=with_ties, 4275 ) 4276 4277 return this 4278 4279 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4280 if not self._match(TokenType.OFFSET): 4281 return this 4282 4283 count = self._parse_term() 4284 self._match_set((TokenType.ROW, TokenType.ROWS)) 4285 4286 return self.expression( 4287 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4288 ) 4289 4290 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4291 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4292 4293 def _parse_locks(self) -> t.List[exp.Lock]: 4294 locks = [] 4295 while True: 4296 if self._match_text_seq("FOR", "UPDATE"): 4297 update = True 4298 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4299 "LOCK", "IN", "SHARE", "MODE" 4300 ): 4301 update = False 4302 else: 4303 break 4304 4305 expressions = None 4306 if self._match_text_seq("OF"): 4307 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4308 4309 wait: t.Optional[bool | exp.Expression] = None 4310 if self._match_text_seq("NOWAIT"): 4311 wait = True 4312 elif self._match_text_seq("WAIT"): 4313 wait = self._parse_primary() 4314 elif self._match_text_seq("SKIP", "LOCKED"): 4315 wait = False 4316 4317 locks.append( 4318 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4319 ) 4320 4321 return locks 4322 4323 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4324 while this and self._match_set(self.SET_OPERATIONS): 4325 token_type = self._prev.token_type 4326 4327 if token_type == TokenType.UNION: 4328 operation: t.Type[exp.SetOperation] = exp.Union 4329 elif token_type == TokenType.EXCEPT: 4330 operation = exp.Except 4331 else: 4332 operation = exp.Intersect 4333 4334 comments = self._prev.comments 4335 4336 if self._match(TokenType.DISTINCT): 4337 distinct: t.Optional[bool] = True 4338 elif self._match(TokenType.ALL): 4339 distinct = False 4340 else: 4341 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4342 if distinct is None: 4343 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4344 4345 by_name = self._match_text_seq("BY", "NAME") 4346 expression = self._parse_select(nested=True, parse_set_operation=False) 4347 4348 this = self.expression( 4349 operation, 4350 comments=comments, 4351 this=this, 4352 distinct=distinct, 4353 by_name=by_name, 4354 expression=expression, 4355 ) 4356 4357 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4358 expression = this.expression 4359 4360 if expression: 4361 for arg in self.SET_OP_MODIFIERS: 4362 expr = expression.args.get(arg) 4363 if expr: 4364 this.set(arg, expr.pop()) 4365 4366 return this 4367 4368 def _parse_expression(self) -> t.Optional[exp.Expression]: 4369 return self._parse_alias(self._parse_assignment()) 4370 4371 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4372 this = self._parse_disjunction() 4373 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4374 # This allows us to parse <non-identifier token> := <expr> 4375 this = exp.column( 4376 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4377 ) 4378 4379 while self._match_set(self.ASSIGNMENT): 4380 if isinstance(this, exp.Column) and len(this.parts) == 1: 4381 this = this.this 4382 4383 this = self.expression( 4384 self.ASSIGNMENT[self._prev.token_type], 4385 this=this, 4386 comments=self._prev_comments, 4387 expression=self._parse_assignment(), 4388 ) 4389 4390 return this 4391 4392 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4393 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4394 4395 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4396 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4397 4398 def _parse_equality(self) -> t.Optional[exp.Expression]: 4399 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4400 4401 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4402 return self._parse_tokens(self._parse_range, self.COMPARISON) 4403 4404 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4405 this = this or self._parse_bitwise() 4406 negate = self._match(TokenType.NOT) 4407 4408 if self._match_set(self.RANGE_PARSERS): 4409 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4410 if not expression: 4411 return this 4412 4413 this = expression 4414 elif self._match(TokenType.ISNULL): 4415 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4416 4417 # Postgres supports ISNULL and NOTNULL for conditions. 4418 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4419 if self._match(TokenType.NOTNULL): 4420 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4421 this = self.expression(exp.Not, this=this) 4422 4423 if negate: 4424 this = self._negate_range(this) 4425 4426 if self._match(TokenType.IS): 4427 this = self._parse_is(this) 4428 4429 return this 4430 4431 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4432 if not this: 4433 return this 4434 4435 return self.expression(exp.Not, this=this) 4436 4437 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4438 index = self._index - 1 4439 negate = self._match(TokenType.NOT) 4440 4441 if self._match_text_seq("DISTINCT", "FROM"): 4442 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4443 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4444 4445 if self._match(TokenType.JSON): 4446 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4447 4448 if self._match_text_seq("WITH"): 4449 _with = True 4450 elif self._match_text_seq("WITHOUT"): 4451 _with = False 4452 else: 4453 _with = None 4454 4455 unique = self._match(TokenType.UNIQUE) 4456 self._match_text_seq("KEYS") 4457 expression: t.Optional[exp.Expression] = self.expression( 4458 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4459 ) 4460 else: 4461 expression = self._parse_primary() or self._parse_null() 4462 if not expression: 4463 self._retreat(index) 4464 return None 4465 4466 this = self.expression(exp.Is, this=this, expression=expression) 4467 return self.expression(exp.Not, this=this) if negate else this 4468 4469 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4470 unnest = self._parse_unnest(with_alias=False) 4471 if unnest: 4472 this = self.expression(exp.In, this=this, unnest=unnest) 4473 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4474 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4475 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4476 4477 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4478 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4479 else: 4480 this = self.expression(exp.In, this=this, expressions=expressions) 4481 4482 if matched_l_paren: 4483 self._match_r_paren(this) 4484 elif not self._match(TokenType.R_BRACKET, expression=this): 4485 self.raise_error("Expecting ]") 4486 else: 4487 this = self.expression(exp.In, this=this, field=self._parse_column()) 4488 4489 return this 4490 4491 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4492 low = self._parse_bitwise() 4493 self._match(TokenType.AND) 4494 high = self._parse_bitwise() 4495 return self.expression(exp.Between, this=this, low=low, high=high) 4496 4497 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4498 if not self._match(TokenType.ESCAPE): 4499 return this 4500 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4501 4502 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4503 index = self._index 4504 4505 if not self._match(TokenType.INTERVAL) and match_interval: 4506 return None 4507 4508 if self._match(TokenType.STRING, advance=False): 4509 this = self._parse_primary() 4510 else: 4511 this = self._parse_term() 4512 4513 if not this or ( 4514 isinstance(this, exp.Column) 4515 and not this.table 4516 and not this.this.quoted 4517 and this.name.upper() == "IS" 4518 ): 4519 self._retreat(index) 4520 return None 4521 4522 unit = self._parse_function() or ( 4523 not self._match(TokenType.ALIAS, advance=False) 4524 and self._parse_var(any_token=True, upper=True) 4525 ) 4526 4527 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4528 # each INTERVAL expression into this canonical form so it's easy to transpile 4529 if this and this.is_number: 4530 this = exp.Literal.string(this.to_py()) 4531 elif this and this.is_string: 4532 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4533 if len(parts) == 1: 4534 if unit: 4535 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4536 self._retreat(self._index - 1) 4537 4538 this = exp.Literal.string(parts[0][0]) 4539 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4540 4541 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4542 unit = self.expression( 4543 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4544 ) 4545 4546 interval = self.expression(exp.Interval, this=this, unit=unit) 4547 4548 index = self._index 4549 self._match(TokenType.PLUS) 4550 4551 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4552 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4553 return self.expression( 4554 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4555 ) 4556 4557 self._retreat(index) 4558 return interval 4559 4560 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4561 this = self._parse_term() 4562 4563 while True: 4564 if self._match_set(self.BITWISE): 4565 this = self.expression( 4566 self.BITWISE[self._prev.token_type], 4567 this=this, 4568 expression=self._parse_term(), 4569 ) 4570 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4571 this = self.expression( 4572 exp.DPipe, 4573 this=this, 4574 expression=self._parse_term(), 4575 safe=not self.dialect.STRICT_STRING_CONCAT, 4576 ) 4577 elif self._match(TokenType.DQMARK): 4578 this = self.expression( 4579 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4580 ) 4581 elif self._match_pair(TokenType.LT, TokenType.LT): 4582 this = self.expression( 4583 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4584 ) 4585 elif self._match_pair(TokenType.GT, TokenType.GT): 4586 this = self.expression( 4587 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4588 ) 4589 else: 4590 break 4591 4592 return this 4593 4594 def _parse_term(self) -> t.Optional[exp.Expression]: 4595 this = self._parse_factor() 4596 4597 while self._match_set(self.TERM): 4598 klass = self.TERM[self._prev.token_type] 4599 comments = self._prev_comments 4600 expression = self._parse_factor() 4601 4602 this = self.expression(klass, this=this, comments=comments, expression=expression) 4603 4604 if isinstance(this, exp.Collate): 4605 expr = this.expression 4606 4607 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4608 # fallback to Identifier / Var 4609 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4610 ident = expr.this 4611 if isinstance(ident, exp.Identifier): 4612 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4613 4614 return this 4615 4616 def _parse_factor(self) -> t.Optional[exp.Expression]: 4617 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4618 this = parse_method() 4619 4620 while self._match_set(self.FACTOR): 4621 klass = self.FACTOR[self._prev.token_type] 4622 comments = self._prev_comments 4623 expression = parse_method() 4624 4625 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4626 self._retreat(self._index - 1) 4627 return this 4628 4629 this = self.expression(klass, this=this, comments=comments, expression=expression) 4630 4631 if isinstance(this, exp.Div): 4632 this.args["typed"] = self.dialect.TYPED_DIVISION 4633 this.args["safe"] = self.dialect.SAFE_DIVISION 4634 4635 return this 4636 4637 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4638 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4639 4640 def _parse_unary(self) -> t.Optional[exp.Expression]: 4641 if self._match_set(self.UNARY_PARSERS): 4642 return self.UNARY_PARSERS[self._prev.token_type](self) 4643 return self._parse_at_time_zone(self._parse_type()) 4644 4645 def _parse_type( 4646 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4647 ) -> t.Optional[exp.Expression]: 4648 interval = parse_interval and self._parse_interval() 4649 if interval: 4650 return interval 4651 4652 index = self._index 4653 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4654 4655 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4656 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4657 if isinstance(data_type, exp.Cast): 4658 # This constructor can contain ops directly after it, for instance struct unnesting: 4659 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4660 return self._parse_column_ops(data_type) 4661 4662 if data_type: 4663 index2 = self._index 4664 this = self._parse_primary() 4665 4666 if isinstance(this, exp.Literal): 4667 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4668 if parser: 4669 return parser(self, this, data_type) 4670 4671 return self.expression(exp.Cast, this=this, to=data_type) 4672 4673 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4674 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4675 # 4676 # If the index difference here is greater than 1, that means the parser itself must have 4677 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4678 # 4679 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4680 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4681 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4682 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4683 # 4684 # In these cases, we don't really want to return the converted type, but instead retreat 4685 # and try to parse a Column or Identifier in the section below. 4686 if data_type.expressions and index2 - index > 1: 4687 self._retreat(index2) 4688 return self._parse_column_ops(data_type) 4689 4690 self._retreat(index) 4691 4692 if fallback_to_identifier: 4693 return self._parse_id_var() 4694 4695 this = self._parse_column() 4696 return this and self._parse_column_ops(this) 4697 4698 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4699 this = self._parse_type() 4700 if not this: 4701 return None 4702 4703 if isinstance(this, exp.Column) and not this.table: 4704 this = exp.var(this.name.upper()) 4705 4706 return self.expression( 4707 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4708 ) 4709 4710 def _parse_types( 4711 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4712 ) -> t.Optional[exp.Expression]: 4713 index = self._index 4714 4715 this: t.Optional[exp.Expression] = None 4716 prefix = self._match_text_seq("SYSUDTLIB", ".") 4717 4718 if not self._match_set(self.TYPE_TOKENS): 4719 identifier = allow_identifiers and self._parse_id_var( 4720 any_token=False, tokens=(TokenType.VAR,) 4721 ) 4722 if isinstance(identifier, exp.Identifier): 4723 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4724 4725 if len(tokens) != 1: 4726 self.raise_error("Unexpected identifier", self._prev) 4727 4728 if tokens[0].token_type in self.TYPE_TOKENS: 4729 self._prev = tokens[0] 4730 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4731 type_name = identifier.name 4732 4733 while self._match(TokenType.DOT): 4734 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4735 4736 this = exp.DataType.build(type_name, udt=True) 4737 else: 4738 self._retreat(self._index - 1) 4739 return None 4740 else: 4741 return None 4742 4743 type_token = self._prev.token_type 4744 4745 if type_token == TokenType.PSEUDO_TYPE: 4746 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4747 4748 if type_token == TokenType.OBJECT_IDENTIFIER: 4749 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4750 4751 # https://materialize.com/docs/sql/types/map/ 4752 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4753 key_type = self._parse_types( 4754 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4755 ) 4756 if not self._match(TokenType.FARROW): 4757 self._retreat(index) 4758 return None 4759 4760 value_type = self._parse_types( 4761 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4762 ) 4763 if not self._match(TokenType.R_BRACKET): 4764 self._retreat(index) 4765 return None 4766 4767 return exp.DataType( 4768 this=exp.DataType.Type.MAP, 4769 expressions=[key_type, value_type], 4770 nested=True, 4771 prefix=prefix, 4772 ) 4773 4774 nested = type_token in self.NESTED_TYPE_TOKENS 4775 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4776 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4777 expressions = None 4778 maybe_func = False 4779 4780 if self._match(TokenType.L_PAREN): 4781 if is_struct: 4782 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4783 elif nested: 4784 expressions = self._parse_csv( 4785 lambda: self._parse_types( 4786 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4787 ) 4788 ) 4789 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4790 this = expressions[0] 4791 this.set("nullable", True) 4792 self._match_r_paren() 4793 return this 4794 elif type_token in self.ENUM_TYPE_TOKENS: 4795 expressions = self._parse_csv(self._parse_equality) 4796 elif is_aggregate: 4797 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4798 any_token=False, tokens=(TokenType.VAR,) 4799 ) 4800 if not func_or_ident or not self._match(TokenType.COMMA): 4801 return None 4802 expressions = self._parse_csv( 4803 lambda: self._parse_types( 4804 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4805 ) 4806 ) 4807 expressions.insert(0, func_or_ident) 4808 else: 4809 expressions = self._parse_csv(self._parse_type_size) 4810 4811 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4812 if type_token == TokenType.VECTOR and len(expressions) == 2: 4813 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4814 4815 if not expressions or not self._match(TokenType.R_PAREN): 4816 self._retreat(index) 4817 return None 4818 4819 maybe_func = True 4820 4821 values: t.Optional[t.List[exp.Expression]] = None 4822 4823 if nested and self._match(TokenType.LT): 4824 if is_struct: 4825 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4826 else: 4827 expressions = self._parse_csv( 4828 lambda: self._parse_types( 4829 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4830 ) 4831 ) 4832 4833 if not self._match(TokenType.GT): 4834 self.raise_error("Expecting >") 4835 4836 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4837 values = self._parse_csv(self._parse_assignment) 4838 if not values and is_struct: 4839 values = None 4840 self._retreat(self._index - 1) 4841 else: 4842 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4843 4844 if type_token in self.TIMESTAMPS: 4845 if self._match_text_seq("WITH", "TIME", "ZONE"): 4846 maybe_func = False 4847 tz_type = ( 4848 exp.DataType.Type.TIMETZ 4849 if type_token in self.TIMES 4850 else exp.DataType.Type.TIMESTAMPTZ 4851 ) 4852 this = exp.DataType(this=tz_type, expressions=expressions) 4853 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4854 maybe_func = False 4855 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4856 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4857 maybe_func = False 4858 elif type_token == TokenType.INTERVAL: 4859 unit = self._parse_var(upper=True) 4860 if unit: 4861 if self._match_text_seq("TO"): 4862 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4863 4864 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4865 else: 4866 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4867 4868 if maybe_func and check_func: 4869 index2 = self._index 4870 peek = self._parse_string() 4871 4872 if not peek: 4873 self._retreat(index) 4874 return None 4875 4876 self._retreat(index2) 4877 4878 if not this: 4879 if self._match_text_seq("UNSIGNED"): 4880 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4881 if not unsigned_type_token: 4882 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4883 4884 type_token = unsigned_type_token or type_token 4885 4886 this = exp.DataType( 4887 this=exp.DataType.Type[type_token.value], 4888 expressions=expressions, 4889 nested=nested, 4890 prefix=prefix, 4891 ) 4892 4893 # Empty arrays/structs are allowed 4894 if values is not None: 4895 cls = exp.Struct if is_struct else exp.Array 4896 this = exp.cast(cls(expressions=values), this, copy=False) 4897 4898 elif expressions: 4899 this.set("expressions", expressions) 4900 4901 # https://materialize.com/docs/sql/types/list/#type-name 4902 while self._match(TokenType.LIST): 4903 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4904 4905 index = self._index 4906 4907 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4908 matched_array = self._match(TokenType.ARRAY) 4909 4910 while self._curr: 4911 datatype_token = self._prev.token_type 4912 matched_l_bracket = self._match(TokenType.L_BRACKET) 4913 if not matched_l_bracket and not matched_array: 4914 break 4915 4916 matched_array = False 4917 values = self._parse_csv(self._parse_assignment) or None 4918 if ( 4919 values 4920 and not schema 4921 and ( 4922 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4923 ) 4924 ): 4925 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4926 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4927 self._retreat(index) 4928 break 4929 4930 this = exp.DataType( 4931 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4932 ) 4933 self._match(TokenType.R_BRACKET) 4934 4935 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4936 converter = self.TYPE_CONVERTERS.get(this.this) 4937 if converter: 4938 this = converter(t.cast(exp.DataType, this)) 4939 4940 return this 4941 4942 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4943 index = self._index 4944 4945 if ( 4946 self._curr 4947 and self._next 4948 and self._curr.token_type in self.TYPE_TOKENS 4949 and self._next.token_type in self.TYPE_TOKENS 4950 ): 4951 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4952 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4953 this = self._parse_id_var() 4954 else: 4955 this = ( 4956 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4957 or self._parse_id_var() 4958 ) 4959 4960 self._match(TokenType.COLON) 4961 4962 if ( 4963 type_required 4964 and not isinstance(this, exp.DataType) 4965 and not self._match_set(self.TYPE_TOKENS, advance=False) 4966 ): 4967 self._retreat(index) 4968 return self._parse_types() 4969 4970 return self._parse_column_def(this) 4971 4972 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4973 if not self._match_text_seq("AT", "TIME", "ZONE"): 4974 return this 4975 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4976 4977 def _parse_column(self) -> t.Optional[exp.Expression]: 4978 this = self._parse_column_reference() 4979 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4980 4981 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4982 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4983 4984 return column 4985 4986 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4987 this = self._parse_field() 4988 if ( 4989 not this 4990 and self._match(TokenType.VALUES, advance=False) 4991 and self.VALUES_FOLLOWED_BY_PAREN 4992 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4993 ): 4994 this = self._parse_id_var() 4995 4996 if isinstance(this, exp.Identifier): 4997 # We bubble up comments from the Identifier to the Column 4998 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4999 5000 return this 5001 5002 def _parse_colon_as_variant_extract( 5003 self, this: t.Optional[exp.Expression] 5004 ) -> t.Optional[exp.Expression]: 5005 casts = [] 5006 json_path = [] 5007 escape = None 5008 5009 while self._match(TokenType.COLON): 5010 start_index = self._index 5011 5012 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5013 path = self._parse_column_ops( 5014 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5015 ) 5016 5017 # The cast :: operator has a lower precedence than the extraction operator :, so 5018 # we rearrange the AST appropriately to avoid casting the JSON path 5019 while isinstance(path, exp.Cast): 5020 casts.append(path.to) 5021 path = path.this 5022 5023 if casts: 5024 dcolon_offset = next( 5025 i 5026 for i, t in enumerate(self._tokens[start_index:]) 5027 if t.token_type == TokenType.DCOLON 5028 ) 5029 end_token = self._tokens[start_index + dcolon_offset - 1] 5030 else: 5031 end_token = self._prev 5032 5033 if path: 5034 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5035 # it'll roundtrip to a string literal in GET_PATH 5036 if isinstance(path, exp.Identifier) and path.quoted: 5037 escape = True 5038 5039 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5040 5041 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5042 # Databricks transforms it back to the colon/dot notation 5043 if json_path: 5044 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5045 5046 if json_path_expr: 5047 json_path_expr.set("escape", escape) 5048 5049 this = self.expression( 5050 exp.JSONExtract, 5051 this=this, 5052 expression=json_path_expr, 5053 variant_extract=True, 5054 ) 5055 5056 while casts: 5057 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5058 5059 return this 5060 5061 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5062 return self._parse_types() 5063 5064 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5065 this = self._parse_bracket(this) 5066 5067 while self._match_set(self.COLUMN_OPERATORS): 5068 op_token = self._prev.token_type 5069 op = self.COLUMN_OPERATORS.get(op_token) 5070 5071 if op_token == TokenType.DCOLON: 5072 field = self._parse_dcolon() 5073 if not field: 5074 self.raise_error("Expected type") 5075 elif op and self._curr: 5076 field = self._parse_column_reference() or self._parse_bracket() 5077 else: 5078 field = self._parse_field(any_token=True, anonymous_func=True) 5079 5080 if isinstance(field, exp.Func) and this: 5081 # bigquery allows function calls like x.y.count(...) 5082 # SAFE.SUBSTR(...) 5083 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5084 this = exp.replace_tree( 5085 this, 5086 lambda n: ( 5087 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5088 if n.table 5089 else n.this 5090 ) 5091 if isinstance(n, exp.Column) 5092 else n, 5093 ) 5094 5095 if op: 5096 this = op(self, this, field) 5097 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5098 this = self.expression( 5099 exp.Column, 5100 comments=this.comments, 5101 this=field, 5102 table=this.this, 5103 db=this.args.get("table"), 5104 catalog=this.args.get("db"), 5105 ) 5106 else: 5107 this = self.expression(exp.Dot, this=this, expression=field) 5108 5109 this = self._parse_bracket(this) 5110 5111 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5112 5113 def _parse_primary(self) -> t.Optional[exp.Expression]: 5114 if self._match_set(self.PRIMARY_PARSERS): 5115 token_type = self._prev.token_type 5116 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5117 5118 if token_type == TokenType.STRING: 5119 expressions = [primary] 5120 while self._match(TokenType.STRING): 5121 expressions.append(exp.Literal.string(self._prev.text)) 5122 5123 if len(expressions) > 1: 5124 return self.expression(exp.Concat, expressions=expressions) 5125 5126 return primary 5127 5128 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5129 return exp.Literal.number(f"0.{self._prev.text}") 5130 5131 if self._match(TokenType.L_PAREN): 5132 comments = self._prev_comments 5133 query = self._parse_select() 5134 5135 if query: 5136 expressions = [query] 5137 else: 5138 expressions = self._parse_expressions() 5139 5140 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5141 5142 if not this and self._match(TokenType.R_PAREN, advance=False): 5143 this = self.expression(exp.Tuple) 5144 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5145 this = self._parse_subquery(this=this, parse_alias=False) 5146 elif isinstance(this, exp.Subquery): 5147 this = self._parse_subquery( 5148 this=self._parse_set_operations(this), parse_alias=False 5149 ) 5150 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5151 this = self.expression(exp.Tuple, expressions=expressions) 5152 else: 5153 this = self.expression(exp.Paren, this=this) 5154 5155 if this: 5156 this.add_comments(comments) 5157 5158 self._match_r_paren(expression=this) 5159 return this 5160 5161 return None 5162 5163 def _parse_field( 5164 self, 5165 any_token: bool = False, 5166 tokens: t.Optional[t.Collection[TokenType]] = None, 5167 anonymous_func: bool = False, 5168 ) -> t.Optional[exp.Expression]: 5169 if anonymous_func: 5170 field = ( 5171 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5172 or self._parse_primary() 5173 ) 5174 else: 5175 field = self._parse_primary() or self._parse_function( 5176 anonymous=anonymous_func, any_token=any_token 5177 ) 5178 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5179 5180 def _parse_function( 5181 self, 5182 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5183 anonymous: bool = False, 5184 optional_parens: bool = True, 5185 any_token: bool = False, 5186 ) -> t.Optional[exp.Expression]: 5187 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5188 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5189 fn_syntax = False 5190 if ( 5191 self._match(TokenType.L_BRACE, advance=False) 5192 and self._next 5193 and self._next.text.upper() == "FN" 5194 ): 5195 self._advance(2) 5196 fn_syntax = True 5197 5198 func = self._parse_function_call( 5199 functions=functions, 5200 anonymous=anonymous, 5201 optional_parens=optional_parens, 5202 any_token=any_token, 5203 ) 5204 5205 if fn_syntax: 5206 self._match(TokenType.R_BRACE) 5207 5208 return func 5209 5210 def _parse_function_call( 5211 self, 5212 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5213 anonymous: bool = False, 5214 optional_parens: bool = True, 5215 any_token: bool = False, 5216 ) -> t.Optional[exp.Expression]: 5217 if not self._curr: 5218 return None 5219 5220 comments = self._curr.comments 5221 token_type = self._curr.token_type 5222 this = self._curr.text 5223 upper = this.upper() 5224 5225 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5226 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5227 self._advance() 5228 return self._parse_window(parser(self)) 5229 5230 if not self._next or self._next.token_type != TokenType.L_PAREN: 5231 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5232 self._advance() 5233 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5234 5235 return None 5236 5237 if any_token: 5238 if token_type in self.RESERVED_TOKENS: 5239 return None 5240 elif token_type not in self.FUNC_TOKENS: 5241 return None 5242 5243 self._advance(2) 5244 5245 parser = self.FUNCTION_PARSERS.get(upper) 5246 if parser and not anonymous: 5247 this = parser(self) 5248 else: 5249 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5250 5251 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5252 this = self.expression( 5253 subquery_predicate, comments=comments, this=self._parse_select() 5254 ) 5255 self._match_r_paren() 5256 return this 5257 5258 if functions is None: 5259 functions = self.FUNCTIONS 5260 5261 function = functions.get(upper) 5262 5263 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5264 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5265 5266 if alias: 5267 args = self._kv_to_prop_eq(args) 5268 5269 if function and not anonymous: 5270 if "dialect" in function.__code__.co_varnames: 5271 func = function(args, dialect=self.dialect) 5272 else: 5273 func = function(args) 5274 5275 func = self.validate_expression(func, args) 5276 if not self.dialect.NORMALIZE_FUNCTIONS: 5277 func.meta["name"] = this 5278 5279 this = func 5280 else: 5281 if token_type == TokenType.IDENTIFIER: 5282 this = exp.Identifier(this=this, quoted=True) 5283 this = self.expression(exp.Anonymous, this=this, expressions=args) 5284 5285 if isinstance(this, exp.Expression): 5286 this.add_comments(comments) 5287 5288 self._match_r_paren(this) 5289 return self._parse_window(this) 5290 5291 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5292 return expression 5293 5294 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5295 transformed = [] 5296 5297 for index, e in enumerate(expressions): 5298 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5299 if isinstance(e, exp.Alias): 5300 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5301 5302 if not isinstance(e, exp.PropertyEQ): 5303 e = self.expression( 5304 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5305 ) 5306 5307 if isinstance(e.this, exp.Column): 5308 e.this.replace(e.this.this) 5309 else: 5310 e = self._to_prop_eq(e, index) 5311 5312 transformed.append(e) 5313 5314 return transformed 5315 5316 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5317 return self._parse_column_def(self._parse_id_var()) 5318 5319 def _parse_user_defined_function( 5320 self, kind: t.Optional[TokenType] = None 5321 ) -> t.Optional[exp.Expression]: 5322 this = self._parse_id_var() 5323 5324 while self._match(TokenType.DOT): 5325 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5326 5327 if not self._match(TokenType.L_PAREN): 5328 return this 5329 5330 expressions = self._parse_csv(self._parse_function_parameter) 5331 self._match_r_paren() 5332 return self.expression( 5333 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5334 ) 5335 5336 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5337 literal = self._parse_primary() 5338 if literal: 5339 return self.expression(exp.Introducer, this=token.text, expression=literal) 5340 5341 return self.expression(exp.Identifier, this=token.text) 5342 5343 def _parse_session_parameter(self) -> exp.SessionParameter: 5344 kind = None 5345 this = self._parse_id_var() or self._parse_primary() 5346 5347 if this and self._match(TokenType.DOT): 5348 kind = this.name 5349 this = self._parse_var() or self._parse_primary() 5350 5351 return self.expression(exp.SessionParameter, this=this, kind=kind) 5352 5353 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5354 return self._parse_id_var() 5355 5356 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5357 index = self._index 5358 5359 if self._match(TokenType.L_PAREN): 5360 expressions = t.cast( 5361 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5362 ) 5363 5364 if not self._match(TokenType.R_PAREN): 5365 self._retreat(index) 5366 else: 5367 expressions = [self._parse_lambda_arg()] 5368 5369 if self._match_set(self.LAMBDAS): 5370 return self.LAMBDAS[self._prev.token_type](self, expressions) 5371 5372 self._retreat(index) 5373 5374 this: t.Optional[exp.Expression] 5375 5376 if self._match(TokenType.DISTINCT): 5377 this = self.expression( 5378 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5379 ) 5380 else: 5381 this = self._parse_select_or_expression(alias=alias) 5382 5383 return self._parse_limit( 5384 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5385 ) 5386 5387 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5388 index = self._index 5389 if not self._match(TokenType.L_PAREN): 5390 return this 5391 5392 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5393 # expr can be of both types 5394 if self._match_set(self.SELECT_START_TOKENS): 5395 self._retreat(index) 5396 return this 5397 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5398 self._match_r_paren() 5399 return self.expression(exp.Schema, this=this, expressions=args) 5400 5401 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5402 return self._parse_column_def(self._parse_field(any_token=True)) 5403 5404 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5405 # column defs are not really columns, they're identifiers 5406 if isinstance(this, exp.Column): 5407 this = this.this 5408 5409 kind = self._parse_types(schema=True) 5410 5411 if self._match_text_seq("FOR", "ORDINALITY"): 5412 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5413 5414 constraints: t.List[exp.Expression] = [] 5415 5416 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5417 ("ALIAS", "MATERIALIZED") 5418 ): 5419 persisted = self._prev.text.upper() == "MATERIALIZED" 5420 constraint_kind = exp.ComputedColumnConstraint( 5421 this=self._parse_assignment(), 5422 persisted=persisted or self._match_text_seq("PERSISTED"), 5423 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5424 ) 5425 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5426 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5427 self._match(TokenType.ALIAS) 5428 constraints.append( 5429 self.expression( 5430 exp.ColumnConstraint, 5431 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5432 ) 5433 ) 5434 5435 while True: 5436 constraint = self._parse_column_constraint() 5437 if not constraint: 5438 break 5439 constraints.append(constraint) 5440 5441 if not kind and not constraints: 5442 return this 5443 5444 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5445 5446 def _parse_auto_increment( 5447 self, 5448 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5449 start = None 5450 increment = None 5451 5452 if self._match(TokenType.L_PAREN, advance=False): 5453 args = self._parse_wrapped_csv(self._parse_bitwise) 5454 start = seq_get(args, 0) 5455 increment = seq_get(args, 1) 5456 elif self._match_text_seq("START"): 5457 start = self._parse_bitwise() 5458 self._match_text_seq("INCREMENT") 5459 increment = self._parse_bitwise() 5460 5461 if start and increment: 5462 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5463 5464 return exp.AutoIncrementColumnConstraint() 5465 5466 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5467 if not self._match_text_seq("REFRESH"): 5468 self._retreat(self._index - 1) 5469 return None 5470 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5471 5472 def _parse_compress(self) -> exp.CompressColumnConstraint: 5473 if self._match(TokenType.L_PAREN, advance=False): 5474 return self.expression( 5475 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5476 ) 5477 5478 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5479 5480 def _parse_generated_as_identity( 5481 self, 5482 ) -> ( 5483 exp.GeneratedAsIdentityColumnConstraint 5484 | exp.ComputedColumnConstraint 5485 | exp.GeneratedAsRowColumnConstraint 5486 ): 5487 if self._match_text_seq("BY", "DEFAULT"): 5488 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5489 this = self.expression( 5490 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5491 ) 5492 else: 5493 self._match_text_seq("ALWAYS") 5494 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5495 5496 self._match(TokenType.ALIAS) 5497 5498 if self._match_text_seq("ROW"): 5499 start = self._match_text_seq("START") 5500 if not start: 5501 self._match(TokenType.END) 5502 hidden = self._match_text_seq("HIDDEN") 5503 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5504 5505 identity = self._match_text_seq("IDENTITY") 5506 5507 if self._match(TokenType.L_PAREN): 5508 if self._match(TokenType.START_WITH): 5509 this.set("start", self._parse_bitwise()) 5510 if self._match_text_seq("INCREMENT", "BY"): 5511 this.set("increment", self._parse_bitwise()) 5512 if self._match_text_seq("MINVALUE"): 5513 this.set("minvalue", self._parse_bitwise()) 5514 if self._match_text_seq("MAXVALUE"): 5515 this.set("maxvalue", self._parse_bitwise()) 5516 5517 if self._match_text_seq("CYCLE"): 5518 this.set("cycle", True) 5519 elif self._match_text_seq("NO", "CYCLE"): 5520 this.set("cycle", False) 5521 5522 if not identity: 5523 this.set("expression", self._parse_range()) 5524 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5525 args = self._parse_csv(self._parse_bitwise) 5526 this.set("start", seq_get(args, 0)) 5527 this.set("increment", seq_get(args, 1)) 5528 5529 self._match_r_paren() 5530 5531 return this 5532 5533 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5534 self._match_text_seq("LENGTH") 5535 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5536 5537 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5538 if self._match_text_seq("NULL"): 5539 return self.expression(exp.NotNullColumnConstraint) 5540 if self._match_text_seq("CASESPECIFIC"): 5541 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5542 if self._match_text_seq("FOR", "REPLICATION"): 5543 return self.expression(exp.NotForReplicationColumnConstraint) 5544 5545 # Unconsume the `NOT` token 5546 self._retreat(self._index - 1) 5547 return None 5548 5549 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5550 if self._match(TokenType.CONSTRAINT): 5551 this = self._parse_id_var() 5552 else: 5553 this = None 5554 5555 if self._match_texts(self.CONSTRAINT_PARSERS): 5556 return self.expression( 5557 exp.ColumnConstraint, 5558 this=this, 5559 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5560 ) 5561 5562 return this 5563 5564 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5565 if not self._match(TokenType.CONSTRAINT): 5566 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5567 5568 return self.expression( 5569 exp.Constraint, 5570 this=self._parse_id_var(), 5571 expressions=self._parse_unnamed_constraints(), 5572 ) 5573 5574 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5575 constraints = [] 5576 while True: 5577 constraint = self._parse_unnamed_constraint() or self._parse_function() 5578 if not constraint: 5579 break 5580 constraints.append(constraint) 5581 5582 return constraints 5583 5584 def _parse_unnamed_constraint( 5585 self, constraints: t.Optional[t.Collection[str]] = None 5586 ) -> t.Optional[exp.Expression]: 5587 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5588 constraints or self.CONSTRAINT_PARSERS 5589 ): 5590 return None 5591 5592 constraint = self._prev.text.upper() 5593 if constraint not in self.CONSTRAINT_PARSERS: 5594 self.raise_error(f"No parser found for schema constraint {constraint}.") 5595 5596 return self.CONSTRAINT_PARSERS[constraint](self) 5597 5598 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5599 return self._parse_id_var(any_token=False) 5600 5601 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5602 self._match_text_seq("KEY") 5603 return self.expression( 5604 exp.UniqueColumnConstraint, 5605 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5606 this=self._parse_schema(self._parse_unique_key()), 5607 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5608 on_conflict=self._parse_on_conflict(), 5609 ) 5610 5611 def _parse_key_constraint_options(self) -> t.List[str]: 5612 options = [] 5613 while True: 5614 if not self._curr: 5615 break 5616 5617 if self._match(TokenType.ON): 5618 action = None 5619 on = self._advance_any() and self._prev.text 5620 5621 if self._match_text_seq("NO", "ACTION"): 5622 action = "NO ACTION" 5623 elif self._match_text_seq("CASCADE"): 5624 action = "CASCADE" 5625 elif self._match_text_seq("RESTRICT"): 5626 action = "RESTRICT" 5627 elif self._match_pair(TokenType.SET, TokenType.NULL): 5628 action = "SET NULL" 5629 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5630 action = "SET DEFAULT" 5631 else: 5632 self.raise_error("Invalid key constraint") 5633 5634 options.append(f"ON {on} {action}") 5635 else: 5636 var = self._parse_var_from_options( 5637 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5638 ) 5639 if not var: 5640 break 5641 options.append(var.name) 5642 5643 return options 5644 5645 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5646 if match and not self._match(TokenType.REFERENCES): 5647 return None 5648 5649 expressions = None 5650 this = self._parse_table(schema=True) 5651 options = self._parse_key_constraint_options() 5652 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5653 5654 def _parse_foreign_key(self) -> exp.ForeignKey: 5655 expressions = self._parse_wrapped_id_vars() 5656 reference = self._parse_references() 5657 options = {} 5658 5659 while self._match(TokenType.ON): 5660 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5661 self.raise_error("Expected DELETE or UPDATE") 5662 5663 kind = self._prev.text.lower() 5664 5665 if self._match_text_seq("NO", "ACTION"): 5666 action = "NO ACTION" 5667 elif self._match(TokenType.SET): 5668 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5669 action = "SET " + self._prev.text.upper() 5670 else: 5671 self._advance() 5672 action = self._prev.text.upper() 5673 5674 options[kind] = action 5675 5676 return self.expression( 5677 exp.ForeignKey, 5678 expressions=expressions, 5679 reference=reference, 5680 **options, # type: ignore 5681 ) 5682 5683 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5684 return self._parse_field() 5685 5686 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5687 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5688 self._retreat(self._index - 1) 5689 return None 5690 5691 id_vars = self._parse_wrapped_id_vars() 5692 return self.expression( 5693 exp.PeriodForSystemTimeConstraint, 5694 this=seq_get(id_vars, 0), 5695 expression=seq_get(id_vars, 1), 5696 ) 5697 5698 def _parse_primary_key( 5699 self, wrapped_optional: bool = False, in_props: bool = False 5700 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5701 desc = ( 5702 self._match_set((TokenType.ASC, TokenType.DESC)) 5703 and self._prev.token_type == TokenType.DESC 5704 ) 5705 5706 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5707 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5708 5709 expressions = self._parse_wrapped_csv( 5710 self._parse_primary_key_part, optional=wrapped_optional 5711 ) 5712 options = self._parse_key_constraint_options() 5713 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5714 5715 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5716 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5717 5718 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5719 """ 5720 Parses a datetime column in ODBC format. We parse the column into the corresponding 5721 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5722 same as we did for `DATE('yyyy-mm-dd')`. 5723 5724 Reference: 5725 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5726 """ 5727 self._match(TokenType.VAR) 5728 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5729 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5730 if not self._match(TokenType.R_BRACE): 5731 self.raise_error("Expected }") 5732 return expression 5733 5734 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5735 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5736 return this 5737 5738 bracket_kind = self._prev.token_type 5739 if ( 5740 bracket_kind == TokenType.L_BRACE 5741 and self._curr 5742 and self._curr.token_type == TokenType.VAR 5743 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5744 ): 5745 return self._parse_odbc_datetime_literal() 5746 5747 expressions = self._parse_csv( 5748 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5749 ) 5750 5751 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5752 self.raise_error("Expected ]") 5753 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5754 self.raise_error("Expected }") 5755 5756 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5757 if bracket_kind == TokenType.L_BRACE: 5758 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5759 elif not this: 5760 this = build_array_constructor( 5761 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5762 ) 5763 else: 5764 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5765 if constructor_type: 5766 return build_array_constructor( 5767 constructor_type, 5768 args=expressions, 5769 bracket_kind=bracket_kind, 5770 dialect=self.dialect, 5771 ) 5772 5773 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5774 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5775 5776 self._add_comments(this) 5777 return self._parse_bracket(this) 5778 5779 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5780 if self._match(TokenType.COLON): 5781 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5782 return this 5783 5784 def _parse_case(self) -> t.Optional[exp.Expression]: 5785 ifs = [] 5786 default = None 5787 5788 comments = self._prev_comments 5789 expression = self._parse_assignment() 5790 5791 while self._match(TokenType.WHEN): 5792 this = self._parse_assignment() 5793 self._match(TokenType.THEN) 5794 then = self._parse_assignment() 5795 ifs.append(self.expression(exp.If, this=this, true=then)) 5796 5797 if self._match(TokenType.ELSE): 5798 default = self._parse_assignment() 5799 5800 if not self._match(TokenType.END): 5801 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5802 default = exp.column("interval") 5803 else: 5804 self.raise_error("Expected END after CASE", self._prev) 5805 5806 return self.expression( 5807 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5808 ) 5809 5810 def _parse_if(self) -> t.Optional[exp.Expression]: 5811 if self._match(TokenType.L_PAREN): 5812 args = self._parse_csv(self._parse_assignment) 5813 this = self.validate_expression(exp.If.from_arg_list(args), args) 5814 self._match_r_paren() 5815 else: 5816 index = self._index - 1 5817 5818 if self.NO_PAREN_IF_COMMANDS and index == 0: 5819 return self._parse_as_command(self._prev) 5820 5821 condition = self._parse_assignment() 5822 5823 if not condition: 5824 self._retreat(index) 5825 return None 5826 5827 self._match(TokenType.THEN) 5828 true = self._parse_assignment() 5829 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5830 self._match(TokenType.END) 5831 this = self.expression(exp.If, this=condition, true=true, false=false) 5832 5833 return this 5834 5835 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5836 if not self._match_text_seq("VALUE", "FOR"): 5837 self._retreat(self._index - 1) 5838 return None 5839 5840 return self.expression( 5841 exp.NextValueFor, 5842 this=self._parse_column(), 5843 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5844 ) 5845 5846 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5847 this = self._parse_function() or self._parse_var_or_string(upper=True) 5848 5849 if self._match(TokenType.FROM): 5850 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5851 5852 if not self._match(TokenType.COMMA): 5853 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5854 5855 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5856 5857 def _parse_gap_fill(self) -> exp.GapFill: 5858 self._match(TokenType.TABLE) 5859 this = self._parse_table() 5860 5861 self._match(TokenType.COMMA) 5862 args = [this, *self._parse_csv(self._parse_lambda)] 5863 5864 gap_fill = exp.GapFill.from_arg_list(args) 5865 return self.validate_expression(gap_fill, args) 5866 5867 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5868 this = self._parse_assignment() 5869 5870 if not self._match(TokenType.ALIAS): 5871 if self._match(TokenType.COMMA): 5872 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5873 5874 self.raise_error("Expected AS after CAST") 5875 5876 fmt = None 5877 to = self._parse_types() 5878 5879 if self._match(TokenType.FORMAT): 5880 fmt_string = self._parse_string() 5881 fmt = self._parse_at_time_zone(fmt_string) 5882 5883 if not to: 5884 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5885 if to.this in exp.DataType.TEMPORAL_TYPES: 5886 this = self.expression( 5887 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5888 this=this, 5889 format=exp.Literal.string( 5890 format_time( 5891 fmt_string.this if fmt_string else "", 5892 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5893 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5894 ) 5895 ), 5896 safe=safe, 5897 ) 5898 5899 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5900 this.set("zone", fmt.args["zone"]) 5901 return this 5902 elif not to: 5903 self.raise_error("Expected TYPE after CAST") 5904 elif isinstance(to, exp.Identifier): 5905 to = exp.DataType.build(to.name, udt=True) 5906 elif to.this == exp.DataType.Type.CHAR: 5907 if self._match(TokenType.CHARACTER_SET): 5908 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5909 5910 return self.expression( 5911 exp.Cast if strict else exp.TryCast, 5912 this=this, 5913 to=to, 5914 format=fmt, 5915 safe=safe, 5916 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5917 ) 5918 5919 def _parse_string_agg(self) -> exp.Expression: 5920 if self._match(TokenType.DISTINCT): 5921 args: t.List[t.Optional[exp.Expression]] = [ 5922 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5923 ] 5924 if self._match(TokenType.COMMA): 5925 args.extend(self._parse_csv(self._parse_assignment)) 5926 else: 5927 args = self._parse_csv(self._parse_assignment) # type: ignore 5928 5929 index = self._index 5930 if not self._match(TokenType.R_PAREN) and args: 5931 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5932 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5933 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5934 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5935 5936 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5937 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5938 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5939 if not self._match_text_seq("WITHIN", "GROUP"): 5940 self._retreat(index) 5941 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5942 5943 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5944 order = self._parse_order(this=seq_get(args, 0)) 5945 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5946 5947 def _parse_convert( 5948 self, strict: bool, safe: t.Optional[bool] = None 5949 ) -> t.Optional[exp.Expression]: 5950 this = self._parse_bitwise() 5951 5952 if self._match(TokenType.USING): 5953 to: t.Optional[exp.Expression] = self.expression( 5954 exp.CharacterSet, this=self._parse_var() 5955 ) 5956 elif self._match(TokenType.COMMA): 5957 to = self._parse_types() 5958 else: 5959 to = None 5960 5961 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5962 5963 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5964 """ 5965 There are generally two variants of the DECODE function: 5966 5967 - DECODE(bin, charset) 5968 - DECODE(expression, search, result [, search, result] ... [, default]) 5969 5970 The second variant will always be parsed into a CASE expression. Note that NULL 5971 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5972 instead of relying on pattern matching. 5973 """ 5974 args = self._parse_csv(self._parse_assignment) 5975 5976 if len(args) < 3: 5977 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5978 5979 expression, *expressions = args 5980 if not expression: 5981 return None 5982 5983 ifs = [] 5984 for search, result in zip(expressions[::2], expressions[1::2]): 5985 if not search or not result: 5986 return None 5987 5988 if isinstance(search, exp.Literal): 5989 ifs.append( 5990 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5991 ) 5992 elif isinstance(search, exp.Null): 5993 ifs.append( 5994 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5995 ) 5996 else: 5997 cond = exp.or_( 5998 exp.EQ(this=expression.copy(), expression=search), 5999 exp.and_( 6000 exp.Is(this=expression.copy(), expression=exp.Null()), 6001 exp.Is(this=search.copy(), expression=exp.Null()), 6002 copy=False, 6003 ), 6004 copy=False, 6005 ) 6006 ifs.append(exp.If(this=cond, true=result)) 6007 6008 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6009 6010 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6011 self._match_text_seq("KEY") 6012 key = self._parse_column() 6013 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6014 self._match_text_seq("VALUE") 6015 value = self._parse_bitwise() 6016 6017 if not key and not value: 6018 return None 6019 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6020 6021 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6022 if not this or not self._match_text_seq("FORMAT", "JSON"): 6023 return this 6024 6025 return self.expression(exp.FormatJson, this=this) 6026 6027 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6028 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6029 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6030 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6031 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6032 else: 6033 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6034 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6035 6036 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6037 6038 if not empty and not error and not null: 6039 return None 6040 6041 return self.expression( 6042 exp.OnCondition, 6043 empty=empty, 6044 error=error, 6045 null=null, 6046 ) 6047 6048 def _parse_on_handling( 6049 self, on: str, *values: str 6050 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6051 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6052 for value in values: 6053 if self._match_text_seq(value, "ON", on): 6054 return f"{value} ON {on}" 6055 6056 index = self._index 6057 if self._match(TokenType.DEFAULT): 6058 default_value = self._parse_bitwise() 6059 if self._match_text_seq("ON", on): 6060 return default_value 6061 6062 self._retreat(index) 6063 6064 return None 6065 6066 @t.overload 6067 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6068 6069 @t.overload 6070 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6071 6072 def _parse_json_object(self, agg=False): 6073 star = self._parse_star() 6074 expressions = ( 6075 [star] 6076 if star 6077 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6078 ) 6079 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6080 6081 unique_keys = None 6082 if self._match_text_seq("WITH", "UNIQUE"): 6083 unique_keys = True 6084 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6085 unique_keys = False 6086 6087 self._match_text_seq("KEYS") 6088 6089 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6090 self._parse_type() 6091 ) 6092 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6093 6094 return self.expression( 6095 exp.JSONObjectAgg if agg else exp.JSONObject, 6096 expressions=expressions, 6097 null_handling=null_handling, 6098 unique_keys=unique_keys, 6099 return_type=return_type, 6100 encoding=encoding, 6101 ) 6102 6103 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6104 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6105 if not self._match_text_seq("NESTED"): 6106 this = self._parse_id_var() 6107 kind = self._parse_types(allow_identifiers=False) 6108 nested = None 6109 else: 6110 this = None 6111 kind = None 6112 nested = True 6113 6114 path = self._match_text_seq("PATH") and self._parse_string() 6115 nested_schema = nested and self._parse_json_schema() 6116 6117 return self.expression( 6118 exp.JSONColumnDef, 6119 this=this, 6120 kind=kind, 6121 path=path, 6122 nested_schema=nested_schema, 6123 ) 6124 6125 def _parse_json_schema(self) -> exp.JSONSchema: 6126 self._match_text_seq("COLUMNS") 6127 return self.expression( 6128 exp.JSONSchema, 6129 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6130 ) 6131 6132 def _parse_json_table(self) -> exp.JSONTable: 6133 this = self._parse_format_json(self._parse_bitwise()) 6134 path = self._match(TokenType.COMMA) and self._parse_string() 6135 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6136 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6137 schema = self._parse_json_schema() 6138 6139 return exp.JSONTable( 6140 this=this, 6141 schema=schema, 6142 path=path, 6143 error_handling=error_handling, 6144 empty_handling=empty_handling, 6145 ) 6146 6147 def _parse_match_against(self) -> exp.MatchAgainst: 6148 expressions = self._parse_csv(self._parse_column) 6149 6150 self._match_text_seq(")", "AGAINST", "(") 6151 6152 this = self._parse_string() 6153 6154 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6155 modifier = "IN NATURAL LANGUAGE MODE" 6156 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6157 modifier = f"{modifier} WITH QUERY EXPANSION" 6158 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6159 modifier = "IN BOOLEAN MODE" 6160 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6161 modifier = "WITH QUERY EXPANSION" 6162 else: 6163 modifier = None 6164 6165 return self.expression( 6166 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6167 ) 6168 6169 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6170 def _parse_open_json(self) -> exp.OpenJSON: 6171 this = self._parse_bitwise() 6172 path = self._match(TokenType.COMMA) and self._parse_string() 6173 6174 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6175 this = self._parse_field(any_token=True) 6176 kind = self._parse_types() 6177 path = self._parse_string() 6178 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6179 6180 return self.expression( 6181 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6182 ) 6183 6184 expressions = None 6185 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6186 self._match_l_paren() 6187 expressions = self._parse_csv(_parse_open_json_column_def) 6188 6189 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6190 6191 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6192 args = self._parse_csv(self._parse_bitwise) 6193 6194 if self._match(TokenType.IN): 6195 return self.expression( 6196 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6197 ) 6198 6199 if haystack_first: 6200 haystack = seq_get(args, 0) 6201 needle = seq_get(args, 1) 6202 else: 6203 needle = seq_get(args, 0) 6204 haystack = seq_get(args, 1) 6205 6206 return self.expression( 6207 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6208 ) 6209 6210 def _parse_predict(self) -> exp.Predict: 6211 self._match_text_seq("MODEL") 6212 this = self._parse_table() 6213 6214 self._match(TokenType.COMMA) 6215 self._match_text_seq("TABLE") 6216 6217 return self.expression( 6218 exp.Predict, 6219 this=this, 6220 expression=self._parse_table(), 6221 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6222 ) 6223 6224 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6225 args = self._parse_csv(self._parse_table) 6226 return exp.JoinHint(this=func_name.upper(), expressions=args) 6227 6228 def _parse_substring(self) -> exp.Substring: 6229 # Postgres supports the form: substring(string [from int] [for int]) 6230 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6231 6232 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6233 6234 if self._match(TokenType.FROM): 6235 args.append(self._parse_bitwise()) 6236 if self._match(TokenType.FOR): 6237 if len(args) == 1: 6238 args.append(exp.Literal.number(1)) 6239 args.append(self._parse_bitwise()) 6240 6241 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6242 6243 def _parse_trim(self) -> exp.Trim: 6244 # https://www.w3resource.com/sql/character-functions/trim.php 6245 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6246 6247 position = None 6248 collation = None 6249 expression = None 6250 6251 if self._match_texts(self.TRIM_TYPES): 6252 position = self._prev.text.upper() 6253 6254 this = self._parse_bitwise() 6255 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6256 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6257 expression = self._parse_bitwise() 6258 6259 if invert_order: 6260 this, expression = expression, this 6261 6262 if self._match(TokenType.COLLATE): 6263 collation = self._parse_bitwise() 6264 6265 return self.expression( 6266 exp.Trim, this=this, position=position, expression=expression, collation=collation 6267 ) 6268 6269 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6270 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6271 6272 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6273 return self._parse_window(self._parse_id_var(), alias=True) 6274 6275 def _parse_respect_or_ignore_nulls( 6276 self, this: t.Optional[exp.Expression] 6277 ) -> t.Optional[exp.Expression]: 6278 if self._match_text_seq("IGNORE", "NULLS"): 6279 return self.expression(exp.IgnoreNulls, this=this) 6280 if self._match_text_seq("RESPECT", "NULLS"): 6281 return self.expression(exp.RespectNulls, this=this) 6282 return this 6283 6284 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6285 if self._match(TokenType.HAVING): 6286 self._match_texts(("MAX", "MIN")) 6287 max = self._prev.text.upper() != "MIN" 6288 return self.expression( 6289 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6290 ) 6291 6292 return this 6293 6294 def _parse_window( 6295 self, this: t.Optional[exp.Expression], alias: bool = False 6296 ) -> t.Optional[exp.Expression]: 6297 func = this 6298 comments = func.comments if isinstance(func, exp.Expression) else None 6299 6300 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6301 self._match(TokenType.WHERE) 6302 this = self.expression( 6303 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6304 ) 6305 self._match_r_paren() 6306 6307 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6308 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6309 if self._match_text_seq("WITHIN", "GROUP"): 6310 order = self._parse_wrapped(self._parse_order) 6311 this = self.expression(exp.WithinGroup, this=this, expression=order) 6312 6313 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6314 # Some dialects choose to implement and some do not. 6315 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6316 6317 # There is some code above in _parse_lambda that handles 6318 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6319 6320 # The below changes handle 6321 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6322 6323 # Oracle allows both formats 6324 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6325 # and Snowflake chose to do the same for familiarity 6326 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6327 if isinstance(this, exp.AggFunc): 6328 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6329 6330 if ignore_respect and ignore_respect is not this: 6331 ignore_respect.replace(ignore_respect.this) 6332 this = self.expression(ignore_respect.__class__, this=this) 6333 6334 this = self._parse_respect_or_ignore_nulls(this) 6335 6336 # bigquery select from window x AS (partition by ...) 6337 if alias: 6338 over = None 6339 self._match(TokenType.ALIAS) 6340 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6341 return this 6342 else: 6343 over = self._prev.text.upper() 6344 6345 if comments and isinstance(func, exp.Expression): 6346 func.pop_comments() 6347 6348 if not self._match(TokenType.L_PAREN): 6349 return self.expression( 6350 exp.Window, 6351 comments=comments, 6352 this=this, 6353 alias=self._parse_id_var(False), 6354 over=over, 6355 ) 6356 6357 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6358 6359 first = self._match(TokenType.FIRST) 6360 if self._match_text_seq("LAST"): 6361 first = False 6362 6363 partition, order = self._parse_partition_and_order() 6364 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6365 6366 if kind: 6367 self._match(TokenType.BETWEEN) 6368 start = self._parse_window_spec() 6369 self._match(TokenType.AND) 6370 end = self._parse_window_spec() 6371 6372 spec = self.expression( 6373 exp.WindowSpec, 6374 kind=kind, 6375 start=start["value"], 6376 start_side=start["side"], 6377 end=end["value"], 6378 end_side=end["side"], 6379 ) 6380 else: 6381 spec = None 6382 6383 self._match_r_paren() 6384 6385 window = self.expression( 6386 exp.Window, 6387 comments=comments, 6388 this=this, 6389 partition_by=partition, 6390 order=order, 6391 spec=spec, 6392 alias=window_alias, 6393 over=over, 6394 first=first, 6395 ) 6396 6397 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6398 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6399 return self._parse_window(window, alias=alias) 6400 6401 return window 6402 6403 def _parse_partition_and_order( 6404 self, 6405 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6406 return self._parse_partition_by(), self._parse_order() 6407 6408 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6409 self._match(TokenType.BETWEEN) 6410 6411 return { 6412 "value": ( 6413 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6414 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6415 or self._parse_bitwise() 6416 ), 6417 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6418 } 6419 6420 def _parse_alias( 6421 self, this: t.Optional[exp.Expression], explicit: bool = False 6422 ) -> t.Optional[exp.Expression]: 6423 any_token = self._match(TokenType.ALIAS) 6424 comments = self._prev_comments or [] 6425 6426 if explicit and not any_token: 6427 return this 6428 6429 if self._match(TokenType.L_PAREN): 6430 aliases = self.expression( 6431 exp.Aliases, 6432 comments=comments, 6433 this=this, 6434 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6435 ) 6436 self._match_r_paren(aliases) 6437 return aliases 6438 6439 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6440 self.STRING_ALIASES and self._parse_string_as_identifier() 6441 ) 6442 6443 if alias: 6444 comments.extend(alias.pop_comments()) 6445 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6446 column = this.this 6447 6448 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6449 if not this.comments and column and column.comments: 6450 this.comments = column.pop_comments() 6451 6452 return this 6453 6454 def _parse_id_var( 6455 self, 6456 any_token: bool = True, 6457 tokens: t.Optional[t.Collection[TokenType]] = None, 6458 ) -> t.Optional[exp.Expression]: 6459 expression = self._parse_identifier() 6460 if not expression and ( 6461 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6462 ): 6463 quoted = self._prev.token_type == TokenType.STRING 6464 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6465 6466 return expression 6467 6468 def _parse_string(self) -> t.Optional[exp.Expression]: 6469 if self._match_set(self.STRING_PARSERS): 6470 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6471 return self._parse_placeholder() 6472 6473 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6474 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6475 6476 def _parse_number(self) -> t.Optional[exp.Expression]: 6477 if self._match_set(self.NUMERIC_PARSERS): 6478 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6479 return self._parse_placeholder() 6480 6481 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6482 if self._match(TokenType.IDENTIFIER): 6483 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6484 return self._parse_placeholder() 6485 6486 def _parse_var( 6487 self, 6488 any_token: bool = False, 6489 tokens: t.Optional[t.Collection[TokenType]] = None, 6490 upper: bool = False, 6491 ) -> t.Optional[exp.Expression]: 6492 if ( 6493 (any_token and self._advance_any()) 6494 or self._match(TokenType.VAR) 6495 or (self._match_set(tokens) if tokens else False) 6496 ): 6497 return self.expression( 6498 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6499 ) 6500 return self._parse_placeholder() 6501 6502 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6503 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6504 self._advance() 6505 return self._prev 6506 return None 6507 6508 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6509 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6510 6511 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6512 return self._parse_primary() or self._parse_var(any_token=True) 6513 6514 def _parse_null(self) -> t.Optional[exp.Expression]: 6515 if self._match_set(self.NULL_TOKENS): 6516 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6517 return self._parse_placeholder() 6518 6519 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6520 if self._match(TokenType.TRUE): 6521 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6522 if self._match(TokenType.FALSE): 6523 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6524 return self._parse_placeholder() 6525 6526 def _parse_star(self) -> t.Optional[exp.Expression]: 6527 if self._match(TokenType.STAR): 6528 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6529 return self._parse_placeholder() 6530 6531 def _parse_parameter(self) -> exp.Parameter: 6532 this = self._parse_identifier() or self._parse_primary_or_var() 6533 return self.expression(exp.Parameter, this=this) 6534 6535 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6536 if self._match_set(self.PLACEHOLDER_PARSERS): 6537 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6538 if placeholder: 6539 return placeholder 6540 self._advance(-1) 6541 return None 6542 6543 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6544 if not self._match_texts(keywords): 6545 return None 6546 if self._match(TokenType.L_PAREN, advance=False): 6547 return self._parse_wrapped_csv(self._parse_expression) 6548 6549 expression = self._parse_expression() 6550 return [expression] if expression else None 6551 6552 def _parse_csv( 6553 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6554 ) -> t.List[exp.Expression]: 6555 parse_result = parse_method() 6556 items = [parse_result] if parse_result is not None else [] 6557 6558 while self._match(sep): 6559 self._add_comments(parse_result) 6560 parse_result = parse_method() 6561 if parse_result is not None: 6562 items.append(parse_result) 6563 6564 return items 6565 6566 def _parse_tokens( 6567 self, parse_method: t.Callable, expressions: t.Dict 6568 ) -> t.Optional[exp.Expression]: 6569 this = parse_method() 6570 6571 while self._match_set(expressions): 6572 this = self.expression( 6573 expressions[self._prev.token_type], 6574 this=this, 6575 comments=self._prev_comments, 6576 expression=parse_method(), 6577 ) 6578 6579 return this 6580 6581 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6582 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6583 6584 def _parse_wrapped_csv( 6585 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6586 ) -> t.List[exp.Expression]: 6587 return self._parse_wrapped( 6588 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6589 ) 6590 6591 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6592 wrapped = self._match(TokenType.L_PAREN) 6593 if not wrapped and not optional: 6594 self.raise_error("Expecting (") 6595 parse_result = parse_method() 6596 if wrapped: 6597 self._match_r_paren() 6598 return parse_result 6599 6600 def _parse_expressions(self) -> t.List[exp.Expression]: 6601 return self._parse_csv(self._parse_expression) 6602 6603 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6604 return self._parse_select() or self._parse_set_operations( 6605 self._parse_expression() if alias else self._parse_assignment() 6606 ) 6607 6608 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6609 return self._parse_query_modifiers( 6610 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6611 ) 6612 6613 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6614 this = None 6615 if self._match_texts(self.TRANSACTION_KIND): 6616 this = self._prev.text 6617 6618 self._match_texts(("TRANSACTION", "WORK")) 6619 6620 modes = [] 6621 while True: 6622 mode = [] 6623 while self._match(TokenType.VAR): 6624 mode.append(self._prev.text) 6625 6626 if mode: 6627 modes.append(" ".join(mode)) 6628 if not self._match(TokenType.COMMA): 6629 break 6630 6631 return self.expression(exp.Transaction, this=this, modes=modes) 6632 6633 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6634 chain = None 6635 savepoint = None 6636 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6637 6638 self._match_texts(("TRANSACTION", "WORK")) 6639 6640 if self._match_text_seq("TO"): 6641 self._match_text_seq("SAVEPOINT") 6642 savepoint = self._parse_id_var() 6643 6644 if self._match(TokenType.AND): 6645 chain = not self._match_text_seq("NO") 6646 self._match_text_seq("CHAIN") 6647 6648 if is_rollback: 6649 return self.expression(exp.Rollback, savepoint=savepoint) 6650 6651 return self.expression(exp.Commit, chain=chain) 6652 6653 def _parse_refresh(self) -> exp.Refresh: 6654 self._match(TokenType.TABLE) 6655 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6656 6657 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6658 if not self._match_text_seq("ADD"): 6659 return None 6660 6661 self._match(TokenType.COLUMN) 6662 exists_column = self._parse_exists(not_=True) 6663 expression = self._parse_field_def() 6664 6665 if expression: 6666 expression.set("exists", exists_column) 6667 6668 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6669 if self._match_texts(("FIRST", "AFTER")): 6670 position = self._prev.text 6671 column_position = self.expression( 6672 exp.ColumnPosition, this=self._parse_column(), position=position 6673 ) 6674 expression.set("position", column_position) 6675 6676 return expression 6677 6678 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6679 drop = self._match(TokenType.DROP) and self._parse_drop() 6680 if drop and not isinstance(drop, exp.Command): 6681 drop.set("kind", drop.args.get("kind", "COLUMN")) 6682 return drop 6683 6684 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6685 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6686 return self.expression( 6687 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6688 ) 6689 6690 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6691 index = self._index - 1 6692 6693 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6694 return self._parse_csv( 6695 lambda: self.expression( 6696 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6697 ) 6698 ) 6699 6700 self._retreat(index) 6701 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6702 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6703 6704 if self._match_text_seq("ADD", "COLUMNS"): 6705 schema = self._parse_schema() 6706 if schema: 6707 return [schema] 6708 return [] 6709 6710 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6711 6712 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6713 if self._match_texts(self.ALTER_ALTER_PARSERS): 6714 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6715 6716 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6717 # keyword after ALTER we default to parsing this statement 6718 self._match(TokenType.COLUMN) 6719 column = self._parse_field(any_token=True) 6720 6721 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6722 return self.expression(exp.AlterColumn, this=column, drop=True) 6723 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6724 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6725 if self._match(TokenType.COMMENT): 6726 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6727 if self._match_text_seq("DROP", "NOT", "NULL"): 6728 return self.expression( 6729 exp.AlterColumn, 6730 this=column, 6731 drop=True, 6732 allow_null=True, 6733 ) 6734 if self._match_text_seq("SET", "NOT", "NULL"): 6735 return self.expression( 6736 exp.AlterColumn, 6737 this=column, 6738 allow_null=False, 6739 ) 6740 self._match_text_seq("SET", "DATA") 6741 self._match_text_seq("TYPE") 6742 return self.expression( 6743 exp.AlterColumn, 6744 this=column, 6745 dtype=self._parse_types(), 6746 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6747 using=self._match(TokenType.USING) and self._parse_assignment(), 6748 ) 6749 6750 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6751 if self._match_texts(("ALL", "EVEN", "AUTO")): 6752 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6753 6754 self._match_text_seq("KEY", "DISTKEY") 6755 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6756 6757 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6758 if compound: 6759 self._match_text_seq("SORTKEY") 6760 6761 if self._match(TokenType.L_PAREN, advance=False): 6762 return self.expression( 6763 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6764 ) 6765 6766 self._match_texts(("AUTO", "NONE")) 6767 return self.expression( 6768 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6769 ) 6770 6771 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6772 index = self._index - 1 6773 6774 partition_exists = self._parse_exists() 6775 if self._match(TokenType.PARTITION, advance=False): 6776 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6777 6778 self._retreat(index) 6779 return self._parse_csv(self._parse_drop_column) 6780 6781 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6782 if self._match(TokenType.COLUMN): 6783 exists = self._parse_exists() 6784 old_column = self._parse_column() 6785 to = self._match_text_seq("TO") 6786 new_column = self._parse_column() 6787 6788 if old_column is None or to is None or new_column is None: 6789 return None 6790 6791 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6792 6793 self._match_text_seq("TO") 6794 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6795 6796 def _parse_alter_table_set(self) -> exp.AlterSet: 6797 alter_set = self.expression(exp.AlterSet) 6798 6799 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6800 "TABLE", "PROPERTIES" 6801 ): 6802 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6803 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6804 alter_set.set("expressions", [self._parse_assignment()]) 6805 elif self._match_texts(("LOGGED", "UNLOGGED")): 6806 alter_set.set("option", exp.var(self._prev.text.upper())) 6807 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6808 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6809 elif self._match_text_seq("LOCATION"): 6810 alter_set.set("location", self._parse_field()) 6811 elif self._match_text_seq("ACCESS", "METHOD"): 6812 alter_set.set("access_method", self._parse_field()) 6813 elif self._match_text_seq("TABLESPACE"): 6814 alter_set.set("tablespace", self._parse_field()) 6815 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6816 alter_set.set("file_format", [self._parse_field()]) 6817 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6818 alter_set.set("file_format", self._parse_wrapped_options()) 6819 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6820 alter_set.set("copy_options", self._parse_wrapped_options()) 6821 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6822 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6823 else: 6824 if self._match_text_seq("SERDE"): 6825 alter_set.set("serde", self._parse_field()) 6826 6827 alter_set.set("expressions", [self._parse_properties()]) 6828 6829 return alter_set 6830 6831 def _parse_alter(self) -> exp.Alter | exp.Command: 6832 start = self._prev 6833 6834 alter_token = self._match_set(self.ALTERABLES) and self._prev 6835 if not alter_token: 6836 return self._parse_as_command(start) 6837 6838 exists = self._parse_exists() 6839 only = self._match_text_seq("ONLY") 6840 this = self._parse_table(schema=True) 6841 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6842 6843 if self._next: 6844 self._advance() 6845 6846 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6847 if parser: 6848 actions = ensure_list(parser(self)) 6849 not_valid = self._match_text_seq("NOT", "VALID") 6850 options = self._parse_csv(self._parse_property) 6851 6852 if not self._curr and actions: 6853 return self.expression( 6854 exp.Alter, 6855 this=this, 6856 kind=alter_token.text.upper(), 6857 exists=exists, 6858 actions=actions, 6859 only=only, 6860 options=options, 6861 cluster=cluster, 6862 not_valid=not_valid, 6863 ) 6864 6865 return self._parse_as_command(start) 6866 6867 def _parse_merge(self) -> exp.Merge: 6868 self._match(TokenType.INTO) 6869 target = self._parse_table() 6870 6871 if target and self._match(TokenType.ALIAS, advance=False): 6872 target.set("alias", self._parse_table_alias()) 6873 6874 self._match(TokenType.USING) 6875 using = self._parse_table() 6876 6877 self._match(TokenType.ON) 6878 on = self._parse_assignment() 6879 6880 return self.expression( 6881 exp.Merge, 6882 this=target, 6883 using=using, 6884 on=on, 6885 expressions=self._parse_when_matched(), 6886 returning=self._parse_returning(), 6887 ) 6888 6889 def _parse_when_matched(self) -> t.List[exp.When]: 6890 whens = [] 6891 6892 while self._match(TokenType.WHEN): 6893 matched = not self._match(TokenType.NOT) 6894 self._match_text_seq("MATCHED") 6895 source = ( 6896 False 6897 if self._match_text_seq("BY", "TARGET") 6898 else self._match_text_seq("BY", "SOURCE") 6899 ) 6900 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6901 6902 self._match(TokenType.THEN) 6903 6904 if self._match(TokenType.INSERT): 6905 this = self._parse_star() 6906 if this: 6907 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6908 else: 6909 then = self.expression( 6910 exp.Insert, 6911 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6912 expression=self._match_text_seq("VALUES") and self._parse_value(), 6913 ) 6914 elif self._match(TokenType.UPDATE): 6915 expressions = self._parse_star() 6916 if expressions: 6917 then = self.expression(exp.Update, expressions=expressions) 6918 else: 6919 then = self.expression( 6920 exp.Update, 6921 expressions=self._match(TokenType.SET) 6922 and self._parse_csv(self._parse_equality), 6923 ) 6924 elif self._match(TokenType.DELETE): 6925 then = self.expression(exp.Var, this=self._prev.text) 6926 else: 6927 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6928 6929 whens.append( 6930 self.expression( 6931 exp.When, 6932 matched=matched, 6933 source=source, 6934 condition=condition, 6935 then=then, 6936 ) 6937 ) 6938 return whens 6939 6940 def _parse_show(self) -> t.Optional[exp.Expression]: 6941 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6942 if parser: 6943 return parser(self) 6944 return self._parse_as_command(self._prev) 6945 6946 def _parse_set_item_assignment( 6947 self, kind: t.Optional[str] = None 6948 ) -> t.Optional[exp.Expression]: 6949 index = self._index 6950 6951 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6952 return self._parse_set_transaction(global_=kind == "GLOBAL") 6953 6954 left = self._parse_primary() or self._parse_column() 6955 assignment_delimiter = self._match_texts(("=", "TO")) 6956 6957 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6958 self._retreat(index) 6959 return None 6960 6961 right = self._parse_statement() or self._parse_id_var() 6962 if isinstance(right, (exp.Column, exp.Identifier)): 6963 right = exp.var(right.name) 6964 6965 this = self.expression(exp.EQ, this=left, expression=right) 6966 return self.expression(exp.SetItem, this=this, kind=kind) 6967 6968 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6969 self._match_text_seq("TRANSACTION") 6970 characteristics = self._parse_csv( 6971 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6972 ) 6973 return self.expression( 6974 exp.SetItem, 6975 expressions=characteristics, 6976 kind="TRANSACTION", 6977 **{"global": global_}, # type: ignore 6978 ) 6979 6980 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6981 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6982 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6983 6984 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6985 index = self._index 6986 set_ = self.expression( 6987 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6988 ) 6989 6990 if self._curr: 6991 self._retreat(index) 6992 return self._parse_as_command(self._prev) 6993 6994 return set_ 6995 6996 def _parse_var_from_options( 6997 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6998 ) -> t.Optional[exp.Var]: 6999 start = self._curr 7000 if not start: 7001 return None 7002 7003 option = start.text.upper() 7004 continuations = options.get(option) 7005 7006 index = self._index 7007 self._advance() 7008 for keywords in continuations or []: 7009 if isinstance(keywords, str): 7010 keywords = (keywords,) 7011 7012 if self._match_text_seq(*keywords): 7013 option = f"{option} {' '.join(keywords)}" 7014 break 7015 else: 7016 if continuations or continuations is None: 7017 if raise_unmatched: 7018 self.raise_error(f"Unknown option {option}") 7019 7020 self._retreat(index) 7021 return None 7022 7023 return exp.var(option) 7024 7025 def _parse_as_command(self, start: Token) -> exp.Command: 7026 while self._curr: 7027 self._advance() 7028 text = self._find_sql(start, self._prev) 7029 size = len(start.text) 7030 self._warn_unsupported() 7031 return exp.Command(this=text[:size], expression=text[size:]) 7032 7033 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7034 settings = [] 7035 7036 self._match_l_paren() 7037 kind = self._parse_id_var() 7038 7039 if self._match(TokenType.L_PAREN): 7040 while True: 7041 key = self._parse_id_var() 7042 value = self._parse_primary() 7043 7044 if not key and value is None: 7045 break 7046 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7047 self._match(TokenType.R_PAREN) 7048 7049 self._match_r_paren() 7050 7051 return self.expression( 7052 exp.DictProperty, 7053 this=this, 7054 kind=kind.this if kind else None, 7055 settings=settings, 7056 ) 7057 7058 def _parse_dict_range(self, this: str) -> exp.DictRange: 7059 self._match_l_paren() 7060 has_min = self._match_text_seq("MIN") 7061 if has_min: 7062 min = self._parse_var() or self._parse_primary() 7063 self._match_text_seq("MAX") 7064 max = self._parse_var() or self._parse_primary() 7065 else: 7066 max = self._parse_var() or self._parse_primary() 7067 min = exp.Literal.number(0) 7068 self._match_r_paren() 7069 return self.expression(exp.DictRange, this=this, min=min, max=max) 7070 7071 def _parse_comprehension( 7072 self, this: t.Optional[exp.Expression] 7073 ) -> t.Optional[exp.Comprehension]: 7074 index = self._index 7075 expression = self._parse_column() 7076 if not self._match(TokenType.IN): 7077 self._retreat(index - 1) 7078 return None 7079 iterator = self._parse_column() 7080 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7081 return self.expression( 7082 exp.Comprehension, 7083 this=this, 7084 expression=expression, 7085 iterator=iterator, 7086 condition=condition, 7087 ) 7088 7089 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7090 if self._match(TokenType.HEREDOC_STRING): 7091 return self.expression(exp.Heredoc, this=self._prev.text) 7092 7093 if not self._match_text_seq("$"): 7094 return None 7095 7096 tags = ["$"] 7097 tag_text = None 7098 7099 if self._is_connected(): 7100 self._advance() 7101 tags.append(self._prev.text.upper()) 7102 else: 7103 self.raise_error("No closing $ found") 7104 7105 if tags[-1] != "$": 7106 if self._is_connected() and self._match_text_seq("$"): 7107 tag_text = tags[-1] 7108 tags.append("$") 7109 else: 7110 self.raise_error("No closing $ found") 7111 7112 heredoc_start = self._curr 7113 7114 while self._curr: 7115 if self._match_text_seq(*tags, advance=False): 7116 this = self._find_sql(heredoc_start, self._prev) 7117 self._advance(len(tags)) 7118 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7119 7120 self._advance() 7121 7122 self.raise_error(f"No closing {''.join(tags)} found") 7123 return None 7124 7125 def _find_parser( 7126 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7127 ) -> t.Optional[t.Callable]: 7128 if not self._curr: 7129 return None 7130 7131 index = self._index 7132 this = [] 7133 while True: 7134 # The current token might be multiple words 7135 curr = self._curr.text.upper() 7136 key = curr.split(" ") 7137 this.append(curr) 7138 7139 self._advance() 7140 result, trie = in_trie(trie, key) 7141 if result == TrieResult.FAILED: 7142 break 7143 7144 if result == TrieResult.EXISTS: 7145 subparser = parsers[" ".join(this)] 7146 return subparser 7147 7148 self._retreat(index) 7149 return None 7150 7151 def _match(self, token_type, advance=True, expression=None): 7152 if not self._curr: 7153 return None 7154 7155 if self._curr.token_type == token_type: 7156 if advance: 7157 self._advance() 7158 self._add_comments(expression) 7159 return True 7160 7161 return None 7162 7163 def _match_set(self, types, advance=True): 7164 if not self._curr: 7165 return None 7166 7167 if self._curr.token_type in types: 7168 if advance: 7169 self._advance() 7170 return True 7171 7172 return None 7173 7174 def _match_pair(self, token_type_a, token_type_b, advance=True): 7175 if not self._curr or not self._next: 7176 return None 7177 7178 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7179 if advance: 7180 self._advance(2) 7181 return True 7182 7183 return None 7184 7185 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7186 if not self._match(TokenType.L_PAREN, expression=expression): 7187 self.raise_error("Expecting (") 7188 7189 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7190 if not self._match(TokenType.R_PAREN, expression=expression): 7191 self.raise_error("Expecting )") 7192 7193 def _match_texts(self, texts, advance=True): 7194 if ( 7195 self._curr 7196 and self._curr.token_type != TokenType.STRING 7197 and self._curr.text.upper() in texts 7198 ): 7199 if advance: 7200 self._advance() 7201 return True 7202 return None 7203 7204 def _match_text_seq(self, *texts, advance=True): 7205 index = self._index 7206 for text in texts: 7207 if ( 7208 self._curr 7209 and self._curr.token_type != TokenType.STRING 7210 and self._curr.text.upper() == text 7211 ): 7212 self._advance() 7213 else: 7214 self._retreat(index) 7215 return None 7216 7217 if not advance: 7218 self._retreat(index) 7219 7220 return True 7221 7222 def _replace_lambda( 7223 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7224 ) -> t.Optional[exp.Expression]: 7225 if not node: 7226 return node 7227 7228 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7229 7230 for column in node.find_all(exp.Column): 7231 typ = lambda_types.get(column.parts[0].name) 7232 if typ is not None: 7233 dot_or_id = column.to_dot() if column.table else column.this 7234 7235 if typ: 7236 dot_or_id = self.expression( 7237 exp.Cast, 7238 this=dot_or_id, 7239 to=typ, 7240 ) 7241 7242 parent = column.parent 7243 7244 while isinstance(parent, exp.Dot): 7245 if not isinstance(parent.parent, exp.Dot): 7246 parent.replace(dot_or_id) 7247 break 7248 parent = parent.parent 7249 else: 7250 if column is node: 7251 node = dot_or_id 7252 else: 7253 column.replace(dot_or_id) 7254 return node 7255 7256 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7257 start = self._prev 7258 7259 # Not to be confused with TRUNCATE(number, decimals) function call 7260 if self._match(TokenType.L_PAREN): 7261 self._retreat(self._index - 2) 7262 return self._parse_function() 7263 7264 # Clickhouse supports TRUNCATE DATABASE as well 7265 is_database = self._match(TokenType.DATABASE) 7266 7267 self._match(TokenType.TABLE) 7268 7269 exists = self._parse_exists(not_=False) 7270 7271 expressions = self._parse_csv( 7272 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7273 ) 7274 7275 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7276 7277 if self._match_text_seq("RESTART", "IDENTITY"): 7278 identity = "RESTART" 7279 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7280 identity = "CONTINUE" 7281 else: 7282 identity = None 7283 7284 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7285 option = self._prev.text 7286 else: 7287 option = None 7288 7289 partition = self._parse_partition() 7290 7291 # Fallback case 7292 if self._curr: 7293 return self._parse_as_command(start) 7294 7295 return self.expression( 7296 exp.TruncateTable, 7297 expressions=expressions, 7298 is_database=is_database, 7299 exists=exists, 7300 cluster=cluster, 7301 identity=identity, 7302 option=option, 7303 partition=partition, 7304 ) 7305 7306 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7307 this = self._parse_ordered(self._parse_opclass) 7308 7309 if not self._match(TokenType.WITH): 7310 return this 7311 7312 op = self._parse_var(any_token=True) 7313 7314 return self.expression(exp.WithOperator, this=this, op=op) 7315 7316 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7317 self._match(TokenType.EQ) 7318 self._match(TokenType.L_PAREN) 7319 7320 opts: t.List[t.Optional[exp.Expression]] = [] 7321 while self._curr and not self._match(TokenType.R_PAREN): 7322 if self._match_text_seq("FORMAT_NAME", "="): 7323 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7324 # so we parse it separately to use _parse_field() 7325 prop = self.expression( 7326 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7327 ) 7328 opts.append(prop) 7329 else: 7330 opts.append(self._parse_property()) 7331 7332 self._match(TokenType.COMMA) 7333 7334 return opts 7335 7336 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7337 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7338 7339 options = [] 7340 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7341 option = self._parse_var(any_token=True) 7342 prev = self._prev.text.upper() 7343 7344 # Different dialects might separate options and values by white space, "=" and "AS" 7345 self._match(TokenType.EQ) 7346 self._match(TokenType.ALIAS) 7347 7348 param = self.expression(exp.CopyParameter, this=option) 7349 7350 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7351 TokenType.L_PAREN, advance=False 7352 ): 7353 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7354 param.set("expressions", self._parse_wrapped_options()) 7355 elif prev == "FILE_FORMAT": 7356 # T-SQL's external file format case 7357 param.set("expression", self._parse_field()) 7358 else: 7359 param.set("expression", self._parse_unquoted_field()) 7360 7361 options.append(param) 7362 self._match(sep) 7363 7364 return options 7365 7366 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7367 expr = self.expression(exp.Credentials) 7368 7369 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7370 expr.set("storage", self._parse_field()) 7371 if self._match_text_seq("CREDENTIALS"): 7372 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7373 creds = ( 7374 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7375 ) 7376 expr.set("credentials", creds) 7377 if self._match_text_seq("ENCRYPTION"): 7378 expr.set("encryption", self._parse_wrapped_options()) 7379 if self._match_text_seq("IAM_ROLE"): 7380 expr.set("iam_role", self._parse_field()) 7381 if self._match_text_seq("REGION"): 7382 expr.set("region", self._parse_field()) 7383 7384 return expr 7385 7386 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7387 return self._parse_field() 7388 7389 def _parse_copy(self) -> exp.Copy | exp.Command: 7390 start = self._prev 7391 7392 self._match(TokenType.INTO) 7393 7394 this = ( 7395 self._parse_select(nested=True, parse_subquery_alias=False) 7396 if self._match(TokenType.L_PAREN, advance=False) 7397 else self._parse_table(schema=True) 7398 ) 7399 7400 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7401 7402 files = self._parse_csv(self._parse_file_location) 7403 credentials = self._parse_credentials() 7404 7405 self._match_text_seq("WITH") 7406 7407 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7408 7409 # Fallback case 7410 if self._curr: 7411 return self._parse_as_command(start) 7412 7413 return self.expression( 7414 exp.Copy, 7415 this=this, 7416 kind=kind, 7417 credentials=credentials, 7418 files=files, 7419 params=params, 7420 ) 7421 7422 def _parse_normalize(self) -> exp.Normalize: 7423 return self.expression( 7424 exp.Normalize, 7425 this=self._parse_bitwise(), 7426 form=self._match(TokenType.COMMA) and self._parse_var(), 7427 ) 7428 7429 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7430 if self._match_text_seq("COLUMNS", "(", advance=False): 7431 this = self._parse_function() 7432 if isinstance(this, exp.Columns): 7433 this.set("unpack", True) 7434 return this 7435 7436 return self.expression( 7437 exp.Star, 7438 **{ # type: ignore 7439 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7440 "replace": self._parse_star_op("REPLACE"), 7441 "rename": self._parse_star_op("RENAME"), 7442 }, 7443 ) 7444 7445 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7446 privilege_parts = [] 7447 7448 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7449 # (end of privilege list) or L_PAREN (start of column list) are met 7450 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7451 privilege_parts.append(self._curr.text.upper()) 7452 self._advance() 7453 7454 this = exp.var(" ".join(privilege_parts)) 7455 expressions = ( 7456 self._parse_wrapped_csv(self._parse_column) 7457 if self._match(TokenType.L_PAREN, advance=False) 7458 else None 7459 ) 7460 7461 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7462 7463 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7464 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7465 principal = self._parse_id_var() 7466 7467 if not principal: 7468 return None 7469 7470 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7471 7472 def _parse_grant(self) -> exp.Grant | exp.Command: 7473 start = self._prev 7474 7475 privileges = self._parse_csv(self._parse_grant_privilege) 7476 7477 self._match(TokenType.ON) 7478 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7479 7480 # Attempt to parse the securable e.g. MySQL allows names 7481 # such as "foo.*", "*.*" which are not easily parseable yet 7482 securable = self._try_parse(self._parse_table_parts) 7483 7484 if not securable or not self._match_text_seq("TO"): 7485 return self._parse_as_command(start) 7486 7487 principals = self._parse_csv(self._parse_grant_principal) 7488 7489 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7490 7491 if self._curr: 7492 return self._parse_as_command(start) 7493 7494 return self.expression( 7495 exp.Grant, 7496 privileges=privileges, 7497 kind=kind, 7498 securable=securable, 7499 principals=principals, 7500 grant_option=grant_option, 7501 ) 7502 7503 def _parse_overlay(self) -> exp.Overlay: 7504 return self.expression( 7505 exp.Overlay, 7506 **{ # type: ignore 7507 "this": self._parse_bitwise(), 7508 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7509 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7510 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7511 }, 7512 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.HLLSKETCH, 361 TokenType.HSTORE, 362 TokenType.PSEUDO_TYPE, 363 TokenType.SUPER, 364 TokenType.SERIAL, 365 TokenType.SMALLSERIAL, 366 TokenType.BIGSERIAL, 367 TokenType.XML, 368 TokenType.YEAR, 369 TokenType.UNIQUEIDENTIFIER, 370 TokenType.USERDEFINED, 371 TokenType.MONEY, 372 TokenType.SMALLMONEY, 373 TokenType.ROWVERSION, 374 TokenType.IMAGE, 375 TokenType.VARIANT, 376 TokenType.VECTOR, 377 TokenType.OBJECT, 378 TokenType.OBJECT_IDENTIFIER, 379 TokenType.INET, 380 TokenType.IPADDRESS, 381 TokenType.IPPREFIX, 382 TokenType.IPV4, 383 TokenType.IPV6, 384 TokenType.UNKNOWN, 385 TokenType.NULL, 386 TokenType.NAME, 387 TokenType.TDIGEST, 388 *ENUM_TYPE_TOKENS, 389 *NESTED_TYPE_TOKENS, 390 *AGGREGATE_TYPE_TOKENS, 391 } 392 393 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 394 TokenType.BIGINT: TokenType.UBIGINT, 395 TokenType.INT: TokenType.UINT, 396 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 397 TokenType.SMALLINT: TokenType.USMALLINT, 398 TokenType.TINYINT: TokenType.UTINYINT, 399 TokenType.DECIMAL: TokenType.UDECIMAL, 400 } 401 402 SUBQUERY_PREDICATES = { 403 TokenType.ANY: exp.Any, 404 TokenType.ALL: exp.All, 405 TokenType.EXISTS: exp.Exists, 406 TokenType.SOME: exp.Any, 407 } 408 409 RESERVED_TOKENS = { 410 *Tokenizer.SINGLE_TOKENS.values(), 411 TokenType.SELECT, 412 } - {TokenType.IDENTIFIER} 413 414 DB_CREATABLES = { 415 TokenType.DATABASE, 416 TokenType.DICTIONARY, 417 TokenType.MODEL, 418 TokenType.SCHEMA, 419 TokenType.SEQUENCE, 420 TokenType.STORAGE_INTEGRATION, 421 TokenType.TABLE, 422 TokenType.TAG, 423 TokenType.VIEW, 424 TokenType.WAREHOUSE, 425 TokenType.STREAMLIT, 426 } 427 428 CREATABLES = { 429 TokenType.COLUMN, 430 TokenType.CONSTRAINT, 431 TokenType.FOREIGN_KEY, 432 TokenType.FUNCTION, 433 TokenType.INDEX, 434 TokenType.PROCEDURE, 435 *DB_CREATABLES, 436 } 437 438 ALTERABLES = { 439 TokenType.INDEX, 440 TokenType.TABLE, 441 TokenType.VIEW, 442 } 443 444 # Tokens that can represent identifiers 445 ID_VAR_TOKENS = { 446 TokenType.ALL, 447 TokenType.VAR, 448 TokenType.ANTI, 449 TokenType.APPLY, 450 TokenType.ASC, 451 TokenType.ASOF, 452 TokenType.AUTO_INCREMENT, 453 TokenType.BEGIN, 454 TokenType.BPCHAR, 455 TokenType.CACHE, 456 TokenType.CASE, 457 TokenType.COLLATE, 458 TokenType.COMMAND, 459 TokenType.COMMENT, 460 TokenType.COMMIT, 461 TokenType.CONSTRAINT, 462 TokenType.COPY, 463 TokenType.CUBE, 464 TokenType.DEFAULT, 465 TokenType.DELETE, 466 TokenType.DESC, 467 TokenType.DESCRIBE, 468 TokenType.DICTIONARY, 469 TokenType.DIV, 470 TokenType.END, 471 TokenType.EXECUTE, 472 TokenType.ESCAPE, 473 TokenType.FALSE, 474 TokenType.FIRST, 475 TokenType.FILTER, 476 TokenType.FINAL, 477 TokenType.FORMAT, 478 TokenType.FULL, 479 TokenType.IDENTIFIER, 480 TokenType.IS, 481 TokenType.ISNULL, 482 TokenType.INTERVAL, 483 TokenType.KEEP, 484 TokenType.KILL, 485 TokenType.LEFT, 486 TokenType.LOAD, 487 TokenType.MERGE, 488 TokenType.NATURAL, 489 TokenType.NEXT, 490 TokenType.OFFSET, 491 TokenType.OPERATOR, 492 TokenType.ORDINALITY, 493 TokenType.OVERLAPS, 494 TokenType.OVERWRITE, 495 TokenType.PARTITION, 496 TokenType.PERCENT, 497 TokenType.PIVOT, 498 TokenType.PRAGMA, 499 TokenType.RANGE, 500 TokenType.RECURSIVE, 501 TokenType.REFERENCES, 502 TokenType.REFRESH, 503 TokenType.RENAME, 504 TokenType.REPLACE, 505 TokenType.RIGHT, 506 TokenType.ROLLUP, 507 TokenType.ROW, 508 TokenType.ROWS, 509 TokenType.SEMI, 510 TokenType.SET, 511 TokenType.SETTINGS, 512 TokenType.SHOW, 513 TokenType.TEMPORARY, 514 TokenType.TOP, 515 TokenType.TRUE, 516 TokenType.TRUNCATE, 517 TokenType.UNIQUE, 518 TokenType.UNNEST, 519 TokenType.UNPIVOT, 520 TokenType.UPDATE, 521 TokenType.USE, 522 TokenType.VOLATILE, 523 TokenType.WINDOW, 524 *CREATABLES, 525 *SUBQUERY_PREDICATES, 526 *TYPE_TOKENS, 527 *NO_PAREN_FUNCTIONS, 528 } 529 ID_VAR_TOKENS.remove(TokenType.UNION) 530 531 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 532 533 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 534 TokenType.ANTI, 535 TokenType.APPLY, 536 TokenType.ASOF, 537 TokenType.FULL, 538 TokenType.LEFT, 539 TokenType.LOCK, 540 TokenType.NATURAL, 541 TokenType.OFFSET, 542 TokenType.RIGHT, 543 TokenType.SEMI, 544 TokenType.WINDOW, 545 } 546 547 ALIAS_TOKENS = ID_VAR_TOKENS 548 549 ARRAY_CONSTRUCTORS = { 550 "ARRAY": exp.Array, 551 "LIST": exp.List, 552 } 553 554 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 555 556 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 557 558 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 559 560 FUNC_TOKENS = { 561 TokenType.COLLATE, 562 TokenType.COMMAND, 563 TokenType.CURRENT_DATE, 564 TokenType.CURRENT_DATETIME, 565 TokenType.CURRENT_TIMESTAMP, 566 TokenType.CURRENT_TIME, 567 TokenType.CURRENT_USER, 568 TokenType.FILTER, 569 TokenType.FIRST, 570 TokenType.FORMAT, 571 TokenType.GLOB, 572 TokenType.IDENTIFIER, 573 TokenType.INDEX, 574 TokenType.ISNULL, 575 TokenType.ILIKE, 576 TokenType.INSERT, 577 TokenType.LIKE, 578 TokenType.MERGE, 579 TokenType.OFFSET, 580 TokenType.PRIMARY_KEY, 581 TokenType.RANGE, 582 TokenType.REPLACE, 583 TokenType.RLIKE, 584 TokenType.ROW, 585 TokenType.UNNEST, 586 TokenType.VAR, 587 TokenType.LEFT, 588 TokenType.RIGHT, 589 TokenType.SEQUENCE, 590 TokenType.DATE, 591 TokenType.DATETIME, 592 TokenType.TABLE, 593 TokenType.TIMESTAMP, 594 TokenType.TIMESTAMPTZ, 595 TokenType.TRUNCATE, 596 TokenType.WINDOW, 597 TokenType.XOR, 598 *TYPE_TOKENS, 599 *SUBQUERY_PREDICATES, 600 } 601 602 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 603 TokenType.AND: exp.And, 604 } 605 606 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 607 TokenType.COLON_EQ: exp.PropertyEQ, 608 } 609 610 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 611 TokenType.OR: exp.Or, 612 } 613 614 EQUALITY = { 615 TokenType.EQ: exp.EQ, 616 TokenType.NEQ: exp.NEQ, 617 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 618 } 619 620 COMPARISON = { 621 TokenType.GT: exp.GT, 622 TokenType.GTE: exp.GTE, 623 TokenType.LT: exp.LT, 624 TokenType.LTE: exp.LTE, 625 } 626 627 BITWISE = { 628 TokenType.AMP: exp.BitwiseAnd, 629 TokenType.CARET: exp.BitwiseXor, 630 TokenType.PIPE: exp.BitwiseOr, 631 } 632 633 TERM = { 634 TokenType.DASH: exp.Sub, 635 TokenType.PLUS: exp.Add, 636 TokenType.MOD: exp.Mod, 637 TokenType.COLLATE: exp.Collate, 638 } 639 640 FACTOR = { 641 TokenType.DIV: exp.IntDiv, 642 TokenType.LR_ARROW: exp.Distance, 643 TokenType.SLASH: exp.Div, 644 TokenType.STAR: exp.Mul, 645 } 646 647 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 648 649 TIMES = { 650 TokenType.TIME, 651 TokenType.TIMETZ, 652 } 653 654 TIMESTAMPS = { 655 TokenType.TIMESTAMP, 656 TokenType.TIMESTAMPTZ, 657 TokenType.TIMESTAMPLTZ, 658 *TIMES, 659 } 660 661 SET_OPERATIONS = { 662 TokenType.UNION, 663 TokenType.INTERSECT, 664 TokenType.EXCEPT, 665 } 666 667 JOIN_METHODS = { 668 TokenType.ASOF, 669 TokenType.NATURAL, 670 TokenType.POSITIONAL, 671 } 672 673 JOIN_SIDES = { 674 TokenType.LEFT, 675 TokenType.RIGHT, 676 TokenType.FULL, 677 } 678 679 JOIN_KINDS = { 680 TokenType.ANTI, 681 TokenType.CROSS, 682 TokenType.INNER, 683 TokenType.OUTER, 684 TokenType.SEMI, 685 TokenType.STRAIGHT_JOIN, 686 } 687 688 JOIN_HINTS: t.Set[str] = set() 689 690 LAMBDAS = { 691 TokenType.ARROW: lambda self, expressions: self.expression( 692 exp.Lambda, 693 this=self._replace_lambda( 694 self._parse_assignment(), 695 expressions, 696 ), 697 expressions=expressions, 698 ), 699 TokenType.FARROW: lambda self, expressions: self.expression( 700 exp.Kwarg, 701 this=exp.var(expressions[0].name), 702 expression=self._parse_assignment(), 703 ), 704 } 705 706 COLUMN_OPERATORS = { 707 TokenType.DOT: None, 708 TokenType.DCOLON: lambda self, this, to: self.expression( 709 exp.Cast if self.STRICT_CAST else exp.TryCast, 710 this=this, 711 to=to, 712 ), 713 TokenType.ARROW: lambda self, this, path: self.expression( 714 exp.JSONExtract, 715 this=this, 716 expression=self.dialect.to_json_path(path), 717 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 718 ), 719 TokenType.DARROW: lambda self, this, path: self.expression( 720 exp.JSONExtractScalar, 721 this=this, 722 expression=self.dialect.to_json_path(path), 723 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 724 ), 725 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 726 exp.JSONBExtract, 727 this=this, 728 expression=path, 729 ), 730 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 731 exp.JSONBExtractScalar, 732 this=this, 733 expression=path, 734 ), 735 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 736 exp.JSONBContains, 737 this=this, 738 expression=key, 739 ), 740 } 741 742 EXPRESSION_PARSERS = { 743 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 744 exp.Column: lambda self: self._parse_column(), 745 exp.Condition: lambda self: self._parse_assignment(), 746 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 747 exp.Expression: lambda self: self._parse_expression(), 748 exp.From: lambda self: self._parse_from(joins=True), 749 exp.Group: lambda self: self._parse_group(), 750 exp.Having: lambda self: self._parse_having(), 751 exp.Identifier: lambda self: self._parse_id_var(), 752 exp.Join: lambda self: self._parse_join(), 753 exp.Lambda: lambda self: self._parse_lambda(), 754 exp.Lateral: lambda self: self._parse_lateral(), 755 exp.Limit: lambda self: self._parse_limit(), 756 exp.Offset: lambda self: self._parse_offset(), 757 exp.Order: lambda self: self._parse_order(), 758 exp.Ordered: lambda self: self._parse_ordered(), 759 exp.Properties: lambda self: self._parse_properties(), 760 exp.Qualify: lambda self: self._parse_qualify(), 761 exp.Returning: lambda self: self._parse_returning(), 762 exp.Select: lambda self: self._parse_select(), 763 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 764 exp.Table: lambda self: self._parse_table_parts(), 765 exp.TableAlias: lambda self: self._parse_table_alias(), 766 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 767 exp.Where: lambda self: self._parse_where(), 768 exp.Window: lambda self: self._parse_named_window(), 769 exp.With: lambda self: self._parse_with(), 770 "JOIN_TYPE": lambda self: self._parse_join_parts(), 771 } 772 773 STATEMENT_PARSERS = { 774 TokenType.ALTER: lambda self: self._parse_alter(), 775 TokenType.BEGIN: lambda self: self._parse_transaction(), 776 TokenType.CACHE: lambda self: self._parse_cache(), 777 TokenType.COMMENT: lambda self: self._parse_comment(), 778 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 779 TokenType.COPY: lambda self: self._parse_copy(), 780 TokenType.CREATE: lambda self: self._parse_create(), 781 TokenType.DELETE: lambda self: self._parse_delete(), 782 TokenType.DESC: lambda self: self._parse_describe(), 783 TokenType.DESCRIBE: lambda self: self._parse_describe(), 784 TokenType.DROP: lambda self: self._parse_drop(), 785 TokenType.GRANT: lambda self: self._parse_grant(), 786 TokenType.INSERT: lambda self: self._parse_insert(), 787 TokenType.KILL: lambda self: self._parse_kill(), 788 TokenType.LOAD: lambda self: self._parse_load(), 789 TokenType.MERGE: lambda self: self._parse_merge(), 790 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 791 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 792 TokenType.REFRESH: lambda self: self._parse_refresh(), 793 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 794 TokenType.SET: lambda self: self._parse_set(), 795 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 796 TokenType.UNCACHE: lambda self: self._parse_uncache(), 797 TokenType.UPDATE: lambda self: self._parse_update(), 798 TokenType.USE: lambda self: self.expression( 799 exp.Use, 800 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 801 this=self._parse_table(schema=False), 802 ), 803 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 804 } 805 806 UNARY_PARSERS = { 807 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 808 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 809 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 810 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 811 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 812 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 813 } 814 815 STRING_PARSERS = { 816 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 817 exp.RawString, this=token.text 818 ), 819 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 820 exp.National, this=token.text 821 ), 822 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 823 TokenType.STRING: lambda self, token: self.expression( 824 exp.Literal, this=token.text, is_string=True 825 ), 826 TokenType.UNICODE_STRING: lambda self, token: self.expression( 827 exp.UnicodeString, 828 this=token.text, 829 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 830 ), 831 } 832 833 NUMERIC_PARSERS = { 834 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 835 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 836 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 837 TokenType.NUMBER: lambda self, token: self.expression( 838 exp.Literal, this=token.text, is_string=False 839 ), 840 } 841 842 PRIMARY_PARSERS = { 843 **STRING_PARSERS, 844 **NUMERIC_PARSERS, 845 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 846 TokenType.NULL: lambda self, _: self.expression(exp.Null), 847 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 848 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 849 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 850 TokenType.STAR: lambda self, _: self._parse_star_ops(), 851 } 852 853 PLACEHOLDER_PARSERS = { 854 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 855 TokenType.PARAMETER: lambda self: self._parse_parameter(), 856 TokenType.COLON: lambda self: ( 857 self.expression(exp.Placeholder, this=self._prev.text) 858 if self._match_set(self.ID_VAR_TOKENS) 859 else None 860 ), 861 } 862 863 RANGE_PARSERS = { 864 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 865 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 866 TokenType.GLOB: binary_range_parser(exp.Glob), 867 TokenType.ILIKE: binary_range_parser(exp.ILike), 868 TokenType.IN: lambda self, this: self._parse_in(this), 869 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 870 TokenType.IS: lambda self, this: self._parse_is(this), 871 TokenType.LIKE: binary_range_parser(exp.Like), 872 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 873 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 874 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 875 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 876 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 877 } 878 879 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 880 "ALLOWED_VALUES": lambda self: self.expression( 881 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 882 ), 883 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 884 "AUTO": lambda self: self._parse_auto_property(), 885 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 886 "BACKUP": lambda self: self.expression( 887 exp.BackupProperty, this=self._parse_var(any_token=True) 888 ), 889 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 890 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 891 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 892 "CHECKSUM": lambda self: self._parse_checksum(), 893 "CLUSTER BY": lambda self: self._parse_cluster(), 894 "CLUSTERED": lambda self: self._parse_clustered_by(), 895 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 896 exp.CollateProperty, **kwargs 897 ), 898 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 899 "CONTAINS": lambda self: self._parse_contains_property(), 900 "COPY": lambda self: self._parse_copy_property(), 901 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 902 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 903 "DEFINER": lambda self: self._parse_definer(), 904 "DETERMINISTIC": lambda self: self.expression( 905 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 906 ), 907 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 908 "DUPLICATE": lambda self: self._parse_duplicate(), 909 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 910 "DISTKEY": lambda self: self._parse_distkey(), 911 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 912 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 913 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 914 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 915 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 916 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 917 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 918 "FREESPACE": lambda self: self._parse_freespace(), 919 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 920 "HEAP": lambda self: self.expression(exp.HeapProperty), 921 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 922 "IMMUTABLE": lambda self: self.expression( 923 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 924 ), 925 "INHERITS": lambda self: self.expression( 926 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 927 ), 928 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 929 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 930 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 931 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 932 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 933 "LIKE": lambda self: self._parse_create_like(), 934 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 935 "LOCK": lambda self: self._parse_locking(), 936 "LOCKING": lambda self: self._parse_locking(), 937 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 938 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 939 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 940 "MODIFIES": lambda self: self._parse_modifies_property(), 941 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 942 "NO": lambda self: self._parse_no_property(), 943 "ON": lambda self: self._parse_on_property(), 944 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 945 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 946 "PARTITION": lambda self: self._parse_partitioned_of(), 947 "PARTITION BY": lambda self: self._parse_partitioned_by(), 948 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 949 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 950 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 951 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 952 "READS": lambda self: self._parse_reads_property(), 953 "REMOTE": lambda self: self._parse_remote_with_connection(), 954 "RETURNS": lambda self: self._parse_returns(), 955 "STRICT": lambda self: self.expression(exp.StrictProperty), 956 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 957 "ROW": lambda self: self._parse_row(), 958 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 959 "SAMPLE": lambda self: self.expression( 960 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 961 ), 962 "SECURE": lambda self: self.expression(exp.SecureProperty), 963 "SECURITY": lambda self: self._parse_security(), 964 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 965 "SETTINGS": lambda self: self._parse_settings_property(), 966 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 967 "SORTKEY": lambda self: self._parse_sortkey(), 968 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 969 "STABLE": lambda self: self.expression( 970 exp.StabilityProperty, this=exp.Literal.string("STABLE") 971 ), 972 "STORED": lambda self: self._parse_stored(), 973 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 974 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 975 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 976 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 977 "TO": lambda self: self._parse_to_table(), 978 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 979 "TRANSFORM": lambda self: self.expression( 980 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 981 ), 982 "TTL": lambda self: self._parse_ttl(), 983 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 984 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 985 "VOLATILE": lambda self: self._parse_volatile_property(), 986 "WITH": lambda self: self._parse_with_property(), 987 } 988 989 CONSTRAINT_PARSERS = { 990 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 991 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 992 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 993 "CHARACTER SET": lambda self: self.expression( 994 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 995 ), 996 "CHECK": lambda self: self.expression( 997 exp.CheckColumnConstraint, 998 this=self._parse_wrapped(self._parse_assignment), 999 enforced=self._match_text_seq("ENFORCED"), 1000 ), 1001 "COLLATE": lambda self: self.expression( 1002 exp.CollateColumnConstraint, 1003 this=self._parse_identifier() or self._parse_column(), 1004 ), 1005 "COMMENT": lambda self: self.expression( 1006 exp.CommentColumnConstraint, this=self._parse_string() 1007 ), 1008 "COMPRESS": lambda self: self._parse_compress(), 1009 "CLUSTERED": lambda self: self.expression( 1010 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1011 ), 1012 "NONCLUSTERED": lambda self: self.expression( 1013 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1014 ), 1015 "DEFAULT": lambda self: self.expression( 1016 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1017 ), 1018 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1019 "EPHEMERAL": lambda self: self.expression( 1020 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1021 ), 1022 "EXCLUDE": lambda self: self.expression( 1023 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1024 ), 1025 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1026 "FORMAT": lambda self: self.expression( 1027 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1028 ), 1029 "GENERATED": lambda self: self._parse_generated_as_identity(), 1030 "IDENTITY": lambda self: self._parse_auto_increment(), 1031 "INLINE": lambda self: self._parse_inline(), 1032 "LIKE": lambda self: self._parse_create_like(), 1033 "NOT": lambda self: self._parse_not_constraint(), 1034 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1035 "ON": lambda self: ( 1036 self._match(TokenType.UPDATE) 1037 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1038 ) 1039 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1040 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1041 "PERIOD": lambda self: self._parse_period_for_system_time(), 1042 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1043 "REFERENCES": lambda self: self._parse_references(match=False), 1044 "TITLE": lambda self: self.expression( 1045 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1046 ), 1047 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1048 "UNIQUE": lambda self: self._parse_unique(), 1049 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1050 "WITH": lambda self: self.expression( 1051 exp.Properties, expressions=self._parse_wrapped_properties() 1052 ), 1053 } 1054 1055 ALTER_PARSERS = { 1056 "ADD": lambda self: self._parse_alter_table_add(), 1057 "ALTER": lambda self: self._parse_alter_table_alter(), 1058 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1059 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1060 "DROP": lambda self: self._parse_alter_table_drop(), 1061 "RENAME": lambda self: self._parse_alter_table_rename(), 1062 "SET": lambda self: self._parse_alter_table_set(), 1063 "AS": lambda self: self._parse_select(), 1064 } 1065 1066 ALTER_ALTER_PARSERS = { 1067 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1068 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1069 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1070 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1071 } 1072 1073 SCHEMA_UNNAMED_CONSTRAINTS = { 1074 "CHECK", 1075 "EXCLUDE", 1076 "FOREIGN KEY", 1077 "LIKE", 1078 "PERIOD", 1079 "PRIMARY KEY", 1080 "UNIQUE", 1081 } 1082 1083 NO_PAREN_FUNCTION_PARSERS = { 1084 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1085 "CASE": lambda self: self._parse_case(), 1086 "CONNECT_BY_ROOT": lambda self: self.expression( 1087 exp.ConnectByRoot, this=self._parse_column() 1088 ), 1089 "IF": lambda self: self._parse_if(), 1090 "NEXT": lambda self: self._parse_next_value_for(), 1091 } 1092 1093 INVALID_FUNC_NAME_TOKENS = { 1094 TokenType.IDENTIFIER, 1095 TokenType.STRING, 1096 } 1097 1098 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1099 1100 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1101 1102 FUNCTION_PARSERS = { 1103 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1104 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1105 "DECODE": lambda self: self._parse_decode(), 1106 "EXTRACT": lambda self: self._parse_extract(), 1107 "GAP_FILL": lambda self: self._parse_gap_fill(), 1108 "JSON_OBJECT": lambda self: self._parse_json_object(), 1109 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1110 "JSON_TABLE": lambda self: self._parse_json_table(), 1111 "MATCH": lambda self: self._parse_match_against(), 1112 "NORMALIZE": lambda self: self._parse_normalize(), 1113 "OPENJSON": lambda self: self._parse_open_json(), 1114 "OVERLAY": lambda self: self._parse_overlay(), 1115 "POSITION": lambda self: self._parse_position(), 1116 "PREDICT": lambda self: self._parse_predict(), 1117 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1118 "STRING_AGG": lambda self: self._parse_string_agg(), 1119 "SUBSTRING": lambda self: self._parse_substring(), 1120 "TRIM": lambda self: self._parse_trim(), 1121 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1122 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1123 } 1124 1125 QUERY_MODIFIER_PARSERS = { 1126 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1127 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1128 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1129 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1130 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1131 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1132 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1133 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1134 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1135 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1136 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1137 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1138 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1139 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1140 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1141 TokenType.CLUSTER_BY: lambda self: ( 1142 "cluster", 1143 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1144 ), 1145 TokenType.DISTRIBUTE_BY: lambda self: ( 1146 "distribute", 1147 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1148 ), 1149 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1150 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1151 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1152 } 1153 1154 SET_PARSERS = { 1155 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1156 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1157 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1158 "TRANSACTION": lambda self: self._parse_set_transaction(), 1159 } 1160 1161 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1162 1163 TYPE_LITERAL_PARSERS = { 1164 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1165 } 1166 1167 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1168 1169 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1170 1171 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1172 1173 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1174 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1175 "ISOLATION": ( 1176 ("LEVEL", "REPEATABLE", "READ"), 1177 ("LEVEL", "READ", "COMMITTED"), 1178 ("LEVEL", "READ", "UNCOMITTED"), 1179 ("LEVEL", "SERIALIZABLE"), 1180 ), 1181 "READ": ("WRITE", "ONLY"), 1182 } 1183 1184 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1185 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1186 ) 1187 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1188 1189 CREATE_SEQUENCE: OPTIONS_TYPE = { 1190 "SCALE": ("EXTEND", "NOEXTEND"), 1191 "SHARD": ("EXTEND", "NOEXTEND"), 1192 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1193 **dict.fromkeys( 1194 ( 1195 "SESSION", 1196 "GLOBAL", 1197 "KEEP", 1198 "NOKEEP", 1199 "ORDER", 1200 "NOORDER", 1201 "NOCACHE", 1202 "CYCLE", 1203 "NOCYCLE", 1204 "NOMINVALUE", 1205 "NOMAXVALUE", 1206 "NOSCALE", 1207 "NOSHARD", 1208 ), 1209 tuple(), 1210 ), 1211 } 1212 1213 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1214 1215 USABLES: OPTIONS_TYPE = dict.fromkeys( 1216 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1217 ) 1218 1219 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1220 1221 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1222 "TYPE": ("EVOLUTION",), 1223 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1224 } 1225 1226 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1227 "NOT": ("ENFORCED",), 1228 "MATCH": ( 1229 "FULL", 1230 "PARTIAL", 1231 "SIMPLE", 1232 ), 1233 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1234 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1235 } 1236 1237 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1238 1239 CLONE_KEYWORDS = {"CLONE", "COPY"} 1240 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1241 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1242 1243 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1244 1245 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1246 1247 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1248 1249 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1250 1251 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1252 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1253 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1254 1255 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1256 1257 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1258 1259 ADD_CONSTRAINT_TOKENS = { 1260 TokenType.CONSTRAINT, 1261 TokenType.FOREIGN_KEY, 1262 TokenType.INDEX, 1263 TokenType.KEY, 1264 TokenType.PRIMARY_KEY, 1265 TokenType.UNIQUE, 1266 } 1267 1268 DISTINCT_TOKENS = {TokenType.DISTINCT} 1269 1270 NULL_TOKENS = {TokenType.NULL} 1271 1272 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1273 1274 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1275 1276 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1277 1278 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1279 1280 ODBC_DATETIME_LITERALS = { 1281 "d": exp.Date, 1282 "t": exp.Time, 1283 "ts": exp.Timestamp, 1284 } 1285 1286 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1287 1288 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1289 1290 # The style options for the DESCRIBE statement 1291 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1292 1293 OPERATION_MODIFIERS: t.Set[str] = set() 1294 1295 STRICT_CAST = True 1296 1297 PREFIXED_PIVOT_COLUMNS = False 1298 IDENTIFY_PIVOT_STRINGS = False 1299 1300 LOG_DEFAULTS_TO_LN = False 1301 1302 # Whether ADD is present for each column added by ALTER TABLE 1303 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1304 1305 # Whether the table sample clause expects CSV syntax 1306 TABLESAMPLE_CSV = False 1307 1308 # The default method used for table sampling 1309 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1310 1311 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1312 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1313 1314 # Whether the TRIM function expects the characters to trim as its first argument 1315 TRIM_PATTERN_FIRST = False 1316 1317 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1318 STRING_ALIASES = False 1319 1320 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1321 MODIFIERS_ATTACHED_TO_SET_OP = True 1322 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1323 1324 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1325 NO_PAREN_IF_COMMANDS = True 1326 1327 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1328 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1329 1330 # Whether the `:` operator is used to extract a value from a VARIANT column 1331 COLON_IS_VARIANT_EXTRACT = False 1332 1333 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1334 # If this is True and '(' is not found, the keyword will be treated as an identifier 1335 VALUES_FOLLOWED_BY_PAREN = True 1336 1337 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1338 SUPPORTS_IMPLICIT_UNNEST = False 1339 1340 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1341 INTERVAL_SPANS = True 1342 1343 # Whether a PARTITION clause can follow a table reference 1344 SUPPORTS_PARTITION_SELECTION = False 1345 1346 __slots__ = ( 1347 "error_level", 1348 "error_message_context", 1349 "max_errors", 1350 "dialect", 1351 "sql", 1352 "errors", 1353 "_tokens", 1354 "_index", 1355 "_curr", 1356 "_next", 1357 "_prev", 1358 "_prev_comments", 1359 ) 1360 1361 # Autofilled 1362 SHOW_TRIE: t.Dict = {} 1363 SET_TRIE: t.Dict = {} 1364 1365 def __init__( 1366 self, 1367 error_level: t.Optional[ErrorLevel] = None, 1368 error_message_context: int = 100, 1369 max_errors: int = 3, 1370 dialect: DialectType = None, 1371 ): 1372 from sqlglot.dialects import Dialect 1373 1374 self.error_level = error_level or ErrorLevel.IMMEDIATE 1375 self.error_message_context = error_message_context 1376 self.max_errors = max_errors 1377 self.dialect = Dialect.get_or_raise(dialect) 1378 self.reset() 1379 1380 def reset(self): 1381 self.sql = "" 1382 self.errors = [] 1383 self._tokens = [] 1384 self._index = 0 1385 self._curr = None 1386 self._next = None 1387 self._prev = None 1388 self._prev_comments = None 1389 1390 def parse( 1391 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1392 ) -> t.List[t.Optional[exp.Expression]]: 1393 """ 1394 Parses a list of tokens and returns a list of syntax trees, one tree 1395 per parsed SQL statement. 1396 1397 Args: 1398 raw_tokens: The list of tokens. 1399 sql: The original SQL string, used to produce helpful debug messages. 1400 1401 Returns: 1402 The list of the produced syntax trees. 1403 """ 1404 return self._parse( 1405 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1406 ) 1407 1408 def parse_into( 1409 self, 1410 expression_types: exp.IntoType, 1411 raw_tokens: t.List[Token], 1412 sql: t.Optional[str] = None, 1413 ) -> t.List[t.Optional[exp.Expression]]: 1414 """ 1415 Parses a list of tokens into a given Expression type. If a collection of Expression 1416 types is given instead, this method will try to parse the token list into each one 1417 of them, stopping at the first for which the parsing succeeds. 1418 1419 Args: 1420 expression_types: The expression type(s) to try and parse the token list into. 1421 raw_tokens: The list of tokens. 1422 sql: The original SQL string, used to produce helpful debug messages. 1423 1424 Returns: 1425 The target Expression. 1426 """ 1427 errors = [] 1428 for expression_type in ensure_list(expression_types): 1429 parser = self.EXPRESSION_PARSERS.get(expression_type) 1430 if not parser: 1431 raise TypeError(f"No parser registered for {expression_type}") 1432 1433 try: 1434 return self._parse(parser, raw_tokens, sql) 1435 except ParseError as e: 1436 e.errors[0]["into_expression"] = expression_type 1437 errors.append(e) 1438 1439 raise ParseError( 1440 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1441 errors=merge_errors(errors), 1442 ) from errors[-1] 1443 1444 def _parse( 1445 self, 1446 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1447 raw_tokens: t.List[Token], 1448 sql: t.Optional[str] = None, 1449 ) -> t.List[t.Optional[exp.Expression]]: 1450 self.reset() 1451 self.sql = sql or "" 1452 1453 total = len(raw_tokens) 1454 chunks: t.List[t.List[Token]] = [[]] 1455 1456 for i, token in enumerate(raw_tokens): 1457 if token.token_type == TokenType.SEMICOLON: 1458 if token.comments: 1459 chunks.append([token]) 1460 1461 if i < total - 1: 1462 chunks.append([]) 1463 else: 1464 chunks[-1].append(token) 1465 1466 expressions = [] 1467 1468 for tokens in chunks: 1469 self._index = -1 1470 self._tokens = tokens 1471 self._advance() 1472 1473 expressions.append(parse_method(self)) 1474 1475 if self._index < len(self._tokens): 1476 self.raise_error("Invalid expression / Unexpected token") 1477 1478 self.check_errors() 1479 1480 return expressions 1481 1482 def check_errors(self) -> None: 1483 """Logs or raises any found errors, depending on the chosen error level setting.""" 1484 if self.error_level == ErrorLevel.WARN: 1485 for error in self.errors: 1486 logger.error(str(error)) 1487 elif self.error_level == ErrorLevel.RAISE and self.errors: 1488 raise ParseError( 1489 concat_messages(self.errors, self.max_errors), 1490 errors=merge_errors(self.errors), 1491 ) 1492 1493 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1494 """ 1495 Appends an error in the list of recorded errors or raises it, depending on the chosen 1496 error level setting. 1497 """ 1498 token = token or self._curr or self._prev or Token.string("") 1499 start = token.start 1500 end = token.end + 1 1501 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1502 highlight = self.sql[start:end] 1503 end_context = self.sql[end : end + self.error_message_context] 1504 1505 error = ParseError.new( 1506 f"{message}. Line {token.line}, Col: {token.col}.\n" 1507 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1508 description=message, 1509 line=token.line, 1510 col=token.col, 1511 start_context=start_context, 1512 highlight=highlight, 1513 end_context=end_context, 1514 ) 1515 1516 if self.error_level == ErrorLevel.IMMEDIATE: 1517 raise error 1518 1519 self.errors.append(error) 1520 1521 def expression( 1522 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1523 ) -> E: 1524 """ 1525 Creates a new, validated Expression. 1526 1527 Args: 1528 exp_class: The expression class to instantiate. 1529 comments: An optional list of comments to attach to the expression. 1530 kwargs: The arguments to set for the expression along with their respective values. 1531 1532 Returns: 1533 The target expression. 1534 """ 1535 instance = exp_class(**kwargs) 1536 instance.add_comments(comments) if comments else self._add_comments(instance) 1537 return self.validate_expression(instance) 1538 1539 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1540 if expression and self._prev_comments: 1541 expression.add_comments(self._prev_comments) 1542 self._prev_comments = None 1543 1544 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1545 """ 1546 Validates an Expression, making sure that all its mandatory arguments are set. 1547 1548 Args: 1549 expression: The expression to validate. 1550 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1551 1552 Returns: 1553 The validated expression. 1554 """ 1555 if self.error_level != ErrorLevel.IGNORE: 1556 for error_message in expression.error_messages(args): 1557 self.raise_error(error_message) 1558 1559 return expression 1560 1561 def _find_sql(self, start: Token, end: Token) -> str: 1562 return self.sql[start.start : end.end + 1] 1563 1564 def _is_connected(self) -> bool: 1565 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1566 1567 def _advance(self, times: int = 1) -> None: 1568 self._index += times 1569 self._curr = seq_get(self._tokens, self._index) 1570 self._next = seq_get(self._tokens, self._index + 1) 1571 1572 if self._index > 0: 1573 self._prev = self._tokens[self._index - 1] 1574 self._prev_comments = self._prev.comments 1575 else: 1576 self._prev = None 1577 self._prev_comments = None 1578 1579 def _retreat(self, index: int) -> None: 1580 if index != self._index: 1581 self._advance(index - self._index) 1582 1583 def _warn_unsupported(self) -> None: 1584 if len(self._tokens) <= 1: 1585 return 1586 1587 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1588 # interested in emitting a warning for the one being currently processed. 1589 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1590 1591 logger.warning( 1592 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1593 ) 1594 1595 def _parse_command(self) -> exp.Command: 1596 self._warn_unsupported() 1597 return self.expression( 1598 exp.Command, 1599 comments=self._prev_comments, 1600 this=self._prev.text.upper(), 1601 expression=self._parse_string(), 1602 ) 1603 1604 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1605 """ 1606 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1607 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1608 solve this by setting & resetting the parser state accordingly 1609 """ 1610 index = self._index 1611 error_level = self.error_level 1612 1613 self.error_level = ErrorLevel.IMMEDIATE 1614 try: 1615 this = parse_method() 1616 except ParseError: 1617 this = None 1618 finally: 1619 if not this or retreat: 1620 self._retreat(index) 1621 self.error_level = error_level 1622 1623 return this 1624 1625 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1626 start = self._prev 1627 exists = self._parse_exists() if allow_exists else None 1628 1629 self._match(TokenType.ON) 1630 1631 materialized = self._match_text_seq("MATERIALIZED") 1632 kind = self._match_set(self.CREATABLES) and self._prev 1633 if not kind: 1634 return self._parse_as_command(start) 1635 1636 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1637 this = self._parse_user_defined_function(kind=kind.token_type) 1638 elif kind.token_type == TokenType.TABLE: 1639 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1640 elif kind.token_type == TokenType.COLUMN: 1641 this = self._parse_column() 1642 else: 1643 this = self._parse_id_var() 1644 1645 self._match(TokenType.IS) 1646 1647 return self.expression( 1648 exp.Comment, 1649 this=this, 1650 kind=kind.text, 1651 expression=self._parse_string(), 1652 exists=exists, 1653 materialized=materialized, 1654 ) 1655 1656 def _parse_to_table( 1657 self, 1658 ) -> exp.ToTableProperty: 1659 table = self._parse_table_parts(schema=True) 1660 return self.expression(exp.ToTableProperty, this=table) 1661 1662 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1663 def _parse_ttl(self) -> exp.Expression: 1664 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1665 this = self._parse_bitwise() 1666 1667 if self._match_text_seq("DELETE"): 1668 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1669 if self._match_text_seq("RECOMPRESS"): 1670 return self.expression( 1671 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1672 ) 1673 if self._match_text_seq("TO", "DISK"): 1674 return self.expression( 1675 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1676 ) 1677 if self._match_text_seq("TO", "VOLUME"): 1678 return self.expression( 1679 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1680 ) 1681 1682 return this 1683 1684 expressions = self._parse_csv(_parse_ttl_action) 1685 where = self._parse_where() 1686 group = self._parse_group() 1687 1688 aggregates = None 1689 if group and self._match(TokenType.SET): 1690 aggregates = self._parse_csv(self._parse_set_item) 1691 1692 return self.expression( 1693 exp.MergeTreeTTL, 1694 expressions=expressions, 1695 where=where, 1696 group=group, 1697 aggregates=aggregates, 1698 ) 1699 1700 def _parse_statement(self) -> t.Optional[exp.Expression]: 1701 if self._curr is None: 1702 return None 1703 1704 if self._match_set(self.STATEMENT_PARSERS): 1705 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1706 1707 if self._match_set(self.dialect.tokenizer.COMMANDS): 1708 return self._parse_command() 1709 1710 expression = self._parse_expression() 1711 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1712 return self._parse_query_modifiers(expression) 1713 1714 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1715 start = self._prev 1716 temporary = self._match(TokenType.TEMPORARY) 1717 materialized = self._match_text_seq("MATERIALIZED") 1718 1719 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1720 if not kind: 1721 return self._parse_as_command(start) 1722 1723 concurrently = self._match_text_seq("CONCURRENTLY") 1724 if_exists = exists or self._parse_exists() 1725 table = self._parse_table_parts( 1726 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1727 ) 1728 1729 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1730 1731 if self._match(TokenType.L_PAREN, advance=False): 1732 expressions = self._parse_wrapped_csv(self._parse_types) 1733 else: 1734 expressions = None 1735 1736 return self.expression( 1737 exp.Drop, 1738 comments=start.comments, 1739 exists=if_exists, 1740 this=table, 1741 expressions=expressions, 1742 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1743 temporary=temporary, 1744 materialized=materialized, 1745 cascade=self._match_text_seq("CASCADE"), 1746 constraints=self._match_text_seq("CONSTRAINTS"), 1747 purge=self._match_text_seq("PURGE"), 1748 cluster=cluster, 1749 concurrently=concurrently, 1750 ) 1751 1752 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1753 return ( 1754 self._match_text_seq("IF") 1755 and (not not_ or self._match(TokenType.NOT)) 1756 and self._match(TokenType.EXISTS) 1757 ) 1758 1759 def _parse_create(self) -> exp.Create | exp.Command: 1760 # Note: this can't be None because we've matched a statement parser 1761 start = self._prev 1762 comments = self._prev_comments 1763 1764 replace = ( 1765 start.token_type == TokenType.REPLACE 1766 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1767 or self._match_pair(TokenType.OR, TokenType.ALTER) 1768 ) 1769 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1770 1771 unique = self._match(TokenType.UNIQUE) 1772 1773 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1774 clustered = True 1775 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1776 "COLUMNSTORE" 1777 ): 1778 clustered = False 1779 else: 1780 clustered = None 1781 1782 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1783 self._advance() 1784 1785 properties = None 1786 create_token = self._match_set(self.CREATABLES) and self._prev 1787 1788 if not create_token: 1789 # exp.Properties.Location.POST_CREATE 1790 properties = self._parse_properties() 1791 create_token = self._match_set(self.CREATABLES) and self._prev 1792 1793 if not properties or not create_token: 1794 return self._parse_as_command(start) 1795 1796 concurrently = self._match_text_seq("CONCURRENTLY") 1797 exists = self._parse_exists(not_=True) 1798 this = None 1799 expression: t.Optional[exp.Expression] = None 1800 indexes = None 1801 no_schema_binding = None 1802 begin = None 1803 end = None 1804 clone = None 1805 1806 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1807 nonlocal properties 1808 if properties and temp_props: 1809 properties.expressions.extend(temp_props.expressions) 1810 elif temp_props: 1811 properties = temp_props 1812 1813 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1814 this = self._parse_user_defined_function(kind=create_token.token_type) 1815 1816 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1817 extend_props(self._parse_properties()) 1818 1819 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1820 extend_props(self._parse_properties()) 1821 1822 if not expression: 1823 if self._match(TokenType.COMMAND): 1824 expression = self._parse_as_command(self._prev) 1825 else: 1826 begin = self._match(TokenType.BEGIN) 1827 return_ = self._match_text_seq("RETURN") 1828 1829 if self._match(TokenType.STRING, advance=False): 1830 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1831 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1832 expression = self._parse_string() 1833 extend_props(self._parse_properties()) 1834 else: 1835 expression = self._parse_statement() 1836 1837 end = self._match_text_seq("END") 1838 1839 if return_: 1840 expression = self.expression(exp.Return, this=expression) 1841 elif create_token.token_type == TokenType.INDEX: 1842 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1843 if not self._match(TokenType.ON): 1844 index = self._parse_id_var() 1845 anonymous = False 1846 else: 1847 index = None 1848 anonymous = True 1849 1850 this = self._parse_index(index=index, anonymous=anonymous) 1851 elif create_token.token_type in self.DB_CREATABLES: 1852 table_parts = self._parse_table_parts( 1853 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1854 ) 1855 1856 # exp.Properties.Location.POST_NAME 1857 self._match(TokenType.COMMA) 1858 extend_props(self._parse_properties(before=True)) 1859 1860 this = self._parse_schema(this=table_parts) 1861 1862 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1863 extend_props(self._parse_properties()) 1864 1865 self._match(TokenType.ALIAS) 1866 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1867 # exp.Properties.Location.POST_ALIAS 1868 extend_props(self._parse_properties()) 1869 1870 if create_token.token_type == TokenType.SEQUENCE: 1871 expression = self._parse_types() 1872 extend_props(self._parse_properties()) 1873 else: 1874 expression = self._parse_ddl_select() 1875 1876 if create_token.token_type == TokenType.TABLE: 1877 # exp.Properties.Location.POST_EXPRESSION 1878 extend_props(self._parse_properties()) 1879 1880 indexes = [] 1881 while True: 1882 index = self._parse_index() 1883 1884 # exp.Properties.Location.POST_INDEX 1885 extend_props(self._parse_properties()) 1886 if not index: 1887 break 1888 else: 1889 self._match(TokenType.COMMA) 1890 indexes.append(index) 1891 elif create_token.token_type == TokenType.VIEW: 1892 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1893 no_schema_binding = True 1894 1895 shallow = self._match_text_seq("SHALLOW") 1896 1897 if self._match_texts(self.CLONE_KEYWORDS): 1898 copy = self._prev.text.lower() == "copy" 1899 clone = self.expression( 1900 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1901 ) 1902 1903 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1904 return self._parse_as_command(start) 1905 1906 create_kind_text = create_token.text.upper() 1907 return self.expression( 1908 exp.Create, 1909 comments=comments, 1910 this=this, 1911 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1912 replace=replace, 1913 refresh=refresh, 1914 unique=unique, 1915 expression=expression, 1916 exists=exists, 1917 properties=properties, 1918 indexes=indexes, 1919 no_schema_binding=no_schema_binding, 1920 begin=begin, 1921 end=end, 1922 clone=clone, 1923 concurrently=concurrently, 1924 clustered=clustered, 1925 ) 1926 1927 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1928 seq = exp.SequenceProperties() 1929 1930 options = [] 1931 index = self._index 1932 1933 while self._curr: 1934 self._match(TokenType.COMMA) 1935 if self._match_text_seq("INCREMENT"): 1936 self._match_text_seq("BY") 1937 self._match_text_seq("=") 1938 seq.set("increment", self._parse_term()) 1939 elif self._match_text_seq("MINVALUE"): 1940 seq.set("minvalue", self._parse_term()) 1941 elif self._match_text_seq("MAXVALUE"): 1942 seq.set("maxvalue", self._parse_term()) 1943 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1944 self._match_text_seq("=") 1945 seq.set("start", self._parse_term()) 1946 elif self._match_text_seq("CACHE"): 1947 # T-SQL allows empty CACHE which is initialized dynamically 1948 seq.set("cache", self._parse_number() or True) 1949 elif self._match_text_seq("OWNED", "BY"): 1950 # "OWNED BY NONE" is the default 1951 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1952 else: 1953 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1954 if opt: 1955 options.append(opt) 1956 else: 1957 break 1958 1959 seq.set("options", options if options else None) 1960 return None if self._index == index else seq 1961 1962 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1963 # only used for teradata currently 1964 self._match(TokenType.COMMA) 1965 1966 kwargs = { 1967 "no": self._match_text_seq("NO"), 1968 "dual": self._match_text_seq("DUAL"), 1969 "before": self._match_text_seq("BEFORE"), 1970 "default": self._match_text_seq("DEFAULT"), 1971 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1972 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1973 "after": self._match_text_seq("AFTER"), 1974 "minimum": self._match_texts(("MIN", "MINIMUM")), 1975 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1976 } 1977 1978 if self._match_texts(self.PROPERTY_PARSERS): 1979 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1980 try: 1981 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1982 except TypeError: 1983 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1984 1985 return None 1986 1987 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1988 return self._parse_wrapped_csv(self._parse_property) 1989 1990 def _parse_property(self) -> t.Optional[exp.Expression]: 1991 if self._match_texts(self.PROPERTY_PARSERS): 1992 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1993 1994 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1995 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1996 1997 if self._match_text_seq("COMPOUND", "SORTKEY"): 1998 return self._parse_sortkey(compound=True) 1999 2000 if self._match_text_seq("SQL", "SECURITY"): 2001 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2002 2003 index = self._index 2004 key = self._parse_column() 2005 2006 if not self._match(TokenType.EQ): 2007 self._retreat(index) 2008 return self._parse_sequence_properties() 2009 2010 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2011 if isinstance(key, exp.Column): 2012 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2013 2014 value = self._parse_bitwise() or self._parse_var(any_token=True) 2015 2016 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2017 if isinstance(value, exp.Column): 2018 value = exp.var(value.name) 2019 2020 return self.expression(exp.Property, this=key, value=value) 2021 2022 def _parse_stored(self) -> exp.FileFormatProperty: 2023 self._match(TokenType.ALIAS) 2024 2025 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2026 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2027 2028 return self.expression( 2029 exp.FileFormatProperty, 2030 this=( 2031 self.expression( 2032 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2033 ) 2034 if input_format or output_format 2035 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2036 ), 2037 ) 2038 2039 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2040 field = self._parse_field() 2041 if isinstance(field, exp.Identifier) and not field.quoted: 2042 field = exp.var(field) 2043 2044 return field 2045 2046 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2047 self._match(TokenType.EQ) 2048 self._match(TokenType.ALIAS) 2049 2050 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2051 2052 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2053 properties = [] 2054 while True: 2055 if before: 2056 prop = self._parse_property_before() 2057 else: 2058 prop = self._parse_property() 2059 if not prop: 2060 break 2061 for p in ensure_list(prop): 2062 properties.append(p) 2063 2064 if properties: 2065 return self.expression(exp.Properties, expressions=properties) 2066 2067 return None 2068 2069 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2070 return self.expression( 2071 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2072 ) 2073 2074 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2075 if self._match_texts(("DEFINER", "INVOKER")): 2076 security_specifier = self._prev.text.upper() 2077 return self.expression(exp.SecurityProperty, this=security_specifier) 2078 return None 2079 2080 def _parse_settings_property(self) -> exp.SettingsProperty: 2081 return self.expression( 2082 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2083 ) 2084 2085 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2086 if self._index >= 2: 2087 pre_volatile_token = self._tokens[self._index - 2] 2088 else: 2089 pre_volatile_token = None 2090 2091 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2092 return exp.VolatileProperty() 2093 2094 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2095 2096 def _parse_retention_period(self) -> exp.Var: 2097 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2098 number = self._parse_number() 2099 number_str = f"{number} " if number else "" 2100 unit = self._parse_var(any_token=True) 2101 return exp.var(f"{number_str}{unit}") 2102 2103 def _parse_system_versioning_property( 2104 self, with_: bool = False 2105 ) -> exp.WithSystemVersioningProperty: 2106 self._match(TokenType.EQ) 2107 prop = self.expression( 2108 exp.WithSystemVersioningProperty, 2109 **{ # type: ignore 2110 "on": True, 2111 "with": with_, 2112 }, 2113 ) 2114 2115 if self._match_text_seq("OFF"): 2116 prop.set("on", False) 2117 return prop 2118 2119 self._match(TokenType.ON) 2120 if self._match(TokenType.L_PAREN): 2121 while self._curr and not self._match(TokenType.R_PAREN): 2122 if self._match_text_seq("HISTORY_TABLE", "="): 2123 prop.set("this", self._parse_table_parts()) 2124 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2125 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2126 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2127 prop.set("retention_period", self._parse_retention_period()) 2128 2129 self._match(TokenType.COMMA) 2130 2131 return prop 2132 2133 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2134 self._match(TokenType.EQ) 2135 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2136 prop = self.expression(exp.DataDeletionProperty, on=on) 2137 2138 if self._match(TokenType.L_PAREN): 2139 while self._curr and not self._match(TokenType.R_PAREN): 2140 if self._match_text_seq("FILTER_COLUMN", "="): 2141 prop.set("filter_column", self._parse_column()) 2142 elif self._match_text_seq("RETENTION_PERIOD", "="): 2143 prop.set("retention_period", self._parse_retention_period()) 2144 2145 self._match(TokenType.COMMA) 2146 2147 return prop 2148 2149 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2150 kind = "HASH" 2151 expressions: t.Optional[t.List[exp.Expression]] = None 2152 if self._match_text_seq("BY", "HASH"): 2153 expressions = self._parse_wrapped_csv(self._parse_id_var) 2154 elif self._match_text_seq("BY", "RANDOM"): 2155 kind = "RANDOM" 2156 2157 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2158 buckets: t.Optional[exp.Expression] = None 2159 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2160 buckets = self._parse_number() 2161 2162 return self.expression( 2163 exp.DistributedByProperty, 2164 expressions=expressions, 2165 kind=kind, 2166 buckets=buckets, 2167 order=self._parse_order(), 2168 ) 2169 2170 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2171 self._match_text_seq("KEY") 2172 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2173 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2174 2175 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2176 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2177 prop = self._parse_system_versioning_property(with_=True) 2178 self._match_r_paren() 2179 return prop 2180 2181 if self._match(TokenType.L_PAREN, advance=False): 2182 return self._parse_wrapped_properties() 2183 2184 if self._match_text_seq("JOURNAL"): 2185 return self._parse_withjournaltable() 2186 2187 if self._match_texts(self.VIEW_ATTRIBUTES): 2188 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2189 2190 if self._match_text_seq("DATA"): 2191 return self._parse_withdata(no=False) 2192 elif self._match_text_seq("NO", "DATA"): 2193 return self._parse_withdata(no=True) 2194 2195 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2196 return self._parse_serde_properties(with_=True) 2197 2198 if self._match(TokenType.SCHEMA): 2199 return self.expression( 2200 exp.WithSchemaBindingProperty, 2201 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2202 ) 2203 2204 if not self._next: 2205 return None 2206 2207 return self._parse_withisolatedloading() 2208 2209 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2210 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2211 self._match(TokenType.EQ) 2212 2213 user = self._parse_id_var() 2214 self._match(TokenType.PARAMETER) 2215 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2216 2217 if not user or not host: 2218 return None 2219 2220 return exp.DefinerProperty(this=f"{user}@{host}") 2221 2222 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2223 self._match(TokenType.TABLE) 2224 self._match(TokenType.EQ) 2225 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2226 2227 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2228 return self.expression(exp.LogProperty, no=no) 2229 2230 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2231 return self.expression(exp.JournalProperty, **kwargs) 2232 2233 def _parse_checksum(self) -> exp.ChecksumProperty: 2234 self._match(TokenType.EQ) 2235 2236 on = None 2237 if self._match(TokenType.ON): 2238 on = True 2239 elif self._match_text_seq("OFF"): 2240 on = False 2241 2242 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2243 2244 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2245 return self.expression( 2246 exp.Cluster, 2247 expressions=( 2248 self._parse_wrapped_csv(self._parse_ordered) 2249 if wrapped 2250 else self._parse_csv(self._parse_ordered) 2251 ), 2252 ) 2253 2254 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2255 self._match_text_seq("BY") 2256 2257 self._match_l_paren() 2258 expressions = self._parse_csv(self._parse_column) 2259 self._match_r_paren() 2260 2261 if self._match_text_seq("SORTED", "BY"): 2262 self._match_l_paren() 2263 sorted_by = self._parse_csv(self._parse_ordered) 2264 self._match_r_paren() 2265 else: 2266 sorted_by = None 2267 2268 self._match(TokenType.INTO) 2269 buckets = self._parse_number() 2270 self._match_text_seq("BUCKETS") 2271 2272 return self.expression( 2273 exp.ClusteredByProperty, 2274 expressions=expressions, 2275 sorted_by=sorted_by, 2276 buckets=buckets, 2277 ) 2278 2279 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2280 if not self._match_text_seq("GRANTS"): 2281 self._retreat(self._index - 1) 2282 return None 2283 2284 return self.expression(exp.CopyGrantsProperty) 2285 2286 def _parse_freespace(self) -> exp.FreespaceProperty: 2287 self._match(TokenType.EQ) 2288 return self.expression( 2289 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2290 ) 2291 2292 def _parse_mergeblockratio( 2293 self, no: bool = False, default: bool = False 2294 ) -> exp.MergeBlockRatioProperty: 2295 if self._match(TokenType.EQ): 2296 return self.expression( 2297 exp.MergeBlockRatioProperty, 2298 this=self._parse_number(), 2299 percent=self._match(TokenType.PERCENT), 2300 ) 2301 2302 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2303 2304 def _parse_datablocksize( 2305 self, 2306 default: t.Optional[bool] = None, 2307 minimum: t.Optional[bool] = None, 2308 maximum: t.Optional[bool] = None, 2309 ) -> exp.DataBlocksizeProperty: 2310 self._match(TokenType.EQ) 2311 size = self._parse_number() 2312 2313 units = None 2314 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2315 units = self._prev.text 2316 2317 return self.expression( 2318 exp.DataBlocksizeProperty, 2319 size=size, 2320 units=units, 2321 default=default, 2322 minimum=minimum, 2323 maximum=maximum, 2324 ) 2325 2326 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2327 self._match(TokenType.EQ) 2328 always = self._match_text_seq("ALWAYS") 2329 manual = self._match_text_seq("MANUAL") 2330 never = self._match_text_seq("NEVER") 2331 default = self._match_text_seq("DEFAULT") 2332 2333 autotemp = None 2334 if self._match_text_seq("AUTOTEMP"): 2335 autotemp = self._parse_schema() 2336 2337 return self.expression( 2338 exp.BlockCompressionProperty, 2339 always=always, 2340 manual=manual, 2341 never=never, 2342 default=default, 2343 autotemp=autotemp, 2344 ) 2345 2346 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2347 index = self._index 2348 no = self._match_text_seq("NO") 2349 concurrent = self._match_text_seq("CONCURRENT") 2350 2351 if not self._match_text_seq("ISOLATED", "LOADING"): 2352 self._retreat(index) 2353 return None 2354 2355 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2356 return self.expression( 2357 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2358 ) 2359 2360 def _parse_locking(self) -> exp.LockingProperty: 2361 if self._match(TokenType.TABLE): 2362 kind = "TABLE" 2363 elif self._match(TokenType.VIEW): 2364 kind = "VIEW" 2365 elif self._match(TokenType.ROW): 2366 kind = "ROW" 2367 elif self._match_text_seq("DATABASE"): 2368 kind = "DATABASE" 2369 else: 2370 kind = None 2371 2372 if kind in ("DATABASE", "TABLE", "VIEW"): 2373 this = self._parse_table_parts() 2374 else: 2375 this = None 2376 2377 if self._match(TokenType.FOR): 2378 for_or_in = "FOR" 2379 elif self._match(TokenType.IN): 2380 for_or_in = "IN" 2381 else: 2382 for_or_in = None 2383 2384 if self._match_text_seq("ACCESS"): 2385 lock_type = "ACCESS" 2386 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2387 lock_type = "EXCLUSIVE" 2388 elif self._match_text_seq("SHARE"): 2389 lock_type = "SHARE" 2390 elif self._match_text_seq("READ"): 2391 lock_type = "READ" 2392 elif self._match_text_seq("WRITE"): 2393 lock_type = "WRITE" 2394 elif self._match_text_seq("CHECKSUM"): 2395 lock_type = "CHECKSUM" 2396 else: 2397 lock_type = None 2398 2399 override = self._match_text_seq("OVERRIDE") 2400 2401 return self.expression( 2402 exp.LockingProperty, 2403 this=this, 2404 kind=kind, 2405 for_or_in=for_or_in, 2406 lock_type=lock_type, 2407 override=override, 2408 ) 2409 2410 def _parse_partition_by(self) -> t.List[exp.Expression]: 2411 if self._match(TokenType.PARTITION_BY): 2412 return self._parse_csv(self._parse_assignment) 2413 return [] 2414 2415 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2416 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2417 if self._match_text_seq("MINVALUE"): 2418 return exp.var("MINVALUE") 2419 if self._match_text_seq("MAXVALUE"): 2420 return exp.var("MAXVALUE") 2421 return self._parse_bitwise() 2422 2423 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2424 expression = None 2425 from_expressions = None 2426 to_expressions = None 2427 2428 if self._match(TokenType.IN): 2429 this = self._parse_wrapped_csv(self._parse_bitwise) 2430 elif self._match(TokenType.FROM): 2431 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2432 self._match_text_seq("TO") 2433 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2434 elif self._match_text_seq("WITH", "(", "MODULUS"): 2435 this = self._parse_number() 2436 self._match_text_seq(",", "REMAINDER") 2437 expression = self._parse_number() 2438 self._match_r_paren() 2439 else: 2440 self.raise_error("Failed to parse partition bound spec.") 2441 2442 return self.expression( 2443 exp.PartitionBoundSpec, 2444 this=this, 2445 expression=expression, 2446 from_expressions=from_expressions, 2447 to_expressions=to_expressions, 2448 ) 2449 2450 # https://www.postgresql.org/docs/current/sql-createtable.html 2451 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2452 if not self._match_text_seq("OF"): 2453 self._retreat(self._index - 1) 2454 return None 2455 2456 this = self._parse_table(schema=True) 2457 2458 if self._match(TokenType.DEFAULT): 2459 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2460 elif self._match_text_seq("FOR", "VALUES"): 2461 expression = self._parse_partition_bound_spec() 2462 else: 2463 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2464 2465 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2466 2467 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2468 self._match(TokenType.EQ) 2469 return self.expression( 2470 exp.PartitionedByProperty, 2471 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2472 ) 2473 2474 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2475 if self._match_text_seq("AND", "STATISTICS"): 2476 statistics = True 2477 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2478 statistics = False 2479 else: 2480 statistics = None 2481 2482 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2483 2484 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2485 if self._match_text_seq("SQL"): 2486 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2487 return None 2488 2489 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2490 if self._match_text_seq("SQL", "DATA"): 2491 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2492 return None 2493 2494 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2495 if self._match_text_seq("PRIMARY", "INDEX"): 2496 return exp.NoPrimaryIndexProperty() 2497 if self._match_text_seq("SQL"): 2498 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2499 return None 2500 2501 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2502 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2503 return exp.OnCommitProperty() 2504 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2505 return exp.OnCommitProperty(delete=True) 2506 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2507 2508 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2509 if self._match_text_seq("SQL", "DATA"): 2510 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2511 return None 2512 2513 def _parse_distkey(self) -> exp.DistKeyProperty: 2514 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2515 2516 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2517 table = self._parse_table(schema=True) 2518 2519 options = [] 2520 while self._match_texts(("INCLUDING", "EXCLUDING")): 2521 this = self._prev.text.upper() 2522 2523 id_var = self._parse_id_var() 2524 if not id_var: 2525 return None 2526 2527 options.append( 2528 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2529 ) 2530 2531 return self.expression(exp.LikeProperty, this=table, expressions=options) 2532 2533 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2534 return self.expression( 2535 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2536 ) 2537 2538 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2539 self._match(TokenType.EQ) 2540 return self.expression( 2541 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2542 ) 2543 2544 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2545 self._match_text_seq("WITH", "CONNECTION") 2546 return self.expression( 2547 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2548 ) 2549 2550 def _parse_returns(self) -> exp.ReturnsProperty: 2551 value: t.Optional[exp.Expression] 2552 null = None 2553 is_table = self._match(TokenType.TABLE) 2554 2555 if is_table: 2556 if self._match(TokenType.LT): 2557 value = self.expression( 2558 exp.Schema, 2559 this="TABLE", 2560 expressions=self._parse_csv(self._parse_struct_types), 2561 ) 2562 if not self._match(TokenType.GT): 2563 self.raise_error("Expecting >") 2564 else: 2565 value = self._parse_schema(exp.var("TABLE")) 2566 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2567 null = True 2568 value = None 2569 else: 2570 value = self._parse_types() 2571 2572 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2573 2574 def _parse_describe(self) -> exp.Describe: 2575 kind = self._match_set(self.CREATABLES) and self._prev.text 2576 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2577 if self._match(TokenType.DOT): 2578 style = None 2579 self._retreat(self._index - 2) 2580 this = self._parse_table(schema=True) 2581 properties = self._parse_properties() 2582 expressions = properties.expressions if properties else None 2583 partition = self._parse_partition() 2584 return self.expression( 2585 exp.Describe, 2586 this=this, 2587 style=style, 2588 kind=kind, 2589 expressions=expressions, 2590 partition=partition, 2591 ) 2592 2593 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2594 kind = self._prev.text.upper() 2595 expressions = [] 2596 2597 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2598 if self._match(TokenType.WHEN): 2599 expression = self._parse_disjunction() 2600 self._match(TokenType.THEN) 2601 else: 2602 expression = None 2603 2604 else_ = self._match(TokenType.ELSE) 2605 2606 if not self._match(TokenType.INTO): 2607 return None 2608 2609 return self.expression( 2610 exp.ConditionalInsert, 2611 this=self.expression( 2612 exp.Insert, 2613 this=self._parse_table(schema=True), 2614 expression=self._parse_derived_table_values(), 2615 ), 2616 expression=expression, 2617 else_=else_, 2618 ) 2619 2620 expression = parse_conditional_insert() 2621 while expression is not None: 2622 expressions.append(expression) 2623 expression = parse_conditional_insert() 2624 2625 return self.expression( 2626 exp.MultitableInserts, 2627 kind=kind, 2628 comments=comments, 2629 expressions=expressions, 2630 source=self._parse_table(), 2631 ) 2632 2633 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2634 comments = ensure_list(self._prev_comments) 2635 hint = self._parse_hint() 2636 overwrite = self._match(TokenType.OVERWRITE) 2637 ignore = self._match(TokenType.IGNORE) 2638 local = self._match_text_seq("LOCAL") 2639 alternative = None 2640 is_function = None 2641 2642 if self._match_text_seq("DIRECTORY"): 2643 this: t.Optional[exp.Expression] = self.expression( 2644 exp.Directory, 2645 this=self._parse_var_or_string(), 2646 local=local, 2647 row_format=self._parse_row_format(match_row=True), 2648 ) 2649 else: 2650 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2651 comments += ensure_list(self._prev_comments) 2652 return self._parse_multitable_inserts(comments) 2653 2654 if self._match(TokenType.OR): 2655 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2656 2657 self._match(TokenType.INTO) 2658 comments += ensure_list(self._prev_comments) 2659 self._match(TokenType.TABLE) 2660 is_function = self._match(TokenType.FUNCTION) 2661 2662 this = ( 2663 self._parse_table(schema=True, parse_partition=True) 2664 if not is_function 2665 else self._parse_function() 2666 ) 2667 2668 returning = self._parse_returning() 2669 2670 return self.expression( 2671 exp.Insert, 2672 comments=comments, 2673 hint=hint, 2674 is_function=is_function, 2675 this=this, 2676 stored=self._match_text_seq("STORED") and self._parse_stored(), 2677 by_name=self._match_text_seq("BY", "NAME"), 2678 exists=self._parse_exists(), 2679 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2680 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2681 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2682 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2683 conflict=self._parse_on_conflict(), 2684 returning=returning or self._parse_returning(), 2685 overwrite=overwrite, 2686 alternative=alternative, 2687 ignore=ignore, 2688 source=self._match(TokenType.TABLE) and self._parse_table(), 2689 ) 2690 2691 def _parse_kill(self) -> exp.Kill: 2692 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2693 2694 return self.expression( 2695 exp.Kill, 2696 this=self._parse_primary(), 2697 kind=kind, 2698 ) 2699 2700 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2701 conflict = self._match_text_seq("ON", "CONFLICT") 2702 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2703 2704 if not conflict and not duplicate: 2705 return None 2706 2707 conflict_keys = None 2708 constraint = None 2709 2710 if conflict: 2711 if self._match_text_seq("ON", "CONSTRAINT"): 2712 constraint = self._parse_id_var() 2713 elif self._match(TokenType.L_PAREN): 2714 conflict_keys = self._parse_csv(self._parse_id_var) 2715 self._match_r_paren() 2716 2717 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2718 if self._prev.token_type == TokenType.UPDATE: 2719 self._match(TokenType.SET) 2720 expressions = self._parse_csv(self._parse_equality) 2721 else: 2722 expressions = None 2723 2724 return self.expression( 2725 exp.OnConflict, 2726 duplicate=duplicate, 2727 expressions=expressions, 2728 action=action, 2729 conflict_keys=conflict_keys, 2730 constraint=constraint, 2731 ) 2732 2733 def _parse_returning(self) -> t.Optional[exp.Returning]: 2734 if not self._match(TokenType.RETURNING): 2735 return None 2736 return self.expression( 2737 exp.Returning, 2738 expressions=self._parse_csv(self._parse_expression), 2739 into=self._match(TokenType.INTO) and self._parse_table_part(), 2740 ) 2741 2742 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2743 if not self._match(TokenType.FORMAT): 2744 return None 2745 return self._parse_row_format() 2746 2747 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2748 index = self._index 2749 with_ = with_ or self._match_text_seq("WITH") 2750 2751 if not self._match(TokenType.SERDE_PROPERTIES): 2752 self._retreat(index) 2753 return None 2754 return self.expression( 2755 exp.SerdeProperties, 2756 **{ # type: ignore 2757 "expressions": self._parse_wrapped_properties(), 2758 "with": with_, 2759 }, 2760 ) 2761 2762 def _parse_row_format( 2763 self, match_row: bool = False 2764 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2765 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2766 return None 2767 2768 if self._match_text_seq("SERDE"): 2769 this = self._parse_string() 2770 2771 serde_properties = self._parse_serde_properties() 2772 2773 return self.expression( 2774 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2775 ) 2776 2777 self._match_text_seq("DELIMITED") 2778 2779 kwargs = {} 2780 2781 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2782 kwargs["fields"] = self._parse_string() 2783 if self._match_text_seq("ESCAPED", "BY"): 2784 kwargs["escaped"] = self._parse_string() 2785 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2786 kwargs["collection_items"] = self._parse_string() 2787 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2788 kwargs["map_keys"] = self._parse_string() 2789 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2790 kwargs["lines"] = self._parse_string() 2791 if self._match_text_seq("NULL", "DEFINED", "AS"): 2792 kwargs["null"] = self._parse_string() 2793 2794 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2795 2796 def _parse_load(self) -> exp.LoadData | exp.Command: 2797 if self._match_text_seq("DATA"): 2798 local = self._match_text_seq("LOCAL") 2799 self._match_text_seq("INPATH") 2800 inpath = self._parse_string() 2801 overwrite = self._match(TokenType.OVERWRITE) 2802 self._match_pair(TokenType.INTO, TokenType.TABLE) 2803 2804 return self.expression( 2805 exp.LoadData, 2806 this=self._parse_table(schema=True), 2807 local=local, 2808 overwrite=overwrite, 2809 inpath=inpath, 2810 partition=self._parse_partition(), 2811 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2812 serde=self._match_text_seq("SERDE") and self._parse_string(), 2813 ) 2814 return self._parse_as_command(self._prev) 2815 2816 def _parse_delete(self) -> exp.Delete: 2817 # This handles MySQL's "Multiple-Table Syntax" 2818 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2819 tables = None 2820 comments = self._prev_comments 2821 if not self._match(TokenType.FROM, advance=False): 2822 tables = self._parse_csv(self._parse_table) or None 2823 2824 returning = self._parse_returning() 2825 2826 return self.expression( 2827 exp.Delete, 2828 comments=comments, 2829 tables=tables, 2830 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2831 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2832 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2833 where=self._parse_where(), 2834 returning=returning or self._parse_returning(), 2835 limit=self._parse_limit(), 2836 ) 2837 2838 def _parse_update(self) -> exp.Update: 2839 comments = self._prev_comments 2840 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2841 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2842 returning = self._parse_returning() 2843 return self.expression( 2844 exp.Update, 2845 comments=comments, 2846 **{ # type: ignore 2847 "this": this, 2848 "expressions": expressions, 2849 "from": self._parse_from(joins=True), 2850 "where": self._parse_where(), 2851 "returning": returning or self._parse_returning(), 2852 "order": self._parse_order(), 2853 "limit": self._parse_limit(), 2854 }, 2855 ) 2856 2857 def _parse_uncache(self) -> exp.Uncache: 2858 if not self._match(TokenType.TABLE): 2859 self.raise_error("Expecting TABLE after UNCACHE") 2860 2861 return self.expression( 2862 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2863 ) 2864 2865 def _parse_cache(self) -> exp.Cache: 2866 lazy = self._match_text_seq("LAZY") 2867 self._match(TokenType.TABLE) 2868 table = self._parse_table(schema=True) 2869 2870 options = [] 2871 if self._match_text_seq("OPTIONS"): 2872 self._match_l_paren() 2873 k = self._parse_string() 2874 self._match(TokenType.EQ) 2875 v = self._parse_string() 2876 options = [k, v] 2877 self._match_r_paren() 2878 2879 self._match(TokenType.ALIAS) 2880 return self.expression( 2881 exp.Cache, 2882 this=table, 2883 lazy=lazy, 2884 options=options, 2885 expression=self._parse_select(nested=True), 2886 ) 2887 2888 def _parse_partition(self) -> t.Optional[exp.Partition]: 2889 if not self._match(TokenType.PARTITION): 2890 return None 2891 2892 return self.expression( 2893 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2894 ) 2895 2896 def _parse_value(self) -> t.Optional[exp.Tuple]: 2897 if self._match(TokenType.L_PAREN): 2898 expressions = self._parse_csv(self._parse_expression) 2899 self._match_r_paren() 2900 return self.expression(exp.Tuple, expressions=expressions) 2901 2902 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2903 expression = self._parse_expression() 2904 if expression: 2905 return self.expression(exp.Tuple, expressions=[expression]) 2906 return None 2907 2908 def _parse_projections(self) -> t.List[exp.Expression]: 2909 return self._parse_expressions() 2910 2911 def _parse_select( 2912 self, 2913 nested: bool = False, 2914 table: bool = False, 2915 parse_subquery_alias: bool = True, 2916 parse_set_operation: bool = True, 2917 ) -> t.Optional[exp.Expression]: 2918 cte = self._parse_with() 2919 2920 if cte: 2921 this = self._parse_statement() 2922 2923 if not this: 2924 self.raise_error("Failed to parse any statement following CTE") 2925 return cte 2926 2927 if "with" in this.arg_types: 2928 this.set("with", cte) 2929 else: 2930 self.raise_error(f"{this.key} does not support CTE") 2931 this = cte 2932 2933 return this 2934 2935 # duckdb supports leading with FROM x 2936 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2937 2938 if self._match(TokenType.SELECT): 2939 comments = self._prev_comments 2940 2941 hint = self._parse_hint() 2942 2943 if self._next and not self._next.token_type == TokenType.DOT: 2944 all_ = self._match(TokenType.ALL) 2945 distinct = self._match_set(self.DISTINCT_TOKENS) 2946 else: 2947 all_, distinct = None, None 2948 2949 kind = ( 2950 self._match(TokenType.ALIAS) 2951 and self._match_texts(("STRUCT", "VALUE")) 2952 and self._prev.text.upper() 2953 ) 2954 2955 if distinct: 2956 distinct = self.expression( 2957 exp.Distinct, 2958 on=self._parse_value() if self._match(TokenType.ON) else None, 2959 ) 2960 2961 if all_ and distinct: 2962 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2963 2964 operation_modifiers = [] 2965 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2966 operation_modifiers.append(exp.var(self._prev.text.upper())) 2967 2968 limit = self._parse_limit(top=True) 2969 projections = self._parse_projections() 2970 2971 this = self.expression( 2972 exp.Select, 2973 kind=kind, 2974 hint=hint, 2975 distinct=distinct, 2976 expressions=projections, 2977 limit=limit, 2978 operation_modifiers=operation_modifiers or None, 2979 ) 2980 this.comments = comments 2981 2982 into = self._parse_into() 2983 if into: 2984 this.set("into", into) 2985 2986 if not from_: 2987 from_ = self._parse_from() 2988 2989 if from_: 2990 this.set("from", from_) 2991 2992 this = self._parse_query_modifiers(this) 2993 elif (table or nested) and self._match(TokenType.L_PAREN): 2994 if self._match(TokenType.PIVOT): 2995 this = self._parse_simplified_pivot() 2996 elif self._match(TokenType.FROM): 2997 this = exp.select("*").from_( 2998 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2999 ) 3000 else: 3001 this = ( 3002 self._parse_table() 3003 if table 3004 else self._parse_select(nested=True, parse_set_operation=False) 3005 ) 3006 3007 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3008 # in case a modifier (e.g. join) is following 3009 if table and isinstance(this, exp.Values) and this.alias: 3010 alias = this.args["alias"].pop() 3011 this = exp.Table(this=this, alias=alias) 3012 3013 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3014 3015 self._match_r_paren() 3016 3017 # We return early here so that the UNION isn't attached to the subquery by the 3018 # following call to _parse_set_operations, but instead becomes the parent node 3019 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3020 elif self._match(TokenType.VALUES, advance=False): 3021 this = self._parse_derived_table_values() 3022 elif from_: 3023 this = exp.select("*").from_(from_.this, copy=False) 3024 elif self._match(TokenType.SUMMARIZE): 3025 table = self._match(TokenType.TABLE) 3026 this = self._parse_select() or self._parse_string() or self._parse_table() 3027 return self.expression(exp.Summarize, this=this, table=table) 3028 elif self._match(TokenType.DESCRIBE): 3029 this = self._parse_describe() 3030 elif self._match_text_seq("STREAM"): 3031 this = self.expression(exp.Stream, this=self._parse_function()) 3032 else: 3033 this = None 3034 3035 return self._parse_set_operations(this) if parse_set_operation else this 3036 3037 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3038 if not skip_with_token and not self._match(TokenType.WITH): 3039 return None 3040 3041 comments = self._prev_comments 3042 recursive = self._match(TokenType.RECURSIVE) 3043 3044 last_comments = None 3045 expressions = [] 3046 while True: 3047 expressions.append(self._parse_cte()) 3048 if last_comments: 3049 expressions[-1].add_comments(last_comments) 3050 3051 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3052 break 3053 else: 3054 self._match(TokenType.WITH) 3055 3056 last_comments = self._prev_comments 3057 3058 return self.expression( 3059 exp.With, comments=comments, expressions=expressions, recursive=recursive 3060 ) 3061 3062 def _parse_cte(self) -> exp.CTE: 3063 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3064 if not alias or not alias.this: 3065 self.raise_error("Expected CTE to have alias") 3066 3067 self._match(TokenType.ALIAS) 3068 comments = self._prev_comments 3069 3070 if self._match_text_seq("NOT", "MATERIALIZED"): 3071 materialized = False 3072 elif self._match_text_seq("MATERIALIZED"): 3073 materialized = True 3074 else: 3075 materialized = None 3076 3077 return self.expression( 3078 exp.CTE, 3079 this=self._parse_wrapped(self._parse_statement), 3080 alias=alias, 3081 materialized=materialized, 3082 comments=comments, 3083 ) 3084 3085 def _parse_table_alias( 3086 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3087 ) -> t.Optional[exp.TableAlias]: 3088 any_token = self._match(TokenType.ALIAS) 3089 alias = ( 3090 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3091 or self._parse_string_as_identifier() 3092 ) 3093 3094 index = self._index 3095 if self._match(TokenType.L_PAREN): 3096 columns = self._parse_csv(self._parse_function_parameter) 3097 self._match_r_paren() if columns else self._retreat(index) 3098 else: 3099 columns = None 3100 3101 if not alias and not columns: 3102 return None 3103 3104 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3105 3106 # We bubble up comments from the Identifier to the TableAlias 3107 if isinstance(alias, exp.Identifier): 3108 table_alias.add_comments(alias.pop_comments()) 3109 3110 return table_alias 3111 3112 def _parse_subquery( 3113 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3114 ) -> t.Optional[exp.Subquery]: 3115 if not this: 3116 return None 3117 3118 return self.expression( 3119 exp.Subquery, 3120 this=this, 3121 pivots=self._parse_pivots(), 3122 alias=self._parse_table_alias() if parse_alias else None, 3123 sample=self._parse_table_sample(), 3124 ) 3125 3126 def _implicit_unnests_to_explicit(self, this: E) -> E: 3127 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3128 3129 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3130 for i, join in enumerate(this.args.get("joins") or []): 3131 table = join.this 3132 normalized_table = table.copy() 3133 normalized_table.meta["maybe_column"] = True 3134 normalized_table = _norm(normalized_table, dialect=self.dialect) 3135 3136 if isinstance(table, exp.Table) and not join.args.get("on"): 3137 if normalized_table.parts[0].name in refs: 3138 table_as_column = table.to_column() 3139 unnest = exp.Unnest(expressions=[table_as_column]) 3140 3141 # Table.to_column creates a parent Alias node that we want to convert to 3142 # a TableAlias and attach to the Unnest, so it matches the parser's output 3143 if isinstance(table.args.get("alias"), exp.TableAlias): 3144 table_as_column.replace(table_as_column.this) 3145 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3146 3147 table.replace(unnest) 3148 3149 refs.add(normalized_table.alias_or_name) 3150 3151 return this 3152 3153 def _parse_query_modifiers( 3154 self, this: t.Optional[exp.Expression] 3155 ) -> t.Optional[exp.Expression]: 3156 if isinstance(this, (exp.Query, exp.Table)): 3157 for join in self._parse_joins(): 3158 this.append("joins", join) 3159 for lateral in iter(self._parse_lateral, None): 3160 this.append("laterals", lateral) 3161 3162 while True: 3163 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3164 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3165 key, expression = parser(self) 3166 3167 if expression: 3168 this.set(key, expression) 3169 if key == "limit": 3170 offset = expression.args.pop("offset", None) 3171 3172 if offset: 3173 offset = exp.Offset(expression=offset) 3174 this.set("offset", offset) 3175 3176 limit_by_expressions = expression.expressions 3177 expression.set("expressions", None) 3178 offset.set("expressions", limit_by_expressions) 3179 continue 3180 break 3181 3182 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3183 this = self._implicit_unnests_to_explicit(this) 3184 3185 return this 3186 3187 def _parse_hint(self) -> t.Optional[exp.Hint]: 3188 if self._match(TokenType.HINT): 3189 hints = [] 3190 for hint in iter( 3191 lambda: self._parse_csv( 3192 lambda: self._parse_function() or self._parse_var(upper=True) 3193 ), 3194 [], 3195 ): 3196 hints.extend(hint) 3197 3198 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3199 self.raise_error("Expected */ after HINT") 3200 3201 return self.expression(exp.Hint, expressions=hints) 3202 3203 return None 3204 3205 def _parse_into(self) -> t.Optional[exp.Into]: 3206 if not self._match(TokenType.INTO): 3207 return None 3208 3209 temp = self._match(TokenType.TEMPORARY) 3210 unlogged = self._match_text_seq("UNLOGGED") 3211 self._match(TokenType.TABLE) 3212 3213 return self.expression( 3214 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3215 ) 3216 3217 def _parse_from( 3218 self, joins: bool = False, skip_from_token: bool = False 3219 ) -> t.Optional[exp.From]: 3220 if not skip_from_token and not self._match(TokenType.FROM): 3221 return None 3222 3223 return self.expression( 3224 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3225 ) 3226 3227 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3228 return self.expression( 3229 exp.MatchRecognizeMeasure, 3230 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3231 this=self._parse_expression(), 3232 ) 3233 3234 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3235 if not self._match(TokenType.MATCH_RECOGNIZE): 3236 return None 3237 3238 self._match_l_paren() 3239 3240 partition = self._parse_partition_by() 3241 order = self._parse_order() 3242 3243 measures = ( 3244 self._parse_csv(self._parse_match_recognize_measure) 3245 if self._match_text_seq("MEASURES") 3246 else None 3247 ) 3248 3249 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3250 rows = exp.var("ONE ROW PER MATCH") 3251 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3252 text = "ALL ROWS PER MATCH" 3253 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3254 text += " SHOW EMPTY MATCHES" 3255 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3256 text += " OMIT EMPTY MATCHES" 3257 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3258 text += " WITH UNMATCHED ROWS" 3259 rows = exp.var(text) 3260 else: 3261 rows = None 3262 3263 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3264 text = "AFTER MATCH SKIP" 3265 if self._match_text_seq("PAST", "LAST", "ROW"): 3266 text += " PAST LAST ROW" 3267 elif self._match_text_seq("TO", "NEXT", "ROW"): 3268 text += " TO NEXT ROW" 3269 elif self._match_text_seq("TO", "FIRST"): 3270 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3271 elif self._match_text_seq("TO", "LAST"): 3272 text += f" TO LAST {self._advance_any().text}" # type: ignore 3273 after = exp.var(text) 3274 else: 3275 after = None 3276 3277 if self._match_text_seq("PATTERN"): 3278 self._match_l_paren() 3279 3280 if not self._curr: 3281 self.raise_error("Expecting )", self._curr) 3282 3283 paren = 1 3284 start = self._curr 3285 3286 while self._curr and paren > 0: 3287 if self._curr.token_type == TokenType.L_PAREN: 3288 paren += 1 3289 if self._curr.token_type == TokenType.R_PAREN: 3290 paren -= 1 3291 3292 end = self._prev 3293 self._advance() 3294 3295 if paren > 0: 3296 self.raise_error("Expecting )", self._curr) 3297 3298 pattern = exp.var(self._find_sql(start, end)) 3299 else: 3300 pattern = None 3301 3302 define = ( 3303 self._parse_csv(self._parse_name_as_expression) 3304 if self._match_text_seq("DEFINE") 3305 else None 3306 ) 3307 3308 self._match_r_paren() 3309 3310 return self.expression( 3311 exp.MatchRecognize, 3312 partition_by=partition, 3313 order=order, 3314 measures=measures, 3315 rows=rows, 3316 after=after, 3317 pattern=pattern, 3318 define=define, 3319 alias=self._parse_table_alias(), 3320 ) 3321 3322 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3323 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3324 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3325 cross_apply = False 3326 3327 if cross_apply is not None: 3328 this = self._parse_select(table=True) 3329 view = None 3330 outer = None 3331 elif self._match(TokenType.LATERAL): 3332 this = self._parse_select(table=True) 3333 view = self._match(TokenType.VIEW) 3334 outer = self._match(TokenType.OUTER) 3335 else: 3336 return None 3337 3338 if not this: 3339 this = ( 3340 self._parse_unnest() 3341 or self._parse_function() 3342 or self._parse_id_var(any_token=False) 3343 ) 3344 3345 while self._match(TokenType.DOT): 3346 this = exp.Dot( 3347 this=this, 3348 expression=self._parse_function() or self._parse_id_var(any_token=False), 3349 ) 3350 3351 if view: 3352 table = self._parse_id_var(any_token=False) 3353 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3354 table_alias: t.Optional[exp.TableAlias] = self.expression( 3355 exp.TableAlias, this=table, columns=columns 3356 ) 3357 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3358 # We move the alias from the lateral's child node to the lateral itself 3359 table_alias = this.args["alias"].pop() 3360 else: 3361 table_alias = self._parse_table_alias() 3362 3363 return self.expression( 3364 exp.Lateral, 3365 this=this, 3366 view=view, 3367 outer=outer, 3368 alias=table_alias, 3369 cross_apply=cross_apply, 3370 ) 3371 3372 def _parse_join_parts( 3373 self, 3374 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3375 return ( 3376 self._match_set(self.JOIN_METHODS) and self._prev, 3377 self._match_set(self.JOIN_SIDES) and self._prev, 3378 self._match_set(self.JOIN_KINDS) and self._prev, 3379 ) 3380 3381 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3382 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3383 this = self._parse_column() 3384 if isinstance(this, exp.Column): 3385 return this.this 3386 return this 3387 3388 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3389 3390 def _parse_join( 3391 self, skip_join_token: bool = False, parse_bracket: bool = False 3392 ) -> t.Optional[exp.Join]: 3393 if self._match(TokenType.COMMA): 3394 return self.expression(exp.Join, this=self._parse_table()) 3395 3396 index = self._index 3397 method, side, kind = self._parse_join_parts() 3398 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3399 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3400 3401 if not skip_join_token and not join: 3402 self._retreat(index) 3403 kind = None 3404 method = None 3405 side = None 3406 3407 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3408 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3409 3410 if not skip_join_token and not join and not outer_apply and not cross_apply: 3411 return None 3412 3413 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3414 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3415 kwargs["expressions"] = self._parse_csv( 3416 lambda: self._parse_table(parse_bracket=parse_bracket) 3417 ) 3418 3419 if method: 3420 kwargs["method"] = method.text 3421 if side: 3422 kwargs["side"] = side.text 3423 if kind: 3424 kwargs["kind"] = kind.text 3425 if hint: 3426 kwargs["hint"] = hint 3427 3428 if self._match(TokenType.MATCH_CONDITION): 3429 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3430 3431 if self._match(TokenType.ON): 3432 kwargs["on"] = self._parse_assignment() 3433 elif self._match(TokenType.USING): 3434 kwargs["using"] = self._parse_using_identifiers() 3435 elif ( 3436 not (outer_apply or cross_apply) 3437 and not isinstance(kwargs["this"], exp.Unnest) 3438 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3439 ): 3440 index = self._index 3441 joins: t.Optional[list] = list(self._parse_joins()) 3442 3443 if joins and self._match(TokenType.ON): 3444 kwargs["on"] = self._parse_assignment() 3445 elif joins and self._match(TokenType.USING): 3446 kwargs["using"] = self._parse_using_identifiers() 3447 else: 3448 joins = None 3449 self._retreat(index) 3450 3451 kwargs["this"].set("joins", joins if joins else None) 3452 3453 comments = [c for token in (method, side, kind) if token for c in token.comments] 3454 return self.expression(exp.Join, comments=comments, **kwargs) 3455 3456 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3457 this = self._parse_assignment() 3458 3459 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3460 return this 3461 3462 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3463 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3464 3465 return this 3466 3467 def _parse_index_params(self) -> exp.IndexParameters: 3468 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3469 3470 if self._match(TokenType.L_PAREN, advance=False): 3471 columns = self._parse_wrapped_csv(self._parse_with_operator) 3472 else: 3473 columns = None 3474 3475 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3476 partition_by = self._parse_partition_by() 3477 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3478 tablespace = ( 3479 self._parse_var(any_token=True) 3480 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3481 else None 3482 ) 3483 where = self._parse_where() 3484 3485 on = self._parse_field() if self._match(TokenType.ON) else None 3486 3487 return self.expression( 3488 exp.IndexParameters, 3489 using=using, 3490 columns=columns, 3491 include=include, 3492 partition_by=partition_by, 3493 where=where, 3494 with_storage=with_storage, 3495 tablespace=tablespace, 3496 on=on, 3497 ) 3498 3499 def _parse_index( 3500 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3501 ) -> t.Optional[exp.Index]: 3502 if index or anonymous: 3503 unique = None 3504 primary = None 3505 amp = None 3506 3507 self._match(TokenType.ON) 3508 self._match(TokenType.TABLE) # hive 3509 table = self._parse_table_parts(schema=True) 3510 else: 3511 unique = self._match(TokenType.UNIQUE) 3512 primary = self._match_text_seq("PRIMARY") 3513 amp = self._match_text_seq("AMP") 3514 3515 if not self._match(TokenType.INDEX): 3516 return None 3517 3518 index = self._parse_id_var() 3519 table = None 3520 3521 params = self._parse_index_params() 3522 3523 return self.expression( 3524 exp.Index, 3525 this=index, 3526 table=table, 3527 unique=unique, 3528 primary=primary, 3529 amp=amp, 3530 params=params, 3531 ) 3532 3533 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3534 hints: t.List[exp.Expression] = [] 3535 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3536 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3537 hints.append( 3538 self.expression( 3539 exp.WithTableHint, 3540 expressions=self._parse_csv( 3541 lambda: self._parse_function() or self._parse_var(any_token=True) 3542 ), 3543 ) 3544 ) 3545 self._match_r_paren() 3546 else: 3547 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3548 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3549 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3550 3551 self._match_set((TokenType.INDEX, TokenType.KEY)) 3552 if self._match(TokenType.FOR): 3553 hint.set("target", self._advance_any() and self._prev.text.upper()) 3554 3555 hint.set("expressions", self._parse_wrapped_id_vars()) 3556 hints.append(hint) 3557 3558 return hints or None 3559 3560 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3561 return ( 3562 (not schema and self._parse_function(optional_parens=False)) 3563 or self._parse_id_var(any_token=False) 3564 or self._parse_string_as_identifier() 3565 or self._parse_placeholder() 3566 ) 3567 3568 def _parse_table_parts( 3569 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3570 ) -> exp.Table: 3571 catalog = None 3572 db = None 3573 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3574 3575 while self._match(TokenType.DOT): 3576 if catalog: 3577 # This allows nesting the table in arbitrarily many dot expressions if needed 3578 table = self.expression( 3579 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3580 ) 3581 else: 3582 catalog = db 3583 db = table 3584 # "" used for tsql FROM a..b case 3585 table = self._parse_table_part(schema=schema) or "" 3586 3587 if ( 3588 wildcard 3589 and self._is_connected() 3590 and (isinstance(table, exp.Identifier) or not table) 3591 and self._match(TokenType.STAR) 3592 ): 3593 if isinstance(table, exp.Identifier): 3594 table.args["this"] += "*" 3595 else: 3596 table = exp.Identifier(this="*") 3597 3598 # We bubble up comments from the Identifier to the Table 3599 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3600 3601 if is_db_reference: 3602 catalog = db 3603 db = table 3604 table = None 3605 3606 if not table and not is_db_reference: 3607 self.raise_error(f"Expected table name but got {self._curr}") 3608 if not db and is_db_reference: 3609 self.raise_error(f"Expected database name but got {self._curr}") 3610 3611 table = self.expression( 3612 exp.Table, 3613 comments=comments, 3614 this=table, 3615 db=db, 3616 catalog=catalog, 3617 ) 3618 3619 changes = self._parse_changes() 3620 if changes: 3621 table.set("changes", changes) 3622 3623 at_before = self._parse_historical_data() 3624 if at_before: 3625 table.set("when", at_before) 3626 3627 pivots = self._parse_pivots() 3628 if pivots: 3629 table.set("pivots", pivots) 3630 3631 return table 3632 3633 def _parse_table( 3634 self, 3635 schema: bool = False, 3636 joins: bool = False, 3637 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3638 parse_bracket: bool = False, 3639 is_db_reference: bool = False, 3640 parse_partition: bool = False, 3641 ) -> t.Optional[exp.Expression]: 3642 lateral = self._parse_lateral() 3643 if lateral: 3644 return lateral 3645 3646 unnest = self._parse_unnest() 3647 if unnest: 3648 return unnest 3649 3650 values = self._parse_derived_table_values() 3651 if values: 3652 return values 3653 3654 subquery = self._parse_select(table=True) 3655 if subquery: 3656 if not subquery.args.get("pivots"): 3657 subquery.set("pivots", self._parse_pivots()) 3658 return subquery 3659 3660 bracket = parse_bracket and self._parse_bracket(None) 3661 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3662 3663 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3664 self._parse_table 3665 ) 3666 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3667 3668 only = self._match(TokenType.ONLY) 3669 3670 this = t.cast( 3671 exp.Expression, 3672 bracket 3673 or rows_from 3674 or self._parse_bracket( 3675 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3676 ), 3677 ) 3678 3679 if only: 3680 this.set("only", only) 3681 3682 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3683 self._match_text_seq("*") 3684 3685 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3686 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3687 this.set("partition", self._parse_partition()) 3688 3689 if schema: 3690 return self._parse_schema(this=this) 3691 3692 version = self._parse_version() 3693 3694 if version: 3695 this.set("version", version) 3696 3697 if self.dialect.ALIAS_POST_TABLESAMPLE: 3698 this.set("sample", self._parse_table_sample()) 3699 3700 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3701 if alias: 3702 this.set("alias", alias) 3703 3704 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3705 return self.expression( 3706 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3707 ) 3708 3709 this.set("hints", self._parse_table_hints()) 3710 3711 if not this.args.get("pivots"): 3712 this.set("pivots", self._parse_pivots()) 3713 3714 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3715 this.set("sample", self._parse_table_sample()) 3716 3717 if joins: 3718 for join in self._parse_joins(): 3719 this.append("joins", join) 3720 3721 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3722 this.set("ordinality", True) 3723 this.set("alias", self._parse_table_alias()) 3724 3725 return this 3726 3727 def _parse_version(self) -> t.Optional[exp.Version]: 3728 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3729 this = "TIMESTAMP" 3730 elif self._match(TokenType.VERSION_SNAPSHOT): 3731 this = "VERSION" 3732 else: 3733 return None 3734 3735 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3736 kind = self._prev.text.upper() 3737 start = self._parse_bitwise() 3738 self._match_texts(("TO", "AND")) 3739 end = self._parse_bitwise() 3740 expression: t.Optional[exp.Expression] = self.expression( 3741 exp.Tuple, expressions=[start, end] 3742 ) 3743 elif self._match_text_seq("CONTAINED", "IN"): 3744 kind = "CONTAINED IN" 3745 expression = self.expression( 3746 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3747 ) 3748 elif self._match(TokenType.ALL): 3749 kind = "ALL" 3750 expression = None 3751 else: 3752 self._match_text_seq("AS", "OF") 3753 kind = "AS OF" 3754 expression = self._parse_type() 3755 3756 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3757 3758 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3759 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3760 index = self._index 3761 historical_data = None 3762 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3763 this = self._prev.text.upper() 3764 kind = ( 3765 self._match(TokenType.L_PAREN) 3766 and self._match_texts(self.HISTORICAL_DATA_KIND) 3767 and self._prev.text.upper() 3768 ) 3769 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3770 3771 if expression: 3772 self._match_r_paren() 3773 historical_data = self.expression( 3774 exp.HistoricalData, this=this, kind=kind, expression=expression 3775 ) 3776 else: 3777 self._retreat(index) 3778 3779 return historical_data 3780 3781 def _parse_changes(self) -> t.Optional[exp.Changes]: 3782 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3783 return None 3784 3785 information = self._parse_var(any_token=True) 3786 self._match_r_paren() 3787 3788 return self.expression( 3789 exp.Changes, 3790 information=information, 3791 at_before=self._parse_historical_data(), 3792 end=self._parse_historical_data(), 3793 ) 3794 3795 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3796 if not self._match(TokenType.UNNEST): 3797 return None 3798 3799 expressions = self._parse_wrapped_csv(self._parse_equality) 3800 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3801 3802 alias = self._parse_table_alias() if with_alias else None 3803 3804 if alias: 3805 if self.dialect.UNNEST_COLUMN_ONLY: 3806 if alias.args.get("columns"): 3807 self.raise_error("Unexpected extra column alias in unnest.") 3808 3809 alias.set("columns", [alias.this]) 3810 alias.set("this", None) 3811 3812 columns = alias.args.get("columns") or [] 3813 if offset and len(expressions) < len(columns): 3814 offset = columns.pop() 3815 3816 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3817 self._match(TokenType.ALIAS) 3818 offset = self._parse_id_var( 3819 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3820 ) or exp.to_identifier("offset") 3821 3822 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3823 3824 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3825 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3826 if not is_derived and not ( 3827 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3828 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3829 ): 3830 return None 3831 3832 expressions = self._parse_csv(self._parse_value) 3833 alias = self._parse_table_alias() 3834 3835 if is_derived: 3836 self._match_r_paren() 3837 3838 return self.expression( 3839 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3840 ) 3841 3842 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3843 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3844 as_modifier and self._match_text_seq("USING", "SAMPLE") 3845 ): 3846 return None 3847 3848 bucket_numerator = None 3849 bucket_denominator = None 3850 bucket_field = None 3851 percent = None 3852 size = None 3853 seed = None 3854 3855 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3856 matched_l_paren = self._match(TokenType.L_PAREN) 3857 3858 if self.TABLESAMPLE_CSV: 3859 num = None 3860 expressions = self._parse_csv(self._parse_primary) 3861 else: 3862 expressions = None 3863 num = ( 3864 self._parse_factor() 3865 if self._match(TokenType.NUMBER, advance=False) 3866 else self._parse_primary() or self._parse_placeholder() 3867 ) 3868 3869 if self._match_text_seq("BUCKET"): 3870 bucket_numerator = self._parse_number() 3871 self._match_text_seq("OUT", "OF") 3872 bucket_denominator = bucket_denominator = self._parse_number() 3873 self._match(TokenType.ON) 3874 bucket_field = self._parse_field() 3875 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3876 percent = num 3877 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3878 size = num 3879 else: 3880 percent = num 3881 3882 if matched_l_paren: 3883 self._match_r_paren() 3884 3885 if self._match(TokenType.L_PAREN): 3886 method = self._parse_var(upper=True) 3887 seed = self._match(TokenType.COMMA) and self._parse_number() 3888 self._match_r_paren() 3889 elif self._match_texts(("SEED", "REPEATABLE")): 3890 seed = self._parse_wrapped(self._parse_number) 3891 3892 if not method and self.DEFAULT_SAMPLING_METHOD: 3893 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3894 3895 return self.expression( 3896 exp.TableSample, 3897 expressions=expressions, 3898 method=method, 3899 bucket_numerator=bucket_numerator, 3900 bucket_denominator=bucket_denominator, 3901 bucket_field=bucket_field, 3902 percent=percent, 3903 size=size, 3904 seed=seed, 3905 ) 3906 3907 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3908 return list(iter(self._parse_pivot, None)) or None 3909 3910 def _parse_joins(self) -> t.Iterator[exp.Join]: 3911 return iter(self._parse_join, None) 3912 3913 # https://duckdb.org/docs/sql/statements/pivot 3914 def _parse_simplified_pivot(self) -> exp.Pivot: 3915 def _parse_on() -> t.Optional[exp.Expression]: 3916 this = self._parse_bitwise() 3917 return self._parse_in(this) if self._match(TokenType.IN) else this 3918 3919 this = self._parse_table() 3920 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3921 using = self._match(TokenType.USING) and self._parse_csv( 3922 lambda: self._parse_alias(self._parse_function()) 3923 ) 3924 group = self._parse_group() 3925 return self.expression( 3926 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3927 ) 3928 3929 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3930 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3931 this = self._parse_select_or_expression() 3932 3933 self._match(TokenType.ALIAS) 3934 alias = self._parse_bitwise() 3935 if alias: 3936 if isinstance(alias, exp.Column) and not alias.db: 3937 alias = alias.this 3938 return self.expression(exp.PivotAlias, this=this, alias=alias) 3939 3940 return this 3941 3942 value = self._parse_column() 3943 3944 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3945 self.raise_error("Expecting IN (") 3946 3947 if self._match(TokenType.ANY): 3948 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3949 else: 3950 exprs = self._parse_csv(_parse_aliased_expression) 3951 3952 self._match_r_paren() 3953 return self.expression(exp.In, this=value, expressions=exprs) 3954 3955 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3956 index = self._index 3957 include_nulls = None 3958 3959 if self._match(TokenType.PIVOT): 3960 unpivot = False 3961 elif self._match(TokenType.UNPIVOT): 3962 unpivot = True 3963 3964 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3965 if self._match_text_seq("INCLUDE", "NULLS"): 3966 include_nulls = True 3967 elif self._match_text_seq("EXCLUDE", "NULLS"): 3968 include_nulls = False 3969 else: 3970 return None 3971 3972 expressions = [] 3973 3974 if not self._match(TokenType.L_PAREN): 3975 self._retreat(index) 3976 return None 3977 3978 if unpivot: 3979 expressions = self._parse_csv(self._parse_column) 3980 else: 3981 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3982 3983 if not expressions: 3984 self.raise_error("Failed to parse PIVOT's aggregation list") 3985 3986 if not self._match(TokenType.FOR): 3987 self.raise_error("Expecting FOR") 3988 3989 field = self._parse_pivot_in() 3990 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3991 self._parse_bitwise 3992 ) 3993 3994 self._match_r_paren() 3995 3996 pivot = self.expression( 3997 exp.Pivot, 3998 expressions=expressions, 3999 field=field, 4000 unpivot=unpivot, 4001 include_nulls=include_nulls, 4002 default_on_null=default_on_null, 4003 ) 4004 4005 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4006 pivot.set("alias", self._parse_table_alias()) 4007 4008 if not unpivot: 4009 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4010 4011 columns: t.List[exp.Expression] = [] 4012 for fld in pivot.args["field"].expressions: 4013 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4014 for name in names: 4015 if self.PREFIXED_PIVOT_COLUMNS: 4016 name = f"{name}_{field_name}" if name else field_name 4017 else: 4018 name = f"{field_name}_{name}" if name else field_name 4019 4020 columns.append(exp.to_identifier(name)) 4021 4022 pivot.set("columns", columns) 4023 4024 return pivot 4025 4026 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4027 return [agg.alias for agg in aggregations] 4028 4029 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4030 if not skip_where_token and not self._match(TokenType.PREWHERE): 4031 return None 4032 4033 return self.expression( 4034 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4035 ) 4036 4037 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4038 if not skip_where_token and not self._match(TokenType.WHERE): 4039 return None 4040 4041 return self.expression( 4042 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4043 ) 4044 4045 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4046 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4047 return None 4048 4049 elements: t.Dict[str, t.Any] = defaultdict(list) 4050 4051 if self._match(TokenType.ALL): 4052 elements["all"] = True 4053 elif self._match(TokenType.DISTINCT): 4054 elements["all"] = False 4055 4056 while True: 4057 index = self._index 4058 4059 elements["expressions"].extend( 4060 self._parse_csv( 4061 lambda: None 4062 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4063 else self._parse_assignment() 4064 ) 4065 ) 4066 4067 before_with_index = self._index 4068 with_prefix = self._match(TokenType.WITH) 4069 4070 if self._match(TokenType.ROLLUP): 4071 elements["rollup"].append( 4072 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4073 ) 4074 elif self._match(TokenType.CUBE): 4075 elements["cube"].append( 4076 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4077 ) 4078 elif self._match(TokenType.GROUPING_SETS): 4079 elements["grouping_sets"].append( 4080 self.expression( 4081 exp.GroupingSets, 4082 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4083 ) 4084 ) 4085 elif self._match_text_seq("TOTALS"): 4086 elements["totals"] = True # type: ignore 4087 4088 if before_with_index <= self._index <= before_with_index + 1: 4089 self._retreat(before_with_index) 4090 break 4091 4092 if index == self._index: 4093 break 4094 4095 return self.expression(exp.Group, **elements) # type: ignore 4096 4097 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4098 return self.expression( 4099 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4100 ) 4101 4102 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4103 if self._match(TokenType.L_PAREN): 4104 grouping_set = self._parse_csv(self._parse_column) 4105 self._match_r_paren() 4106 return self.expression(exp.Tuple, expressions=grouping_set) 4107 4108 return self._parse_column() 4109 4110 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4111 if not skip_having_token and not self._match(TokenType.HAVING): 4112 return None 4113 return self.expression(exp.Having, this=self._parse_assignment()) 4114 4115 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4116 if not self._match(TokenType.QUALIFY): 4117 return None 4118 return self.expression(exp.Qualify, this=self._parse_assignment()) 4119 4120 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4121 if skip_start_token: 4122 start = None 4123 elif self._match(TokenType.START_WITH): 4124 start = self._parse_assignment() 4125 else: 4126 return None 4127 4128 self._match(TokenType.CONNECT_BY) 4129 nocycle = self._match_text_seq("NOCYCLE") 4130 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4131 exp.Prior, this=self._parse_bitwise() 4132 ) 4133 connect = self._parse_assignment() 4134 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4135 4136 if not start and self._match(TokenType.START_WITH): 4137 start = self._parse_assignment() 4138 4139 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4140 4141 def _parse_name_as_expression(self) -> exp.Alias: 4142 return self.expression( 4143 exp.Alias, 4144 alias=self._parse_id_var(any_token=True), 4145 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4146 ) 4147 4148 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4149 if self._match_text_seq("INTERPOLATE"): 4150 return self._parse_wrapped_csv(self._parse_name_as_expression) 4151 return None 4152 4153 def _parse_order( 4154 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4155 ) -> t.Optional[exp.Expression]: 4156 siblings = None 4157 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4158 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4159 return this 4160 4161 siblings = True 4162 4163 return self.expression( 4164 exp.Order, 4165 this=this, 4166 expressions=self._parse_csv(self._parse_ordered), 4167 siblings=siblings, 4168 ) 4169 4170 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4171 if not self._match(token): 4172 return None 4173 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4174 4175 def _parse_ordered( 4176 self, parse_method: t.Optional[t.Callable] = None 4177 ) -> t.Optional[exp.Ordered]: 4178 this = parse_method() if parse_method else self._parse_assignment() 4179 if not this: 4180 return None 4181 4182 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4183 this = exp.var("ALL") 4184 4185 asc = self._match(TokenType.ASC) 4186 desc = self._match(TokenType.DESC) or (asc and False) 4187 4188 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4189 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4190 4191 nulls_first = is_nulls_first or False 4192 explicitly_null_ordered = is_nulls_first or is_nulls_last 4193 4194 if ( 4195 not explicitly_null_ordered 4196 and ( 4197 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4198 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4199 ) 4200 and self.dialect.NULL_ORDERING != "nulls_are_last" 4201 ): 4202 nulls_first = True 4203 4204 if self._match_text_seq("WITH", "FILL"): 4205 with_fill = self.expression( 4206 exp.WithFill, 4207 **{ # type: ignore 4208 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4209 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4210 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4211 "interpolate": self._parse_interpolate(), 4212 }, 4213 ) 4214 else: 4215 with_fill = None 4216 4217 return self.expression( 4218 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4219 ) 4220 4221 def _parse_limit( 4222 self, 4223 this: t.Optional[exp.Expression] = None, 4224 top: bool = False, 4225 skip_limit_token: bool = False, 4226 ) -> t.Optional[exp.Expression]: 4227 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4228 comments = self._prev_comments 4229 if top: 4230 limit_paren = self._match(TokenType.L_PAREN) 4231 expression = self._parse_term() if limit_paren else self._parse_number() 4232 4233 if limit_paren: 4234 self._match_r_paren() 4235 else: 4236 expression = self._parse_term() 4237 4238 if self._match(TokenType.COMMA): 4239 offset = expression 4240 expression = self._parse_term() 4241 else: 4242 offset = None 4243 4244 limit_exp = self.expression( 4245 exp.Limit, 4246 this=this, 4247 expression=expression, 4248 offset=offset, 4249 comments=comments, 4250 expressions=self._parse_limit_by(), 4251 ) 4252 4253 return limit_exp 4254 4255 if self._match(TokenType.FETCH): 4256 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4257 direction = self._prev.text.upper() if direction else "FIRST" 4258 4259 count = self._parse_field(tokens=self.FETCH_TOKENS) 4260 percent = self._match(TokenType.PERCENT) 4261 4262 self._match_set((TokenType.ROW, TokenType.ROWS)) 4263 4264 only = self._match_text_seq("ONLY") 4265 with_ties = self._match_text_seq("WITH", "TIES") 4266 4267 if only and with_ties: 4268 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4269 4270 return self.expression( 4271 exp.Fetch, 4272 direction=direction, 4273 count=count, 4274 percent=percent, 4275 with_ties=with_ties, 4276 ) 4277 4278 return this 4279 4280 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4281 if not self._match(TokenType.OFFSET): 4282 return this 4283 4284 count = self._parse_term() 4285 self._match_set((TokenType.ROW, TokenType.ROWS)) 4286 4287 return self.expression( 4288 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4289 ) 4290 4291 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4292 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4293 4294 def _parse_locks(self) -> t.List[exp.Lock]: 4295 locks = [] 4296 while True: 4297 if self._match_text_seq("FOR", "UPDATE"): 4298 update = True 4299 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4300 "LOCK", "IN", "SHARE", "MODE" 4301 ): 4302 update = False 4303 else: 4304 break 4305 4306 expressions = None 4307 if self._match_text_seq("OF"): 4308 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4309 4310 wait: t.Optional[bool | exp.Expression] = None 4311 if self._match_text_seq("NOWAIT"): 4312 wait = True 4313 elif self._match_text_seq("WAIT"): 4314 wait = self._parse_primary() 4315 elif self._match_text_seq("SKIP", "LOCKED"): 4316 wait = False 4317 4318 locks.append( 4319 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4320 ) 4321 4322 return locks 4323 4324 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4325 while this and self._match_set(self.SET_OPERATIONS): 4326 token_type = self._prev.token_type 4327 4328 if token_type == TokenType.UNION: 4329 operation: t.Type[exp.SetOperation] = exp.Union 4330 elif token_type == TokenType.EXCEPT: 4331 operation = exp.Except 4332 else: 4333 operation = exp.Intersect 4334 4335 comments = self._prev.comments 4336 4337 if self._match(TokenType.DISTINCT): 4338 distinct: t.Optional[bool] = True 4339 elif self._match(TokenType.ALL): 4340 distinct = False 4341 else: 4342 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4343 if distinct is None: 4344 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4345 4346 by_name = self._match_text_seq("BY", "NAME") 4347 expression = self._parse_select(nested=True, parse_set_operation=False) 4348 4349 this = self.expression( 4350 operation, 4351 comments=comments, 4352 this=this, 4353 distinct=distinct, 4354 by_name=by_name, 4355 expression=expression, 4356 ) 4357 4358 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4359 expression = this.expression 4360 4361 if expression: 4362 for arg in self.SET_OP_MODIFIERS: 4363 expr = expression.args.get(arg) 4364 if expr: 4365 this.set(arg, expr.pop()) 4366 4367 return this 4368 4369 def _parse_expression(self) -> t.Optional[exp.Expression]: 4370 return self._parse_alias(self._parse_assignment()) 4371 4372 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4373 this = self._parse_disjunction() 4374 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4375 # This allows us to parse <non-identifier token> := <expr> 4376 this = exp.column( 4377 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4378 ) 4379 4380 while self._match_set(self.ASSIGNMENT): 4381 if isinstance(this, exp.Column) and len(this.parts) == 1: 4382 this = this.this 4383 4384 this = self.expression( 4385 self.ASSIGNMENT[self._prev.token_type], 4386 this=this, 4387 comments=self._prev_comments, 4388 expression=self._parse_assignment(), 4389 ) 4390 4391 return this 4392 4393 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4394 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4395 4396 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4397 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4398 4399 def _parse_equality(self) -> t.Optional[exp.Expression]: 4400 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4401 4402 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4403 return self._parse_tokens(self._parse_range, self.COMPARISON) 4404 4405 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4406 this = this or self._parse_bitwise() 4407 negate = self._match(TokenType.NOT) 4408 4409 if self._match_set(self.RANGE_PARSERS): 4410 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4411 if not expression: 4412 return this 4413 4414 this = expression 4415 elif self._match(TokenType.ISNULL): 4416 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4417 4418 # Postgres supports ISNULL and NOTNULL for conditions. 4419 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4420 if self._match(TokenType.NOTNULL): 4421 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4422 this = self.expression(exp.Not, this=this) 4423 4424 if negate: 4425 this = self._negate_range(this) 4426 4427 if self._match(TokenType.IS): 4428 this = self._parse_is(this) 4429 4430 return this 4431 4432 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4433 if not this: 4434 return this 4435 4436 return self.expression(exp.Not, this=this) 4437 4438 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4439 index = self._index - 1 4440 negate = self._match(TokenType.NOT) 4441 4442 if self._match_text_seq("DISTINCT", "FROM"): 4443 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4444 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4445 4446 if self._match(TokenType.JSON): 4447 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4448 4449 if self._match_text_seq("WITH"): 4450 _with = True 4451 elif self._match_text_seq("WITHOUT"): 4452 _with = False 4453 else: 4454 _with = None 4455 4456 unique = self._match(TokenType.UNIQUE) 4457 self._match_text_seq("KEYS") 4458 expression: t.Optional[exp.Expression] = self.expression( 4459 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4460 ) 4461 else: 4462 expression = self._parse_primary() or self._parse_null() 4463 if not expression: 4464 self._retreat(index) 4465 return None 4466 4467 this = self.expression(exp.Is, this=this, expression=expression) 4468 return self.expression(exp.Not, this=this) if negate else this 4469 4470 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4471 unnest = self._parse_unnest(with_alias=False) 4472 if unnest: 4473 this = self.expression(exp.In, this=this, unnest=unnest) 4474 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4475 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4476 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4477 4478 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4479 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4480 else: 4481 this = self.expression(exp.In, this=this, expressions=expressions) 4482 4483 if matched_l_paren: 4484 self._match_r_paren(this) 4485 elif not self._match(TokenType.R_BRACKET, expression=this): 4486 self.raise_error("Expecting ]") 4487 else: 4488 this = self.expression(exp.In, this=this, field=self._parse_column()) 4489 4490 return this 4491 4492 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4493 low = self._parse_bitwise() 4494 self._match(TokenType.AND) 4495 high = self._parse_bitwise() 4496 return self.expression(exp.Between, this=this, low=low, high=high) 4497 4498 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4499 if not self._match(TokenType.ESCAPE): 4500 return this 4501 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4502 4503 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4504 index = self._index 4505 4506 if not self._match(TokenType.INTERVAL) and match_interval: 4507 return None 4508 4509 if self._match(TokenType.STRING, advance=False): 4510 this = self._parse_primary() 4511 else: 4512 this = self._parse_term() 4513 4514 if not this or ( 4515 isinstance(this, exp.Column) 4516 and not this.table 4517 and not this.this.quoted 4518 and this.name.upper() == "IS" 4519 ): 4520 self._retreat(index) 4521 return None 4522 4523 unit = self._parse_function() or ( 4524 not self._match(TokenType.ALIAS, advance=False) 4525 and self._parse_var(any_token=True, upper=True) 4526 ) 4527 4528 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4529 # each INTERVAL expression into this canonical form so it's easy to transpile 4530 if this and this.is_number: 4531 this = exp.Literal.string(this.to_py()) 4532 elif this and this.is_string: 4533 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4534 if len(parts) == 1: 4535 if unit: 4536 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4537 self._retreat(self._index - 1) 4538 4539 this = exp.Literal.string(parts[0][0]) 4540 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4541 4542 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4543 unit = self.expression( 4544 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4545 ) 4546 4547 interval = self.expression(exp.Interval, this=this, unit=unit) 4548 4549 index = self._index 4550 self._match(TokenType.PLUS) 4551 4552 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4553 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4554 return self.expression( 4555 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4556 ) 4557 4558 self._retreat(index) 4559 return interval 4560 4561 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4562 this = self._parse_term() 4563 4564 while True: 4565 if self._match_set(self.BITWISE): 4566 this = self.expression( 4567 self.BITWISE[self._prev.token_type], 4568 this=this, 4569 expression=self._parse_term(), 4570 ) 4571 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4572 this = self.expression( 4573 exp.DPipe, 4574 this=this, 4575 expression=self._parse_term(), 4576 safe=not self.dialect.STRICT_STRING_CONCAT, 4577 ) 4578 elif self._match(TokenType.DQMARK): 4579 this = self.expression( 4580 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4581 ) 4582 elif self._match_pair(TokenType.LT, TokenType.LT): 4583 this = self.expression( 4584 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4585 ) 4586 elif self._match_pair(TokenType.GT, TokenType.GT): 4587 this = self.expression( 4588 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4589 ) 4590 else: 4591 break 4592 4593 return this 4594 4595 def _parse_term(self) -> t.Optional[exp.Expression]: 4596 this = self._parse_factor() 4597 4598 while self._match_set(self.TERM): 4599 klass = self.TERM[self._prev.token_type] 4600 comments = self._prev_comments 4601 expression = self._parse_factor() 4602 4603 this = self.expression(klass, this=this, comments=comments, expression=expression) 4604 4605 if isinstance(this, exp.Collate): 4606 expr = this.expression 4607 4608 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4609 # fallback to Identifier / Var 4610 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4611 ident = expr.this 4612 if isinstance(ident, exp.Identifier): 4613 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4614 4615 return this 4616 4617 def _parse_factor(self) -> t.Optional[exp.Expression]: 4618 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4619 this = parse_method() 4620 4621 while self._match_set(self.FACTOR): 4622 klass = self.FACTOR[self._prev.token_type] 4623 comments = self._prev_comments 4624 expression = parse_method() 4625 4626 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4627 self._retreat(self._index - 1) 4628 return this 4629 4630 this = self.expression(klass, this=this, comments=comments, expression=expression) 4631 4632 if isinstance(this, exp.Div): 4633 this.args["typed"] = self.dialect.TYPED_DIVISION 4634 this.args["safe"] = self.dialect.SAFE_DIVISION 4635 4636 return this 4637 4638 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4639 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4640 4641 def _parse_unary(self) -> t.Optional[exp.Expression]: 4642 if self._match_set(self.UNARY_PARSERS): 4643 return self.UNARY_PARSERS[self._prev.token_type](self) 4644 return self._parse_at_time_zone(self._parse_type()) 4645 4646 def _parse_type( 4647 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4648 ) -> t.Optional[exp.Expression]: 4649 interval = parse_interval and self._parse_interval() 4650 if interval: 4651 return interval 4652 4653 index = self._index 4654 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4655 4656 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4657 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4658 if isinstance(data_type, exp.Cast): 4659 # This constructor can contain ops directly after it, for instance struct unnesting: 4660 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4661 return self._parse_column_ops(data_type) 4662 4663 if data_type: 4664 index2 = self._index 4665 this = self._parse_primary() 4666 4667 if isinstance(this, exp.Literal): 4668 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4669 if parser: 4670 return parser(self, this, data_type) 4671 4672 return self.expression(exp.Cast, this=this, to=data_type) 4673 4674 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4675 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4676 # 4677 # If the index difference here is greater than 1, that means the parser itself must have 4678 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4679 # 4680 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4681 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4682 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4683 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4684 # 4685 # In these cases, we don't really want to return the converted type, but instead retreat 4686 # and try to parse a Column or Identifier in the section below. 4687 if data_type.expressions and index2 - index > 1: 4688 self._retreat(index2) 4689 return self._parse_column_ops(data_type) 4690 4691 self._retreat(index) 4692 4693 if fallback_to_identifier: 4694 return self._parse_id_var() 4695 4696 this = self._parse_column() 4697 return this and self._parse_column_ops(this) 4698 4699 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4700 this = self._parse_type() 4701 if not this: 4702 return None 4703 4704 if isinstance(this, exp.Column) and not this.table: 4705 this = exp.var(this.name.upper()) 4706 4707 return self.expression( 4708 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4709 ) 4710 4711 def _parse_types( 4712 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4713 ) -> t.Optional[exp.Expression]: 4714 index = self._index 4715 4716 this: t.Optional[exp.Expression] = None 4717 prefix = self._match_text_seq("SYSUDTLIB", ".") 4718 4719 if not self._match_set(self.TYPE_TOKENS): 4720 identifier = allow_identifiers and self._parse_id_var( 4721 any_token=False, tokens=(TokenType.VAR,) 4722 ) 4723 if isinstance(identifier, exp.Identifier): 4724 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4725 4726 if len(tokens) != 1: 4727 self.raise_error("Unexpected identifier", self._prev) 4728 4729 if tokens[0].token_type in self.TYPE_TOKENS: 4730 self._prev = tokens[0] 4731 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4732 type_name = identifier.name 4733 4734 while self._match(TokenType.DOT): 4735 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4736 4737 this = exp.DataType.build(type_name, udt=True) 4738 else: 4739 self._retreat(self._index - 1) 4740 return None 4741 else: 4742 return None 4743 4744 type_token = self._prev.token_type 4745 4746 if type_token == TokenType.PSEUDO_TYPE: 4747 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4748 4749 if type_token == TokenType.OBJECT_IDENTIFIER: 4750 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4751 4752 # https://materialize.com/docs/sql/types/map/ 4753 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4754 key_type = self._parse_types( 4755 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4756 ) 4757 if not self._match(TokenType.FARROW): 4758 self._retreat(index) 4759 return None 4760 4761 value_type = self._parse_types( 4762 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4763 ) 4764 if not self._match(TokenType.R_BRACKET): 4765 self._retreat(index) 4766 return None 4767 4768 return exp.DataType( 4769 this=exp.DataType.Type.MAP, 4770 expressions=[key_type, value_type], 4771 nested=True, 4772 prefix=prefix, 4773 ) 4774 4775 nested = type_token in self.NESTED_TYPE_TOKENS 4776 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4777 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4778 expressions = None 4779 maybe_func = False 4780 4781 if self._match(TokenType.L_PAREN): 4782 if is_struct: 4783 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4784 elif nested: 4785 expressions = self._parse_csv( 4786 lambda: self._parse_types( 4787 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4788 ) 4789 ) 4790 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4791 this = expressions[0] 4792 this.set("nullable", True) 4793 self._match_r_paren() 4794 return this 4795 elif type_token in self.ENUM_TYPE_TOKENS: 4796 expressions = self._parse_csv(self._parse_equality) 4797 elif is_aggregate: 4798 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4799 any_token=False, tokens=(TokenType.VAR,) 4800 ) 4801 if not func_or_ident or not self._match(TokenType.COMMA): 4802 return None 4803 expressions = self._parse_csv( 4804 lambda: self._parse_types( 4805 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4806 ) 4807 ) 4808 expressions.insert(0, func_or_ident) 4809 else: 4810 expressions = self._parse_csv(self._parse_type_size) 4811 4812 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4813 if type_token == TokenType.VECTOR and len(expressions) == 2: 4814 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4815 4816 if not expressions or not self._match(TokenType.R_PAREN): 4817 self._retreat(index) 4818 return None 4819 4820 maybe_func = True 4821 4822 values: t.Optional[t.List[exp.Expression]] = None 4823 4824 if nested and self._match(TokenType.LT): 4825 if is_struct: 4826 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4827 else: 4828 expressions = self._parse_csv( 4829 lambda: self._parse_types( 4830 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4831 ) 4832 ) 4833 4834 if not self._match(TokenType.GT): 4835 self.raise_error("Expecting >") 4836 4837 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4838 values = self._parse_csv(self._parse_assignment) 4839 if not values and is_struct: 4840 values = None 4841 self._retreat(self._index - 1) 4842 else: 4843 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4844 4845 if type_token in self.TIMESTAMPS: 4846 if self._match_text_seq("WITH", "TIME", "ZONE"): 4847 maybe_func = False 4848 tz_type = ( 4849 exp.DataType.Type.TIMETZ 4850 if type_token in self.TIMES 4851 else exp.DataType.Type.TIMESTAMPTZ 4852 ) 4853 this = exp.DataType(this=tz_type, expressions=expressions) 4854 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4855 maybe_func = False 4856 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4857 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4858 maybe_func = False 4859 elif type_token == TokenType.INTERVAL: 4860 unit = self._parse_var(upper=True) 4861 if unit: 4862 if self._match_text_seq("TO"): 4863 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4864 4865 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4866 else: 4867 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4868 4869 if maybe_func and check_func: 4870 index2 = self._index 4871 peek = self._parse_string() 4872 4873 if not peek: 4874 self._retreat(index) 4875 return None 4876 4877 self._retreat(index2) 4878 4879 if not this: 4880 if self._match_text_seq("UNSIGNED"): 4881 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4882 if not unsigned_type_token: 4883 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4884 4885 type_token = unsigned_type_token or type_token 4886 4887 this = exp.DataType( 4888 this=exp.DataType.Type[type_token.value], 4889 expressions=expressions, 4890 nested=nested, 4891 prefix=prefix, 4892 ) 4893 4894 # Empty arrays/structs are allowed 4895 if values is not None: 4896 cls = exp.Struct if is_struct else exp.Array 4897 this = exp.cast(cls(expressions=values), this, copy=False) 4898 4899 elif expressions: 4900 this.set("expressions", expressions) 4901 4902 # https://materialize.com/docs/sql/types/list/#type-name 4903 while self._match(TokenType.LIST): 4904 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4905 4906 index = self._index 4907 4908 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4909 matched_array = self._match(TokenType.ARRAY) 4910 4911 while self._curr: 4912 datatype_token = self._prev.token_type 4913 matched_l_bracket = self._match(TokenType.L_BRACKET) 4914 if not matched_l_bracket and not matched_array: 4915 break 4916 4917 matched_array = False 4918 values = self._parse_csv(self._parse_assignment) or None 4919 if ( 4920 values 4921 and not schema 4922 and ( 4923 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4924 ) 4925 ): 4926 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4927 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4928 self._retreat(index) 4929 break 4930 4931 this = exp.DataType( 4932 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4933 ) 4934 self._match(TokenType.R_BRACKET) 4935 4936 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4937 converter = self.TYPE_CONVERTERS.get(this.this) 4938 if converter: 4939 this = converter(t.cast(exp.DataType, this)) 4940 4941 return this 4942 4943 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4944 index = self._index 4945 4946 if ( 4947 self._curr 4948 and self._next 4949 and self._curr.token_type in self.TYPE_TOKENS 4950 and self._next.token_type in self.TYPE_TOKENS 4951 ): 4952 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4953 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4954 this = self._parse_id_var() 4955 else: 4956 this = ( 4957 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4958 or self._parse_id_var() 4959 ) 4960 4961 self._match(TokenType.COLON) 4962 4963 if ( 4964 type_required 4965 and not isinstance(this, exp.DataType) 4966 and not self._match_set(self.TYPE_TOKENS, advance=False) 4967 ): 4968 self._retreat(index) 4969 return self._parse_types() 4970 4971 return self._parse_column_def(this) 4972 4973 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4974 if not self._match_text_seq("AT", "TIME", "ZONE"): 4975 return this 4976 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4977 4978 def _parse_column(self) -> t.Optional[exp.Expression]: 4979 this = self._parse_column_reference() 4980 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4981 4982 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4983 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4984 4985 return column 4986 4987 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4988 this = self._parse_field() 4989 if ( 4990 not this 4991 and self._match(TokenType.VALUES, advance=False) 4992 and self.VALUES_FOLLOWED_BY_PAREN 4993 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4994 ): 4995 this = self._parse_id_var() 4996 4997 if isinstance(this, exp.Identifier): 4998 # We bubble up comments from the Identifier to the Column 4999 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5000 5001 return this 5002 5003 def _parse_colon_as_variant_extract( 5004 self, this: t.Optional[exp.Expression] 5005 ) -> t.Optional[exp.Expression]: 5006 casts = [] 5007 json_path = [] 5008 escape = None 5009 5010 while self._match(TokenType.COLON): 5011 start_index = self._index 5012 5013 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5014 path = self._parse_column_ops( 5015 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5016 ) 5017 5018 # The cast :: operator has a lower precedence than the extraction operator :, so 5019 # we rearrange the AST appropriately to avoid casting the JSON path 5020 while isinstance(path, exp.Cast): 5021 casts.append(path.to) 5022 path = path.this 5023 5024 if casts: 5025 dcolon_offset = next( 5026 i 5027 for i, t in enumerate(self._tokens[start_index:]) 5028 if t.token_type == TokenType.DCOLON 5029 ) 5030 end_token = self._tokens[start_index + dcolon_offset - 1] 5031 else: 5032 end_token = self._prev 5033 5034 if path: 5035 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5036 # it'll roundtrip to a string literal in GET_PATH 5037 if isinstance(path, exp.Identifier) and path.quoted: 5038 escape = True 5039 5040 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5041 5042 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5043 # Databricks transforms it back to the colon/dot notation 5044 if json_path: 5045 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5046 5047 if json_path_expr: 5048 json_path_expr.set("escape", escape) 5049 5050 this = self.expression( 5051 exp.JSONExtract, 5052 this=this, 5053 expression=json_path_expr, 5054 variant_extract=True, 5055 ) 5056 5057 while casts: 5058 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5059 5060 return this 5061 5062 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5063 return self._parse_types() 5064 5065 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5066 this = self._parse_bracket(this) 5067 5068 while self._match_set(self.COLUMN_OPERATORS): 5069 op_token = self._prev.token_type 5070 op = self.COLUMN_OPERATORS.get(op_token) 5071 5072 if op_token == TokenType.DCOLON: 5073 field = self._parse_dcolon() 5074 if not field: 5075 self.raise_error("Expected type") 5076 elif op and self._curr: 5077 field = self._parse_column_reference() or self._parse_bracket() 5078 else: 5079 field = self._parse_field(any_token=True, anonymous_func=True) 5080 5081 if isinstance(field, exp.Func) and this: 5082 # bigquery allows function calls like x.y.count(...) 5083 # SAFE.SUBSTR(...) 5084 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5085 this = exp.replace_tree( 5086 this, 5087 lambda n: ( 5088 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5089 if n.table 5090 else n.this 5091 ) 5092 if isinstance(n, exp.Column) 5093 else n, 5094 ) 5095 5096 if op: 5097 this = op(self, this, field) 5098 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5099 this = self.expression( 5100 exp.Column, 5101 comments=this.comments, 5102 this=field, 5103 table=this.this, 5104 db=this.args.get("table"), 5105 catalog=this.args.get("db"), 5106 ) 5107 else: 5108 this = self.expression(exp.Dot, this=this, expression=field) 5109 5110 this = self._parse_bracket(this) 5111 5112 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5113 5114 def _parse_primary(self) -> t.Optional[exp.Expression]: 5115 if self._match_set(self.PRIMARY_PARSERS): 5116 token_type = self._prev.token_type 5117 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5118 5119 if token_type == TokenType.STRING: 5120 expressions = [primary] 5121 while self._match(TokenType.STRING): 5122 expressions.append(exp.Literal.string(self._prev.text)) 5123 5124 if len(expressions) > 1: 5125 return self.expression(exp.Concat, expressions=expressions) 5126 5127 return primary 5128 5129 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5130 return exp.Literal.number(f"0.{self._prev.text}") 5131 5132 if self._match(TokenType.L_PAREN): 5133 comments = self._prev_comments 5134 query = self._parse_select() 5135 5136 if query: 5137 expressions = [query] 5138 else: 5139 expressions = self._parse_expressions() 5140 5141 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5142 5143 if not this and self._match(TokenType.R_PAREN, advance=False): 5144 this = self.expression(exp.Tuple) 5145 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5146 this = self._parse_subquery(this=this, parse_alias=False) 5147 elif isinstance(this, exp.Subquery): 5148 this = self._parse_subquery( 5149 this=self._parse_set_operations(this), parse_alias=False 5150 ) 5151 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5152 this = self.expression(exp.Tuple, expressions=expressions) 5153 else: 5154 this = self.expression(exp.Paren, this=this) 5155 5156 if this: 5157 this.add_comments(comments) 5158 5159 self._match_r_paren(expression=this) 5160 return this 5161 5162 return None 5163 5164 def _parse_field( 5165 self, 5166 any_token: bool = False, 5167 tokens: t.Optional[t.Collection[TokenType]] = None, 5168 anonymous_func: bool = False, 5169 ) -> t.Optional[exp.Expression]: 5170 if anonymous_func: 5171 field = ( 5172 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5173 or self._parse_primary() 5174 ) 5175 else: 5176 field = self._parse_primary() or self._parse_function( 5177 anonymous=anonymous_func, any_token=any_token 5178 ) 5179 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5180 5181 def _parse_function( 5182 self, 5183 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5184 anonymous: bool = False, 5185 optional_parens: bool = True, 5186 any_token: bool = False, 5187 ) -> t.Optional[exp.Expression]: 5188 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5189 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5190 fn_syntax = False 5191 if ( 5192 self._match(TokenType.L_BRACE, advance=False) 5193 and self._next 5194 and self._next.text.upper() == "FN" 5195 ): 5196 self._advance(2) 5197 fn_syntax = True 5198 5199 func = self._parse_function_call( 5200 functions=functions, 5201 anonymous=anonymous, 5202 optional_parens=optional_parens, 5203 any_token=any_token, 5204 ) 5205 5206 if fn_syntax: 5207 self._match(TokenType.R_BRACE) 5208 5209 return func 5210 5211 def _parse_function_call( 5212 self, 5213 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5214 anonymous: bool = False, 5215 optional_parens: bool = True, 5216 any_token: bool = False, 5217 ) -> t.Optional[exp.Expression]: 5218 if not self._curr: 5219 return None 5220 5221 comments = self._curr.comments 5222 token_type = self._curr.token_type 5223 this = self._curr.text 5224 upper = this.upper() 5225 5226 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5227 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5228 self._advance() 5229 return self._parse_window(parser(self)) 5230 5231 if not self._next or self._next.token_type != TokenType.L_PAREN: 5232 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5233 self._advance() 5234 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5235 5236 return None 5237 5238 if any_token: 5239 if token_type in self.RESERVED_TOKENS: 5240 return None 5241 elif token_type not in self.FUNC_TOKENS: 5242 return None 5243 5244 self._advance(2) 5245 5246 parser = self.FUNCTION_PARSERS.get(upper) 5247 if parser and not anonymous: 5248 this = parser(self) 5249 else: 5250 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5251 5252 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5253 this = self.expression( 5254 subquery_predicate, comments=comments, this=self._parse_select() 5255 ) 5256 self._match_r_paren() 5257 return this 5258 5259 if functions is None: 5260 functions = self.FUNCTIONS 5261 5262 function = functions.get(upper) 5263 5264 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5265 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5266 5267 if alias: 5268 args = self._kv_to_prop_eq(args) 5269 5270 if function and not anonymous: 5271 if "dialect" in function.__code__.co_varnames: 5272 func = function(args, dialect=self.dialect) 5273 else: 5274 func = function(args) 5275 5276 func = self.validate_expression(func, args) 5277 if not self.dialect.NORMALIZE_FUNCTIONS: 5278 func.meta["name"] = this 5279 5280 this = func 5281 else: 5282 if token_type == TokenType.IDENTIFIER: 5283 this = exp.Identifier(this=this, quoted=True) 5284 this = self.expression(exp.Anonymous, this=this, expressions=args) 5285 5286 if isinstance(this, exp.Expression): 5287 this.add_comments(comments) 5288 5289 self._match_r_paren(this) 5290 return self._parse_window(this) 5291 5292 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5293 return expression 5294 5295 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5296 transformed = [] 5297 5298 for index, e in enumerate(expressions): 5299 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5300 if isinstance(e, exp.Alias): 5301 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5302 5303 if not isinstance(e, exp.PropertyEQ): 5304 e = self.expression( 5305 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5306 ) 5307 5308 if isinstance(e.this, exp.Column): 5309 e.this.replace(e.this.this) 5310 else: 5311 e = self._to_prop_eq(e, index) 5312 5313 transformed.append(e) 5314 5315 return transformed 5316 5317 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5318 return self._parse_column_def(self._parse_id_var()) 5319 5320 def _parse_user_defined_function( 5321 self, kind: t.Optional[TokenType] = None 5322 ) -> t.Optional[exp.Expression]: 5323 this = self._parse_id_var() 5324 5325 while self._match(TokenType.DOT): 5326 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5327 5328 if not self._match(TokenType.L_PAREN): 5329 return this 5330 5331 expressions = self._parse_csv(self._parse_function_parameter) 5332 self._match_r_paren() 5333 return self.expression( 5334 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5335 ) 5336 5337 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5338 literal = self._parse_primary() 5339 if literal: 5340 return self.expression(exp.Introducer, this=token.text, expression=literal) 5341 5342 return self.expression(exp.Identifier, this=token.text) 5343 5344 def _parse_session_parameter(self) -> exp.SessionParameter: 5345 kind = None 5346 this = self._parse_id_var() or self._parse_primary() 5347 5348 if this and self._match(TokenType.DOT): 5349 kind = this.name 5350 this = self._parse_var() or self._parse_primary() 5351 5352 return self.expression(exp.SessionParameter, this=this, kind=kind) 5353 5354 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5355 return self._parse_id_var() 5356 5357 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5358 index = self._index 5359 5360 if self._match(TokenType.L_PAREN): 5361 expressions = t.cast( 5362 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5363 ) 5364 5365 if not self._match(TokenType.R_PAREN): 5366 self._retreat(index) 5367 else: 5368 expressions = [self._parse_lambda_arg()] 5369 5370 if self._match_set(self.LAMBDAS): 5371 return self.LAMBDAS[self._prev.token_type](self, expressions) 5372 5373 self._retreat(index) 5374 5375 this: t.Optional[exp.Expression] 5376 5377 if self._match(TokenType.DISTINCT): 5378 this = self.expression( 5379 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5380 ) 5381 else: 5382 this = self._parse_select_or_expression(alias=alias) 5383 5384 return self._parse_limit( 5385 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5386 ) 5387 5388 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5389 index = self._index 5390 if not self._match(TokenType.L_PAREN): 5391 return this 5392 5393 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5394 # expr can be of both types 5395 if self._match_set(self.SELECT_START_TOKENS): 5396 self._retreat(index) 5397 return this 5398 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5399 self._match_r_paren() 5400 return self.expression(exp.Schema, this=this, expressions=args) 5401 5402 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5403 return self._parse_column_def(self._parse_field(any_token=True)) 5404 5405 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5406 # column defs are not really columns, they're identifiers 5407 if isinstance(this, exp.Column): 5408 this = this.this 5409 5410 kind = self._parse_types(schema=True) 5411 5412 if self._match_text_seq("FOR", "ORDINALITY"): 5413 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5414 5415 constraints: t.List[exp.Expression] = [] 5416 5417 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5418 ("ALIAS", "MATERIALIZED") 5419 ): 5420 persisted = self._prev.text.upper() == "MATERIALIZED" 5421 constraint_kind = exp.ComputedColumnConstraint( 5422 this=self._parse_assignment(), 5423 persisted=persisted or self._match_text_seq("PERSISTED"), 5424 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5425 ) 5426 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5427 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5428 self._match(TokenType.ALIAS) 5429 constraints.append( 5430 self.expression( 5431 exp.ColumnConstraint, 5432 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5433 ) 5434 ) 5435 5436 while True: 5437 constraint = self._parse_column_constraint() 5438 if not constraint: 5439 break 5440 constraints.append(constraint) 5441 5442 if not kind and not constraints: 5443 return this 5444 5445 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5446 5447 def _parse_auto_increment( 5448 self, 5449 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5450 start = None 5451 increment = None 5452 5453 if self._match(TokenType.L_PAREN, advance=False): 5454 args = self._parse_wrapped_csv(self._parse_bitwise) 5455 start = seq_get(args, 0) 5456 increment = seq_get(args, 1) 5457 elif self._match_text_seq("START"): 5458 start = self._parse_bitwise() 5459 self._match_text_seq("INCREMENT") 5460 increment = self._parse_bitwise() 5461 5462 if start and increment: 5463 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5464 5465 return exp.AutoIncrementColumnConstraint() 5466 5467 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5468 if not self._match_text_seq("REFRESH"): 5469 self._retreat(self._index - 1) 5470 return None 5471 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5472 5473 def _parse_compress(self) -> exp.CompressColumnConstraint: 5474 if self._match(TokenType.L_PAREN, advance=False): 5475 return self.expression( 5476 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5477 ) 5478 5479 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5480 5481 def _parse_generated_as_identity( 5482 self, 5483 ) -> ( 5484 exp.GeneratedAsIdentityColumnConstraint 5485 | exp.ComputedColumnConstraint 5486 | exp.GeneratedAsRowColumnConstraint 5487 ): 5488 if self._match_text_seq("BY", "DEFAULT"): 5489 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5490 this = self.expression( 5491 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5492 ) 5493 else: 5494 self._match_text_seq("ALWAYS") 5495 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5496 5497 self._match(TokenType.ALIAS) 5498 5499 if self._match_text_seq("ROW"): 5500 start = self._match_text_seq("START") 5501 if not start: 5502 self._match(TokenType.END) 5503 hidden = self._match_text_seq("HIDDEN") 5504 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5505 5506 identity = self._match_text_seq("IDENTITY") 5507 5508 if self._match(TokenType.L_PAREN): 5509 if self._match(TokenType.START_WITH): 5510 this.set("start", self._parse_bitwise()) 5511 if self._match_text_seq("INCREMENT", "BY"): 5512 this.set("increment", self._parse_bitwise()) 5513 if self._match_text_seq("MINVALUE"): 5514 this.set("minvalue", self._parse_bitwise()) 5515 if self._match_text_seq("MAXVALUE"): 5516 this.set("maxvalue", self._parse_bitwise()) 5517 5518 if self._match_text_seq("CYCLE"): 5519 this.set("cycle", True) 5520 elif self._match_text_seq("NO", "CYCLE"): 5521 this.set("cycle", False) 5522 5523 if not identity: 5524 this.set("expression", self._parse_range()) 5525 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5526 args = self._parse_csv(self._parse_bitwise) 5527 this.set("start", seq_get(args, 0)) 5528 this.set("increment", seq_get(args, 1)) 5529 5530 self._match_r_paren() 5531 5532 return this 5533 5534 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5535 self._match_text_seq("LENGTH") 5536 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5537 5538 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5539 if self._match_text_seq("NULL"): 5540 return self.expression(exp.NotNullColumnConstraint) 5541 if self._match_text_seq("CASESPECIFIC"): 5542 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5543 if self._match_text_seq("FOR", "REPLICATION"): 5544 return self.expression(exp.NotForReplicationColumnConstraint) 5545 5546 # Unconsume the `NOT` token 5547 self._retreat(self._index - 1) 5548 return None 5549 5550 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5551 if self._match(TokenType.CONSTRAINT): 5552 this = self._parse_id_var() 5553 else: 5554 this = None 5555 5556 if self._match_texts(self.CONSTRAINT_PARSERS): 5557 return self.expression( 5558 exp.ColumnConstraint, 5559 this=this, 5560 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5561 ) 5562 5563 return this 5564 5565 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5566 if not self._match(TokenType.CONSTRAINT): 5567 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5568 5569 return self.expression( 5570 exp.Constraint, 5571 this=self._parse_id_var(), 5572 expressions=self._parse_unnamed_constraints(), 5573 ) 5574 5575 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5576 constraints = [] 5577 while True: 5578 constraint = self._parse_unnamed_constraint() or self._parse_function() 5579 if not constraint: 5580 break 5581 constraints.append(constraint) 5582 5583 return constraints 5584 5585 def _parse_unnamed_constraint( 5586 self, constraints: t.Optional[t.Collection[str]] = None 5587 ) -> t.Optional[exp.Expression]: 5588 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5589 constraints or self.CONSTRAINT_PARSERS 5590 ): 5591 return None 5592 5593 constraint = self._prev.text.upper() 5594 if constraint not in self.CONSTRAINT_PARSERS: 5595 self.raise_error(f"No parser found for schema constraint {constraint}.") 5596 5597 return self.CONSTRAINT_PARSERS[constraint](self) 5598 5599 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5600 return self._parse_id_var(any_token=False) 5601 5602 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5603 self._match_text_seq("KEY") 5604 return self.expression( 5605 exp.UniqueColumnConstraint, 5606 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5607 this=self._parse_schema(self._parse_unique_key()), 5608 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5609 on_conflict=self._parse_on_conflict(), 5610 ) 5611 5612 def _parse_key_constraint_options(self) -> t.List[str]: 5613 options = [] 5614 while True: 5615 if not self._curr: 5616 break 5617 5618 if self._match(TokenType.ON): 5619 action = None 5620 on = self._advance_any() and self._prev.text 5621 5622 if self._match_text_seq("NO", "ACTION"): 5623 action = "NO ACTION" 5624 elif self._match_text_seq("CASCADE"): 5625 action = "CASCADE" 5626 elif self._match_text_seq("RESTRICT"): 5627 action = "RESTRICT" 5628 elif self._match_pair(TokenType.SET, TokenType.NULL): 5629 action = "SET NULL" 5630 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5631 action = "SET DEFAULT" 5632 else: 5633 self.raise_error("Invalid key constraint") 5634 5635 options.append(f"ON {on} {action}") 5636 else: 5637 var = self._parse_var_from_options( 5638 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5639 ) 5640 if not var: 5641 break 5642 options.append(var.name) 5643 5644 return options 5645 5646 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5647 if match and not self._match(TokenType.REFERENCES): 5648 return None 5649 5650 expressions = None 5651 this = self._parse_table(schema=True) 5652 options = self._parse_key_constraint_options() 5653 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5654 5655 def _parse_foreign_key(self) -> exp.ForeignKey: 5656 expressions = self._parse_wrapped_id_vars() 5657 reference = self._parse_references() 5658 options = {} 5659 5660 while self._match(TokenType.ON): 5661 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5662 self.raise_error("Expected DELETE or UPDATE") 5663 5664 kind = self._prev.text.lower() 5665 5666 if self._match_text_seq("NO", "ACTION"): 5667 action = "NO ACTION" 5668 elif self._match(TokenType.SET): 5669 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5670 action = "SET " + self._prev.text.upper() 5671 else: 5672 self._advance() 5673 action = self._prev.text.upper() 5674 5675 options[kind] = action 5676 5677 return self.expression( 5678 exp.ForeignKey, 5679 expressions=expressions, 5680 reference=reference, 5681 **options, # type: ignore 5682 ) 5683 5684 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5685 return self._parse_field() 5686 5687 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5688 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5689 self._retreat(self._index - 1) 5690 return None 5691 5692 id_vars = self._parse_wrapped_id_vars() 5693 return self.expression( 5694 exp.PeriodForSystemTimeConstraint, 5695 this=seq_get(id_vars, 0), 5696 expression=seq_get(id_vars, 1), 5697 ) 5698 5699 def _parse_primary_key( 5700 self, wrapped_optional: bool = False, in_props: bool = False 5701 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5702 desc = ( 5703 self._match_set((TokenType.ASC, TokenType.DESC)) 5704 and self._prev.token_type == TokenType.DESC 5705 ) 5706 5707 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5708 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5709 5710 expressions = self._parse_wrapped_csv( 5711 self._parse_primary_key_part, optional=wrapped_optional 5712 ) 5713 options = self._parse_key_constraint_options() 5714 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5715 5716 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5717 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5718 5719 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5720 """ 5721 Parses a datetime column in ODBC format. We parse the column into the corresponding 5722 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5723 same as we did for `DATE('yyyy-mm-dd')`. 5724 5725 Reference: 5726 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5727 """ 5728 self._match(TokenType.VAR) 5729 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5730 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5731 if not self._match(TokenType.R_BRACE): 5732 self.raise_error("Expected }") 5733 return expression 5734 5735 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5736 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5737 return this 5738 5739 bracket_kind = self._prev.token_type 5740 if ( 5741 bracket_kind == TokenType.L_BRACE 5742 and self._curr 5743 and self._curr.token_type == TokenType.VAR 5744 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5745 ): 5746 return self._parse_odbc_datetime_literal() 5747 5748 expressions = self._parse_csv( 5749 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5750 ) 5751 5752 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5753 self.raise_error("Expected ]") 5754 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5755 self.raise_error("Expected }") 5756 5757 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5758 if bracket_kind == TokenType.L_BRACE: 5759 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5760 elif not this: 5761 this = build_array_constructor( 5762 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5763 ) 5764 else: 5765 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5766 if constructor_type: 5767 return build_array_constructor( 5768 constructor_type, 5769 args=expressions, 5770 bracket_kind=bracket_kind, 5771 dialect=self.dialect, 5772 ) 5773 5774 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5775 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5776 5777 self._add_comments(this) 5778 return self._parse_bracket(this) 5779 5780 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5781 if self._match(TokenType.COLON): 5782 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5783 return this 5784 5785 def _parse_case(self) -> t.Optional[exp.Expression]: 5786 ifs = [] 5787 default = None 5788 5789 comments = self._prev_comments 5790 expression = self._parse_assignment() 5791 5792 while self._match(TokenType.WHEN): 5793 this = self._parse_assignment() 5794 self._match(TokenType.THEN) 5795 then = self._parse_assignment() 5796 ifs.append(self.expression(exp.If, this=this, true=then)) 5797 5798 if self._match(TokenType.ELSE): 5799 default = self._parse_assignment() 5800 5801 if not self._match(TokenType.END): 5802 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5803 default = exp.column("interval") 5804 else: 5805 self.raise_error("Expected END after CASE", self._prev) 5806 5807 return self.expression( 5808 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5809 ) 5810 5811 def _parse_if(self) -> t.Optional[exp.Expression]: 5812 if self._match(TokenType.L_PAREN): 5813 args = self._parse_csv(self._parse_assignment) 5814 this = self.validate_expression(exp.If.from_arg_list(args), args) 5815 self._match_r_paren() 5816 else: 5817 index = self._index - 1 5818 5819 if self.NO_PAREN_IF_COMMANDS and index == 0: 5820 return self._parse_as_command(self._prev) 5821 5822 condition = self._parse_assignment() 5823 5824 if not condition: 5825 self._retreat(index) 5826 return None 5827 5828 self._match(TokenType.THEN) 5829 true = self._parse_assignment() 5830 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5831 self._match(TokenType.END) 5832 this = self.expression(exp.If, this=condition, true=true, false=false) 5833 5834 return this 5835 5836 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5837 if not self._match_text_seq("VALUE", "FOR"): 5838 self._retreat(self._index - 1) 5839 return None 5840 5841 return self.expression( 5842 exp.NextValueFor, 5843 this=self._parse_column(), 5844 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5845 ) 5846 5847 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5848 this = self._parse_function() or self._parse_var_or_string(upper=True) 5849 5850 if self._match(TokenType.FROM): 5851 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5852 5853 if not self._match(TokenType.COMMA): 5854 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5855 5856 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5857 5858 def _parse_gap_fill(self) -> exp.GapFill: 5859 self._match(TokenType.TABLE) 5860 this = self._parse_table() 5861 5862 self._match(TokenType.COMMA) 5863 args = [this, *self._parse_csv(self._parse_lambda)] 5864 5865 gap_fill = exp.GapFill.from_arg_list(args) 5866 return self.validate_expression(gap_fill, args) 5867 5868 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5869 this = self._parse_assignment() 5870 5871 if not self._match(TokenType.ALIAS): 5872 if self._match(TokenType.COMMA): 5873 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5874 5875 self.raise_error("Expected AS after CAST") 5876 5877 fmt = None 5878 to = self._parse_types() 5879 5880 if self._match(TokenType.FORMAT): 5881 fmt_string = self._parse_string() 5882 fmt = self._parse_at_time_zone(fmt_string) 5883 5884 if not to: 5885 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5886 if to.this in exp.DataType.TEMPORAL_TYPES: 5887 this = self.expression( 5888 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5889 this=this, 5890 format=exp.Literal.string( 5891 format_time( 5892 fmt_string.this if fmt_string else "", 5893 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5894 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5895 ) 5896 ), 5897 safe=safe, 5898 ) 5899 5900 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5901 this.set("zone", fmt.args["zone"]) 5902 return this 5903 elif not to: 5904 self.raise_error("Expected TYPE after CAST") 5905 elif isinstance(to, exp.Identifier): 5906 to = exp.DataType.build(to.name, udt=True) 5907 elif to.this == exp.DataType.Type.CHAR: 5908 if self._match(TokenType.CHARACTER_SET): 5909 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5910 5911 return self.expression( 5912 exp.Cast if strict else exp.TryCast, 5913 this=this, 5914 to=to, 5915 format=fmt, 5916 safe=safe, 5917 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5918 ) 5919 5920 def _parse_string_agg(self) -> exp.Expression: 5921 if self._match(TokenType.DISTINCT): 5922 args: t.List[t.Optional[exp.Expression]] = [ 5923 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5924 ] 5925 if self._match(TokenType.COMMA): 5926 args.extend(self._parse_csv(self._parse_assignment)) 5927 else: 5928 args = self._parse_csv(self._parse_assignment) # type: ignore 5929 5930 index = self._index 5931 if not self._match(TokenType.R_PAREN) and args: 5932 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5933 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5934 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5935 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5936 5937 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5938 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5939 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5940 if not self._match_text_seq("WITHIN", "GROUP"): 5941 self._retreat(index) 5942 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5943 5944 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5945 order = self._parse_order(this=seq_get(args, 0)) 5946 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5947 5948 def _parse_convert( 5949 self, strict: bool, safe: t.Optional[bool] = None 5950 ) -> t.Optional[exp.Expression]: 5951 this = self._parse_bitwise() 5952 5953 if self._match(TokenType.USING): 5954 to: t.Optional[exp.Expression] = self.expression( 5955 exp.CharacterSet, this=self._parse_var() 5956 ) 5957 elif self._match(TokenType.COMMA): 5958 to = self._parse_types() 5959 else: 5960 to = None 5961 5962 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5963 5964 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5965 """ 5966 There are generally two variants of the DECODE function: 5967 5968 - DECODE(bin, charset) 5969 - DECODE(expression, search, result [, search, result] ... [, default]) 5970 5971 The second variant will always be parsed into a CASE expression. Note that NULL 5972 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5973 instead of relying on pattern matching. 5974 """ 5975 args = self._parse_csv(self._parse_assignment) 5976 5977 if len(args) < 3: 5978 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5979 5980 expression, *expressions = args 5981 if not expression: 5982 return None 5983 5984 ifs = [] 5985 for search, result in zip(expressions[::2], expressions[1::2]): 5986 if not search or not result: 5987 return None 5988 5989 if isinstance(search, exp.Literal): 5990 ifs.append( 5991 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5992 ) 5993 elif isinstance(search, exp.Null): 5994 ifs.append( 5995 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5996 ) 5997 else: 5998 cond = exp.or_( 5999 exp.EQ(this=expression.copy(), expression=search), 6000 exp.and_( 6001 exp.Is(this=expression.copy(), expression=exp.Null()), 6002 exp.Is(this=search.copy(), expression=exp.Null()), 6003 copy=False, 6004 ), 6005 copy=False, 6006 ) 6007 ifs.append(exp.If(this=cond, true=result)) 6008 6009 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6010 6011 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6012 self._match_text_seq("KEY") 6013 key = self._parse_column() 6014 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6015 self._match_text_seq("VALUE") 6016 value = self._parse_bitwise() 6017 6018 if not key and not value: 6019 return None 6020 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6021 6022 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6023 if not this or not self._match_text_seq("FORMAT", "JSON"): 6024 return this 6025 6026 return self.expression(exp.FormatJson, this=this) 6027 6028 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6029 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6030 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6031 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6032 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6033 else: 6034 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6035 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6036 6037 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6038 6039 if not empty and not error and not null: 6040 return None 6041 6042 return self.expression( 6043 exp.OnCondition, 6044 empty=empty, 6045 error=error, 6046 null=null, 6047 ) 6048 6049 def _parse_on_handling( 6050 self, on: str, *values: str 6051 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6052 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6053 for value in values: 6054 if self._match_text_seq(value, "ON", on): 6055 return f"{value} ON {on}" 6056 6057 index = self._index 6058 if self._match(TokenType.DEFAULT): 6059 default_value = self._parse_bitwise() 6060 if self._match_text_seq("ON", on): 6061 return default_value 6062 6063 self._retreat(index) 6064 6065 return None 6066 6067 @t.overload 6068 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6069 6070 @t.overload 6071 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6072 6073 def _parse_json_object(self, agg=False): 6074 star = self._parse_star() 6075 expressions = ( 6076 [star] 6077 if star 6078 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6079 ) 6080 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6081 6082 unique_keys = None 6083 if self._match_text_seq("WITH", "UNIQUE"): 6084 unique_keys = True 6085 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6086 unique_keys = False 6087 6088 self._match_text_seq("KEYS") 6089 6090 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6091 self._parse_type() 6092 ) 6093 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6094 6095 return self.expression( 6096 exp.JSONObjectAgg if agg else exp.JSONObject, 6097 expressions=expressions, 6098 null_handling=null_handling, 6099 unique_keys=unique_keys, 6100 return_type=return_type, 6101 encoding=encoding, 6102 ) 6103 6104 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6105 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6106 if not self._match_text_seq("NESTED"): 6107 this = self._parse_id_var() 6108 kind = self._parse_types(allow_identifiers=False) 6109 nested = None 6110 else: 6111 this = None 6112 kind = None 6113 nested = True 6114 6115 path = self._match_text_seq("PATH") and self._parse_string() 6116 nested_schema = nested and self._parse_json_schema() 6117 6118 return self.expression( 6119 exp.JSONColumnDef, 6120 this=this, 6121 kind=kind, 6122 path=path, 6123 nested_schema=nested_schema, 6124 ) 6125 6126 def _parse_json_schema(self) -> exp.JSONSchema: 6127 self._match_text_seq("COLUMNS") 6128 return self.expression( 6129 exp.JSONSchema, 6130 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6131 ) 6132 6133 def _parse_json_table(self) -> exp.JSONTable: 6134 this = self._parse_format_json(self._parse_bitwise()) 6135 path = self._match(TokenType.COMMA) and self._parse_string() 6136 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6137 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6138 schema = self._parse_json_schema() 6139 6140 return exp.JSONTable( 6141 this=this, 6142 schema=schema, 6143 path=path, 6144 error_handling=error_handling, 6145 empty_handling=empty_handling, 6146 ) 6147 6148 def _parse_match_against(self) -> exp.MatchAgainst: 6149 expressions = self._parse_csv(self._parse_column) 6150 6151 self._match_text_seq(")", "AGAINST", "(") 6152 6153 this = self._parse_string() 6154 6155 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6156 modifier = "IN NATURAL LANGUAGE MODE" 6157 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6158 modifier = f"{modifier} WITH QUERY EXPANSION" 6159 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6160 modifier = "IN BOOLEAN MODE" 6161 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6162 modifier = "WITH QUERY EXPANSION" 6163 else: 6164 modifier = None 6165 6166 return self.expression( 6167 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6168 ) 6169 6170 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6171 def _parse_open_json(self) -> exp.OpenJSON: 6172 this = self._parse_bitwise() 6173 path = self._match(TokenType.COMMA) and self._parse_string() 6174 6175 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6176 this = self._parse_field(any_token=True) 6177 kind = self._parse_types() 6178 path = self._parse_string() 6179 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6180 6181 return self.expression( 6182 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6183 ) 6184 6185 expressions = None 6186 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6187 self._match_l_paren() 6188 expressions = self._parse_csv(_parse_open_json_column_def) 6189 6190 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6191 6192 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6193 args = self._parse_csv(self._parse_bitwise) 6194 6195 if self._match(TokenType.IN): 6196 return self.expression( 6197 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6198 ) 6199 6200 if haystack_first: 6201 haystack = seq_get(args, 0) 6202 needle = seq_get(args, 1) 6203 else: 6204 needle = seq_get(args, 0) 6205 haystack = seq_get(args, 1) 6206 6207 return self.expression( 6208 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6209 ) 6210 6211 def _parse_predict(self) -> exp.Predict: 6212 self._match_text_seq("MODEL") 6213 this = self._parse_table() 6214 6215 self._match(TokenType.COMMA) 6216 self._match_text_seq("TABLE") 6217 6218 return self.expression( 6219 exp.Predict, 6220 this=this, 6221 expression=self._parse_table(), 6222 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6223 ) 6224 6225 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6226 args = self._parse_csv(self._parse_table) 6227 return exp.JoinHint(this=func_name.upper(), expressions=args) 6228 6229 def _parse_substring(self) -> exp.Substring: 6230 # Postgres supports the form: substring(string [from int] [for int]) 6231 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6232 6233 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6234 6235 if self._match(TokenType.FROM): 6236 args.append(self._parse_bitwise()) 6237 if self._match(TokenType.FOR): 6238 if len(args) == 1: 6239 args.append(exp.Literal.number(1)) 6240 args.append(self._parse_bitwise()) 6241 6242 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6243 6244 def _parse_trim(self) -> exp.Trim: 6245 # https://www.w3resource.com/sql/character-functions/trim.php 6246 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6247 6248 position = None 6249 collation = None 6250 expression = None 6251 6252 if self._match_texts(self.TRIM_TYPES): 6253 position = self._prev.text.upper() 6254 6255 this = self._parse_bitwise() 6256 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6257 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6258 expression = self._parse_bitwise() 6259 6260 if invert_order: 6261 this, expression = expression, this 6262 6263 if self._match(TokenType.COLLATE): 6264 collation = self._parse_bitwise() 6265 6266 return self.expression( 6267 exp.Trim, this=this, position=position, expression=expression, collation=collation 6268 ) 6269 6270 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6271 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6272 6273 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6274 return self._parse_window(self._parse_id_var(), alias=True) 6275 6276 def _parse_respect_or_ignore_nulls( 6277 self, this: t.Optional[exp.Expression] 6278 ) -> t.Optional[exp.Expression]: 6279 if self._match_text_seq("IGNORE", "NULLS"): 6280 return self.expression(exp.IgnoreNulls, this=this) 6281 if self._match_text_seq("RESPECT", "NULLS"): 6282 return self.expression(exp.RespectNulls, this=this) 6283 return this 6284 6285 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6286 if self._match(TokenType.HAVING): 6287 self._match_texts(("MAX", "MIN")) 6288 max = self._prev.text.upper() != "MIN" 6289 return self.expression( 6290 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6291 ) 6292 6293 return this 6294 6295 def _parse_window( 6296 self, this: t.Optional[exp.Expression], alias: bool = False 6297 ) -> t.Optional[exp.Expression]: 6298 func = this 6299 comments = func.comments if isinstance(func, exp.Expression) else None 6300 6301 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6302 self._match(TokenType.WHERE) 6303 this = self.expression( 6304 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6305 ) 6306 self._match_r_paren() 6307 6308 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6309 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6310 if self._match_text_seq("WITHIN", "GROUP"): 6311 order = self._parse_wrapped(self._parse_order) 6312 this = self.expression(exp.WithinGroup, this=this, expression=order) 6313 6314 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6315 # Some dialects choose to implement and some do not. 6316 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6317 6318 # There is some code above in _parse_lambda that handles 6319 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6320 6321 # The below changes handle 6322 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6323 6324 # Oracle allows both formats 6325 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6326 # and Snowflake chose to do the same for familiarity 6327 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6328 if isinstance(this, exp.AggFunc): 6329 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6330 6331 if ignore_respect and ignore_respect is not this: 6332 ignore_respect.replace(ignore_respect.this) 6333 this = self.expression(ignore_respect.__class__, this=this) 6334 6335 this = self._parse_respect_or_ignore_nulls(this) 6336 6337 # bigquery select from window x AS (partition by ...) 6338 if alias: 6339 over = None 6340 self._match(TokenType.ALIAS) 6341 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6342 return this 6343 else: 6344 over = self._prev.text.upper() 6345 6346 if comments and isinstance(func, exp.Expression): 6347 func.pop_comments() 6348 6349 if not self._match(TokenType.L_PAREN): 6350 return self.expression( 6351 exp.Window, 6352 comments=comments, 6353 this=this, 6354 alias=self._parse_id_var(False), 6355 over=over, 6356 ) 6357 6358 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6359 6360 first = self._match(TokenType.FIRST) 6361 if self._match_text_seq("LAST"): 6362 first = False 6363 6364 partition, order = self._parse_partition_and_order() 6365 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6366 6367 if kind: 6368 self._match(TokenType.BETWEEN) 6369 start = self._parse_window_spec() 6370 self._match(TokenType.AND) 6371 end = self._parse_window_spec() 6372 6373 spec = self.expression( 6374 exp.WindowSpec, 6375 kind=kind, 6376 start=start["value"], 6377 start_side=start["side"], 6378 end=end["value"], 6379 end_side=end["side"], 6380 ) 6381 else: 6382 spec = None 6383 6384 self._match_r_paren() 6385 6386 window = self.expression( 6387 exp.Window, 6388 comments=comments, 6389 this=this, 6390 partition_by=partition, 6391 order=order, 6392 spec=spec, 6393 alias=window_alias, 6394 over=over, 6395 first=first, 6396 ) 6397 6398 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6399 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6400 return self._parse_window(window, alias=alias) 6401 6402 return window 6403 6404 def _parse_partition_and_order( 6405 self, 6406 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6407 return self._parse_partition_by(), self._parse_order() 6408 6409 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6410 self._match(TokenType.BETWEEN) 6411 6412 return { 6413 "value": ( 6414 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6415 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6416 or self._parse_bitwise() 6417 ), 6418 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6419 } 6420 6421 def _parse_alias( 6422 self, this: t.Optional[exp.Expression], explicit: bool = False 6423 ) -> t.Optional[exp.Expression]: 6424 any_token = self._match(TokenType.ALIAS) 6425 comments = self._prev_comments or [] 6426 6427 if explicit and not any_token: 6428 return this 6429 6430 if self._match(TokenType.L_PAREN): 6431 aliases = self.expression( 6432 exp.Aliases, 6433 comments=comments, 6434 this=this, 6435 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6436 ) 6437 self._match_r_paren(aliases) 6438 return aliases 6439 6440 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6441 self.STRING_ALIASES and self._parse_string_as_identifier() 6442 ) 6443 6444 if alias: 6445 comments.extend(alias.pop_comments()) 6446 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6447 column = this.this 6448 6449 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6450 if not this.comments and column and column.comments: 6451 this.comments = column.pop_comments() 6452 6453 return this 6454 6455 def _parse_id_var( 6456 self, 6457 any_token: bool = True, 6458 tokens: t.Optional[t.Collection[TokenType]] = None, 6459 ) -> t.Optional[exp.Expression]: 6460 expression = self._parse_identifier() 6461 if not expression and ( 6462 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6463 ): 6464 quoted = self._prev.token_type == TokenType.STRING 6465 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6466 6467 return expression 6468 6469 def _parse_string(self) -> t.Optional[exp.Expression]: 6470 if self._match_set(self.STRING_PARSERS): 6471 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6472 return self._parse_placeholder() 6473 6474 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6475 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6476 6477 def _parse_number(self) -> t.Optional[exp.Expression]: 6478 if self._match_set(self.NUMERIC_PARSERS): 6479 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6480 return self._parse_placeholder() 6481 6482 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6483 if self._match(TokenType.IDENTIFIER): 6484 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6485 return self._parse_placeholder() 6486 6487 def _parse_var( 6488 self, 6489 any_token: bool = False, 6490 tokens: t.Optional[t.Collection[TokenType]] = None, 6491 upper: bool = False, 6492 ) -> t.Optional[exp.Expression]: 6493 if ( 6494 (any_token and self._advance_any()) 6495 or self._match(TokenType.VAR) 6496 or (self._match_set(tokens) if tokens else False) 6497 ): 6498 return self.expression( 6499 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6500 ) 6501 return self._parse_placeholder() 6502 6503 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6504 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6505 self._advance() 6506 return self._prev 6507 return None 6508 6509 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6510 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6511 6512 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6513 return self._parse_primary() or self._parse_var(any_token=True) 6514 6515 def _parse_null(self) -> t.Optional[exp.Expression]: 6516 if self._match_set(self.NULL_TOKENS): 6517 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6518 return self._parse_placeholder() 6519 6520 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6521 if self._match(TokenType.TRUE): 6522 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6523 if self._match(TokenType.FALSE): 6524 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6525 return self._parse_placeholder() 6526 6527 def _parse_star(self) -> t.Optional[exp.Expression]: 6528 if self._match(TokenType.STAR): 6529 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6530 return self._parse_placeholder() 6531 6532 def _parse_parameter(self) -> exp.Parameter: 6533 this = self._parse_identifier() or self._parse_primary_or_var() 6534 return self.expression(exp.Parameter, this=this) 6535 6536 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6537 if self._match_set(self.PLACEHOLDER_PARSERS): 6538 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6539 if placeholder: 6540 return placeholder 6541 self._advance(-1) 6542 return None 6543 6544 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6545 if not self._match_texts(keywords): 6546 return None 6547 if self._match(TokenType.L_PAREN, advance=False): 6548 return self._parse_wrapped_csv(self._parse_expression) 6549 6550 expression = self._parse_expression() 6551 return [expression] if expression else None 6552 6553 def _parse_csv( 6554 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6555 ) -> t.List[exp.Expression]: 6556 parse_result = parse_method() 6557 items = [parse_result] if parse_result is not None else [] 6558 6559 while self._match(sep): 6560 self._add_comments(parse_result) 6561 parse_result = parse_method() 6562 if parse_result is not None: 6563 items.append(parse_result) 6564 6565 return items 6566 6567 def _parse_tokens( 6568 self, parse_method: t.Callable, expressions: t.Dict 6569 ) -> t.Optional[exp.Expression]: 6570 this = parse_method() 6571 6572 while self._match_set(expressions): 6573 this = self.expression( 6574 expressions[self._prev.token_type], 6575 this=this, 6576 comments=self._prev_comments, 6577 expression=parse_method(), 6578 ) 6579 6580 return this 6581 6582 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6583 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6584 6585 def _parse_wrapped_csv( 6586 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6587 ) -> t.List[exp.Expression]: 6588 return self._parse_wrapped( 6589 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6590 ) 6591 6592 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6593 wrapped = self._match(TokenType.L_PAREN) 6594 if not wrapped and not optional: 6595 self.raise_error("Expecting (") 6596 parse_result = parse_method() 6597 if wrapped: 6598 self._match_r_paren() 6599 return parse_result 6600 6601 def _parse_expressions(self) -> t.List[exp.Expression]: 6602 return self._parse_csv(self._parse_expression) 6603 6604 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6605 return self._parse_select() or self._parse_set_operations( 6606 self._parse_expression() if alias else self._parse_assignment() 6607 ) 6608 6609 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6610 return self._parse_query_modifiers( 6611 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6612 ) 6613 6614 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6615 this = None 6616 if self._match_texts(self.TRANSACTION_KIND): 6617 this = self._prev.text 6618 6619 self._match_texts(("TRANSACTION", "WORK")) 6620 6621 modes = [] 6622 while True: 6623 mode = [] 6624 while self._match(TokenType.VAR): 6625 mode.append(self._prev.text) 6626 6627 if mode: 6628 modes.append(" ".join(mode)) 6629 if not self._match(TokenType.COMMA): 6630 break 6631 6632 return self.expression(exp.Transaction, this=this, modes=modes) 6633 6634 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6635 chain = None 6636 savepoint = None 6637 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6638 6639 self._match_texts(("TRANSACTION", "WORK")) 6640 6641 if self._match_text_seq("TO"): 6642 self._match_text_seq("SAVEPOINT") 6643 savepoint = self._parse_id_var() 6644 6645 if self._match(TokenType.AND): 6646 chain = not self._match_text_seq("NO") 6647 self._match_text_seq("CHAIN") 6648 6649 if is_rollback: 6650 return self.expression(exp.Rollback, savepoint=savepoint) 6651 6652 return self.expression(exp.Commit, chain=chain) 6653 6654 def _parse_refresh(self) -> exp.Refresh: 6655 self._match(TokenType.TABLE) 6656 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6657 6658 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6659 if not self._match_text_seq("ADD"): 6660 return None 6661 6662 self._match(TokenType.COLUMN) 6663 exists_column = self._parse_exists(not_=True) 6664 expression = self._parse_field_def() 6665 6666 if expression: 6667 expression.set("exists", exists_column) 6668 6669 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6670 if self._match_texts(("FIRST", "AFTER")): 6671 position = self._prev.text 6672 column_position = self.expression( 6673 exp.ColumnPosition, this=self._parse_column(), position=position 6674 ) 6675 expression.set("position", column_position) 6676 6677 return expression 6678 6679 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6680 drop = self._match(TokenType.DROP) and self._parse_drop() 6681 if drop and not isinstance(drop, exp.Command): 6682 drop.set("kind", drop.args.get("kind", "COLUMN")) 6683 return drop 6684 6685 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6686 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6687 return self.expression( 6688 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6689 ) 6690 6691 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6692 index = self._index - 1 6693 6694 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6695 return self._parse_csv( 6696 lambda: self.expression( 6697 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6698 ) 6699 ) 6700 6701 self._retreat(index) 6702 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6703 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6704 6705 if self._match_text_seq("ADD", "COLUMNS"): 6706 schema = self._parse_schema() 6707 if schema: 6708 return [schema] 6709 return [] 6710 6711 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6712 6713 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6714 if self._match_texts(self.ALTER_ALTER_PARSERS): 6715 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6716 6717 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6718 # keyword after ALTER we default to parsing this statement 6719 self._match(TokenType.COLUMN) 6720 column = self._parse_field(any_token=True) 6721 6722 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6723 return self.expression(exp.AlterColumn, this=column, drop=True) 6724 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6725 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6726 if self._match(TokenType.COMMENT): 6727 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6728 if self._match_text_seq("DROP", "NOT", "NULL"): 6729 return self.expression( 6730 exp.AlterColumn, 6731 this=column, 6732 drop=True, 6733 allow_null=True, 6734 ) 6735 if self._match_text_seq("SET", "NOT", "NULL"): 6736 return self.expression( 6737 exp.AlterColumn, 6738 this=column, 6739 allow_null=False, 6740 ) 6741 self._match_text_seq("SET", "DATA") 6742 self._match_text_seq("TYPE") 6743 return self.expression( 6744 exp.AlterColumn, 6745 this=column, 6746 dtype=self._parse_types(), 6747 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6748 using=self._match(TokenType.USING) and self._parse_assignment(), 6749 ) 6750 6751 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6752 if self._match_texts(("ALL", "EVEN", "AUTO")): 6753 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6754 6755 self._match_text_seq("KEY", "DISTKEY") 6756 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6757 6758 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6759 if compound: 6760 self._match_text_seq("SORTKEY") 6761 6762 if self._match(TokenType.L_PAREN, advance=False): 6763 return self.expression( 6764 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6765 ) 6766 6767 self._match_texts(("AUTO", "NONE")) 6768 return self.expression( 6769 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6770 ) 6771 6772 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6773 index = self._index - 1 6774 6775 partition_exists = self._parse_exists() 6776 if self._match(TokenType.PARTITION, advance=False): 6777 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6778 6779 self._retreat(index) 6780 return self._parse_csv(self._parse_drop_column) 6781 6782 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6783 if self._match(TokenType.COLUMN): 6784 exists = self._parse_exists() 6785 old_column = self._parse_column() 6786 to = self._match_text_seq("TO") 6787 new_column = self._parse_column() 6788 6789 if old_column is None or to is None or new_column is None: 6790 return None 6791 6792 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6793 6794 self._match_text_seq("TO") 6795 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6796 6797 def _parse_alter_table_set(self) -> exp.AlterSet: 6798 alter_set = self.expression(exp.AlterSet) 6799 6800 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6801 "TABLE", "PROPERTIES" 6802 ): 6803 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6804 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6805 alter_set.set("expressions", [self._parse_assignment()]) 6806 elif self._match_texts(("LOGGED", "UNLOGGED")): 6807 alter_set.set("option", exp.var(self._prev.text.upper())) 6808 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6809 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6810 elif self._match_text_seq("LOCATION"): 6811 alter_set.set("location", self._parse_field()) 6812 elif self._match_text_seq("ACCESS", "METHOD"): 6813 alter_set.set("access_method", self._parse_field()) 6814 elif self._match_text_seq("TABLESPACE"): 6815 alter_set.set("tablespace", self._parse_field()) 6816 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6817 alter_set.set("file_format", [self._parse_field()]) 6818 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6819 alter_set.set("file_format", self._parse_wrapped_options()) 6820 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6821 alter_set.set("copy_options", self._parse_wrapped_options()) 6822 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6823 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6824 else: 6825 if self._match_text_seq("SERDE"): 6826 alter_set.set("serde", self._parse_field()) 6827 6828 alter_set.set("expressions", [self._parse_properties()]) 6829 6830 return alter_set 6831 6832 def _parse_alter(self) -> exp.Alter | exp.Command: 6833 start = self._prev 6834 6835 alter_token = self._match_set(self.ALTERABLES) and self._prev 6836 if not alter_token: 6837 return self._parse_as_command(start) 6838 6839 exists = self._parse_exists() 6840 only = self._match_text_seq("ONLY") 6841 this = self._parse_table(schema=True) 6842 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6843 6844 if self._next: 6845 self._advance() 6846 6847 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6848 if parser: 6849 actions = ensure_list(parser(self)) 6850 not_valid = self._match_text_seq("NOT", "VALID") 6851 options = self._parse_csv(self._parse_property) 6852 6853 if not self._curr and actions: 6854 return self.expression( 6855 exp.Alter, 6856 this=this, 6857 kind=alter_token.text.upper(), 6858 exists=exists, 6859 actions=actions, 6860 only=only, 6861 options=options, 6862 cluster=cluster, 6863 not_valid=not_valid, 6864 ) 6865 6866 return self._parse_as_command(start) 6867 6868 def _parse_merge(self) -> exp.Merge: 6869 self._match(TokenType.INTO) 6870 target = self._parse_table() 6871 6872 if target and self._match(TokenType.ALIAS, advance=False): 6873 target.set("alias", self._parse_table_alias()) 6874 6875 self._match(TokenType.USING) 6876 using = self._parse_table() 6877 6878 self._match(TokenType.ON) 6879 on = self._parse_assignment() 6880 6881 return self.expression( 6882 exp.Merge, 6883 this=target, 6884 using=using, 6885 on=on, 6886 expressions=self._parse_when_matched(), 6887 returning=self._parse_returning(), 6888 ) 6889 6890 def _parse_when_matched(self) -> t.List[exp.When]: 6891 whens = [] 6892 6893 while self._match(TokenType.WHEN): 6894 matched = not self._match(TokenType.NOT) 6895 self._match_text_seq("MATCHED") 6896 source = ( 6897 False 6898 if self._match_text_seq("BY", "TARGET") 6899 else self._match_text_seq("BY", "SOURCE") 6900 ) 6901 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6902 6903 self._match(TokenType.THEN) 6904 6905 if self._match(TokenType.INSERT): 6906 this = self._parse_star() 6907 if this: 6908 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6909 else: 6910 then = self.expression( 6911 exp.Insert, 6912 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6913 expression=self._match_text_seq("VALUES") and self._parse_value(), 6914 ) 6915 elif self._match(TokenType.UPDATE): 6916 expressions = self._parse_star() 6917 if expressions: 6918 then = self.expression(exp.Update, expressions=expressions) 6919 else: 6920 then = self.expression( 6921 exp.Update, 6922 expressions=self._match(TokenType.SET) 6923 and self._parse_csv(self._parse_equality), 6924 ) 6925 elif self._match(TokenType.DELETE): 6926 then = self.expression(exp.Var, this=self._prev.text) 6927 else: 6928 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6929 6930 whens.append( 6931 self.expression( 6932 exp.When, 6933 matched=matched, 6934 source=source, 6935 condition=condition, 6936 then=then, 6937 ) 6938 ) 6939 return whens 6940 6941 def _parse_show(self) -> t.Optional[exp.Expression]: 6942 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6943 if parser: 6944 return parser(self) 6945 return self._parse_as_command(self._prev) 6946 6947 def _parse_set_item_assignment( 6948 self, kind: t.Optional[str] = None 6949 ) -> t.Optional[exp.Expression]: 6950 index = self._index 6951 6952 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6953 return self._parse_set_transaction(global_=kind == "GLOBAL") 6954 6955 left = self._parse_primary() or self._parse_column() 6956 assignment_delimiter = self._match_texts(("=", "TO")) 6957 6958 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6959 self._retreat(index) 6960 return None 6961 6962 right = self._parse_statement() or self._parse_id_var() 6963 if isinstance(right, (exp.Column, exp.Identifier)): 6964 right = exp.var(right.name) 6965 6966 this = self.expression(exp.EQ, this=left, expression=right) 6967 return self.expression(exp.SetItem, this=this, kind=kind) 6968 6969 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6970 self._match_text_seq("TRANSACTION") 6971 characteristics = self._parse_csv( 6972 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6973 ) 6974 return self.expression( 6975 exp.SetItem, 6976 expressions=characteristics, 6977 kind="TRANSACTION", 6978 **{"global": global_}, # type: ignore 6979 ) 6980 6981 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6982 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6983 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6984 6985 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6986 index = self._index 6987 set_ = self.expression( 6988 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6989 ) 6990 6991 if self._curr: 6992 self._retreat(index) 6993 return self._parse_as_command(self._prev) 6994 6995 return set_ 6996 6997 def _parse_var_from_options( 6998 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6999 ) -> t.Optional[exp.Var]: 7000 start = self._curr 7001 if not start: 7002 return None 7003 7004 option = start.text.upper() 7005 continuations = options.get(option) 7006 7007 index = self._index 7008 self._advance() 7009 for keywords in continuations or []: 7010 if isinstance(keywords, str): 7011 keywords = (keywords,) 7012 7013 if self._match_text_seq(*keywords): 7014 option = f"{option} {' '.join(keywords)}" 7015 break 7016 else: 7017 if continuations or continuations is None: 7018 if raise_unmatched: 7019 self.raise_error(f"Unknown option {option}") 7020 7021 self._retreat(index) 7022 return None 7023 7024 return exp.var(option) 7025 7026 def _parse_as_command(self, start: Token) -> exp.Command: 7027 while self._curr: 7028 self._advance() 7029 text = self._find_sql(start, self._prev) 7030 size = len(start.text) 7031 self._warn_unsupported() 7032 return exp.Command(this=text[:size], expression=text[size:]) 7033 7034 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7035 settings = [] 7036 7037 self._match_l_paren() 7038 kind = self._parse_id_var() 7039 7040 if self._match(TokenType.L_PAREN): 7041 while True: 7042 key = self._parse_id_var() 7043 value = self._parse_primary() 7044 7045 if not key and value is None: 7046 break 7047 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7048 self._match(TokenType.R_PAREN) 7049 7050 self._match_r_paren() 7051 7052 return self.expression( 7053 exp.DictProperty, 7054 this=this, 7055 kind=kind.this if kind else None, 7056 settings=settings, 7057 ) 7058 7059 def _parse_dict_range(self, this: str) -> exp.DictRange: 7060 self._match_l_paren() 7061 has_min = self._match_text_seq("MIN") 7062 if has_min: 7063 min = self._parse_var() or self._parse_primary() 7064 self._match_text_seq("MAX") 7065 max = self._parse_var() or self._parse_primary() 7066 else: 7067 max = self._parse_var() or self._parse_primary() 7068 min = exp.Literal.number(0) 7069 self._match_r_paren() 7070 return self.expression(exp.DictRange, this=this, min=min, max=max) 7071 7072 def _parse_comprehension( 7073 self, this: t.Optional[exp.Expression] 7074 ) -> t.Optional[exp.Comprehension]: 7075 index = self._index 7076 expression = self._parse_column() 7077 if not self._match(TokenType.IN): 7078 self._retreat(index - 1) 7079 return None 7080 iterator = self._parse_column() 7081 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7082 return self.expression( 7083 exp.Comprehension, 7084 this=this, 7085 expression=expression, 7086 iterator=iterator, 7087 condition=condition, 7088 ) 7089 7090 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7091 if self._match(TokenType.HEREDOC_STRING): 7092 return self.expression(exp.Heredoc, this=self._prev.text) 7093 7094 if not self._match_text_seq("$"): 7095 return None 7096 7097 tags = ["$"] 7098 tag_text = None 7099 7100 if self._is_connected(): 7101 self._advance() 7102 tags.append(self._prev.text.upper()) 7103 else: 7104 self.raise_error("No closing $ found") 7105 7106 if tags[-1] != "$": 7107 if self._is_connected() and self._match_text_seq("$"): 7108 tag_text = tags[-1] 7109 tags.append("$") 7110 else: 7111 self.raise_error("No closing $ found") 7112 7113 heredoc_start = self._curr 7114 7115 while self._curr: 7116 if self._match_text_seq(*tags, advance=False): 7117 this = self._find_sql(heredoc_start, self._prev) 7118 self._advance(len(tags)) 7119 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7120 7121 self._advance() 7122 7123 self.raise_error(f"No closing {''.join(tags)} found") 7124 return None 7125 7126 def _find_parser( 7127 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7128 ) -> t.Optional[t.Callable]: 7129 if not self._curr: 7130 return None 7131 7132 index = self._index 7133 this = [] 7134 while True: 7135 # The current token might be multiple words 7136 curr = self._curr.text.upper() 7137 key = curr.split(" ") 7138 this.append(curr) 7139 7140 self._advance() 7141 result, trie = in_trie(trie, key) 7142 if result == TrieResult.FAILED: 7143 break 7144 7145 if result == TrieResult.EXISTS: 7146 subparser = parsers[" ".join(this)] 7147 return subparser 7148 7149 self._retreat(index) 7150 return None 7151 7152 def _match(self, token_type, advance=True, expression=None): 7153 if not self._curr: 7154 return None 7155 7156 if self._curr.token_type == token_type: 7157 if advance: 7158 self._advance() 7159 self._add_comments(expression) 7160 return True 7161 7162 return None 7163 7164 def _match_set(self, types, advance=True): 7165 if not self._curr: 7166 return None 7167 7168 if self._curr.token_type in types: 7169 if advance: 7170 self._advance() 7171 return True 7172 7173 return None 7174 7175 def _match_pair(self, token_type_a, token_type_b, advance=True): 7176 if not self._curr or not self._next: 7177 return None 7178 7179 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7180 if advance: 7181 self._advance(2) 7182 return True 7183 7184 return None 7185 7186 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7187 if not self._match(TokenType.L_PAREN, expression=expression): 7188 self.raise_error("Expecting (") 7189 7190 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7191 if not self._match(TokenType.R_PAREN, expression=expression): 7192 self.raise_error("Expecting )") 7193 7194 def _match_texts(self, texts, advance=True): 7195 if ( 7196 self._curr 7197 and self._curr.token_type != TokenType.STRING 7198 and self._curr.text.upper() in texts 7199 ): 7200 if advance: 7201 self._advance() 7202 return True 7203 return None 7204 7205 def _match_text_seq(self, *texts, advance=True): 7206 index = self._index 7207 for text in texts: 7208 if ( 7209 self._curr 7210 and self._curr.token_type != TokenType.STRING 7211 and self._curr.text.upper() == text 7212 ): 7213 self._advance() 7214 else: 7215 self._retreat(index) 7216 return None 7217 7218 if not advance: 7219 self._retreat(index) 7220 7221 return True 7222 7223 def _replace_lambda( 7224 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7225 ) -> t.Optional[exp.Expression]: 7226 if not node: 7227 return node 7228 7229 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7230 7231 for column in node.find_all(exp.Column): 7232 typ = lambda_types.get(column.parts[0].name) 7233 if typ is not None: 7234 dot_or_id = column.to_dot() if column.table else column.this 7235 7236 if typ: 7237 dot_or_id = self.expression( 7238 exp.Cast, 7239 this=dot_or_id, 7240 to=typ, 7241 ) 7242 7243 parent = column.parent 7244 7245 while isinstance(parent, exp.Dot): 7246 if not isinstance(parent.parent, exp.Dot): 7247 parent.replace(dot_or_id) 7248 break 7249 parent = parent.parent 7250 else: 7251 if column is node: 7252 node = dot_or_id 7253 else: 7254 column.replace(dot_or_id) 7255 return node 7256 7257 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7258 start = self._prev 7259 7260 # Not to be confused with TRUNCATE(number, decimals) function call 7261 if self._match(TokenType.L_PAREN): 7262 self._retreat(self._index - 2) 7263 return self._parse_function() 7264 7265 # Clickhouse supports TRUNCATE DATABASE as well 7266 is_database = self._match(TokenType.DATABASE) 7267 7268 self._match(TokenType.TABLE) 7269 7270 exists = self._parse_exists(not_=False) 7271 7272 expressions = self._parse_csv( 7273 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7274 ) 7275 7276 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7277 7278 if self._match_text_seq("RESTART", "IDENTITY"): 7279 identity = "RESTART" 7280 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7281 identity = "CONTINUE" 7282 else: 7283 identity = None 7284 7285 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7286 option = self._prev.text 7287 else: 7288 option = None 7289 7290 partition = self._parse_partition() 7291 7292 # Fallback case 7293 if self._curr: 7294 return self._parse_as_command(start) 7295 7296 return self.expression( 7297 exp.TruncateTable, 7298 expressions=expressions, 7299 is_database=is_database, 7300 exists=exists, 7301 cluster=cluster, 7302 identity=identity, 7303 option=option, 7304 partition=partition, 7305 ) 7306 7307 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7308 this = self._parse_ordered(self._parse_opclass) 7309 7310 if not self._match(TokenType.WITH): 7311 return this 7312 7313 op = self._parse_var(any_token=True) 7314 7315 return self.expression(exp.WithOperator, this=this, op=op) 7316 7317 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7318 self._match(TokenType.EQ) 7319 self._match(TokenType.L_PAREN) 7320 7321 opts: t.List[t.Optional[exp.Expression]] = [] 7322 while self._curr and not self._match(TokenType.R_PAREN): 7323 if self._match_text_seq("FORMAT_NAME", "="): 7324 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7325 # so we parse it separately to use _parse_field() 7326 prop = self.expression( 7327 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7328 ) 7329 opts.append(prop) 7330 else: 7331 opts.append(self._parse_property()) 7332 7333 self._match(TokenType.COMMA) 7334 7335 return opts 7336 7337 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7338 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7339 7340 options = [] 7341 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7342 option = self._parse_var(any_token=True) 7343 prev = self._prev.text.upper() 7344 7345 # Different dialects might separate options and values by white space, "=" and "AS" 7346 self._match(TokenType.EQ) 7347 self._match(TokenType.ALIAS) 7348 7349 param = self.expression(exp.CopyParameter, this=option) 7350 7351 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7352 TokenType.L_PAREN, advance=False 7353 ): 7354 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7355 param.set("expressions", self._parse_wrapped_options()) 7356 elif prev == "FILE_FORMAT": 7357 # T-SQL's external file format case 7358 param.set("expression", self._parse_field()) 7359 else: 7360 param.set("expression", self._parse_unquoted_field()) 7361 7362 options.append(param) 7363 self._match(sep) 7364 7365 return options 7366 7367 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7368 expr = self.expression(exp.Credentials) 7369 7370 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7371 expr.set("storage", self._parse_field()) 7372 if self._match_text_seq("CREDENTIALS"): 7373 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7374 creds = ( 7375 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7376 ) 7377 expr.set("credentials", creds) 7378 if self._match_text_seq("ENCRYPTION"): 7379 expr.set("encryption", self._parse_wrapped_options()) 7380 if self._match_text_seq("IAM_ROLE"): 7381 expr.set("iam_role", self._parse_field()) 7382 if self._match_text_seq("REGION"): 7383 expr.set("region", self._parse_field()) 7384 7385 return expr 7386 7387 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7388 return self._parse_field() 7389 7390 def _parse_copy(self) -> exp.Copy | exp.Command: 7391 start = self._prev 7392 7393 self._match(TokenType.INTO) 7394 7395 this = ( 7396 self._parse_select(nested=True, parse_subquery_alias=False) 7397 if self._match(TokenType.L_PAREN, advance=False) 7398 else self._parse_table(schema=True) 7399 ) 7400 7401 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7402 7403 files = self._parse_csv(self._parse_file_location) 7404 credentials = self._parse_credentials() 7405 7406 self._match_text_seq("WITH") 7407 7408 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7409 7410 # Fallback case 7411 if self._curr: 7412 return self._parse_as_command(start) 7413 7414 return self.expression( 7415 exp.Copy, 7416 this=this, 7417 kind=kind, 7418 credentials=credentials, 7419 files=files, 7420 params=params, 7421 ) 7422 7423 def _parse_normalize(self) -> exp.Normalize: 7424 return self.expression( 7425 exp.Normalize, 7426 this=self._parse_bitwise(), 7427 form=self._match(TokenType.COMMA) and self._parse_var(), 7428 ) 7429 7430 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7431 if self._match_text_seq("COLUMNS", "(", advance=False): 7432 this = self._parse_function() 7433 if isinstance(this, exp.Columns): 7434 this.set("unpack", True) 7435 return this 7436 7437 return self.expression( 7438 exp.Star, 7439 **{ # type: ignore 7440 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7441 "replace": self._parse_star_op("REPLACE"), 7442 "rename": self._parse_star_op("RENAME"), 7443 }, 7444 ) 7445 7446 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7447 privilege_parts = [] 7448 7449 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7450 # (end of privilege list) or L_PAREN (start of column list) are met 7451 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7452 privilege_parts.append(self._curr.text.upper()) 7453 self._advance() 7454 7455 this = exp.var(" ".join(privilege_parts)) 7456 expressions = ( 7457 self._parse_wrapped_csv(self._parse_column) 7458 if self._match(TokenType.L_PAREN, advance=False) 7459 else None 7460 ) 7461 7462 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7463 7464 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7465 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7466 principal = self._parse_id_var() 7467 7468 if not principal: 7469 return None 7470 7471 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7472 7473 def _parse_grant(self) -> exp.Grant | exp.Command: 7474 start = self._prev 7475 7476 privileges = self._parse_csv(self._parse_grant_privilege) 7477 7478 self._match(TokenType.ON) 7479 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7480 7481 # Attempt to parse the securable e.g. MySQL allows names 7482 # such as "foo.*", "*.*" which are not easily parseable yet 7483 securable = self._try_parse(self._parse_table_parts) 7484 7485 if not securable or not self._match_text_seq("TO"): 7486 return self._parse_as_command(start) 7487 7488 principals = self._parse_csv(self._parse_grant_principal) 7489 7490 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7491 7492 if self._curr: 7493 return self._parse_as_command(start) 7494 7495 return self.expression( 7496 exp.Grant, 7497 privileges=privileges, 7498 kind=kind, 7499 securable=securable, 7500 principals=principals, 7501 grant_option=grant_option, 7502 ) 7503 7504 def _parse_overlay(self) -> exp.Overlay: 7505 return self.expression( 7506 exp.Overlay, 7507 **{ # type: ignore 7508 "this": self._parse_bitwise(), 7509 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7510 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7511 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7512 }, 7513 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1365 def __init__( 1366 self, 1367 error_level: t.Optional[ErrorLevel] = None, 1368 error_message_context: int = 100, 1369 max_errors: int = 3, 1370 dialect: DialectType = None, 1371 ): 1372 from sqlglot.dialects import Dialect 1373 1374 self.error_level = error_level or ErrorLevel.IMMEDIATE 1375 self.error_message_context = error_message_context 1376 self.max_errors = max_errors 1377 self.dialect = Dialect.get_or_raise(dialect) 1378 self.reset()
1390 def parse( 1391 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1392 ) -> t.List[t.Optional[exp.Expression]]: 1393 """ 1394 Parses a list of tokens and returns a list of syntax trees, one tree 1395 per parsed SQL statement. 1396 1397 Args: 1398 raw_tokens: The list of tokens. 1399 sql: The original SQL string, used to produce helpful debug messages. 1400 1401 Returns: 1402 The list of the produced syntax trees. 1403 """ 1404 return self._parse( 1405 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1406 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1408 def parse_into( 1409 self, 1410 expression_types: exp.IntoType, 1411 raw_tokens: t.List[Token], 1412 sql: t.Optional[str] = None, 1413 ) -> t.List[t.Optional[exp.Expression]]: 1414 """ 1415 Parses a list of tokens into a given Expression type. If a collection of Expression 1416 types is given instead, this method will try to parse the token list into each one 1417 of them, stopping at the first for which the parsing succeeds. 1418 1419 Args: 1420 expression_types: The expression type(s) to try and parse the token list into. 1421 raw_tokens: The list of tokens. 1422 sql: The original SQL string, used to produce helpful debug messages. 1423 1424 Returns: 1425 The target Expression. 1426 """ 1427 errors = [] 1428 for expression_type in ensure_list(expression_types): 1429 parser = self.EXPRESSION_PARSERS.get(expression_type) 1430 if not parser: 1431 raise TypeError(f"No parser registered for {expression_type}") 1432 1433 try: 1434 return self._parse(parser, raw_tokens, sql) 1435 except ParseError as e: 1436 e.errors[0]["into_expression"] = expression_type 1437 errors.append(e) 1438 1439 raise ParseError( 1440 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1441 errors=merge_errors(errors), 1442 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1482 def check_errors(self) -> None: 1483 """Logs or raises any found errors, depending on the chosen error level setting.""" 1484 if self.error_level == ErrorLevel.WARN: 1485 for error in self.errors: 1486 logger.error(str(error)) 1487 elif self.error_level == ErrorLevel.RAISE and self.errors: 1488 raise ParseError( 1489 concat_messages(self.errors, self.max_errors), 1490 errors=merge_errors(self.errors), 1491 )
Logs or raises any found errors, depending on the chosen error level setting.
1493 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1494 """ 1495 Appends an error in the list of recorded errors or raises it, depending on the chosen 1496 error level setting. 1497 """ 1498 token = token or self._curr or self._prev or Token.string("") 1499 start = token.start 1500 end = token.end + 1 1501 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1502 highlight = self.sql[start:end] 1503 end_context = self.sql[end : end + self.error_message_context] 1504 1505 error = ParseError.new( 1506 f"{message}. Line {token.line}, Col: {token.col}.\n" 1507 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1508 description=message, 1509 line=token.line, 1510 col=token.col, 1511 start_context=start_context, 1512 highlight=highlight, 1513 end_context=end_context, 1514 ) 1515 1516 if self.error_level == ErrorLevel.IMMEDIATE: 1517 raise error 1518 1519 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1521 def expression( 1522 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1523 ) -> E: 1524 """ 1525 Creates a new, validated Expression. 1526 1527 Args: 1528 exp_class: The expression class to instantiate. 1529 comments: An optional list of comments to attach to the expression. 1530 kwargs: The arguments to set for the expression along with their respective values. 1531 1532 Returns: 1533 The target expression. 1534 """ 1535 instance = exp_class(**kwargs) 1536 instance.add_comments(comments) if comments else self._add_comments(instance) 1537 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1544 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1545 """ 1546 Validates an Expression, making sure that all its mandatory arguments are set. 1547 1548 Args: 1549 expression: The expression to validate. 1550 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1551 1552 Returns: 1553 The validated expression. 1554 """ 1555 if self.error_level != ErrorLevel.IGNORE: 1556 for error_message in expression.error_messages(args): 1557 self.raise_error(error_message) 1558 1559 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.