sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.DECIMAL256, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.POINT, 361 TokenType.RING, 362 TokenType.LINESTRING, 363 TokenType.MULTILINESTRING, 364 TokenType.POLYGON, 365 TokenType.MULTIPOLYGON, 366 TokenType.HLLSKETCH, 367 TokenType.HSTORE, 368 TokenType.PSEUDO_TYPE, 369 TokenType.SUPER, 370 TokenType.SERIAL, 371 TokenType.SMALLSERIAL, 372 TokenType.BIGSERIAL, 373 TokenType.XML, 374 TokenType.YEAR, 375 TokenType.UNIQUEIDENTIFIER, 376 TokenType.USERDEFINED, 377 TokenType.MONEY, 378 TokenType.SMALLMONEY, 379 TokenType.ROWVERSION, 380 TokenType.IMAGE, 381 TokenType.VARIANT, 382 TokenType.VECTOR, 383 TokenType.OBJECT, 384 TokenType.OBJECT_IDENTIFIER, 385 TokenType.INET, 386 TokenType.IPADDRESS, 387 TokenType.IPPREFIX, 388 TokenType.IPV4, 389 TokenType.IPV6, 390 TokenType.UNKNOWN, 391 TokenType.NULL, 392 TokenType.NAME, 393 TokenType.TDIGEST, 394 *ENUM_TYPE_TOKENS, 395 *NESTED_TYPE_TOKENS, 396 *AGGREGATE_TYPE_TOKENS, 397 } 398 399 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 400 TokenType.BIGINT: TokenType.UBIGINT, 401 TokenType.INT: TokenType.UINT, 402 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 403 TokenType.SMALLINT: TokenType.USMALLINT, 404 TokenType.TINYINT: TokenType.UTINYINT, 405 TokenType.DECIMAL: TokenType.UDECIMAL, 406 } 407 408 SUBQUERY_PREDICATES = { 409 TokenType.ANY: exp.Any, 410 TokenType.ALL: exp.All, 411 TokenType.EXISTS: exp.Exists, 412 TokenType.SOME: exp.Any, 413 } 414 415 RESERVED_TOKENS = { 416 *Tokenizer.SINGLE_TOKENS.values(), 417 TokenType.SELECT, 418 } - {TokenType.IDENTIFIER} 419 420 DB_CREATABLES = { 421 TokenType.DATABASE, 422 TokenType.DICTIONARY, 423 TokenType.MODEL, 424 TokenType.SCHEMA, 425 TokenType.SEQUENCE, 426 TokenType.STORAGE_INTEGRATION, 427 TokenType.TABLE, 428 TokenType.TAG, 429 TokenType.VIEW, 430 TokenType.WAREHOUSE, 431 TokenType.STREAMLIT, 432 } 433 434 CREATABLES = { 435 TokenType.COLUMN, 436 TokenType.CONSTRAINT, 437 TokenType.FOREIGN_KEY, 438 TokenType.FUNCTION, 439 TokenType.INDEX, 440 TokenType.PROCEDURE, 441 *DB_CREATABLES, 442 } 443 444 ALTERABLES = { 445 TokenType.INDEX, 446 TokenType.TABLE, 447 TokenType.VIEW, 448 } 449 450 # Tokens that can represent identifiers 451 ID_VAR_TOKENS = { 452 TokenType.ALL, 453 TokenType.VAR, 454 TokenType.ANTI, 455 TokenType.APPLY, 456 TokenType.ASC, 457 TokenType.ASOF, 458 TokenType.AUTO_INCREMENT, 459 TokenType.BEGIN, 460 TokenType.BPCHAR, 461 TokenType.CACHE, 462 TokenType.CASE, 463 TokenType.COLLATE, 464 TokenType.COMMAND, 465 TokenType.COMMENT, 466 TokenType.COMMIT, 467 TokenType.CONSTRAINT, 468 TokenType.COPY, 469 TokenType.CUBE, 470 TokenType.DEFAULT, 471 TokenType.DELETE, 472 TokenType.DESC, 473 TokenType.DESCRIBE, 474 TokenType.DICTIONARY, 475 TokenType.DIV, 476 TokenType.END, 477 TokenType.EXECUTE, 478 TokenType.ESCAPE, 479 TokenType.FALSE, 480 TokenType.FIRST, 481 TokenType.FILTER, 482 TokenType.FINAL, 483 TokenType.FORMAT, 484 TokenType.FULL, 485 TokenType.IDENTIFIER, 486 TokenType.IS, 487 TokenType.ISNULL, 488 TokenType.INTERVAL, 489 TokenType.KEEP, 490 TokenType.KILL, 491 TokenType.LEFT, 492 TokenType.LOAD, 493 TokenType.MERGE, 494 TokenType.NATURAL, 495 TokenType.NEXT, 496 TokenType.OFFSET, 497 TokenType.OPERATOR, 498 TokenType.ORDINALITY, 499 TokenType.OVERLAPS, 500 TokenType.OVERWRITE, 501 TokenType.PARTITION, 502 TokenType.PERCENT, 503 TokenType.PIVOT, 504 TokenType.PRAGMA, 505 TokenType.RANGE, 506 TokenType.RECURSIVE, 507 TokenType.REFERENCES, 508 TokenType.REFRESH, 509 TokenType.RENAME, 510 TokenType.REPLACE, 511 TokenType.RIGHT, 512 TokenType.ROLLUP, 513 TokenType.ROW, 514 TokenType.ROWS, 515 TokenType.SEMI, 516 TokenType.SET, 517 TokenType.SETTINGS, 518 TokenType.SHOW, 519 TokenType.TEMPORARY, 520 TokenType.TOP, 521 TokenType.TRUE, 522 TokenType.TRUNCATE, 523 TokenType.UNIQUE, 524 TokenType.UNNEST, 525 TokenType.UNPIVOT, 526 TokenType.UPDATE, 527 TokenType.USE, 528 TokenType.VOLATILE, 529 TokenType.WINDOW, 530 *CREATABLES, 531 *SUBQUERY_PREDICATES, 532 *TYPE_TOKENS, 533 *NO_PAREN_FUNCTIONS, 534 } 535 ID_VAR_TOKENS.remove(TokenType.UNION) 536 537 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 538 539 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 540 TokenType.ANTI, 541 TokenType.APPLY, 542 TokenType.ASOF, 543 TokenType.FULL, 544 TokenType.LEFT, 545 TokenType.LOCK, 546 TokenType.NATURAL, 547 TokenType.OFFSET, 548 TokenType.RIGHT, 549 TokenType.SEMI, 550 TokenType.WINDOW, 551 } 552 553 ALIAS_TOKENS = ID_VAR_TOKENS 554 555 ARRAY_CONSTRUCTORS = { 556 "ARRAY": exp.Array, 557 "LIST": exp.List, 558 } 559 560 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 561 562 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 563 564 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 565 566 FUNC_TOKENS = { 567 TokenType.COLLATE, 568 TokenType.COMMAND, 569 TokenType.CURRENT_DATE, 570 TokenType.CURRENT_DATETIME, 571 TokenType.CURRENT_TIMESTAMP, 572 TokenType.CURRENT_TIME, 573 TokenType.CURRENT_USER, 574 TokenType.FILTER, 575 TokenType.FIRST, 576 TokenType.FORMAT, 577 TokenType.GLOB, 578 TokenType.IDENTIFIER, 579 TokenType.INDEX, 580 TokenType.ISNULL, 581 TokenType.ILIKE, 582 TokenType.INSERT, 583 TokenType.LIKE, 584 TokenType.MERGE, 585 TokenType.OFFSET, 586 TokenType.PRIMARY_KEY, 587 TokenType.RANGE, 588 TokenType.REPLACE, 589 TokenType.RLIKE, 590 TokenType.ROW, 591 TokenType.UNNEST, 592 TokenType.VAR, 593 TokenType.LEFT, 594 TokenType.RIGHT, 595 TokenType.SEQUENCE, 596 TokenType.DATE, 597 TokenType.DATETIME, 598 TokenType.TABLE, 599 TokenType.TIMESTAMP, 600 TokenType.TIMESTAMPTZ, 601 TokenType.TRUNCATE, 602 TokenType.WINDOW, 603 TokenType.XOR, 604 *TYPE_TOKENS, 605 *SUBQUERY_PREDICATES, 606 } 607 608 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 609 TokenType.AND: exp.And, 610 } 611 612 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 613 TokenType.COLON_EQ: exp.PropertyEQ, 614 } 615 616 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 617 TokenType.OR: exp.Or, 618 } 619 620 EQUALITY = { 621 TokenType.EQ: exp.EQ, 622 TokenType.NEQ: exp.NEQ, 623 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 624 } 625 626 COMPARISON = { 627 TokenType.GT: exp.GT, 628 TokenType.GTE: exp.GTE, 629 TokenType.LT: exp.LT, 630 TokenType.LTE: exp.LTE, 631 } 632 633 BITWISE = { 634 TokenType.AMP: exp.BitwiseAnd, 635 TokenType.CARET: exp.BitwiseXor, 636 TokenType.PIPE: exp.BitwiseOr, 637 } 638 639 TERM = { 640 TokenType.DASH: exp.Sub, 641 TokenType.PLUS: exp.Add, 642 TokenType.MOD: exp.Mod, 643 TokenType.COLLATE: exp.Collate, 644 } 645 646 FACTOR = { 647 TokenType.DIV: exp.IntDiv, 648 TokenType.LR_ARROW: exp.Distance, 649 TokenType.SLASH: exp.Div, 650 TokenType.STAR: exp.Mul, 651 } 652 653 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 654 655 TIMES = { 656 TokenType.TIME, 657 TokenType.TIMETZ, 658 } 659 660 TIMESTAMPS = { 661 TokenType.TIMESTAMP, 662 TokenType.TIMESTAMPTZ, 663 TokenType.TIMESTAMPLTZ, 664 *TIMES, 665 } 666 667 SET_OPERATIONS = { 668 TokenType.UNION, 669 TokenType.INTERSECT, 670 TokenType.EXCEPT, 671 } 672 673 JOIN_METHODS = { 674 TokenType.ASOF, 675 TokenType.NATURAL, 676 TokenType.POSITIONAL, 677 } 678 679 JOIN_SIDES = { 680 TokenType.LEFT, 681 TokenType.RIGHT, 682 TokenType.FULL, 683 } 684 685 JOIN_KINDS = { 686 TokenType.ANTI, 687 TokenType.CROSS, 688 TokenType.INNER, 689 TokenType.OUTER, 690 TokenType.SEMI, 691 TokenType.STRAIGHT_JOIN, 692 } 693 694 JOIN_HINTS: t.Set[str] = set() 695 696 LAMBDAS = { 697 TokenType.ARROW: lambda self, expressions: self.expression( 698 exp.Lambda, 699 this=self._replace_lambda( 700 self._parse_assignment(), 701 expressions, 702 ), 703 expressions=expressions, 704 ), 705 TokenType.FARROW: lambda self, expressions: self.expression( 706 exp.Kwarg, 707 this=exp.var(expressions[0].name), 708 expression=self._parse_assignment(), 709 ), 710 } 711 712 COLUMN_OPERATORS = { 713 TokenType.DOT: None, 714 TokenType.DCOLON: lambda self, this, to: self.expression( 715 exp.Cast if self.STRICT_CAST else exp.TryCast, 716 this=this, 717 to=to, 718 ), 719 TokenType.ARROW: lambda self, this, path: self.expression( 720 exp.JSONExtract, 721 this=this, 722 expression=self.dialect.to_json_path(path), 723 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 724 ), 725 TokenType.DARROW: lambda self, this, path: self.expression( 726 exp.JSONExtractScalar, 727 this=this, 728 expression=self.dialect.to_json_path(path), 729 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 730 ), 731 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 732 exp.JSONBExtract, 733 this=this, 734 expression=path, 735 ), 736 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 737 exp.JSONBExtractScalar, 738 this=this, 739 expression=path, 740 ), 741 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 742 exp.JSONBContains, 743 this=this, 744 expression=key, 745 ), 746 } 747 748 EXPRESSION_PARSERS = { 749 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 750 exp.Column: lambda self: self._parse_column(), 751 exp.Condition: lambda self: self._parse_assignment(), 752 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 753 exp.Expression: lambda self: self._parse_expression(), 754 exp.From: lambda self: self._parse_from(joins=True), 755 exp.Group: lambda self: self._parse_group(), 756 exp.Having: lambda self: self._parse_having(), 757 exp.Identifier: lambda self: self._parse_id_var(), 758 exp.Join: lambda self: self._parse_join(), 759 exp.Lambda: lambda self: self._parse_lambda(), 760 exp.Lateral: lambda self: self._parse_lateral(), 761 exp.Limit: lambda self: self._parse_limit(), 762 exp.Offset: lambda self: self._parse_offset(), 763 exp.Order: lambda self: self._parse_order(), 764 exp.Ordered: lambda self: self._parse_ordered(), 765 exp.Properties: lambda self: self._parse_properties(), 766 exp.Qualify: lambda self: self._parse_qualify(), 767 exp.Returning: lambda self: self._parse_returning(), 768 exp.Select: lambda self: self._parse_select(), 769 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 770 exp.Table: lambda self: self._parse_table_parts(), 771 exp.TableAlias: lambda self: self._parse_table_alias(), 772 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 773 exp.Where: lambda self: self._parse_where(), 774 exp.Window: lambda self: self._parse_named_window(), 775 exp.With: lambda self: self._parse_with(), 776 "JOIN_TYPE": lambda self: self._parse_join_parts(), 777 } 778 779 STATEMENT_PARSERS = { 780 TokenType.ALTER: lambda self: self._parse_alter(), 781 TokenType.BEGIN: lambda self: self._parse_transaction(), 782 TokenType.CACHE: lambda self: self._parse_cache(), 783 TokenType.COMMENT: lambda self: self._parse_comment(), 784 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 785 TokenType.COPY: lambda self: self._parse_copy(), 786 TokenType.CREATE: lambda self: self._parse_create(), 787 TokenType.DELETE: lambda self: self._parse_delete(), 788 TokenType.DESC: lambda self: self._parse_describe(), 789 TokenType.DESCRIBE: lambda self: self._parse_describe(), 790 TokenType.DROP: lambda self: self._parse_drop(), 791 TokenType.GRANT: lambda self: self._parse_grant(), 792 TokenType.INSERT: lambda self: self._parse_insert(), 793 TokenType.KILL: lambda self: self._parse_kill(), 794 TokenType.LOAD: lambda self: self._parse_load(), 795 TokenType.MERGE: lambda self: self._parse_merge(), 796 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 797 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 798 TokenType.REFRESH: lambda self: self._parse_refresh(), 799 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 800 TokenType.SET: lambda self: self._parse_set(), 801 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 802 TokenType.UNCACHE: lambda self: self._parse_uncache(), 803 TokenType.UPDATE: lambda self: self._parse_update(), 804 TokenType.USE: lambda self: self.expression( 805 exp.Use, 806 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 807 this=self._parse_table(schema=False), 808 ), 809 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 810 } 811 812 UNARY_PARSERS = { 813 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 814 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 815 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 816 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 817 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 818 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 819 } 820 821 STRING_PARSERS = { 822 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 823 exp.RawString, this=token.text 824 ), 825 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 826 exp.National, this=token.text 827 ), 828 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 829 TokenType.STRING: lambda self, token: self.expression( 830 exp.Literal, this=token.text, is_string=True 831 ), 832 TokenType.UNICODE_STRING: lambda self, token: self.expression( 833 exp.UnicodeString, 834 this=token.text, 835 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 836 ), 837 } 838 839 NUMERIC_PARSERS = { 840 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 841 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 842 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 843 TokenType.NUMBER: lambda self, token: self.expression( 844 exp.Literal, this=token.text, is_string=False 845 ), 846 } 847 848 PRIMARY_PARSERS = { 849 **STRING_PARSERS, 850 **NUMERIC_PARSERS, 851 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 852 TokenType.NULL: lambda self, _: self.expression(exp.Null), 853 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 854 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 855 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 856 TokenType.STAR: lambda self, _: self._parse_star_ops(), 857 } 858 859 PLACEHOLDER_PARSERS = { 860 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 861 TokenType.PARAMETER: lambda self: self._parse_parameter(), 862 TokenType.COLON: lambda self: ( 863 self.expression(exp.Placeholder, this=self._prev.text) 864 if self._match_set(self.ID_VAR_TOKENS) 865 else None 866 ), 867 } 868 869 RANGE_PARSERS = { 870 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 871 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 872 TokenType.GLOB: binary_range_parser(exp.Glob), 873 TokenType.ILIKE: binary_range_parser(exp.ILike), 874 TokenType.IN: lambda self, this: self._parse_in(this), 875 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 876 TokenType.IS: lambda self, this: self._parse_is(this), 877 TokenType.LIKE: binary_range_parser(exp.Like), 878 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 879 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 880 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 881 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 882 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 883 } 884 885 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 886 "ALLOWED_VALUES": lambda self: self.expression( 887 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 888 ), 889 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 890 "AUTO": lambda self: self._parse_auto_property(), 891 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 892 "BACKUP": lambda self: self.expression( 893 exp.BackupProperty, this=self._parse_var(any_token=True) 894 ), 895 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 896 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 897 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 898 "CHECKSUM": lambda self: self._parse_checksum(), 899 "CLUSTER BY": lambda self: self._parse_cluster(), 900 "CLUSTERED": lambda self: self._parse_clustered_by(), 901 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 902 exp.CollateProperty, **kwargs 903 ), 904 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 905 "CONTAINS": lambda self: self._parse_contains_property(), 906 "COPY": lambda self: self._parse_copy_property(), 907 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 908 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 909 "DEFINER": lambda self: self._parse_definer(), 910 "DETERMINISTIC": lambda self: self.expression( 911 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 912 ), 913 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 914 "DUPLICATE": lambda self: self._parse_duplicate(), 915 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 916 "DISTKEY": lambda self: self._parse_distkey(), 917 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 918 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 919 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 920 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 921 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 922 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 923 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 924 "FREESPACE": lambda self: self._parse_freespace(), 925 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 926 "HEAP": lambda self: self.expression(exp.HeapProperty), 927 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 928 "IMMUTABLE": lambda self: self.expression( 929 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 930 ), 931 "INHERITS": lambda self: self.expression( 932 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 933 ), 934 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 935 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 936 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 937 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 938 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 939 "LIKE": lambda self: self._parse_create_like(), 940 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 941 "LOCK": lambda self: self._parse_locking(), 942 "LOCKING": lambda self: self._parse_locking(), 943 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 944 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 945 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 946 "MODIFIES": lambda self: self._parse_modifies_property(), 947 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 948 "NO": lambda self: self._parse_no_property(), 949 "ON": lambda self: self._parse_on_property(), 950 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 951 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 952 "PARTITION": lambda self: self._parse_partitioned_of(), 953 "PARTITION BY": lambda self: self._parse_partitioned_by(), 954 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 955 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 956 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 957 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 958 "READS": lambda self: self._parse_reads_property(), 959 "REMOTE": lambda self: self._parse_remote_with_connection(), 960 "RETURNS": lambda self: self._parse_returns(), 961 "STRICT": lambda self: self.expression(exp.StrictProperty), 962 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 963 "ROW": lambda self: self._parse_row(), 964 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 965 "SAMPLE": lambda self: self.expression( 966 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 967 ), 968 "SECURE": lambda self: self.expression(exp.SecureProperty), 969 "SECURITY": lambda self: self._parse_security(), 970 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 971 "SETTINGS": lambda self: self._parse_settings_property(), 972 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 973 "SORTKEY": lambda self: self._parse_sortkey(), 974 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 975 "STABLE": lambda self: self.expression( 976 exp.StabilityProperty, this=exp.Literal.string("STABLE") 977 ), 978 "STORED": lambda self: self._parse_stored(), 979 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 980 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 981 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 982 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 983 "TO": lambda self: self._parse_to_table(), 984 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 985 "TRANSFORM": lambda self: self.expression( 986 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 987 ), 988 "TTL": lambda self: self._parse_ttl(), 989 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 990 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 991 "VOLATILE": lambda self: self._parse_volatile_property(), 992 "WITH": lambda self: self._parse_with_property(), 993 } 994 995 CONSTRAINT_PARSERS = { 996 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 997 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 998 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 999 "CHARACTER SET": lambda self: self.expression( 1000 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1001 ), 1002 "CHECK": lambda self: self.expression( 1003 exp.CheckColumnConstraint, 1004 this=self._parse_wrapped(self._parse_assignment), 1005 enforced=self._match_text_seq("ENFORCED"), 1006 ), 1007 "COLLATE": lambda self: self.expression( 1008 exp.CollateColumnConstraint, 1009 this=self._parse_identifier() or self._parse_column(), 1010 ), 1011 "COMMENT": lambda self: self.expression( 1012 exp.CommentColumnConstraint, this=self._parse_string() 1013 ), 1014 "COMPRESS": lambda self: self._parse_compress(), 1015 "CLUSTERED": lambda self: self.expression( 1016 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1017 ), 1018 "NONCLUSTERED": lambda self: self.expression( 1019 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1020 ), 1021 "DEFAULT": lambda self: self.expression( 1022 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1023 ), 1024 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1025 "EPHEMERAL": lambda self: self.expression( 1026 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1027 ), 1028 "EXCLUDE": lambda self: self.expression( 1029 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1030 ), 1031 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1032 "FORMAT": lambda self: self.expression( 1033 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1034 ), 1035 "GENERATED": lambda self: self._parse_generated_as_identity(), 1036 "IDENTITY": lambda self: self._parse_auto_increment(), 1037 "INLINE": lambda self: self._parse_inline(), 1038 "LIKE": lambda self: self._parse_create_like(), 1039 "NOT": lambda self: self._parse_not_constraint(), 1040 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1041 "ON": lambda self: ( 1042 self._match(TokenType.UPDATE) 1043 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1044 ) 1045 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1046 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1047 "PERIOD": lambda self: self._parse_period_for_system_time(), 1048 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1049 "REFERENCES": lambda self: self._parse_references(match=False), 1050 "TITLE": lambda self: self.expression( 1051 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1052 ), 1053 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1054 "UNIQUE": lambda self: self._parse_unique(), 1055 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1056 "WITH": lambda self: self.expression( 1057 exp.Properties, expressions=self._parse_wrapped_properties() 1058 ), 1059 } 1060 1061 ALTER_PARSERS = { 1062 "ADD": lambda self: self._parse_alter_table_add(), 1063 "AS": lambda self: self._parse_select(), 1064 "ALTER": lambda self: self._parse_alter_table_alter(), 1065 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1066 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1067 "DROP": lambda self: self._parse_alter_table_drop(), 1068 "RENAME": lambda self: self._parse_alter_table_rename(), 1069 "SET": lambda self: self._parse_alter_table_set(), 1070 "SWAP": lambda self: self.expression( 1071 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1072 ), 1073 } 1074 1075 ALTER_ALTER_PARSERS = { 1076 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1077 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1078 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1079 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1080 } 1081 1082 SCHEMA_UNNAMED_CONSTRAINTS = { 1083 "CHECK", 1084 "EXCLUDE", 1085 "FOREIGN KEY", 1086 "LIKE", 1087 "PERIOD", 1088 "PRIMARY KEY", 1089 "UNIQUE", 1090 } 1091 1092 NO_PAREN_FUNCTION_PARSERS = { 1093 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1094 "CASE": lambda self: self._parse_case(), 1095 "CONNECT_BY_ROOT": lambda self: self.expression( 1096 exp.ConnectByRoot, this=self._parse_column() 1097 ), 1098 "IF": lambda self: self._parse_if(), 1099 "NEXT": lambda self: self._parse_next_value_for(), 1100 } 1101 1102 INVALID_FUNC_NAME_TOKENS = { 1103 TokenType.IDENTIFIER, 1104 TokenType.STRING, 1105 } 1106 1107 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1108 1109 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1110 1111 FUNCTION_PARSERS = { 1112 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1113 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1114 "DECODE": lambda self: self._parse_decode(), 1115 "EXTRACT": lambda self: self._parse_extract(), 1116 "GAP_FILL": lambda self: self._parse_gap_fill(), 1117 "JSON_OBJECT": lambda self: self._parse_json_object(), 1118 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1119 "JSON_TABLE": lambda self: self._parse_json_table(), 1120 "MATCH": lambda self: self._parse_match_against(), 1121 "NORMALIZE": lambda self: self._parse_normalize(), 1122 "OPENJSON": lambda self: self._parse_open_json(), 1123 "OVERLAY": lambda self: self._parse_overlay(), 1124 "POSITION": lambda self: self._parse_position(), 1125 "PREDICT": lambda self: self._parse_predict(), 1126 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1127 "STRING_AGG": lambda self: self._parse_string_agg(), 1128 "SUBSTRING": lambda self: self._parse_substring(), 1129 "TRIM": lambda self: self._parse_trim(), 1130 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1131 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1132 } 1133 1134 QUERY_MODIFIER_PARSERS = { 1135 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1136 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1137 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1138 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1139 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1140 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1141 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1142 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1143 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1144 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1145 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1146 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1147 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1148 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1149 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1150 TokenType.CLUSTER_BY: lambda self: ( 1151 "cluster", 1152 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1153 ), 1154 TokenType.DISTRIBUTE_BY: lambda self: ( 1155 "distribute", 1156 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1157 ), 1158 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1159 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1160 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1161 } 1162 1163 SET_PARSERS = { 1164 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1165 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1166 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1167 "TRANSACTION": lambda self: self._parse_set_transaction(), 1168 } 1169 1170 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1171 1172 TYPE_LITERAL_PARSERS = { 1173 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1174 } 1175 1176 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1177 1178 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1179 1180 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1181 1182 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1183 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1184 "ISOLATION": ( 1185 ("LEVEL", "REPEATABLE", "READ"), 1186 ("LEVEL", "READ", "COMMITTED"), 1187 ("LEVEL", "READ", "UNCOMITTED"), 1188 ("LEVEL", "SERIALIZABLE"), 1189 ), 1190 "READ": ("WRITE", "ONLY"), 1191 } 1192 1193 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1194 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1195 ) 1196 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1197 1198 CREATE_SEQUENCE: OPTIONS_TYPE = { 1199 "SCALE": ("EXTEND", "NOEXTEND"), 1200 "SHARD": ("EXTEND", "NOEXTEND"), 1201 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1202 **dict.fromkeys( 1203 ( 1204 "SESSION", 1205 "GLOBAL", 1206 "KEEP", 1207 "NOKEEP", 1208 "ORDER", 1209 "NOORDER", 1210 "NOCACHE", 1211 "CYCLE", 1212 "NOCYCLE", 1213 "NOMINVALUE", 1214 "NOMAXVALUE", 1215 "NOSCALE", 1216 "NOSHARD", 1217 ), 1218 tuple(), 1219 ), 1220 } 1221 1222 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1223 1224 USABLES: OPTIONS_TYPE = dict.fromkeys( 1225 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1226 ) 1227 1228 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1229 1230 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1231 "TYPE": ("EVOLUTION",), 1232 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1233 } 1234 1235 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1236 1237 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1238 1239 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1240 "NOT": ("ENFORCED",), 1241 "MATCH": ( 1242 "FULL", 1243 "PARTIAL", 1244 "SIMPLE", 1245 ), 1246 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1247 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1248 } 1249 1250 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1251 1252 CLONE_KEYWORDS = {"CLONE", "COPY"} 1253 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1254 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1255 1256 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1257 1258 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1259 1260 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1261 1262 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1263 1264 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1265 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1266 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1267 1268 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1269 1270 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1271 1272 ADD_CONSTRAINT_TOKENS = { 1273 TokenType.CONSTRAINT, 1274 TokenType.FOREIGN_KEY, 1275 TokenType.INDEX, 1276 TokenType.KEY, 1277 TokenType.PRIMARY_KEY, 1278 TokenType.UNIQUE, 1279 } 1280 1281 DISTINCT_TOKENS = {TokenType.DISTINCT} 1282 1283 NULL_TOKENS = {TokenType.NULL} 1284 1285 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1286 1287 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1288 1289 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1290 1291 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1292 1293 ODBC_DATETIME_LITERALS = { 1294 "d": exp.Date, 1295 "t": exp.Time, 1296 "ts": exp.Timestamp, 1297 } 1298 1299 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1300 1301 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1302 1303 # The style options for the DESCRIBE statement 1304 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1305 1306 OPERATION_MODIFIERS: t.Set[str] = set() 1307 1308 STRICT_CAST = True 1309 1310 PREFIXED_PIVOT_COLUMNS = False 1311 IDENTIFY_PIVOT_STRINGS = False 1312 1313 LOG_DEFAULTS_TO_LN = False 1314 1315 # Whether ADD is present for each column added by ALTER TABLE 1316 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1317 1318 # Whether the table sample clause expects CSV syntax 1319 TABLESAMPLE_CSV = False 1320 1321 # The default method used for table sampling 1322 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1323 1324 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1325 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1326 1327 # Whether the TRIM function expects the characters to trim as its first argument 1328 TRIM_PATTERN_FIRST = False 1329 1330 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1331 STRING_ALIASES = False 1332 1333 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1334 MODIFIERS_ATTACHED_TO_SET_OP = True 1335 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1336 1337 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1338 NO_PAREN_IF_COMMANDS = True 1339 1340 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1341 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1342 1343 # Whether the `:` operator is used to extract a value from a VARIANT column 1344 COLON_IS_VARIANT_EXTRACT = False 1345 1346 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1347 # If this is True and '(' is not found, the keyword will be treated as an identifier 1348 VALUES_FOLLOWED_BY_PAREN = True 1349 1350 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1351 SUPPORTS_IMPLICIT_UNNEST = False 1352 1353 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1354 INTERVAL_SPANS = True 1355 1356 # Whether a PARTITION clause can follow a table reference 1357 SUPPORTS_PARTITION_SELECTION = False 1358 1359 __slots__ = ( 1360 "error_level", 1361 "error_message_context", 1362 "max_errors", 1363 "dialect", 1364 "sql", 1365 "errors", 1366 "_tokens", 1367 "_index", 1368 "_curr", 1369 "_next", 1370 "_prev", 1371 "_prev_comments", 1372 ) 1373 1374 # Autofilled 1375 SHOW_TRIE: t.Dict = {} 1376 SET_TRIE: t.Dict = {} 1377 1378 def __init__( 1379 self, 1380 error_level: t.Optional[ErrorLevel] = None, 1381 error_message_context: int = 100, 1382 max_errors: int = 3, 1383 dialect: DialectType = None, 1384 ): 1385 from sqlglot.dialects import Dialect 1386 1387 self.error_level = error_level or ErrorLevel.IMMEDIATE 1388 self.error_message_context = error_message_context 1389 self.max_errors = max_errors 1390 self.dialect = Dialect.get_or_raise(dialect) 1391 self.reset() 1392 1393 def reset(self): 1394 self.sql = "" 1395 self.errors = [] 1396 self._tokens = [] 1397 self._index = 0 1398 self._curr = None 1399 self._next = None 1400 self._prev = None 1401 self._prev_comments = None 1402 1403 def parse( 1404 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1405 ) -> t.List[t.Optional[exp.Expression]]: 1406 """ 1407 Parses a list of tokens and returns a list of syntax trees, one tree 1408 per parsed SQL statement. 1409 1410 Args: 1411 raw_tokens: The list of tokens. 1412 sql: The original SQL string, used to produce helpful debug messages. 1413 1414 Returns: 1415 The list of the produced syntax trees. 1416 """ 1417 return self._parse( 1418 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1419 ) 1420 1421 def parse_into( 1422 self, 1423 expression_types: exp.IntoType, 1424 raw_tokens: t.List[Token], 1425 sql: t.Optional[str] = None, 1426 ) -> t.List[t.Optional[exp.Expression]]: 1427 """ 1428 Parses a list of tokens into a given Expression type. If a collection of Expression 1429 types is given instead, this method will try to parse the token list into each one 1430 of them, stopping at the first for which the parsing succeeds. 1431 1432 Args: 1433 expression_types: The expression type(s) to try and parse the token list into. 1434 raw_tokens: The list of tokens. 1435 sql: The original SQL string, used to produce helpful debug messages. 1436 1437 Returns: 1438 The target Expression. 1439 """ 1440 errors = [] 1441 for expression_type in ensure_list(expression_types): 1442 parser = self.EXPRESSION_PARSERS.get(expression_type) 1443 if not parser: 1444 raise TypeError(f"No parser registered for {expression_type}") 1445 1446 try: 1447 return self._parse(parser, raw_tokens, sql) 1448 except ParseError as e: 1449 e.errors[0]["into_expression"] = expression_type 1450 errors.append(e) 1451 1452 raise ParseError( 1453 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1454 errors=merge_errors(errors), 1455 ) from errors[-1] 1456 1457 def _parse( 1458 self, 1459 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1460 raw_tokens: t.List[Token], 1461 sql: t.Optional[str] = None, 1462 ) -> t.List[t.Optional[exp.Expression]]: 1463 self.reset() 1464 self.sql = sql or "" 1465 1466 total = len(raw_tokens) 1467 chunks: t.List[t.List[Token]] = [[]] 1468 1469 for i, token in enumerate(raw_tokens): 1470 if token.token_type == TokenType.SEMICOLON: 1471 if token.comments: 1472 chunks.append([token]) 1473 1474 if i < total - 1: 1475 chunks.append([]) 1476 else: 1477 chunks[-1].append(token) 1478 1479 expressions = [] 1480 1481 for tokens in chunks: 1482 self._index = -1 1483 self._tokens = tokens 1484 self._advance() 1485 1486 expressions.append(parse_method(self)) 1487 1488 if self._index < len(self._tokens): 1489 self.raise_error("Invalid expression / Unexpected token") 1490 1491 self.check_errors() 1492 1493 return expressions 1494 1495 def check_errors(self) -> None: 1496 """Logs or raises any found errors, depending on the chosen error level setting.""" 1497 if self.error_level == ErrorLevel.WARN: 1498 for error in self.errors: 1499 logger.error(str(error)) 1500 elif self.error_level == ErrorLevel.RAISE and self.errors: 1501 raise ParseError( 1502 concat_messages(self.errors, self.max_errors), 1503 errors=merge_errors(self.errors), 1504 ) 1505 1506 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1507 """ 1508 Appends an error in the list of recorded errors or raises it, depending on the chosen 1509 error level setting. 1510 """ 1511 token = token or self._curr or self._prev or Token.string("") 1512 start = token.start 1513 end = token.end + 1 1514 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1515 highlight = self.sql[start:end] 1516 end_context = self.sql[end : end + self.error_message_context] 1517 1518 error = ParseError.new( 1519 f"{message}. Line {token.line}, Col: {token.col}.\n" 1520 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1521 description=message, 1522 line=token.line, 1523 col=token.col, 1524 start_context=start_context, 1525 highlight=highlight, 1526 end_context=end_context, 1527 ) 1528 1529 if self.error_level == ErrorLevel.IMMEDIATE: 1530 raise error 1531 1532 self.errors.append(error) 1533 1534 def expression( 1535 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1536 ) -> E: 1537 """ 1538 Creates a new, validated Expression. 1539 1540 Args: 1541 exp_class: The expression class to instantiate. 1542 comments: An optional list of comments to attach to the expression. 1543 kwargs: The arguments to set for the expression along with their respective values. 1544 1545 Returns: 1546 The target expression. 1547 """ 1548 instance = exp_class(**kwargs) 1549 instance.add_comments(comments) if comments else self._add_comments(instance) 1550 return self.validate_expression(instance) 1551 1552 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1553 if expression and self._prev_comments: 1554 expression.add_comments(self._prev_comments) 1555 self._prev_comments = None 1556 1557 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1558 """ 1559 Validates an Expression, making sure that all its mandatory arguments are set. 1560 1561 Args: 1562 expression: The expression to validate. 1563 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1564 1565 Returns: 1566 The validated expression. 1567 """ 1568 if self.error_level != ErrorLevel.IGNORE: 1569 for error_message in expression.error_messages(args): 1570 self.raise_error(error_message) 1571 1572 return expression 1573 1574 def _find_sql(self, start: Token, end: Token) -> str: 1575 return self.sql[start.start : end.end + 1] 1576 1577 def _is_connected(self) -> bool: 1578 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1579 1580 def _advance(self, times: int = 1) -> None: 1581 self._index += times 1582 self._curr = seq_get(self._tokens, self._index) 1583 self._next = seq_get(self._tokens, self._index + 1) 1584 1585 if self._index > 0: 1586 self._prev = self._tokens[self._index - 1] 1587 self._prev_comments = self._prev.comments 1588 else: 1589 self._prev = None 1590 self._prev_comments = None 1591 1592 def _retreat(self, index: int) -> None: 1593 if index != self._index: 1594 self._advance(index - self._index) 1595 1596 def _warn_unsupported(self) -> None: 1597 if len(self._tokens) <= 1: 1598 return 1599 1600 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1601 # interested in emitting a warning for the one being currently processed. 1602 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1603 1604 logger.warning( 1605 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1606 ) 1607 1608 def _parse_command(self) -> exp.Command: 1609 self._warn_unsupported() 1610 return self.expression( 1611 exp.Command, 1612 comments=self._prev_comments, 1613 this=self._prev.text.upper(), 1614 expression=self._parse_string(), 1615 ) 1616 1617 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1618 """ 1619 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1620 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1621 solve this by setting & resetting the parser state accordingly 1622 """ 1623 index = self._index 1624 error_level = self.error_level 1625 1626 self.error_level = ErrorLevel.IMMEDIATE 1627 try: 1628 this = parse_method() 1629 except ParseError: 1630 this = None 1631 finally: 1632 if not this or retreat: 1633 self._retreat(index) 1634 self.error_level = error_level 1635 1636 return this 1637 1638 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1639 start = self._prev 1640 exists = self._parse_exists() if allow_exists else None 1641 1642 self._match(TokenType.ON) 1643 1644 materialized = self._match_text_seq("MATERIALIZED") 1645 kind = self._match_set(self.CREATABLES) and self._prev 1646 if not kind: 1647 return self._parse_as_command(start) 1648 1649 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1650 this = self._parse_user_defined_function(kind=kind.token_type) 1651 elif kind.token_type == TokenType.TABLE: 1652 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1653 elif kind.token_type == TokenType.COLUMN: 1654 this = self._parse_column() 1655 else: 1656 this = self._parse_id_var() 1657 1658 self._match(TokenType.IS) 1659 1660 return self.expression( 1661 exp.Comment, 1662 this=this, 1663 kind=kind.text, 1664 expression=self._parse_string(), 1665 exists=exists, 1666 materialized=materialized, 1667 ) 1668 1669 def _parse_to_table( 1670 self, 1671 ) -> exp.ToTableProperty: 1672 table = self._parse_table_parts(schema=True) 1673 return self.expression(exp.ToTableProperty, this=table) 1674 1675 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1676 def _parse_ttl(self) -> exp.Expression: 1677 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1678 this = self._parse_bitwise() 1679 1680 if self._match_text_seq("DELETE"): 1681 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1682 if self._match_text_seq("RECOMPRESS"): 1683 return self.expression( 1684 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1685 ) 1686 if self._match_text_seq("TO", "DISK"): 1687 return self.expression( 1688 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1689 ) 1690 if self._match_text_seq("TO", "VOLUME"): 1691 return self.expression( 1692 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1693 ) 1694 1695 return this 1696 1697 expressions = self._parse_csv(_parse_ttl_action) 1698 where = self._parse_where() 1699 group = self._parse_group() 1700 1701 aggregates = None 1702 if group and self._match(TokenType.SET): 1703 aggregates = self._parse_csv(self._parse_set_item) 1704 1705 return self.expression( 1706 exp.MergeTreeTTL, 1707 expressions=expressions, 1708 where=where, 1709 group=group, 1710 aggregates=aggregates, 1711 ) 1712 1713 def _parse_statement(self) -> t.Optional[exp.Expression]: 1714 if self._curr is None: 1715 return None 1716 1717 if self._match_set(self.STATEMENT_PARSERS): 1718 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1719 1720 if self._match_set(self.dialect.tokenizer.COMMANDS): 1721 return self._parse_command() 1722 1723 expression = self._parse_expression() 1724 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1725 return self._parse_query_modifiers(expression) 1726 1727 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1728 start = self._prev 1729 temporary = self._match(TokenType.TEMPORARY) 1730 materialized = self._match_text_seq("MATERIALIZED") 1731 1732 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1733 if not kind: 1734 return self._parse_as_command(start) 1735 1736 concurrently = self._match_text_seq("CONCURRENTLY") 1737 if_exists = exists or self._parse_exists() 1738 table = self._parse_table_parts( 1739 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1740 ) 1741 1742 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1743 1744 if self._match(TokenType.L_PAREN, advance=False): 1745 expressions = self._parse_wrapped_csv(self._parse_types) 1746 else: 1747 expressions = None 1748 1749 return self.expression( 1750 exp.Drop, 1751 comments=start.comments, 1752 exists=if_exists, 1753 this=table, 1754 expressions=expressions, 1755 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1756 temporary=temporary, 1757 materialized=materialized, 1758 cascade=self._match_text_seq("CASCADE"), 1759 constraints=self._match_text_seq("CONSTRAINTS"), 1760 purge=self._match_text_seq("PURGE"), 1761 cluster=cluster, 1762 concurrently=concurrently, 1763 ) 1764 1765 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1766 return ( 1767 self._match_text_seq("IF") 1768 and (not not_ or self._match(TokenType.NOT)) 1769 and self._match(TokenType.EXISTS) 1770 ) 1771 1772 def _parse_create(self) -> exp.Create | exp.Command: 1773 # Note: this can't be None because we've matched a statement parser 1774 start = self._prev 1775 comments = self._prev_comments 1776 1777 replace = ( 1778 start.token_type == TokenType.REPLACE 1779 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1780 or self._match_pair(TokenType.OR, TokenType.ALTER) 1781 ) 1782 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1783 1784 unique = self._match(TokenType.UNIQUE) 1785 1786 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1787 clustered = True 1788 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1789 "COLUMNSTORE" 1790 ): 1791 clustered = False 1792 else: 1793 clustered = None 1794 1795 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1796 self._advance() 1797 1798 properties = None 1799 create_token = self._match_set(self.CREATABLES) and self._prev 1800 1801 if not create_token: 1802 # exp.Properties.Location.POST_CREATE 1803 properties = self._parse_properties() 1804 create_token = self._match_set(self.CREATABLES) and self._prev 1805 1806 if not properties or not create_token: 1807 return self._parse_as_command(start) 1808 1809 concurrently = self._match_text_seq("CONCURRENTLY") 1810 exists = self._parse_exists(not_=True) 1811 this = None 1812 expression: t.Optional[exp.Expression] = None 1813 indexes = None 1814 no_schema_binding = None 1815 begin = None 1816 end = None 1817 clone = None 1818 1819 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1820 nonlocal properties 1821 if properties and temp_props: 1822 properties.expressions.extend(temp_props.expressions) 1823 elif temp_props: 1824 properties = temp_props 1825 1826 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1827 this = self._parse_user_defined_function(kind=create_token.token_type) 1828 1829 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1830 extend_props(self._parse_properties()) 1831 1832 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1833 extend_props(self._parse_properties()) 1834 1835 if not expression: 1836 if self._match(TokenType.COMMAND): 1837 expression = self._parse_as_command(self._prev) 1838 else: 1839 begin = self._match(TokenType.BEGIN) 1840 return_ = self._match_text_seq("RETURN") 1841 1842 if self._match(TokenType.STRING, advance=False): 1843 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1844 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1845 expression = self._parse_string() 1846 extend_props(self._parse_properties()) 1847 else: 1848 expression = self._parse_user_defined_function_expression() 1849 1850 end = self._match_text_seq("END") 1851 1852 if return_: 1853 expression = self.expression(exp.Return, this=expression) 1854 elif create_token.token_type == TokenType.INDEX: 1855 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1856 if not self._match(TokenType.ON): 1857 index = self._parse_id_var() 1858 anonymous = False 1859 else: 1860 index = None 1861 anonymous = True 1862 1863 this = self._parse_index(index=index, anonymous=anonymous) 1864 elif create_token.token_type in self.DB_CREATABLES: 1865 table_parts = self._parse_table_parts( 1866 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1867 ) 1868 1869 # exp.Properties.Location.POST_NAME 1870 self._match(TokenType.COMMA) 1871 extend_props(self._parse_properties(before=True)) 1872 1873 this = self._parse_schema(this=table_parts) 1874 1875 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1876 extend_props(self._parse_properties()) 1877 1878 self._match(TokenType.ALIAS) 1879 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1880 # exp.Properties.Location.POST_ALIAS 1881 extend_props(self._parse_properties()) 1882 1883 if create_token.token_type == TokenType.SEQUENCE: 1884 expression = self._parse_types() 1885 extend_props(self._parse_properties()) 1886 else: 1887 expression = self._parse_ddl_select() 1888 1889 if create_token.token_type == TokenType.TABLE: 1890 # exp.Properties.Location.POST_EXPRESSION 1891 extend_props(self._parse_properties()) 1892 1893 indexes = [] 1894 while True: 1895 index = self._parse_index() 1896 1897 # exp.Properties.Location.POST_INDEX 1898 extend_props(self._parse_properties()) 1899 if not index: 1900 break 1901 else: 1902 self._match(TokenType.COMMA) 1903 indexes.append(index) 1904 elif create_token.token_type == TokenType.VIEW: 1905 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1906 no_schema_binding = True 1907 1908 shallow = self._match_text_seq("SHALLOW") 1909 1910 if self._match_texts(self.CLONE_KEYWORDS): 1911 copy = self._prev.text.lower() == "copy" 1912 clone = self.expression( 1913 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1914 ) 1915 1916 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1917 return self._parse_as_command(start) 1918 1919 create_kind_text = create_token.text.upper() 1920 return self.expression( 1921 exp.Create, 1922 comments=comments, 1923 this=this, 1924 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1925 replace=replace, 1926 refresh=refresh, 1927 unique=unique, 1928 expression=expression, 1929 exists=exists, 1930 properties=properties, 1931 indexes=indexes, 1932 no_schema_binding=no_schema_binding, 1933 begin=begin, 1934 end=end, 1935 clone=clone, 1936 concurrently=concurrently, 1937 clustered=clustered, 1938 ) 1939 1940 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1941 seq = exp.SequenceProperties() 1942 1943 options = [] 1944 index = self._index 1945 1946 while self._curr: 1947 self._match(TokenType.COMMA) 1948 if self._match_text_seq("INCREMENT"): 1949 self._match_text_seq("BY") 1950 self._match_text_seq("=") 1951 seq.set("increment", self._parse_term()) 1952 elif self._match_text_seq("MINVALUE"): 1953 seq.set("minvalue", self._parse_term()) 1954 elif self._match_text_seq("MAXVALUE"): 1955 seq.set("maxvalue", self._parse_term()) 1956 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1957 self._match_text_seq("=") 1958 seq.set("start", self._parse_term()) 1959 elif self._match_text_seq("CACHE"): 1960 # T-SQL allows empty CACHE which is initialized dynamically 1961 seq.set("cache", self._parse_number() or True) 1962 elif self._match_text_seq("OWNED", "BY"): 1963 # "OWNED BY NONE" is the default 1964 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1965 else: 1966 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1967 if opt: 1968 options.append(opt) 1969 else: 1970 break 1971 1972 seq.set("options", options if options else None) 1973 return None if self._index == index else seq 1974 1975 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1976 # only used for teradata currently 1977 self._match(TokenType.COMMA) 1978 1979 kwargs = { 1980 "no": self._match_text_seq("NO"), 1981 "dual": self._match_text_seq("DUAL"), 1982 "before": self._match_text_seq("BEFORE"), 1983 "default": self._match_text_seq("DEFAULT"), 1984 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1985 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1986 "after": self._match_text_seq("AFTER"), 1987 "minimum": self._match_texts(("MIN", "MINIMUM")), 1988 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1989 } 1990 1991 if self._match_texts(self.PROPERTY_PARSERS): 1992 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1993 try: 1994 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1995 except TypeError: 1996 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1997 1998 return None 1999 2000 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2001 return self._parse_wrapped_csv(self._parse_property) 2002 2003 def _parse_property(self) -> t.Optional[exp.Expression]: 2004 if self._match_texts(self.PROPERTY_PARSERS): 2005 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2006 2007 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2008 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2009 2010 if self._match_text_seq("COMPOUND", "SORTKEY"): 2011 return self._parse_sortkey(compound=True) 2012 2013 if self._match_text_seq("SQL", "SECURITY"): 2014 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2015 2016 index = self._index 2017 key = self._parse_column() 2018 2019 if not self._match(TokenType.EQ): 2020 self._retreat(index) 2021 return self._parse_sequence_properties() 2022 2023 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2024 if isinstance(key, exp.Column): 2025 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2026 2027 value = self._parse_bitwise() or self._parse_var(any_token=True) 2028 2029 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2030 if isinstance(value, exp.Column): 2031 value = exp.var(value.name) 2032 2033 return self.expression(exp.Property, this=key, value=value) 2034 2035 def _parse_stored(self) -> exp.FileFormatProperty: 2036 self._match(TokenType.ALIAS) 2037 2038 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2039 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2040 2041 return self.expression( 2042 exp.FileFormatProperty, 2043 this=( 2044 self.expression( 2045 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2046 ) 2047 if input_format or output_format 2048 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2049 ), 2050 ) 2051 2052 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2053 field = self._parse_field() 2054 if isinstance(field, exp.Identifier) and not field.quoted: 2055 field = exp.var(field) 2056 2057 return field 2058 2059 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2060 self._match(TokenType.EQ) 2061 self._match(TokenType.ALIAS) 2062 2063 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2064 2065 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2066 properties = [] 2067 while True: 2068 if before: 2069 prop = self._parse_property_before() 2070 else: 2071 prop = self._parse_property() 2072 if not prop: 2073 break 2074 for p in ensure_list(prop): 2075 properties.append(p) 2076 2077 if properties: 2078 return self.expression(exp.Properties, expressions=properties) 2079 2080 return None 2081 2082 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2083 return self.expression( 2084 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2085 ) 2086 2087 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2088 if self._match_texts(("DEFINER", "INVOKER")): 2089 security_specifier = self._prev.text.upper() 2090 return self.expression(exp.SecurityProperty, this=security_specifier) 2091 return None 2092 2093 def _parse_settings_property(self) -> exp.SettingsProperty: 2094 return self.expression( 2095 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2096 ) 2097 2098 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2099 if self._index >= 2: 2100 pre_volatile_token = self._tokens[self._index - 2] 2101 else: 2102 pre_volatile_token = None 2103 2104 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2105 return exp.VolatileProperty() 2106 2107 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2108 2109 def _parse_retention_period(self) -> exp.Var: 2110 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2111 number = self._parse_number() 2112 number_str = f"{number} " if number else "" 2113 unit = self._parse_var(any_token=True) 2114 return exp.var(f"{number_str}{unit}") 2115 2116 def _parse_system_versioning_property( 2117 self, with_: bool = False 2118 ) -> exp.WithSystemVersioningProperty: 2119 self._match(TokenType.EQ) 2120 prop = self.expression( 2121 exp.WithSystemVersioningProperty, 2122 **{ # type: ignore 2123 "on": True, 2124 "with": with_, 2125 }, 2126 ) 2127 2128 if self._match_text_seq("OFF"): 2129 prop.set("on", False) 2130 return prop 2131 2132 self._match(TokenType.ON) 2133 if self._match(TokenType.L_PAREN): 2134 while self._curr and not self._match(TokenType.R_PAREN): 2135 if self._match_text_seq("HISTORY_TABLE", "="): 2136 prop.set("this", self._parse_table_parts()) 2137 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2138 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2139 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2140 prop.set("retention_period", self._parse_retention_period()) 2141 2142 self._match(TokenType.COMMA) 2143 2144 return prop 2145 2146 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2147 self._match(TokenType.EQ) 2148 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2149 prop = self.expression(exp.DataDeletionProperty, on=on) 2150 2151 if self._match(TokenType.L_PAREN): 2152 while self._curr and not self._match(TokenType.R_PAREN): 2153 if self._match_text_seq("FILTER_COLUMN", "="): 2154 prop.set("filter_column", self._parse_column()) 2155 elif self._match_text_seq("RETENTION_PERIOD", "="): 2156 prop.set("retention_period", self._parse_retention_period()) 2157 2158 self._match(TokenType.COMMA) 2159 2160 return prop 2161 2162 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2163 kind = "HASH" 2164 expressions: t.Optional[t.List[exp.Expression]] = None 2165 if self._match_text_seq("BY", "HASH"): 2166 expressions = self._parse_wrapped_csv(self._parse_id_var) 2167 elif self._match_text_seq("BY", "RANDOM"): 2168 kind = "RANDOM" 2169 2170 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2171 buckets: t.Optional[exp.Expression] = None 2172 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2173 buckets = self._parse_number() 2174 2175 return self.expression( 2176 exp.DistributedByProperty, 2177 expressions=expressions, 2178 kind=kind, 2179 buckets=buckets, 2180 order=self._parse_order(), 2181 ) 2182 2183 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2184 self._match_text_seq("KEY") 2185 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2186 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2187 2188 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2189 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2190 prop = self._parse_system_versioning_property(with_=True) 2191 self._match_r_paren() 2192 return prop 2193 2194 if self._match(TokenType.L_PAREN, advance=False): 2195 return self._parse_wrapped_properties() 2196 2197 if self._match_text_seq("JOURNAL"): 2198 return self._parse_withjournaltable() 2199 2200 if self._match_texts(self.VIEW_ATTRIBUTES): 2201 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2202 2203 if self._match_text_seq("DATA"): 2204 return self._parse_withdata(no=False) 2205 elif self._match_text_seq("NO", "DATA"): 2206 return self._parse_withdata(no=True) 2207 2208 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2209 return self._parse_serde_properties(with_=True) 2210 2211 if self._match(TokenType.SCHEMA): 2212 return self.expression( 2213 exp.WithSchemaBindingProperty, 2214 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2215 ) 2216 2217 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2218 return self.expression( 2219 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2220 ) 2221 2222 if not self._next: 2223 return None 2224 2225 return self._parse_withisolatedloading() 2226 2227 def _parse_procedure_option(self) -> exp.Expression | None: 2228 if self._match_text_seq("EXECUTE", "AS"): 2229 return self.expression( 2230 exp.ExecuteAsProperty, 2231 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2232 or self._parse_string(), 2233 ) 2234 2235 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2236 2237 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2238 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2239 self._match(TokenType.EQ) 2240 2241 user = self._parse_id_var() 2242 self._match(TokenType.PARAMETER) 2243 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2244 2245 if not user or not host: 2246 return None 2247 2248 return exp.DefinerProperty(this=f"{user}@{host}") 2249 2250 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2251 self._match(TokenType.TABLE) 2252 self._match(TokenType.EQ) 2253 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2254 2255 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2256 return self.expression(exp.LogProperty, no=no) 2257 2258 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2259 return self.expression(exp.JournalProperty, **kwargs) 2260 2261 def _parse_checksum(self) -> exp.ChecksumProperty: 2262 self._match(TokenType.EQ) 2263 2264 on = None 2265 if self._match(TokenType.ON): 2266 on = True 2267 elif self._match_text_seq("OFF"): 2268 on = False 2269 2270 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2271 2272 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2273 return self.expression( 2274 exp.Cluster, 2275 expressions=( 2276 self._parse_wrapped_csv(self._parse_ordered) 2277 if wrapped 2278 else self._parse_csv(self._parse_ordered) 2279 ), 2280 ) 2281 2282 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2283 self._match_text_seq("BY") 2284 2285 self._match_l_paren() 2286 expressions = self._parse_csv(self._parse_column) 2287 self._match_r_paren() 2288 2289 if self._match_text_seq("SORTED", "BY"): 2290 self._match_l_paren() 2291 sorted_by = self._parse_csv(self._parse_ordered) 2292 self._match_r_paren() 2293 else: 2294 sorted_by = None 2295 2296 self._match(TokenType.INTO) 2297 buckets = self._parse_number() 2298 self._match_text_seq("BUCKETS") 2299 2300 return self.expression( 2301 exp.ClusteredByProperty, 2302 expressions=expressions, 2303 sorted_by=sorted_by, 2304 buckets=buckets, 2305 ) 2306 2307 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2308 if not self._match_text_seq("GRANTS"): 2309 self._retreat(self._index - 1) 2310 return None 2311 2312 return self.expression(exp.CopyGrantsProperty) 2313 2314 def _parse_freespace(self) -> exp.FreespaceProperty: 2315 self._match(TokenType.EQ) 2316 return self.expression( 2317 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2318 ) 2319 2320 def _parse_mergeblockratio( 2321 self, no: bool = False, default: bool = False 2322 ) -> exp.MergeBlockRatioProperty: 2323 if self._match(TokenType.EQ): 2324 return self.expression( 2325 exp.MergeBlockRatioProperty, 2326 this=self._parse_number(), 2327 percent=self._match(TokenType.PERCENT), 2328 ) 2329 2330 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2331 2332 def _parse_datablocksize( 2333 self, 2334 default: t.Optional[bool] = None, 2335 minimum: t.Optional[bool] = None, 2336 maximum: t.Optional[bool] = None, 2337 ) -> exp.DataBlocksizeProperty: 2338 self._match(TokenType.EQ) 2339 size = self._parse_number() 2340 2341 units = None 2342 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2343 units = self._prev.text 2344 2345 return self.expression( 2346 exp.DataBlocksizeProperty, 2347 size=size, 2348 units=units, 2349 default=default, 2350 minimum=minimum, 2351 maximum=maximum, 2352 ) 2353 2354 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2355 self._match(TokenType.EQ) 2356 always = self._match_text_seq("ALWAYS") 2357 manual = self._match_text_seq("MANUAL") 2358 never = self._match_text_seq("NEVER") 2359 default = self._match_text_seq("DEFAULT") 2360 2361 autotemp = None 2362 if self._match_text_seq("AUTOTEMP"): 2363 autotemp = self._parse_schema() 2364 2365 return self.expression( 2366 exp.BlockCompressionProperty, 2367 always=always, 2368 manual=manual, 2369 never=never, 2370 default=default, 2371 autotemp=autotemp, 2372 ) 2373 2374 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2375 index = self._index 2376 no = self._match_text_seq("NO") 2377 concurrent = self._match_text_seq("CONCURRENT") 2378 2379 if not self._match_text_seq("ISOLATED", "LOADING"): 2380 self._retreat(index) 2381 return None 2382 2383 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2384 return self.expression( 2385 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2386 ) 2387 2388 def _parse_locking(self) -> exp.LockingProperty: 2389 if self._match(TokenType.TABLE): 2390 kind = "TABLE" 2391 elif self._match(TokenType.VIEW): 2392 kind = "VIEW" 2393 elif self._match(TokenType.ROW): 2394 kind = "ROW" 2395 elif self._match_text_seq("DATABASE"): 2396 kind = "DATABASE" 2397 else: 2398 kind = None 2399 2400 if kind in ("DATABASE", "TABLE", "VIEW"): 2401 this = self._parse_table_parts() 2402 else: 2403 this = None 2404 2405 if self._match(TokenType.FOR): 2406 for_or_in = "FOR" 2407 elif self._match(TokenType.IN): 2408 for_or_in = "IN" 2409 else: 2410 for_or_in = None 2411 2412 if self._match_text_seq("ACCESS"): 2413 lock_type = "ACCESS" 2414 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2415 lock_type = "EXCLUSIVE" 2416 elif self._match_text_seq("SHARE"): 2417 lock_type = "SHARE" 2418 elif self._match_text_seq("READ"): 2419 lock_type = "READ" 2420 elif self._match_text_seq("WRITE"): 2421 lock_type = "WRITE" 2422 elif self._match_text_seq("CHECKSUM"): 2423 lock_type = "CHECKSUM" 2424 else: 2425 lock_type = None 2426 2427 override = self._match_text_seq("OVERRIDE") 2428 2429 return self.expression( 2430 exp.LockingProperty, 2431 this=this, 2432 kind=kind, 2433 for_or_in=for_or_in, 2434 lock_type=lock_type, 2435 override=override, 2436 ) 2437 2438 def _parse_partition_by(self) -> t.List[exp.Expression]: 2439 if self._match(TokenType.PARTITION_BY): 2440 return self._parse_csv(self._parse_assignment) 2441 return [] 2442 2443 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2444 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2445 if self._match_text_seq("MINVALUE"): 2446 return exp.var("MINVALUE") 2447 if self._match_text_seq("MAXVALUE"): 2448 return exp.var("MAXVALUE") 2449 return self._parse_bitwise() 2450 2451 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2452 expression = None 2453 from_expressions = None 2454 to_expressions = None 2455 2456 if self._match(TokenType.IN): 2457 this = self._parse_wrapped_csv(self._parse_bitwise) 2458 elif self._match(TokenType.FROM): 2459 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2460 self._match_text_seq("TO") 2461 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2462 elif self._match_text_seq("WITH", "(", "MODULUS"): 2463 this = self._parse_number() 2464 self._match_text_seq(",", "REMAINDER") 2465 expression = self._parse_number() 2466 self._match_r_paren() 2467 else: 2468 self.raise_error("Failed to parse partition bound spec.") 2469 2470 return self.expression( 2471 exp.PartitionBoundSpec, 2472 this=this, 2473 expression=expression, 2474 from_expressions=from_expressions, 2475 to_expressions=to_expressions, 2476 ) 2477 2478 # https://www.postgresql.org/docs/current/sql-createtable.html 2479 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2480 if not self._match_text_seq("OF"): 2481 self._retreat(self._index - 1) 2482 return None 2483 2484 this = self._parse_table(schema=True) 2485 2486 if self._match(TokenType.DEFAULT): 2487 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2488 elif self._match_text_seq("FOR", "VALUES"): 2489 expression = self._parse_partition_bound_spec() 2490 else: 2491 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2492 2493 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2494 2495 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2496 self._match(TokenType.EQ) 2497 return self.expression( 2498 exp.PartitionedByProperty, 2499 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2500 ) 2501 2502 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2503 if self._match_text_seq("AND", "STATISTICS"): 2504 statistics = True 2505 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2506 statistics = False 2507 else: 2508 statistics = None 2509 2510 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2511 2512 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2513 if self._match_text_seq("SQL"): 2514 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2515 return None 2516 2517 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2518 if self._match_text_seq("SQL", "DATA"): 2519 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2520 return None 2521 2522 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2523 if self._match_text_seq("PRIMARY", "INDEX"): 2524 return exp.NoPrimaryIndexProperty() 2525 if self._match_text_seq("SQL"): 2526 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2527 return None 2528 2529 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2530 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2531 return exp.OnCommitProperty() 2532 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2533 return exp.OnCommitProperty(delete=True) 2534 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2535 2536 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2537 if self._match_text_seq("SQL", "DATA"): 2538 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2539 return None 2540 2541 def _parse_distkey(self) -> exp.DistKeyProperty: 2542 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2543 2544 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2545 table = self._parse_table(schema=True) 2546 2547 options = [] 2548 while self._match_texts(("INCLUDING", "EXCLUDING")): 2549 this = self._prev.text.upper() 2550 2551 id_var = self._parse_id_var() 2552 if not id_var: 2553 return None 2554 2555 options.append( 2556 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2557 ) 2558 2559 return self.expression(exp.LikeProperty, this=table, expressions=options) 2560 2561 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2562 return self.expression( 2563 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2564 ) 2565 2566 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2567 self._match(TokenType.EQ) 2568 return self.expression( 2569 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2570 ) 2571 2572 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2573 self._match_text_seq("WITH", "CONNECTION") 2574 return self.expression( 2575 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2576 ) 2577 2578 def _parse_returns(self) -> exp.ReturnsProperty: 2579 value: t.Optional[exp.Expression] 2580 null = None 2581 is_table = self._match(TokenType.TABLE) 2582 2583 if is_table: 2584 if self._match(TokenType.LT): 2585 value = self.expression( 2586 exp.Schema, 2587 this="TABLE", 2588 expressions=self._parse_csv(self._parse_struct_types), 2589 ) 2590 if not self._match(TokenType.GT): 2591 self.raise_error("Expecting >") 2592 else: 2593 value = self._parse_schema(exp.var("TABLE")) 2594 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2595 null = True 2596 value = None 2597 else: 2598 value = self._parse_types() 2599 2600 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2601 2602 def _parse_describe(self) -> exp.Describe: 2603 kind = self._match_set(self.CREATABLES) and self._prev.text 2604 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2605 if self._match(TokenType.DOT): 2606 style = None 2607 self._retreat(self._index - 2) 2608 this = self._parse_table(schema=True) 2609 properties = self._parse_properties() 2610 expressions = properties.expressions if properties else None 2611 partition = self._parse_partition() 2612 return self.expression( 2613 exp.Describe, 2614 this=this, 2615 style=style, 2616 kind=kind, 2617 expressions=expressions, 2618 partition=partition, 2619 ) 2620 2621 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2622 kind = self._prev.text.upper() 2623 expressions = [] 2624 2625 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2626 if self._match(TokenType.WHEN): 2627 expression = self._parse_disjunction() 2628 self._match(TokenType.THEN) 2629 else: 2630 expression = None 2631 2632 else_ = self._match(TokenType.ELSE) 2633 2634 if not self._match(TokenType.INTO): 2635 return None 2636 2637 return self.expression( 2638 exp.ConditionalInsert, 2639 this=self.expression( 2640 exp.Insert, 2641 this=self._parse_table(schema=True), 2642 expression=self._parse_derived_table_values(), 2643 ), 2644 expression=expression, 2645 else_=else_, 2646 ) 2647 2648 expression = parse_conditional_insert() 2649 while expression is not None: 2650 expressions.append(expression) 2651 expression = parse_conditional_insert() 2652 2653 return self.expression( 2654 exp.MultitableInserts, 2655 kind=kind, 2656 comments=comments, 2657 expressions=expressions, 2658 source=self._parse_table(), 2659 ) 2660 2661 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2662 comments = ensure_list(self._prev_comments) 2663 hint = self._parse_hint() 2664 overwrite = self._match(TokenType.OVERWRITE) 2665 ignore = self._match(TokenType.IGNORE) 2666 local = self._match_text_seq("LOCAL") 2667 alternative = None 2668 is_function = None 2669 2670 if self._match_text_seq("DIRECTORY"): 2671 this: t.Optional[exp.Expression] = self.expression( 2672 exp.Directory, 2673 this=self._parse_var_or_string(), 2674 local=local, 2675 row_format=self._parse_row_format(match_row=True), 2676 ) 2677 else: 2678 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2679 comments += ensure_list(self._prev_comments) 2680 return self._parse_multitable_inserts(comments) 2681 2682 if self._match(TokenType.OR): 2683 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2684 2685 self._match(TokenType.INTO) 2686 comments += ensure_list(self._prev_comments) 2687 self._match(TokenType.TABLE) 2688 is_function = self._match(TokenType.FUNCTION) 2689 2690 this = ( 2691 self._parse_table(schema=True, parse_partition=True) 2692 if not is_function 2693 else self._parse_function() 2694 ) 2695 2696 returning = self._parse_returning() 2697 2698 return self.expression( 2699 exp.Insert, 2700 comments=comments, 2701 hint=hint, 2702 is_function=is_function, 2703 this=this, 2704 stored=self._match_text_seq("STORED") and self._parse_stored(), 2705 by_name=self._match_text_seq("BY", "NAME"), 2706 exists=self._parse_exists(), 2707 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2708 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2709 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2710 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2711 conflict=self._parse_on_conflict(), 2712 returning=returning or self._parse_returning(), 2713 overwrite=overwrite, 2714 alternative=alternative, 2715 ignore=ignore, 2716 source=self._match(TokenType.TABLE) and self._parse_table(), 2717 ) 2718 2719 def _parse_kill(self) -> exp.Kill: 2720 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2721 2722 return self.expression( 2723 exp.Kill, 2724 this=self._parse_primary(), 2725 kind=kind, 2726 ) 2727 2728 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2729 conflict = self._match_text_seq("ON", "CONFLICT") 2730 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2731 2732 if not conflict and not duplicate: 2733 return None 2734 2735 conflict_keys = None 2736 constraint = None 2737 2738 if conflict: 2739 if self._match_text_seq("ON", "CONSTRAINT"): 2740 constraint = self._parse_id_var() 2741 elif self._match(TokenType.L_PAREN): 2742 conflict_keys = self._parse_csv(self._parse_id_var) 2743 self._match_r_paren() 2744 2745 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2746 if self._prev.token_type == TokenType.UPDATE: 2747 self._match(TokenType.SET) 2748 expressions = self._parse_csv(self._parse_equality) 2749 else: 2750 expressions = None 2751 2752 return self.expression( 2753 exp.OnConflict, 2754 duplicate=duplicate, 2755 expressions=expressions, 2756 action=action, 2757 conflict_keys=conflict_keys, 2758 constraint=constraint, 2759 ) 2760 2761 def _parse_returning(self) -> t.Optional[exp.Returning]: 2762 if not self._match(TokenType.RETURNING): 2763 return None 2764 return self.expression( 2765 exp.Returning, 2766 expressions=self._parse_csv(self._parse_expression), 2767 into=self._match(TokenType.INTO) and self._parse_table_part(), 2768 ) 2769 2770 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2771 if not self._match(TokenType.FORMAT): 2772 return None 2773 return self._parse_row_format() 2774 2775 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2776 index = self._index 2777 with_ = with_ or self._match_text_seq("WITH") 2778 2779 if not self._match(TokenType.SERDE_PROPERTIES): 2780 self._retreat(index) 2781 return None 2782 return self.expression( 2783 exp.SerdeProperties, 2784 **{ # type: ignore 2785 "expressions": self._parse_wrapped_properties(), 2786 "with": with_, 2787 }, 2788 ) 2789 2790 def _parse_row_format( 2791 self, match_row: bool = False 2792 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2793 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2794 return None 2795 2796 if self._match_text_seq("SERDE"): 2797 this = self._parse_string() 2798 2799 serde_properties = self._parse_serde_properties() 2800 2801 return self.expression( 2802 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2803 ) 2804 2805 self._match_text_seq("DELIMITED") 2806 2807 kwargs = {} 2808 2809 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2810 kwargs["fields"] = self._parse_string() 2811 if self._match_text_seq("ESCAPED", "BY"): 2812 kwargs["escaped"] = self._parse_string() 2813 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2814 kwargs["collection_items"] = self._parse_string() 2815 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2816 kwargs["map_keys"] = self._parse_string() 2817 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2818 kwargs["lines"] = self._parse_string() 2819 if self._match_text_seq("NULL", "DEFINED", "AS"): 2820 kwargs["null"] = self._parse_string() 2821 2822 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2823 2824 def _parse_load(self) -> exp.LoadData | exp.Command: 2825 if self._match_text_seq("DATA"): 2826 local = self._match_text_seq("LOCAL") 2827 self._match_text_seq("INPATH") 2828 inpath = self._parse_string() 2829 overwrite = self._match(TokenType.OVERWRITE) 2830 self._match_pair(TokenType.INTO, TokenType.TABLE) 2831 2832 return self.expression( 2833 exp.LoadData, 2834 this=self._parse_table(schema=True), 2835 local=local, 2836 overwrite=overwrite, 2837 inpath=inpath, 2838 partition=self._parse_partition(), 2839 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2840 serde=self._match_text_seq("SERDE") and self._parse_string(), 2841 ) 2842 return self._parse_as_command(self._prev) 2843 2844 def _parse_delete(self) -> exp.Delete: 2845 # This handles MySQL's "Multiple-Table Syntax" 2846 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2847 tables = None 2848 comments = self._prev_comments 2849 if not self._match(TokenType.FROM, advance=False): 2850 tables = self._parse_csv(self._parse_table) or None 2851 2852 returning = self._parse_returning() 2853 2854 return self.expression( 2855 exp.Delete, 2856 comments=comments, 2857 tables=tables, 2858 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2859 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2860 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2861 where=self._parse_where(), 2862 returning=returning or self._parse_returning(), 2863 limit=self._parse_limit(), 2864 ) 2865 2866 def _parse_update(self) -> exp.Update: 2867 comments = self._prev_comments 2868 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2869 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2870 returning = self._parse_returning() 2871 return self.expression( 2872 exp.Update, 2873 comments=comments, 2874 **{ # type: ignore 2875 "this": this, 2876 "expressions": expressions, 2877 "from": self._parse_from(joins=True), 2878 "where": self._parse_where(), 2879 "returning": returning or self._parse_returning(), 2880 "order": self._parse_order(), 2881 "limit": self._parse_limit(), 2882 }, 2883 ) 2884 2885 def _parse_uncache(self) -> exp.Uncache: 2886 if not self._match(TokenType.TABLE): 2887 self.raise_error("Expecting TABLE after UNCACHE") 2888 2889 return self.expression( 2890 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2891 ) 2892 2893 def _parse_cache(self) -> exp.Cache: 2894 lazy = self._match_text_seq("LAZY") 2895 self._match(TokenType.TABLE) 2896 table = self._parse_table(schema=True) 2897 2898 options = [] 2899 if self._match_text_seq("OPTIONS"): 2900 self._match_l_paren() 2901 k = self._parse_string() 2902 self._match(TokenType.EQ) 2903 v = self._parse_string() 2904 options = [k, v] 2905 self._match_r_paren() 2906 2907 self._match(TokenType.ALIAS) 2908 return self.expression( 2909 exp.Cache, 2910 this=table, 2911 lazy=lazy, 2912 options=options, 2913 expression=self._parse_select(nested=True), 2914 ) 2915 2916 def _parse_partition(self) -> t.Optional[exp.Partition]: 2917 if not self._match(TokenType.PARTITION): 2918 return None 2919 2920 return self.expression( 2921 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2922 ) 2923 2924 def _parse_value(self) -> t.Optional[exp.Tuple]: 2925 if self._match(TokenType.L_PAREN): 2926 expressions = self._parse_csv(self._parse_expression) 2927 self._match_r_paren() 2928 return self.expression(exp.Tuple, expressions=expressions) 2929 2930 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2931 expression = self._parse_expression() 2932 if expression: 2933 return self.expression(exp.Tuple, expressions=[expression]) 2934 return None 2935 2936 def _parse_projections(self) -> t.List[exp.Expression]: 2937 return self._parse_expressions() 2938 2939 def _parse_select( 2940 self, 2941 nested: bool = False, 2942 table: bool = False, 2943 parse_subquery_alias: bool = True, 2944 parse_set_operation: bool = True, 2945 ) -> t.Optional[exp.Expression]: 2946 cte = self._parse_with() 2947 2948 if cte: 2949 this = self._parse_statement() 2950 2951 if not this: 2952 self.raise_error("Failed to parse any statement following CTE") 2953 return cte 2954 2955 if "with" in this.arg_types: 2956 this.set("with", cte) 2957 else: 2958 self.raise_error(f"{this.key} does not support CTE") 2959 this = cte 2960 2961 return this 2962 2963 # duckdb supports leading with FROM x 2964 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2965 2966 if self._match(TokenType.SELECT): 2967 comments = self._prev_comments 2968 2969 hint = self._parse_hint() 2970 2971 if self._next and not self._next.token_type == TokenType.DOT: 2972 all_ = self._match(TokenType.ALL) 2973 distinct = self._match_set(self.DISTINCT_TOKENS) 2974 else: 2975 all_, distinct = None, None 2976 2977 kind = ( 2978 self._match(TokenType.ALIAS) 2979 and self._match_texts(("STRUCT", "VALUE")) 2980 and self._prev.text.upper() 2981 ) 2982 2983 if distinct: 2984 distinct = self.expression( 2985 exp.Distinct, 2986 on=self._parse_value() if self._match(TokenType.ON) else None, 2987 ) 2988 2989 if all_ and distinct: 2990 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2991 2992 operation_modifiers = [] 2993 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2994 operation_modifiers.append(exp.var(self._prev.text.upper())) 2995 2996 limit = self._parse_limit(top=True) 2997 projections = self._parse_projections() 2998 2999 this = self.expression( 3000 exp.Select, 3001 kind=kind, 3002 hint=hint, 3003 distinct=distinct, 3004 expressions=projections, 3005 limit=limit, 3006 operation_modifiers=operation_modifiers or None, 3007 ) 3008 this.comments = comments 3009 3010 into = self._parse_into() 3011 if into: 3012 this.set("into", into) 3013 3014 if not from_: 3015 from_ = self._parse_from() 3016 3017 if from_: 3018 this.set("from", from_) 3019 3020 this = self._parse_query_modifiers(this) 3021 elif (table or nested) and self._match(TokenType.L_PAREN): 3022 if self._match(TokenType.PIVOT): 3023 this = self._parse_simplified_pivot() 3024 elif self._match(TokenType.FROM): 3025 this = exp.select("*").from_( 3026 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3027 ) 3028 else: 3029 this = ( 3030 self._parse_table() 3031 if table 3032 else self._parse_select(nested=True, parse_set_operation=False) 3033 ) 3034 3035 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3036 # in case a modifier (e.g. join) is following 3037 if table and isinstance(this, exp.Values) and this.alias: 3038 alias = this.args["alias"].pop() 3039 this = exp.Table(this=this, alias=alias) 3040 3041 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3042 3043 self._match_r_paren() 3044 3045 # We return early here so that the UNION isn't attached to the subquery by the 3046 # following call to _parse_set_operations, but instead becomes the parent node 3047 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3048 elif self._match(TokenType.VALUES, advance=False): 3049 this = self._parse_derived_table_values() 3050 elif from_: 3051 this = exp.select("*").from_(from_.this, copy=False) 3052 elif self._match(TokenType.SUMMARIZE): 3053 table = self._match(TokenType.TABLE) 3054 this = self._parse_select() or self._parse_string() or self._parse_table() 3055 return self.expression(exp.Summarize, this=this, table=table) 3056 elif self._match(TokenType.DESCRIBE): 3057 this = self._parse_describe() 3058 elif self._match_text_seq("STREAM"): 3059 this = self._parse_function() 3060 if this: 3061 this = self.expression(exp.Stream, this=this) 3062 else: 3063 self._retreat(self._index - 1) 3064 else: 3065 this = None 3066 3067 return self._parse_set_operations(this) if parse_set_operation else this 3068 3069 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3070 if not skip_with_token and not self._match(TokenType.WITH): 3071 return None 3072 3073 comments = self._prev_comments 3074 recursive = self._match(TokenType.RECURSIVE) 3075 3076 last_comments = None 3077 expressions = [] 3078 while True: 3079 expressions.append(self._parse_cte()) 3080 if last_comments: 3081 expressions[-1].add_comments(last_comments) 3082 3083 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3084 break 3085 else: 3086 self._match(TokenType.WITH) 3087 3088 last_comments = self._prev_comments 3089 3090 return self.expression( 3091 exp.With, comments=comments, expressions=expressions, recursive=recursive 3092 ) 3093 3094 def _parse_cte(self) -> exp.CTE: 3095 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3096 if not alias or not alias.this: 3097 self.raise_error("Expected CTE to have alias") 3098 3099 self._match(TokenType.ALIAS) 3100 comments = self._prev_comments 3101 3102 if self._match_text_seq("NOT", "MATERIALIZED"): 3103 materialized = False 3104 elif self._match_text_seq("MATERIALIZED"): 3105 materialized = True 3106 else: 3107 materialized = None 3108 3109 return self.expression( 3110 exp.CTE, 3111 this=self._parse_wrapped(self._parse_statement), 3112 alias=alias, 3113 materialized=materialized, 3114 comments=comments, 3115 ) 3116 3117 def _parse_table_alias( 3118 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3119 ) -> t.Optional[exp.TableAlias]: 3120 any_token = self._match(TokenType.ALIAS) 3121 alias = ( 3122 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3123 or self._parse_string_as_identifier() 3124 ) 3125 3126 index = self._index 3127 if self._match(TokenType.L_PAREN): 3128 columns = self._parse_csv(self._parse_function_parameter) 3129 self._match_r_paren() if columns else self._retreat(index) 3130 else: 3131 columns = None 3132 3133 if not alias and not columns: 3134 return None 3135 3136 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3137 3138 # We bubble up comments from the Identifier to the TableAlias 3139 if isinstance(alias, exp.Identifier): 3140 table_alias.add_comments(alias.pop_comments()) 3141 3142 return table_alias 3143 3144 def _parse_subquery( 3145 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3146 ) -> t.Optional[exp.Subquery]: 3147 if not this: 3148 return None 3149 3150 return self.expression( 3151 exp.Subquery, 3152 this=this, 3153 pivots=self._parse_pivots(), 3154 alias=self._parse_table_alias() if parse_alias else None, 3155 sample=self._parse_table_sample(), 3156 ) 3157 3158 def _implicit_unnests_to_explicit(self, this: E) -> E: 3159 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3160 3161 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3162 for i, join in enumerate(this.args.get("joins") or []): 3163 table = join.this 3164 normalized_table = table.copy() 3165 normalized_table.meta["maybe_column"] = True 3166 normalized_table = _norm(normalized_table, dialect=self.dialect) 3167 3168 if isinstance(table, exp.Table) and not join.args.get("on"): 3169 if normalized_table.parts[0].name in refs: 3170 table_as_column = table.to_column() 3171 unnest = exp.Unnest(expressions=[table_as_column]) 3172 3173 # Table.to_column creates a parent Alias node that we want to convert to 3174 # a TableAlias and attach to the Unnest, so it matches the parser's output 3175 if isinstance(table.args.get("alias"), exp.TableAlias): 3176 table_as_column.replace(table_as_column.this) 3177 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3178 3179 table.replace(unnest) 3180 3181 refs.add(normalized_table.alias_or_name) 3182 3183 return this 3184 3185 def _parse_query_modifiers( 3186 self, this: t.Optional[exp.Expression] 3187 ) -> t.Optional[exp.Expression]: 3188 if isinstance(this, (exp.Query, exp.Table)): 3189 for join in self._parse_joins(): 3190 this.append("joins", join) 3191 for lateral in iter(self._parse_lateral, None): 3192 this.append("laterals", lateral) 3193 3194 while True: 3195 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3196 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3197 key, expression = parser(self) 3198 3199 if expression: 3200 this.set(key, expression) 3201 if key == "limit": 3202 offset = expression.args.pop("offset", None) 3203 3204 if offset: 3205 offset = exp.Offset(expression=offset) 3206 this.set("offset", offset) 3207 3208 limit_by_expressions = expression.expressions 3209 expression.set("expressions", None) 3210 offset.set("expressions", limit_by_expressions) 3211 continue 3212 break 3213 3214 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3215 this = self._implicit_unnests_to_explicit(this) 3216 3217 return this 3218 3219 def _parse_hint(self) -> t.Optional[exp.Hint]: 3220 if self._match(TokenType.HINT): 3221 hints = [] 3222 for hint in iter( 3223 lambda: self._parse_csv( 3224 lambda: self._parse_function() or self._parse_var(upper=True) 3225 ), 3226 [], 3227 ): 3228 hints.extend(hint) 3229 3230 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3231 self.raise_error("Expected */ after HINT") 3232 3233 return self.expression(exp.Hint, expressions=hints) 3234 3235 return None 3236 3237 def _parse_into(self) -> t.Optional[exp.Into]: 3238 if not self._match(TokenType.INTO): 3239 return None 3240 3241 temp = self._match(TokenType.TEMPORARY) 3242 unlogged = self._match_text_seq("UNLOGGED") 3243 self._match(TokenType.TABLE) 3244 3245 return self.expression( 3246 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3247 ) 3248 3249 def _parse_from( 3250 self, joins: bool = False, skip_from_token: bool = False 3251 ) -> t.Optional[exp.From]: 3252 if not skip_from_token and not self._match(TokenType.FROM): 3253 return None 3254 3255 return self.expression( 3256 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3257 ) 3258 3259 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3260 return self.expression( 3261 exp.MatchRecognizeMeasure, 3262 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3263 this=self._parse_expression(), 3264 ) 3265 3266 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3267 if not self._match(TokenType.MATCH_RECOGNIZE): 3268 return None 3269 3270 self._match_l_paren() 3271 3272 partition = self._parse_partition_by() 3273 order = self._parse_order() 3274 3275 measures = ( 3276 self._parse_csv(self._parse_match_recognize_measure) 3277 if self._match_text_seq("MEASURES") 3278 else None 3279 ) 3280 3281 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3282 rows = exp.var("ONE ROW PER MATCH") 3283 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3284 text = "ALL ROWS PER MATCH" 3285 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3286 text += " SHOW EMPTY MATCHES" 3287 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3288 text += " OMIT EMPTY MATCHES" 3289 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3290 text += " WITH UNMATCHED ROWS" 3291 rows = exp.var(text) 3292 else: 3293 rows = None 3294 3295 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3296 text = "AFTER MATCH SKIP" 3297 if self._match_text_seq("PAST", "LAST", "ROW"): 3298 text += " PAST LAST ROW" 3299 elif self._match_text_seq("TO", "NEXT", "ROW"): 3300 text += " TO NEXT ROW" 3301 elif self._match_text_seq("TO", "FIRST"): 3302 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3303 elif self._match_text_seq("TO", "LAST"): 3304 text += f" TO LAST {self._advance_any().text}" # type: ignore 3305 after = exp.var(text) 3306 else: 3307 after = None 3308 3309 if self._match_text_seq("PATTERN"): 3310 self._match_l_paren() 3311 3312 if not self._curr: 3313 self.raise_error("Expecting )", self._curr) 3314 3315 paren = 1 3316 start = self._curr 3317 3318 while self._curr and paren > 0: 3319 if self._curr.token_type == TokenType.L_PAREN: 3320 paren += 1 3321 if self._curr.token_type == TokenType.R_PAREN: 3322 paren -= 1 3323 3324 end = self._prev 3325 self._advance() 3326 3327 if paren > 0: 3328 self.raise_error("Expecting )", self._curr) 3329 3330 pattern = exp.var(self._find_sql(start, end)) 3331 else: 3332 pattern = None 3333 3334 define = ( 3335 self._parse_csv(self._parse_name_as_expression) 3336 if self._match_text_seq("DEFINE") 3337 else None 3338 ) 3339 3340 self._match_r_paren() 3341 3342 return self.expression( 3343 exp.MatchRecognize, 3344 partition_by=partition, 3345 order=order, 3346 measures=measures, 3347 rows=rows, 3348 after=after, 3349 pattern=pattern, 3350 define=define, 3351 alias=self._parse_table_alias(), 3352 ) 3353 3354 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3355 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3356 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3357 cross_apply = False 3358 3359 if cross_apply is not None: 3360 this = self._parse_select(table=True) 3361 view = None 3362 outer = None 3363 elif self._match(TokenType.LATERAL): 3364 this = self._parse_select(table=True) 3365 view = self._match(TokenType.VIEW) 3366 outer = self._match(TokenType.OUTER) 3367 else: 3368 return None 3369 3370 if not this: 3371 this = ( 3372 self._parse_unnest() 3373 or self._parse_function() 3374 or self._parse_id_var(any_token=False) 3375 ) 3376 3377 while self._match(TokenType.DOT): 3378 this = exp.Dot( 3379 this=this, 3380 expression=self._parse_function() or self._parse_id_var(any_token=False), 3381 ) 3382 3383 if view: 3384 table = self._parse_id_var(any_token=False) 3385 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3386 table_alias: t.Optional[exp.TableAlias] = self.expression( 3387 exp.TableAlias, this=table, columns=columns 3388 ) 3389 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3390 # We move the alias from the lateral's child node to the lateral itself 3391 table_alias = this.args["alias"].pop() 3392 else: 3393 table_alias = self._parse_table_alias() 3394 3395 return self.expression( 3396 exp.Lateral, 3397 this=this, 3398 view=view, 3399 outer=outer, 3400 alias=table_alias, 3401 cross_apply=cross_apply, 3402 ) 3403 3404 def _parse_join_parts( 3405 self, 3406 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3407 return ( 3408 self._match_set(self.JOIN_METHODS) and self._prev, 3409 self._match_set(self.JOIN_SIDES) and self._prev, 3410 self._match_set(self.JOIN_KINDS) and self._prev, 3411 ) 3412 3413 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3414 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3415 this = self._parse_column() 3416 if isinstance(this, exp.Column): 3417 return this.this 3418 return this 3419 3420 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3421 3422 def _parse_join( 3423 self, skip_join_token: bool = False, parse_bracket: bool = False 3424 ) -> t.Optional[exp.Join]: 3425 if self._match(TokenType.COMMA): 3426 return self.expression(exp.Join, this=self._parse_table()) 3427 3428 index = self._index 3429 method, side, kind = self._parse_join_parts() 3430 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3431 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3432 3433 if not skip_join_token and not join: 3434 self._retreat(index) 3435 kind = None 3436 method = None 3437 side = None 3438 3439 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3440 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3441 3442 if not skip_join_token and not join and not outer_apply and not cross_apply: 3443 return None 3444 3445 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3446 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3447 kwargs["expressions"] = self._parse_csv( 3448 lambda: self._parse_table(parse_bracket=parse_bracket) 3449 ) 3450 3451 if method: 3452 kwargs["method"] = method.text 3453 if side: 3454 kwargs["side"] = side.text 3455 if kind: 3456 kwargs["kind"] = kind.text 3457 if hint: 3458 kwargs["hint"] = hint 3459 3460 if self._match(TokenType.MATCH_CONDITION): 3461 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3462 3463 if self._match(TokenType.ON): 3464 kwargs["on"] = self._parse_assignment() 3465 elif self._match(TokenType.USING): 3466 kwargs["using"] = self._parse_using_identifiers() 3467 elif ( 3468 not (outer_apply or cross_apply) 3469 and not isinstance(kwargs["this"], exp.Unnest) 3470 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3471 ): 3472 index = self._index 3473 joins: t.Optional[list] = list(self._parse_joins()) 3474 3475 if joins and self._match(TokenType.ON): 3476 kwargs["on"] = self._parse_assignment() 3477 elif joins and self._match(TokenType.USING): 3478 kwargs["using"] = self._parse_using_identifiers() 3479 else: 3480 joins = None 3481 self._retreat(index) 3482 3483 kwargs["this"].set("joins", joins if joins else None) 3484 3485 comments = [c for token in (method, side, kind) if token for c in token.comments] 3486 return self.expression(exp.Join, comments=comments, **kwargs) 3487 3488 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3489 this = self._parse_assignment() 3490 3491 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3492 return this 3493 3494 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3495 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3496 3497 return this 3498 3499 def _parse_index_params(self) -> exp.IndexParameters: 3500 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3501 3502 if self._match(TokenType.L_PAREN, advance=False): 3503 columns = self._parse_wrapped_csv(self._parse_with_operator) 3504 else: 3505 columns = None 3506 3507 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3508 partition_by = self._parse_partition_by() 3509 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3510 tablespace = ( 3511 self._parse_var(any_token=True) 3512 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3513 else None 3514 ) 3515 where = self._parse_where() 3516 3517 on = self._parse_field() if self._match(TokenType.ON) else None 3518 3519 return self.expression( 3520 exp.IndexParameters, 3521 using=using, 3522 columns=columns, 3523 include=include, 3524 partition_by=partition_by, 3525 where=where, 3526 with_storage=with_storage, 3527 tablespace=tablespace, 3528 on=on, 3529 ) 3530 3531 def _parse_index( 3532 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3533 ) -> t.Optional[exp.Index]: 3534 if index or anonymous: 3535 unique = None 3536 primary = None 3537 amp = None 3538 3539 self._match(TokenType.ON) 3540 self._match(TokenType.TABLE) # hive 3541 table = self._parse_table_parts(schema=True) 3542 else: 3543 unique = self._match(TokenType.UNIQUE) 3544 primary = self._match_text_seq("PRIMARY") 3545 amp = self._match_text_seq("AMP") 3546 3547 if not self._match(TokenType.INDEX): 3548 return None 3549 3550 index = self._parse_id_var() 3551 table = None 3552 3553 params = self._parse_index_params() 3554 3555 return self.expression( 3556 exp.Index, 3557 this=index, 3558 table=table, 3559 unique=unique, 3560 primary=primary, 3561 amp=amp, 3562 params=params, 3563 ) 3564 3565 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3566 hints: t.List[exp.Expression] = [] 3567 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3568 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3569 hints.append( 3570 self.expression( 3571 exp.WithTableHint, 3572 expressions=self._parse_csv( 3573 lambda: self._parse_function() or self._parse_var(any_token=True) 3574 ), 3575 ) 3576 ) 3577 self._match_r_paren() 3578 else: 3579 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3580 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3581 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3582 3583 self._match_set((TokenType.INDEX, TokenType.KEY)) 3584 if self._match(TokenType.FOR): 3585 hint.set("target", self._advance_any() and self._prev.text.upper()) 3586 3587 hint.set("expressions", self._parse_wrapped_id_vars()) 3588 hints.append(hint) 3589 3590 return hints or None 3591 3592 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3593 return ( 3594 (not schema and self._parse_function(optional_parens=False)) 3595 or self._parse_id_var(any_token=False) 3596 or self._parse_string_as_identifier() 3597 or self._parse_placeholder() 3598 ) 3599 3600 def _parse_table_parts( 3601 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3602 ) -> exp.Table: 3603 catalog = None 3604 db = None 3605 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3606 3607 while self._match(TokenType.DOT): 3608 if catalog: 3609 # This allows nesting the table in arbitrarily many dot expressions if needed 3610 table = self.expression( 3611 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3612 ) 3613 else: 3614 catalog = db 3615 db = table 3616 # "" used for tsql FROM a..b case 3617 table = self._parse_table_part(schema=schema) or "" 3618 3619 if ( 3620 wildcard 3621 and self._is_connected() 3622 and (isinstance(table, exp.Identifier) or not table) 3623 and self._match(TokenType.STAR) 3624 ): 3625 if isinstance(table, exp.Identifier): 3626 table.args["this"] += "*" 3627 else: 3628 table = exp.Identifier(this="*") 3629 3630 # We bubble up comments from the Identifier to the Table 3631 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3632 3633 if is_db_reference: 3634 catalog = db 3635 db = table 3636 table = None 3637 3638 if not table and not is_db_reference: 3639 self.raise_error(f"Expected table name but got {self._curr}") 3640 if not db and is_db_reference: 3641 self.raise_error(f"Expected database name but got {self._curr}") 3642 3643 table = self.expression( 3644 exp.Table, 3645 comments=comments, 3646 this=table, 3647 db=db, 3648 catalog=catalog, 3649 ) 3650 3651 changes = self._parse_changes() 3652 if changes: 3653 table.set("changes", changes) 3654 3655 at_before = self._parse_historical_data() 3656 if at_before: 3657 table.set("when", at_before) 3658 3659 pivots = self._parse_pivots() 3660 if pivots: 3661 table.set("pivots", pivots) 3662 3663 return table 3664 3665 def _parse_table( 3666 self, 3667 schema: bool = False, 3668 joins: bool = False, 3669 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3670 parse_bracket: bool = False, 3671 is_db_reference: bool = False, 3672 parse_partition: bool = False, 3673 ) -> t.Optional[exp.Expression]: 3674 lateral = self._parse_lateral() 3675 if lateral: 3676 return lateral 3677 3678 unnest = self._parse_unnest() 3679 if unnest: 3680 return unnest 3681 3682 values = self._parse_derived_table_values() 3683 if values: 3684 return values 3685 3686 subquery = self._parse_select(table=True) 3687 if subquery: 3688 if not subquery.args.get("pivots"): 3689 subquery.set("pivots", self._parse_pivots()) 3690 return subquery 3691 3692 bracket = parse_bracket and self._parse_bracket(None) 3693 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3694 3695 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3696 self._parse_table 3697 ) 3698 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3699 3700 only = self._match(TokenType.ONLY) 3701 3702 this = t.cast( 3703 exp.Expression, 3704 bracket 3705 or rows_from 3706 or self._parse_bracket( 3707 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3708 ), 3709 ) 3710 3711 if only: 3712 this.set("only", only) 3713 3714 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3715 self._match_text_seq("*") 3716 3717 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3718 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3719 this.set("partition", self._parse_partition()) 3720 3721 if schema: 3722 return self._parse_schema(this=this) 3723 3724 version = self._parse_version() 3725 3726 if version: 3727 this.set("version", version) 3728 3729 if self.dialect.ALIAS_POST_TABLESAMPLE: 3730 this.set("sample", self._parse_table_sample()) 3731 3732 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3733 if alias: 3734 this.set("alias", alias) 3735 3736 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3737 return self.expression( 3738 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3739 ) 3740 3741 this.set("hints", self._parse_table_hints()) 3742 3743 if not this.args.get("pivots"): 3744 this.set("pivots", self._parse_pivots()) 3745 3746 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3747 this.set("sample", self._parse_table_sample()) 3748 3749 if joins: 3750 for join in self._parse_joins(): 3751 this.append("joins", join) 3752 3753 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3754 this.set("ordinality", True) 3755 this.set("alias", self._parse_table_alias()) 3756 3757 return this 3758 3759 def _parse_version(self) -> t.Optional[exp.Version]: 3760 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3761 this = "TIMESTAMP" 3762 elif self._match(TokenType.VERSION_SNAPSHOT): 3763 this = "VERSION" 3764 else: 3765 return None 3766 3767 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3768 kind = self._prev.text.upper() 3769 start = self._parse_bitwise() 3770 self._match_texts(("TO", "AND")) 3771 end = self._parse_bitwise() 3772 expression: t.Optional[exp.Expression] = self.expression( 3773 exp.Tuple, expressions=[start, end] 3774 ) 3775 elif self._match_text_seq("CONTAINED", "IN"): 3776 kind = "CONTAINED IN" 3777 expression = self.expression( 3778 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3779 ) 3780 elif self._match(TokenType.ALL): 3781 kind = "ALL" 3782 expression = None 3783 else: 3784 self._match_text_seq("AS", "OF") 3785 kind = "AS OF" 3786 expression = self._parse_type() 3787 3788 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3789 3790 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3791 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3792 index = self._index 3793 historical_data = None 3794 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3795 this = self._prev.text.upper() 3796 kind = ( 3797 self._match(TokenType.L_PAREN) 3798 and self._match_texts(self.HISTORICAL_DATA_KIND) 3799 and self._prev.text.upper() 3800 ) 3801 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3802 3803 if expression: 3804 self._match_r_paren() 3805 historical_data = self.expression( 3806 exp.HistoricalData, this=this, kind=kind, expression=expression 3807 ) 3808 else: 3809 self._retreat(index) 3810 3811 return historical_data 3812 3813 def _parse_changes(self) -> t.Optional[exp.Changes]: 3814 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3815 return None 3816 3817 information = self._parse_var(any_token=True) 3818 self._match_r_paren() 3819 3820 return self.expression( 3821 exp.Changes, 3822 information=information, 3823 at_before=self._parse_historical_data(), 3824 end=self._parse_historical_data(), 3825 ) 3826 3827 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3828 if not self._match(TokenType.UNNEST): 3829 return None 3830 3831 expressions = self._parse_wrapped_csv(self._parse_equality) 3832 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3833 3834 alias = self._parse_table_alias() if with_alias else None 3835 3836 if alias: 3837 if self.dialect.UNNEST_COLUMN_ONLY: 3838 if alias.args.get("columns"): 3839 self.raise_error("Unexpected extra column alias in unnest.") 3840 3841 alias.set("columns", [alias.this]) 3842 alias.set("this", None) 3843 3844 columns = alias.args.get("columns") or [] 3845 if offset and len(expressions) < len(columns): 3846 offset = columns.pop() 3847 3848 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3849 self._match(TokenType.ALIAS) 3850 offset = self._parse_id_var( 3851 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3852 ) or exp.to_identifier("offset") 3853 3854 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3855 3856 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3857 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3858 if not is_derived and not ( 3859 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3860 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3861 ): 3862 return None 3863 3864 expressions = self._parse_csv(self._parse_value) 3865 alias = self._parse_table_alias() 3866 3867 if is_derived: 3868 self._match_r_paren() 3869 3870 return self.expression( 3871 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3872 ) 3873 3874 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3875 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3876 as_modifier and self._match_text_seq("USING", "SAMPLE") 3877 ): 3878 return None 3879 3880 bucket_numerator = None 3881 bucket_denominator = None 3882 bucket_field = None 3883 percent = None 3884 size = None 3885 seed = None 3886 3887 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3888 matched_l_paren = self._match(TokenType.L_PAREN) 3889 3890 if self.TABLESAMPLE_CSV: 3891 num = None 3892 expressions = self._parse_csv(self._parse_primary) 3893 else: 3894 expressions = None 3895 num = ( 3896 self._parse_factor() 3897 if self._match(TokenType.NUMBER, advance=False) 3898 else self._parse_primary() or self._parse_placeholder() 3899 ) 3900 3901 if self._match_text_seq("BUCKET"): 3902 bucket_numerator = self._parse_number() 3903 self._match_text_seq("OUT", "OF") 3904 bucket_denominator = bucket_denominator = self._parse_number() 3905 self._match(TokenType.ON) 3906 bucket_field = self._parse_field() 3907 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3908 percent = num 3909 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3910 size = num 3911 else: 3912 percent = num 3913 3914 if matched_l_paren: 3915 self._match_r_paren() 3916 3917 if self._match(TokenType.L_PAREN): 3918 method = self._parse_var(upper=True) 3919 seed = self._match(TokenType.COMMA) and self._parse_number() 3920 self._match_r_paren() 3921 elif self._match_texts(("SEED", "REPEATABLE")): 3922 seed = self._parse_wrapped(self._parse_number) 3923 3924 if not method and self.DEFAULT_SAMPLING_METHOD: 3925 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3926 3927 return self.expression( 3928 exp.TableSample, 3929 expressions=expressions, 3930 method=method, 3931 bucket_numerator=bucket_numerator, 3932 bucket_denominator=bucket_denominator, 3933 bucket_field=bucket_field, 3934 percent=percent, 3935 size=size, 3936 seed=seed, 3937 ) 3938 3939 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3940 return list(iter(self._parse_pivot, None)) or None 3941 3942 def _parse_joins(self) -> t.Iterator[exp.Join]: 3943 return iter(self._parse_join, None) 3944 3945 # https://duckdb.org/docs/sql/statements/pivot 3946 def _parse_simplified_pivot(self) -> exp.Pivot: 3947 def _parse_on() -> t.Optional[exp.Expression]: 3948 this = self._parse_bitwise() 3949 return self._parse_in(this) if self._match(TokenType.IN) else this 3950 3951 this = self._parse_table() 3952 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3953 using = self._match(TokenType.USING) and self._parse_csv( 3954 lambda: self._parse_alias(self._parse_function()) 3955 ) 3956 group = self._parse_group() 3957 return self.expression( 3958 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3959 ) 3960 3961 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3962 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3963 this = self._parse_select_or_expression() 3964 3965 self._match(TokenType.ALIAS) 3966 alias = self._parse_bitwise() 3967 if alias: 3968 if isinstance(alias, exp.Column) and not alias.db: 3969 alias = alias.this 3970 return self.expression(exp.PivotAlias, this=this, alias=alias) 3971 3972 return this 3973 3974 value = self._parse_column() 3975 3976 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3977 self.raise_error("Expecting IN (") 3978 3979 if self._match(TokenType.ANY): 3980 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3981 else: 3982 exprs = self._parse_csv(_parse_aliased_expression) 3983 3984 self._match_r_paren() 3985 return self.expression(exp.In, this=value, expressions=exprs) 3986 3987 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3988 index = self._index 3989 include_nulls = None 3990 3991 if self._match(TokenType.PIVOT): 3992 unpivot = False 3993 elif self._match(TokenType.UNPIVOT): 3994 unpivot = True 3995 3996 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3997 if self._match_text_seq("INCLUDE", "NULLS"): 3998 include_nulls = True 3999 elif self._match_text_seq("EXCLUDE", "NULLS"): 4000 include_nulls = False 4001 else: 4002 return None 4003 4004 expressions = [] 4005 4006 if not self._match(TokenType.L_PAREN): 4007 self._retreat(index) 4008 return None 4009 4010 if unpivot: 4011 expressions = self._parse_csv(self._parse_column) 4012 else: 4013 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4014 4015 if not expressions: 4016 self.raise_error("Failed to parse PIVOT's aggregation list") 4017 4018 if not self._match(TokenType.FOR): 4019 self.raise_error("Expecting FOR") 4020 4021 field = self._parse_pivot_in() 4022 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4023 self._parse_bitwise 4024 ) 4025 4026 self._match_r_paren() 4027 4028 pivot = self.expression( 4029 exp.Pivot, 4030 expressions=expressions, 4031 field=field, 4032 unpivot=unpivot, 4033 include_nulls=include_nulls, 4034 default_on_null=default_on_null, 4035 ) 4036 4037 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4038 pivot.set("alias", self._parse_table_alias()) 4039 4040 if not unpivot: 4041 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4042 4043 columns: t.List[exp.Expression] = [] 4044 for fld in pivot.args["field"].expressions: 4045 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4046 for name in names: 4047 if self.PREFIXED_PIVOT_COLUMNS: 4048 name = f"{name}_{field_name}" if name else field_name 4049 else: 4050 name = f"{field_name}_{name}" if name else field_name 4051 4052 columns.append(exp.to_identifier(name)) 4053 4054 pivot.set("columns", columns) 4055 4056 return pivot 4057 4058 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4059 return [agg.alias for agg in aggregations] 4060 4061 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4062 if not skip_where_token and not self._match(TokenType.PREWHERE): 4063 return None 4064 4065 return self.expression( 4066 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4067 ) 4068 4069 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4070 if not skip_where_token and not self._match(TokenType.WHERE): 4071 return None 4072 4073 return self.expression( 4074 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4075 ) 4076 4077 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4078 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4079 return None 4080 4081 elements: t.Dict[str, t.Any] = defaultdict(list) 4082 4083 if self._match(TokenType.ALL): 4084 elements["all"] = True 4085 elif self._match(TokenType.DISTINCT): 4086 elements["all"] = False 4087 4088 while True: 4089 index = self._index 4090 4091 elements["expressions"].extend( 4092 self._parse_csv( 4093 lambda: None 4094 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4095 else self._parse_assignment() 4096 ) 4097 ) 4098 4099 before_with_index = self._index 4100 with_prefix = self._match(TokenType.WITH) 4101 4102 if self._match(TokenType.ROLLUP): 4103 elements["rollup"].append( 4104 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4105 ) 4106 elif self._match(TokenType.CUBE): 4107 elements["cube"].append( 4108 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4109 ) 4110 elif self._match(TokenType.GROUPING_SETS): 4111 elements["grouping_sets"].append( 4112 self.expression( 4113 exp.GroupingSets, 4114 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4115 ) 4116 ) 4117 elif self._match_text_seq("TOTALS"): 4118 elements["totals"] = True # type: ignore 4119 4120 if before_with_index <= self._index <= before_with_index + 1: 4121 self._retreat(before_with_index) 4122 break 4123 4124 if index == self._index: 4125 break 4126 4127 return self.expression(exp.Group, **elements) # type: ignore 4128 4129 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4130 return self.expression( 4131 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4132 ) 4133 4134 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4135 if self._match(TokenType.L_PAREN): 4136 grouping_set = self._parse_csv(self._parse_column) 4137 self._match_r_paren() 4138 return self.expression(exp.Tuple, expressions=grouping_set) 4139 4140 return self._parse_column() 4141 4142 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4143 if not skip_having_token and not self._match(TokenType.HAVING): 4144 return None 4145 return self.expression(exp.Having, this=self._parse_assignment()) 4146 4147 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4148 if not self._match(TokenType.QUALIFY): 4149 return None 4150 return self.expression(exp.Qualify, this=self._parse_assignment()) 4151 4152 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4153 if skip_start_token: 4154 start = None 4155 elif self._match(TokenType.START_WITH): 4156 start = self._parse_assignment() 4157 else: 4158 return None 4159 4160 self._match(TokenType.CONNECT_BY) 4161 nocycle = self._match_text_seq("NOCYCLE") 4162 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4163 exp.Prior, this=self._parse_bitwise() 4164 ) 4165 connect = self._parse_assignment() 4166 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4167 4168 if not start and self._match(TokenType.START_WITH): 4169 start = self._parse_assignment() 4170 4171 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4172 4173 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4174 this = self._parse_id_var(any_token=True) 4175 if self._match(TokenType.ALIAS): 4176 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4177 return this 4178 4179 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4180 if self._match_text_seq("INTERPOLATE"): 4181 return self._parse_wrapped_csv(self._parse_name_as_expression) 4182 return None 4183 4184 def _parse_order( 4185 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4186 ) -> t.Optional[exp.Expression]: 4187 siblings = None 4188 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4189 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4190 return this 4191 4192 siblings = True 4193 4194 return self.expression( 4195 exp.Order, 4196 this=this, 4197 expressions=self._parse_csv(self._parse_ordered), 4198 siblings=siblings, 4199 ) 4200 4201 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4202 if not self._match(token): 4203 return None 4204 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4205 4206 def _parse_ordered( 4207 self, parse_method: t.Optional[t.Callable] = None 4208 ) -> t.Optional[exp.Ordered]: 4209 this = parse_method() if parse_method else self._parse_assignment() 4210 if not this: 4211 return None 4212 4213 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4214 this = exp.var("ALL") 4215 4216 asc = self._match(TokenType.ASC) 4217 desc = self._match(TokenType.DESC) or (asc and False) 4218 4219 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4220 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4221 4222 nulls_first = is_nulls_first or False 4223 explicitly_null_ordered = is_nulls_first or is_nulls_last 4224 4225 if ( 4226 not explicitly_null_ordered 4227 and ( 4228 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4229 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4230 ) 4231 and self.dialect.NULL_ORDERING != "nulls_are_last" 4232 ): 4233 nulls_first = True 4234 4235 if self._match_text_seq("WITH", "FILL"): 4236 with_fill = self.expression( 4237 exp.WithFill, 4238 **{ # type: ignore 4239 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4240 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4241 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4242 "interpolate": self._parse_interpolate(), 4243 }, 4244 ) 4245 else: 4246 with_fill = None 4247 4248 return self.expression( 4249 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4250 ) 4251 4252 def _parse_limit( 4253 self, 4254 this: t.Optional[exp.Expression] = None, 4255 top: bool = False, 4256 skip_limit_token: bool = False, 4257 ) -> t.Optional[exp.Expression]: 4258 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4259 comments = self._prev_comments 4260 if top: 4261 limit_paren = self._match(TokenType.L_PAREN) 4262 expression = self._parse_term() if limit_paren else self._parse_number() 4263 4264 if limit_paren: 4265 self._match_r_paren() 4266 else: 4267 expression = self._parse_term() 4268 4269 if self._match(TokenType.COMMA): 4270 offset = expression 4271 expression = self._parse_term() 4272 else: 4273 offset = None 4274 4275 limit_exp = self.expression( 4276 exp.Limit, 4277 this=this, 4278 expression=expression, 4279 offset=offset, 4280 comments=comments, 4281 expressions=self._parse_limit_by(), 4282 ) 4283 4284 return limit_exp 4285 4286 if self._match(TokenType.FETCH): 4287 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4288 direction = self._prev.text.upper() if direction else "FIRST" 4289 4290 count = self._parse_field(tokens=self.FETCH_TOKENS) 4291 percent = self._match(TokenType.PERCENT) 4292 4293 self._match_set((TokenType.ROW, TokenType.ROWS)) 4294 4295 only = self._match_text_seq("ONLY") 4296 with_ties = self._match_text_seq("WITH", "TIES") 4297 4298 if only and with_ties: 4299 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4300 4301 return self.expression( 4302 exp.Fetch, 4303 direction=direction, 4304 count=count, 4305 percent=percent, 4306 with_ties=with_ties, 4307 ) 4308 4309 return this 4310 4311 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4312 if not self._match(TokenType.OFFSET): 4313 return this 4314 4315 count = self._parse_term() 4316 self._match_set((TokenType.ROW, TokenType.ROWS)) 4317 4318 return self.expression( 4319 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4320 ) 4321 4322 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4323 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4324 4325 def _parse_locks(self) -> t.List[exp.Lock]: 4326 locks = [] 4327 while True: 4328 if self._match_text_seq("FOR", "UPDATE"): 4329 update = True 4330 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4331 "LOCK", "IN", "SHARE", "MODE" 4332 ): 4333 update = False 4334 else: 4335 break 4336 4337 expressions = None 4338 if self._match_text_seq("OF"): 4339 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4340 4341 wait: t.Optional[bool | exp.Expression] = None 4342 if self._match_text_seq("NOWAIT"): 4343 wait = True 4344 elif self._match_text_seq("WAIT"): 4345 wait = self._parse_primary() 4346 elif self._match_text_seq("SKIP", "LOCKED"): 4347 wait = False 4348 4349 locks.append( 4350 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4351 ) 4352 4353 return locks 4354 4355 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4356 while this and self._match_set(self.SET_OPERATIONS): 4357 token_type = self._prev.token_type 4358 4359 if token_type == TokenType.UNION: 4360 operation: t.Type[exp.SetOperation] = exp.Union 4361 elif token_type == TokenType.EXCEPT: 4362 operation = exp.Except 4363 else: 4364 operation = exp.Intersect 4365 4366 comments = self._prev.comments 4367 4368 if self._match(TokenType.DISTINCT): 4369 distinct: t.Optional[bool] = True 4370 elif self._match(TokenType.ALL): 4371 distinct = False 4372 else: 4373 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4374 if distinct is None: 4375 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4376 4377 by_name = self._match_text_seq("BY", "NAME") 4378 expression = self._parse_select(nested=True, parse_set_operation=False) 4379 4380 this = self.expression( 4381 operation, 4382 comments=comments, 4383 this=this, 4384 distinct=distinct, 4385 by_name=by_name, 4386 expression=expression, 4387 ) 4388 4389 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4390 expression = this.expression 4391 4392 if expression: 4393 for arg in self.SET_OP_MODIFIERS: 4394 expr = expression.args.get(arg) 4395 if expr: 4396 this.set(arg, expr.pop()) 4397 4398 return this 4399 4400 def _parse_expression(self) -> t.Optional[exp.Expression]: 4401 return self._parse_alias(self._parse_assignment()) 4402 4403 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4404 this = self._parse_disjunction() 4405 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4406 # This allows us to parse <non-identifier token> := <expr> 4407 this = exp.column( 4408 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4409 ) 4410 4411 while self._match_set(self.ASSIGNMENT): 4412 if isinstance(this, exp.Column) and len(this.parts) == 1: 4413 this = this.this 4414 4415 this = self.expression( 4416 self.ASSIGNMENT[self._prev.token_type], 4417 this=this, 4418 comments=self._prev_comments, 4419 expression=self._parse_assignment(), 4420 ) 4421 4422 return this 4423 4424 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4425 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4426 4427 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4428 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4429 4430 def _parse_equality(self) -> t.Optional[exp.Expression]: 4431 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4432 4433 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4434 return self._parse_tokens(self._parse_range, self.COMPARISON) 4435 4436 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4437 this = this or self._parse_bitwise() 4438 negate = self._match(TokenType.NOT) 4439 4440 if self._match_set(self.RANGE_PARSERS): 4441 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4442 if not expression: 4443 return this 4444 4445 this = expression 4446 elif self._match(TokenType.ISNULL): 4447 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4448 4449 # Postgres supports ISNULL and NOTNULL for conditions. 4450 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4451 if self._match(TokenType.NOTNULL): 4452 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4453 this = self.expression(exp.Not, this=this) 4454 4455 if negate: 4456 this = self._negate_range(this) 4457 4458 if self._match(TokenType.IS): 4459 this = self._parse_is(this) 4460 4461 return this 4462 4463 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4464 if not this: 4465 return this 4466 4467 return self.expression(exp.Not, this=this) 4468 4469 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4470 index = self._index - 1 4471 negate = self._match(TokenType.NOT) 4472 4473 if self._match_text_seq("DISTINCT", "FROM"): 4474 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4475 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4476 4477 if self._match(TokenType.JSON): 4478 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4479 4480 if self._match_text_seq("WITH"): 4481 _with = True 4482 elif self._match_text_seq("WITHOUT"): 4483 _with = False 4484 else: 4485 _with = None 4486 4487 unique = self._match(TokenType.UNIQUE) 4488 self._match_text_seq("KEYS") 4489 expression: t.Optional[exp.Expression] = self.expression( 4490 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4491 ) 4492 else: 4493 expression = self._parse_primary() or self._parse_null() 4494 if not expression: 4495 self._retreat(index) 4496 return None 4497 4498 this = self.expression(exp.Is, this=this, expression=expression) 4499 return self.expression(exp.Not, this=this) if negate else this 4500 4501 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4502 unnest = self._parse_unnest(with_alias=False) 4503 if unnest: 4504 this = self.expression(exp.In, this=this, unnest=unnest) 4505 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4506 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4507 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4508 4509 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4510 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4511 else: 4512 this = self.expression(exp.In, this=this, expressions=expressions) 4513 4514 if matched_l_paren: 4515 self._match_r_paren(this) 4516 elif not self._match(TokenType.R_BRACKET, expression=this): 4517 self.raise_error("Expecting ]") 4518 else: 4519 this = self.expression(exp.In, this=this, field=self._parse_column()) 4520 4521 return this 4522 4523 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4524 low = self._parse_bitwise() 4525 self._match(TokenType.AND) 4526 high = self._parse_bitwise() 4527 return self.expression(exp.Between, this=this, low=low, high=high) 4528 4529 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4530 if not self._match(TokenType.ESCAPE): 4531 return this 4532 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4533 4534 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4535 index = self._index 4536 4537 if not self._match(TokenType.INTERVAL) and match_interval: 4538 return None 4539 4540 if self._match(TokenType.STRING, advance=False): 4541 this = self._parse_primary() 4542 else: 4543 this = self._parse_term() 4544 4545 if not this or ( 4546 isinstance(this, exp.Column) 4547 and not this.table 4548 and not this.this.quoted 4549 and this.name.upper() == "IS" 4550 ): 4551 self._retreat(index) 4552 return None 4553 4554 unit = self._parse_function() or ( 4555 not self._match(TokenType.ALIAS, advance=False) 4556 and self._parse_var(any_token=True, upper=True) 4557 ) 4558 4559 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4560 # each INTERVAL expression into this canonical form so it's easy to transpile 4561 if this and this.is_number: 4562 this = exp.Literal.string(this.to_py()) 4563 elif this and this.is_string: 4564 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4565 if len(parts) == 1: 4566 if unit: 4567 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4568 self._retreat(self._index - 1) 4569 4570 this = exp.Literal.string(parts[0][0]) 4571 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4572 4573 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4574 unit = self.expression( 4575 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4576 ) 4577 4578 interval = self.expression(exp.Interval, this=this, unit=unit) 4579 4580 index = self._index 4581 self._match(TokenType.PLUS) 4582 4583 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4584 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4585 return self.expression( 4586 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4587 ) 4588 4589 self._retreat(index) 4590 return interval 4591 4592 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4593 this = self._parse_term() 4594 4595 while True: 4596 if self._match_set(self.BITWISE): 4597 this = self.expression( 4598 self.BITWISE[self._prev.token_type], 4599 this=this, 4600 expression=self._parse_term(), 4601 ) 4602 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4603 this = self.expression( 4604 exp.DPipe, 4605 this=this, 4606 expression=self._parse_term(), 4607 safe=not self.dialect.STRICT_STRING_CONCAT, 4608 ) 4609 elif self._match(TokenType.DQMARK): 4610 this = self.expression( 4611 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4612 ) 4613 elif self._match_pair(TokenType.LT, TokenType.LT): 4614 this = self.expression( 4615 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4616 ) 4617 elif self._match_pair(TokenType.GT, TokenType.GT): 4618 this = self.expression( 4619 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4620 ) 4621 else: 4622 break 4623 4624 return this 4625 4626 def _parse_term(self) -> t.Optional[exp.Expression]: 4627 this = self._parse_factor() 4628 4629 while self._match_set(self.TERM): 4630 klass = self.TERM[self._prev.token_type] 4631 comments = self._prev_comments 4632 expression = self._parse_factor() 4633 4634 this = self.expression(klass, this=this, comments=comments, expression=expression) 4635 4636 if isinstance(this, exp.Collate): 4637 expr = this.expression 4638 4639 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4640 # fallback to Identifier / Var 4641 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4642 ident = expr.this 4643 if isinstance(ident, exp.Identifier): 4644 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4645 4646 return this 4647 4648 def _parse_factor(self) -> t.Optional[exp.Expression]: 4649 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4650 this = parse_method() 4651 4652 while self._match_set(self.FACTOR): 4653 klass = self.FACTOR[self._prev.token_type] 4654 comments = self._prev_comments 4655 expression = parse_method() 4656 4657 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4658 self._retreat(self._index - 1) 4659 return this 4660 4661 this = self.expression(klass, this=this, comments=comments, expression=expression) 4662 4663 if isinstance(this, exp.Div): 4664 this.args["typed"] = self.dialect.TYPED_DIVISION 4665 this.args["safe"] = self.dialect.SAFE_DIVISION 4666 4667 return this 4668 4669 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4670 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4671 4672 def _parse_unary(self) -> t.Optional[exp.Expression]: 4673 if self._match_set(self.UNARY_PARSERS): 4674 return self.UNARY_PARSERS[self._prev.token_type](self) 4675 return self._parse_at_time_zone(self._parse_type()) 4676 4677 def _parse_type( 4678 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4679 ) -> t.Optional[exp.Expression]: 4680 interval = parse_interval and self._parse_interval() 4681 if interval: 4682 return interval 4683 4684 index = self._index 4685 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4686 4687 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4688 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4689 if isinstance(data_type, exp.Cast): 4690 # This constructor can contain ops directly after it, for instance struct unnesting: 4691 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4692 return self._parse_column_ops(data_type) 4693 4694 if data_type: 4695 index2 = self._index 4696 this = self._parse_primary() 4697 4698 if isinstance(this, exp.Literal): 4699 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4700 if parser: 4701 return parser(self, this, data_type) 4702 4703 return self.expression(exp.Cast, this=this, to=data_type) 4704 4705 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4706 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4707 # 4708 # If the index difference here is greater than 1, that means the parser itself must have 4709 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4710 # 4711 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4712 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4713 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4714 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4715 # 4716 # In these cases, we don't really want to return the converted type, but instead retreat 4717 # and try to parse a Column or Identifier in the section below. 4718 if data_type.expressions and index2 - index > 1: 4719 self._retreat(index2) 4720 return self._parse_column_ops(data_type) 4721 4722 self._retreat(index) 4723 4724 if fallback_to_identifier: 4725 return self._parse_id_var() 4726 4727 this = self._parse_column() 4728 return this and self._parse_column_ops(this) 4729 4730 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4731 this = self._parse_type() 4732 if not this: 4733 return None 4734 4735 if isinstance(this, exp.Column) and not this.table: 4736 this = exp.var(this.name.upper()) 4737 4738 return self.expression( 4739 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4740 ) 4741 4742 def _parse_types( 4743 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4744 ) -> t.Optional[exp.Expression]: 4745 index = self._index 4746 4747 this: t.Optional[exp.Expression] = None 4748 prefix = self._match_text_seq("SYSUDTLIB", ".") 4749 4750 if not self._match_set(self.TYPE_TOKENS): 4751 identifier = allow_identifiers and self._parse_id_var( 4752 any_token=False, tokens=(TokenType.VAR,) 4753 ) 4754 if isinstance(identifier, exp.Identifier): 4755 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4756 4757 if len(tokens) != 1: 4758 self.raise_error("Unexpected identifier", self._prev) 4759 4760 if tokens[0].token_type in self.TYPE_TOKENS: 4761 self._prev = tokens[0] 4762 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4763 type_name = identifier.name 4764 4765 while self._match(TokenType.DOT): 4766 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4767 4768 this = exp.DataType.build(type_name, udt=True) 4769 else: 4770 self._retreat(self._index - 1) 4771 return None 4772 else: 4773 return None 4774 4775 type_token = self._prev.token_type 4776 4777 if type_token == TokenType.PSEUDO_TYPE: 4778 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4779 4780 if type_token == TokenType.OBJECT_IDENTIFIER: 4781 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4782 4783 # https://materialize.com/docs/sql/types/map/ 4784 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4785 key_type = self._parse_types( 4786 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4787 ) 4788 if not self._match(TokenType.FARROW): 4789 self._retreat(index) 4790 return None 4791 4792 value_type = self._parse_types( 4793 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4794 ) 4795 if not self._match(TokenType.R_BRACKET): 4796 self._retreat(index) 4797 return None 4798 4799 return exp.DataType( 4800 this=exp.DataType.Type.MAP, 4801 expressions=[key_type, value_type], 4802 nested=True, 4803 prefix=prefix, 4804 ) 4805 4806 nested = type_token in self.NESTED_TYPE_TOKENS 4807 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4808 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4809 expressions = None 4810 maybe_func = False 4811 4812 if self._match(TokenType.L_PAREN): 4813 if is_struct: 4814 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4815 elif nested: 4816 expressions = self._parse_csv( 4817 lambda: self._parse_types( 4818 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4819 ) 4820 ) 4821 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4822 this = expressions[0] 4823 this.set("nullable", True) 4824 self._match_r_paren() 4825 return this 4826 elif type_token in self.ENUM_TYPE_TOKENS: 4827 expressions = self._parse_csv(self._parse_equality) 4828 elif is_aggregate: 4829 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4830 any_token=False, tokens=(TokenType.VAR,) 4831 ) 4832 if not func_or_ident or not self._match(TokenType.COMMA): 4833 return None 4834 expressions = self._parse_csv( 4835 lambda: self._parse_types( 4836 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4837 ) 4838 ) 4839 expressions.insert(0, func_or_ident) 4840 else: 4841 expressions = self._parse_csv(self._parse_type_size) 4842 4843 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4844 if type_token == TokenType.VECTOR and len(expressions) == 2: 4845 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4846 4847 if not expressions or not self._match(TokenType.R_PAREN): 4848 self._retreat(index) 4849 return None 4850 4851 maybe_func = True 4852 4853 values: t.Optional[t.List[exp.Expression]] = None 4854 4855 if nested and self._match(TokenType.LT): 4856 if is_struct: 4857 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4858 else: 4859 expressions = self._parse_csv( 4860 lambda: self._parse_types( 4861 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4862 ) 4863 ) 4864 4865 if not self._match(TokenType.GT): 4866 self.raise_error("Expecting >") 4867 4868 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4869 values = self._parse_csv(self._parse_assignment) 4870 if not values and is_struct: 4871 values = None 4872 self._retreat(self._index - 1) 4873 else: 4874 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4875 4876 if type_token in self.TIMESTAMPS: 4877 if self._match_text_seq("WITH", "TIME", "ZONE"): 4878 maybe_func = False 4879 tz_type = ( 4880 exp.DataType.Type.TIMETZ 4881 if type_token in self.TIMES 4882 else exp.DataType.Type.TIMESTAMPTZ 4883 ) 4884 this = exp.DataType(this=tz_type, expressions=expressions) 4885 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4886 maybe_func = False 4887 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4888 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4889 maybe_func = False 4890 elif type_token == TokenType.INTERVAL: 4891 unit = self._parse_var(upper=True) 4892 if unit: 4893 if self._match_text_seq("TO"): 4894 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4895 4896 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4897 else: 4898 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4899 4900 if maybe_func and check_func: 4901 index2 = self._index 4902 peek = self._parse_string() 4903 4904 if not peek: 4905 self._retreat(index) 4906 return None 4907 4908 self._retreat(index2) 4909 4910 if not this: 4911 if self._match_text_seq("UNSIGNED"): 4912 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4913 if not unsigned_type_token: 4914 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4915 4916 type_token = unsigned_type_token or type_token 4917 4918 this = exp.DataType( 4919 this=exp.DataType.Type[type_token.value], 4920 expressions=expressions, 4921 nested=nested, 4922 prefix=prefix, 4923 ) 4924 4925 # Empty arrays/structs are allowed 4926 if values is not None: 4927 cls = exp.Struct if is_struct else exp.Array 4928 this = exp.cast(cls(expressions=values), this, copy=False) 4929 4930 elif expressions: 4931 this.set("expressions", expressions) 4932 4933 # https://materialize.com/docs/sql/types/list/#type-name 4934 while self._match(TokenType.LIST): 4935 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4936 4937 index = self._index 4938 4939 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4940 matched_array = self._match(TokenType.ARRAY) 4941 4942 while self._curr: 4943 datatype_token = self._prev.token_type 4944 matched_l_bracket = self._match(TokenType.L_BRACKET) 4945 if not matched_l_bracket and not matched_array: 4946 break 4947 4948 matched_array = False 4949 values = self._parse_csv(self._parse_assignment) or None 4950 if ( 4951 values 4952 and not schema 4953 and ( 4954 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4955 ) 4956 ): 4957 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4958 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4959 self._retreat(index) 4960 break 4961 4962 this = exp.DataType( 4963 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4964 ) 4965 self._match(TokenType.R_BRACKET) 4966 4967 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4968 converter = self.TYPE_CONVERTERS.get(this.this) 4969 if converter: 4970 this = converter(t.cast(exp.DataType, this)) 4971 4972 return this 4973 4974 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4975 index = self._index 4976 4977 if ( 4978 self._curr 4979 and self._next 4980 and self._curr.token_type in self.TYPE_TOKENS 4981 and self._next.token_type in self.TYPE_TOKENS 4982 ): 4983 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4984 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4985 this = self._parse_id_var() 4986 else: 4987 this = ( 4988 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4989 or self._parse_id_var() 4990 ) 4991 4992 self._match(TokenType.COLON) 4993 4994 if ( 4995 type_required 4996 and not isinstance(this, exp.DataType) 4997 and not self._match_set(self.TYPE_TOKENS, advance=False) 4998 ): 4999 self._retreat(index) 5000 return self._parse_types() 5001 5002 return self._parse_column_def(this) 5003 5004 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5005 if not self._match_text_seq("AT", "TIME", "ZONE"): 5006 return this 5007 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5008 5009 def _parse_column(self) -> t.Optional[exp.Expression]: 5010 this = self._parse_column_reference() 5011 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5012 5013 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5014 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5015 5016 return column 5017 5018 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5019 this = self._parse_field() 5020 if ( 5021 not this 5022 and self._match(TokenType.VALUES, advance=False) 5023 and self.VALUES_FOLLOWED_BY_PAREN 5024 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5025 ): 5026 this = self._parse_id_var() 5027 5028 if isinstance(this, exp.Identifier): 5029 # We bubble up comments from the Identifier to the Column 5030 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5031 5032 return this 5033 5034 def _parse_colon_as_variant_extract( 5035 self, this: t.Optional[exp.Expression] 5036 ) -> t.Optional[exp.Expression]: 5037 casts = [] 5038 json_path = [] 5039 escape = None 5040 5041 while self._match(TokenType.COLON): 5042 start_index = self._index 5043 5044 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5045 path = self._parse_column_ops( 5046 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5047 ) 5048 5049 # The cast :: operator has a lower precedence than the extraction operator :, so 5050 # we rearrange the AST appropriately to avoid casting the JSON path 5051 while isinstance(path, exp.Cast): 5052 casts.append(path.to) 5053 path = path.this 5054 5055 if casts: 5056 dcolon_offset = next( 5057 i 5058 for i, t in enumerate(self._tokens[start_index:]) 5059 if t.token_type == TokenType.DCOLON 5060 ) 5061 end_token = self._tokens[start_index + dcolon_offset - 1] 5062 else: 5063 end_token = self._prev 5064 5065 if path: 5066 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5067 # it'll roundtrip to a string literal in GET_PATH 5068 if isinstance(path, exp.Identifier) and path.quoted: 5069 escape = True 5070 5071 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5072 5073 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5074 # Databricks transforms it back to the colon/dot notation 5075 if json_path: 5076 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5077 5078 if json_path_expr: 5079 json_path_expr.set("escape", escape) 5080 5081 this = self.expression( 5082 exp.JSONExtract, 5083 this=this, 5084 expression=json_path_expr, 5085 variant_extract=True, 5086 ) 5087 5088 while casts: 5089 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5090 5091 return this 5092 5093 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5094 return self._parse_types() 5095 5096 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5097 this = self._parse_bracket(this) 5098 5099 while self._match_set(self.COLUMN_OPERATORS): 5100 op_token = self._prev.token_type 5101 op = self.COLUMN_OPERATORS.get(op_token) 5102 5103 if op_token == TokenType.DCOLON: 5104 field = self._parse_dcolon() 5105 if not field: 5106 self.raise_error("Expected type") 5107 elif op and self._curr: 5108 field = self._parse_column_reference() or self._parse_bracket() 5109 else: 5110 field = self._parse_field(any_token=True, anonymous_func=True) 5111 5112 if isinstance(field, exp.Func) and this: 5113 # bigquery allows function calls like x.y.count(...) 5114 # SAFE.SUBSTR(...) 5115 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5116 this = exp.replace_tree( 5117 this, 5118 lambda n: ( 5119 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5120 if n.table 5121 else n.this 5122 ) 5123 if isinstance(n, exp.Column) 5124 else n, 5125 ) 5126 5127 if op: 5128 this = op(self, this, field) 5129 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5130 this = self.expression( 5131 exp.Column, 5132 comments=this.comments, 5133 this=field, 5134 table=this.this, 5135 db=this.args.get("table"), 5136 catalog=this.args.get("db"), 5137 ) 5138 else: 5139 this = self.expression(exp.Dot, this=this, expression=field) 5140 5141 this = self._parse_bracket(this) 5142 5143 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5144 5145 def _parse_primary(self) -> t.Optional[exp.Expression]: 5146 if self._match_set(self.PRIMARY_PARSERS): 5147 token_type = self._prev.token_type 5148 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5149 5150 if token_type == TokenType.STRING: 5151 expressions = [primary] 5152 while self._match(TokenType.STRING): 5153 expressions.append(exp.Literal.string(self._prev.text)) 5154 5155 if len(expressions) > 1: 5156 return self.expression(exp.Concat, expressions=expressions) 5157 5158 return primary 5159 5160 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5161 return exp.Literal.number(f"0.{self._prev.text}") 5162 5163 if self._match(TokenType.L_PAREN): 5164 comments = self._prev_comments 5165 query = self._parse_select() 5166 5167 if query: 5168 expressions = [query] 5169 else: 5170 expressions = self._parse_expressions() 5171 5172 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5173 5174 if not this and self._match(TokenType.R_PAREN, advance=False): 5175 this = self.expression(exp.Tuple) 5176 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5177 this = self._parse_subquery(this=this, parse_alias=False) 5178 elif isinstance(this, exp.Subquery): 5179 this = self._parse_subquery( 5180 this=self._parse_set_operations(this), parse_alias=False 5181 ) 5182 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5183 this = self.expression(exp.Tuple, expressions=expressions) 5184 else: 5185 this = self.expression(exp.Paren, this=this) 5186 5187 if this: 5188 this.add_comments(comments) 5189 5190 self._match_r_paren(expression=this) 5191 return this 5192 5193 return None 5194 5195 def _parse_field( 5196 self, 5197 any_token: bool = False, 5198 tokens: t.Optional[t.Collection[TokenType]] = None, 5199 anonymous_func: bool = False, 5200 ) -> t.Optional[exp.Expression]: 5201 if anonymous_func: 5202 field = ( 5203 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5204 or self._parse_primary() 5205 ) 5206 else: 5207 field = self._parse_primary() or self._parse_function( 5208 anonymous=anonymous_func, any_token=any_token 5209 ) 5210 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5211 5212 def _parse_function( 5213 self, 5214 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5215 anonymous: bool = False, 5216 optional_parens: bool = True, 5217 any_token: bool = False, 5218 ) -> t.Optional[exp.Expression]: 5219 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5220 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5221 fn_syntax = False 5222 if ( 5223 self._match(TokenType.L_BRACE, advance=False) 5224 and self._next 5225 and self._next.text.upper() == "FN" 5226 ): 5227 self._advance(2) 5228 fn_syntax = True 5229 5230 func = self._parse_function_call( 5231 functions=functions, 5232 anonymous=anonymous, 5233 optional_parens=optional_parens, 5234 any_token=any_token, 5235 ) 5236 5237 if fn_syntax: 5238 self._match(TokenType.R_BRACE) 5239 5240 return func 5241 5242 def _parse_function_call( 5243 self, 5244 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5245 anonymous: bool = False, 5246 optional_parens: bool = True, 5247 any_token: bool = False, 5248 ) -> t.Optional[exp.Expression]: 5249 if not self._curr: 5250 return None 5251 5252 comments = self._curr.comments 5253 token_type = self._curr.token_type 5254 this = self._curr.text 5255 upper = this.upper() 5256 5257 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5258 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5259 self._advance() 5260 return self._parse_window(parser(self)) 5261 5262 if not self._next or self._next.token_type != TokenType.L_PAREN: 5263 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5264 self._advance() 5265 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5266 5267 return None 5268 5269 if any_token: 5270 if token_type in self.RESERVED_TOKENS: 5271 return None 5272 elif token_type not in self.FUNC_TOKENS: 5273 return None 5274 5275 self._advance(2) 5276 5277 parser = self.FUNCTION_PARSERS.get(upper) 5278 if parser and not anonymous: 5279 this = parser(self) 5280 else: 5281 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5282 5283 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5284 this = self.expression( 5285 subquery_predicate, comments=comments, this=self._parse_select() 5286 ) 5287 self._match_r_paren() 5288 return this 5289 5290 if functions is None: 5291 functions = self.FUNCTIONS 5292 5293 function = functions.get(upper) 5294 5295 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5296 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5297 5298 if alias: 5299 args = self._kv_to_prop_eq(args) 5300 5301 if function and not anonymous: 5302 if "dialect" in function.__code__.co_varnames: 5303 func = function(args, dialect=self.dialect) 5304 else: 5305 func = function(args) 5306 5307 func = self.validate_expression(func, args) 5308 if not self.dialect.NORMALIZE_FUNCTIONS: 5309 func.meta["name"] = this 5310 5311 this = func 5312 else: 5313 if token_type == TokenType.IDENTIFIER: 5314 this = exp.Identifier(this=this, quoted=True) 5315 this = self.expression(exp.Anonymous, this=this, expressions=args) 5316 5317 if isinstance(this, exp.Expression): 5318 this.add_comments(comments) 5319 5320 self._match_r_paren(this) 5321 return self._parse_window(this) 5322 5323 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5324 return expression 5325 5326 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5327 transformed = [] 5328 5329 for index, e in enumerate(expressions): 5330 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5331 if isinstance(e, exp.Alias): 5332 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5333 5334 if not isinstance(e, exp.PropertyEQ): 5335 e = self.expression( 5336 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5337 ) 5338 5339 if isinstance(e.this, exp.Column): 5340 e.this.replace(e.this.this) 5341 else: 5342 e = self._to_prop_eq(e, index) 5343 5344 transformed.append(e) 5345 5346 return transformed 5347 5348 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5349 return self._parse_statement() 5350 5351 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5352 return self._parse_column_def(self._parse_id_var()) 5353 5354 def _parse_user_defined_function( 5355 self, kind: t.Optional[TokenType] = None 5356 ) -> t.Optional[exp.Expression]: 5357 this = self._parse_id_var() 5358 5359 while self._match(TokenType.DOT): 5360 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5361 5362 if not self._match(TokenType.L_PAREN): 5363 return this 5364 5365 expressions = self._parse_csv(self._parse_function_parameter) 5366 self._match_r_paren() 5367 return self.expression( 5368 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5369 ) 5370 5371 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5372 literal = self._parse_primary() 5373 if literal: 5374 return self.expression(exp.Introducer, this=token.text, expression=literal) 5375 5376 return self.expression(exp.Identifier, this=token.text) 5377 5378 def _parse_session_parameter(self) -> exp.SessionParameter: 5379 kind = None 5380 this = self._parse_id_var() or self._parse_primary() 5381 5382 if this and self._match(TokenType.DOT): 5383 kind = this.name 5384 this = self._parse_var() or self._parse_primary() 5385 5386 return self.expression(exp.SessionParameter, this=this, kind=kind) 5387 5388 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5389 return self._parse_id_var() 5390 5391 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5392 index = self._index 5393 5394 if self._match(TokenType.L_PAREN): 5395 expressions = t.cast( 5396 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5397 ) 5398 5399 if not self._match(TokenType.R_PAREN): 5400 self._retreat(index) 5401 else: 5402 expressions = [self._parse_lambda_arg()] 5403 5404 if self._match_set(self.LAMBDAS): 5405 return self.LAMBDAS[self._prev.token_type](self, expressions) 5406 5407 self._retreat(index) 5408 5409 this: t.Optional[exp.Expression] 5410 5411 if self._match(TokenType.DISTINCT): 5412 this = self.expression( 5413 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5414 ) 5415 else: 5416 this = self._parse_select_or_expression(alias=alias) 5417 5418 return self._parse_limit( 5419 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5420 ) 5421 5422 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5423 index = self._index 5424 if not self._match(TokenType.L_PAREN): 5425 return this 5426 5427 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5428 # expr can be of both types 5429 if self._match_set(self.SELECT_START_TOKENS): 5430 self._retreat(index) 5431 return this 5432 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5433 self._match_r_paren() 5434 return self.expression(exp.Schema, this=this, expressions=args) 5435 5436 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5437 return self._parse_column_def(self._parse_field(any_token=True)) 5438 5439 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5440 # column defs are not really columns, they're identifiers 5441 if isinstance(this, exp.Column): 5442 this = this.this 5443 5444 kind = self._parse_types(schema=True) 5445 5446 if self._match_text_seq("FOR", "ORDINALITY"): 5447 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5448 5449 constraints: t.List[exp.Expression] = [] 5450 5451 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5452 ("ALIAS", "MATERIALIZED") 5453 ): 5454 persisted = self._prev.text.upper() == "MATERIALIZED" 5455 constraint_kind = exp.ComputedColumnConstraint( 5456 this=self._parse_assignment(), 5457 persisted=persisted or self._match_text_seq("PERSISTED"), 5458 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5459 ) 5460 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5461 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5462 self._match(TokenType.ALIAS) 5463 constraints.append( 5464 self.expression( 5465 exp.ColumnConstraint, 5466 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5467 ) 5468 ) 5469 5470 while True: 5471 constraint = self._parse_column_constraint() 5472 if not constraint: 5473 break 5474 constraints.append(constraint) 5475 5476 if not kind and not constraints: 5477 return this 5478 5479 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5480 5481 def _parse_auto_increment( 5482 self, 5483 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5484 start = None 5485 increment = None 5486 5487 if self._match(TokenType.L_PAREN, advance=False): 5488 args = self._parse_wrapped_csv(self._parse_bitwise) 5489 start = seq_get(args, 0) 5490 increment = seq_get(args, 1) 5491 elif self._match_text_seq("START"): 5492 start = self._parse_bitwise() 5493 self._match_text_seq("INCREMENT") 5494 increment = self._parse_bitwise() 5495 5496 if start and increment: 5497 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5498 5499 return exp.AutoIncrementColumnConstraint() 5500 5501 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5502 if not self._match_text_seq("REFRESH"): 5503 self._retreat(self._index - 1) 5504 return None 5505 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5506 5507 def _parse_compress(self) -> exp.CompressColumnConstraint: 5508 if self._match(TokenType.L_PAREN, advance=False): 5509 return self.expression( 5510 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5511 ) 5512 5513 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5514 5515 def _parse_generated_as_identity( 5516 self, 5517 ) -> ( 5518 exp.GeneratedAsIdentityColumnConstraint 5519 | exp.ComputedColumnConstraint 5520 | exp.GeneratedAsRowColumnConstraint 5521 ): 5522 if self._match_text_seq("BY", "DEFAULT"): 5523 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5524 this = self.expression( 5525 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5526 ) 5527 else: 5528 self._match_text_seq("ALWAYS") 5529 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5530 5531 self._match(TokenType.ALIAS) 5532 5533 if self._match_text_seq("ROW"): 5534 start = self._match_text_seq("START") 5535 if not start: 5536 self._match(TokenType.END) 5537 hidden = self._match_text_seq("HIDDEN") 5538 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5539 5540 identity = self._match_text_seq("IDENTITY") 5541 5542 if self._match(TokenType.L_PAREN): 5543 if self._match(TokenType.START_WITH): 5544 this.set("start", self._parse_bitwise()) 5545 if self._match_text_seq("INCREMENT", "BY"): 5546 this.set("increment", self._parse_bitwise()) 5547 if self._match_text_seq("MINVALUE"): 5548 this.set("minvalue", self._parse_bitwise()) 5549 if self._match_text_seq("MAXVALUE"): 5550 this.set("maxvalue", self._parse_bitwise()) 5551 5552 if self._match_text_seq("CYCLE"): 5553 this.set("cycle", True) 5554 elif self._match_text_seq("NO", "CYCLE"): 5555 this.set("cycle", False) 5556 5557 if not identity: 5558 this.set("expression", self._parse_range()) 5559 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5560 args = self._parse_csv(self._parse_bitwise) 5561 this.set("start", seq_get(args, 0)) 5562 this.set("increment", seq_get(args, 1)) 5563 5564 self._match_r_paren() 5565 5566 return this 5567 5568 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5569 self._match_text_seq("LENGTH") 5570 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5571 5572 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5573 if self._match_text_seq("NULL"): 5574 return self.expression(exp.NotNullColumnConstraint) 5575 if self._match_text_seq("CASESPECIFIC"): 5576 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5577 if self._match_text_seq("FOR", "REPLICATION"): 5578 return self.expression(exp.NotForReplicationColumnConstraint) 5579 5580 # Unconsume the `NOT` token 5581 self._retreat(self._index - 1) 5582 return None 5583 5584 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5585 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5586 5587 procedure_option_follows = ( 5588 self._match(TokenType.WITH, advance=False) 5589 and self._next 5590 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5591 ) 5592 5593 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5594 return self.expression( 5595 exp.ColumnConstraint, 5596 this=this, 5597 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5598 ) 5599 5600 return this 5601 5602 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5603 if not self._match(TokenType.CONSTRAINT): 5604 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5605 5606 return self.expression( 5607 exp.Constraint, 5608 this=self._parse_id_var(), 5609 expressions=self._parse_unnamed_constraints(), 5610 ) 5611 5612 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5613 constraints = [] 5614 while True: 5615 constraint = self._parse_unnamed_constraint() or self._parse_function() 5616 if not constraint: 5617 break 5618 constraints.append(constraint) 5619 5620 return constraints 5621 5622 def _parse_unnamed_constraint( 5623 self, constraints: t.Optional[t.Collection[str]] = None 5624 ) -> t.Optional[exp.Expression]: 5625 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5626 constraints or self.CONSTRAINT_PARSERS 5627 ): 5628 return None 5629 5630 constraint = self._prev.text.upper() 5631 if constraint not in self.CONSTRAINT_PARSERS: 5632 self.raise_error(f"No parser found for schema constraint {constraint}.") 5633 5634 return self.CONSTRAINT_PARSERS[constraint](self) 5635 5636 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5637 return self._parse_id_var(any_token=False) 5638 5639 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5640 self._match_text_seq("KEY") 5641 return self.expression( 5642 exp.UniqueColumnConstraint, 5643 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5644 this=self._parse_schema(self._parse_unique_key()), 5645 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5646 on_conflict=self._parse_on_conflict(), 5647 ) 5648 5649 def _parse_key_constraint_options(self) -> t.List[str]: 5650 options = [] 5651 while True: 5652 if not self._curr: 5653 break 5654 5655 if self._match(TokenType.ON): 5656 action = None 5657 on = self._advance_any() and self._prev.text 5658 5659 if self._match_text_seq("NO", "ACTION"): 5660 action = "NO ACTION" 5661 elif self._match_text_seq("CASCADE"): 5662 action = "CASCADE" 5663 elif self._match_text_seq("RESTRICT"): 5664 action = "RESTRICT" 5665 elif self._match_pair(TokenType.SET, TokenType.NULL): 5666 action = "SET NULL" 5667 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5668 action = "SET DEFAULT" 5669 else: 5670 self.raise_error("Invalid key constraint") 5671 5672 options.append(f"ON {on} {action}") 5673 else: 5674 var = self._parse_var_from_options( 5675 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5676 ) 5677 if not var: 5678 break 5679 options.append(var.name) 5680 5681 return options 5682 5683 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5684 if match and not self._match(TokenType.REFERENCES): 5685 return None 5686 5687 expressions = None 5688 this = self._parse_table(schema=True) 5689 options = self._parse_key_constraint_options() 5690 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5691 5692 def _parse_foreign_key(self) -> exp.ForeignKey: 5693 expressions = self._parse_wrapped_id_vars() 5694 reference = self._parse_references() 5695 options = {} 5696 5697 while self._match(TokenType.ON): 5698 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5699 self.raise_error("Expected DELETE or UPDATE") 5700 5701 kind = self._prev.text.lower() 5702 5703 if self._match_text_seq("NO", "ACTION"): 5704 action = "NO ACTION" 5705 elif self._match(TokenType.SET): 5706 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5707 action = "SET " + self._prev.text.upper() 5708 else: 5709 self._advance() 5710 action = self._prev.text.upper() 5711 5712 options[kind] = action 5713 5714 return self.expression( 5715 exp.ForeignKey, 5716 expressions=expressions, 5717 reference=reference, 5718 **options, # type: ignore 5719 ) 5720 5721 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5722 return self._parse_field() 5723 5724 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5725 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5726 self._retreat(self._index - 1) 5727 return None 5728 5729 id_vars = self._parse_wrapped_id_vars() 5730 return self.expression( 5731 exp.PeriodForSystemTimeConstraint, 5732 this=seq_get(id_vars, 0), 5733 expression=seq_get(id_vars, 1), 5734 ) 5735 5736 def _parse_primary_key( 5737 self, wrapped_optional: bool = False, in_props: bool = False 5738 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5739 desc = ( 5740 self._match_set((TokenType.ASC, TokenType.DESC)) 5741 and self._prev.token_type == TokenType.DESC 5742 ) 5743 5744 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5745 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5746 5747 expressions = self._parse_wrapped_csv( 5748 self._parse_primary_key_part, optional=wrapped_optional 5749 ) 5750 options = self._parse_key_constraint_options() 5751 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5752 5753 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5754 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5755 5756 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5757 """ 5758 Parses a datetime column in ODBC format. We parse the column into the corresponding 5759 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5760 same as we did for `DATE('yyyy-mm-dd')`. 5761 5762 Reference: 5763 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5764 """ 5765 self._match(TokenType.VAR) 5766 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5767 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5768 if not self._match(TokenType.R_BRACE): 5769 self.raise_error("Expected }") 5770 return expression 5771 5772 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5773 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5774 return this 5775 5776 bracket_kind = self._prev.token_type 5777 if ( 5778 bracket_kind == TokenType.L_BRACE 5779 and self._curr 5780 and self._curr.token_type == TokenType.VAR 5781 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5782 ): 5783 return self._parse_odbc_datetime_literal() 5784 5785 expressions = self._parse_csv( 5786 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5787 ) 5788 5789 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5790 self.raise_error("Expected ]") 5791 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5792 self.raise_error("Expected }") 5793 5794 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5795 if bracket_kind == TokenType.L_BRACE: 5796 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5797 elif not this: 5798 this = build_array_constructor( 5799 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5800 ) 5801 else: 5802 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5803 if constructor_type: 5804 return build_array_constructor( 5805 constructor_type, 5806 args=expressions, 5807 bracket_kind=bracket_kind, 5808 dialect=self.dialect, 5809 ) 5810 5811 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5812 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5813 5814 self._add_comments(this) 5815 return self._parse_bracket(this) 5816 5817 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5818 if self._match(TokenType.COLON): 5819 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5820 return this 5821 5822 def _parse_case(self) -> t.Optional[exp.Expression]: 5823 ifs = [] 5824 default = None 5825 5826 comments = self._prev_comments 5827 expression = self._parse_assignment() 5828 5829 while self._match(TokenType.WHEN): 5830 this = self._parse_assignment() 5831 self._match(TokenType.THEN) 5832 then = self._parse_assignment() 5833 ifs.append(self.expression(exp.If, this=this, true=then)) 5834 5835 if self._match(TokenType.ELSE): 5836 default = self._parse_assignment() 5837 5838 if not self._match(TokenType.END): 5839 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5840 default = exp.column("interval") 5841 else: 5842 self.raise_error("Expected END after CASE", self._prev) 5843 5844 return self.expression( 5845 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5846 ) 5847 5848 def _parse_if(self) -> t.Optional[exp.Expression]: 5849 if self._match(TokenType.L_PAREN): 5850 args = self._parse_csv(self._parse_assignment) 5851 this = self.validate_expression(exp.If.from_arg_list(args), args) 5852 self._match_r_paren() 5853 else: 5854 index = self._index - 1 5855 5856 if self.NO_PAREN_IF_COMMANDS and index == 0: 5857 return self._parse_as_command(self._prev) 5858 5859 condition = self._parse_assignment() 5860 5861 if not condition: 5862 self._retreat(index) 5863 return None 5864 5865 self._match(TokenType.THEN) 5866 true = self._parse_assignment() 5867 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5868 self._match(TokenType.END) 5869 this = self.expression(exp.If, this=condition, true=true, false=false) 5870 5871 return this 5872 5873 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5874 if not self._match_text_seq("VALUE", "FOR"): 5875 self._retreat(self._index - 1) 5876 return None 5877 5878 return self.expression( 5879 exp.NextValueFor, 5880 this=self._parse_column(), 5881 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5882 ) 5883 5884 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5885 this = self._parse_function() or self._parse_var_or_string(upper=True) 5886 5887 if self._match(TokenType.FROM): 5888 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5889 5890 if not self._match(TokenType.COMMA): 5891 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5892 5893 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5894 5895 def _parse_gap_fill(self) -> exp.GapFill: 5896 self._match(TokenType.TABLE) 5897 this = self._parse_table() 5898 5899 self._match(TokenType.COMMA) 5900 args = [this, *self._parse_csv(self._parse_lambda)] 5901 5902 gap_fill = exp.GapFill.from_arg_list(args) 5903 return self.validate_expression(gap_fill, args) 5904 5905 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5906 this = self._parse_assignment() 5907 5908 if not self._match(TokenType.ALIAS): 5909 if self._match(TokenType.COMMA): 5910 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5911 5912 self.raise_error("Expected AS after CAST") 5913 5914 fmt = None 5915 to = self._parse_types() 5916 5917 if self._match(TokenType.FORMAT): 5918 fmt_string = self._parse_string() 5919 fmt = self._parse_at_time_zone(fmt_string) 5920 5921 if not to: 5922 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5923 if to.this in exp.DataType.TEMPORAL_TYPES: 5924 this = self.expression( 5925 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5926 this=this, 5927 format=exp.Literal.string( 5928 format_time( 5929 fmt_string.this if fmt_string else "", 5930 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5931 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5932 ) 5933 ), 5934 safe=safe, 5935 ) 5936 5937 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5938 this.set("zone", fmt.args["zone"]) 5939 return this 5940 elif not to: 5941 self.raise_error("Expected TYPE after CAST") 5942 elif isinstance(to, exp.Identifier): 5943 to = exp.DataType.build(to.name, udt=True) 5944 elif to.this == exp.DataType.Type.CHAR: 5945 if self._match(TokenType.CHARACTER_SET): 5946 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5947 5948 return self.expression( 5949 exp.Cast if strict else exp.TryCast, 5950 this=this, 5951 to=to, 5952 format=fmt, 5953 safe=safe, 5954 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5955 ) 5956 5957 def _parse_string_agg(self) -> exp.GroupConcat: 5958 if self._match(TokenType.DISTINCT): 5959 args: t.List[t.Optional[exp.Expression]] = [ 5960 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5961 ] 5962 if self._match(TokenType.COMMA): 5963 args.extend(self._parse_csv(self._parse_assignment)) 5964 else: 5965 args = self._parse_csv(self._parse_assignment) # type: ignore 5966 5967 if self._match_text_seq("ON", "OVERFLOW"): 5968 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 5969 if self._match_text_seq("ERROR"): 5970 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 5971 else: 5972 self._match_text_seq("TRUNCATE") 5973 on_overflow = self.expression( 5974 exp.OverflowTruncateBehavior, 5975 this=self._parse_string(), 5976 with_count=( 5977 self._match_text_seq("WITH", "COUNT") 5978 or not self._match_text_seq("WITHOUT", "COUNT") 5979 ), 5980 ) 5981 else: 5982 on_overflow = None 5983 5984 index = self._index 5985 if not self._match(TokenType.R_PAREN) and args: 5986 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5987 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5988 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5989 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5990 5991 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5992 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5993 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5994 if not self._match_text_seq("WITHIN", "GROUP"): 5995 self._retreat(index) 5996 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5997 5998 # The corresponding match_r_paren will be called in parse_function (caller) 5999 self._match_l_paren() 6000 6001 return self.expression( 6002 exp.GroupConcat, 6003 this=self._parse_order(this=seq_get(args, 0)), 6004 separator=seq_get(args, 1), 6005 on_overflow=on_overflow, 6006 ) 6007 6008 def _parse_convert( 6009 self, strict: bool, safe: t.Optional[bool] = None 6010 ) -> t.Optional[exp.Expression]: 6011 this = self._parse_bitwise() 6012 6013 if self._match(TokenType.USING): 6014 to: t.Optional[exp.Expression] = self.expression( 6015 exp.CharacterSet, this=self._parse_var() 6016 ) 6017 elif self._match(TokenType.COMMA): 6018 to = self._parse_types() 6019 else: 6020 to = None 6021 6022 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6023 6024 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6025 """ 6026 There are generally two variants of the DECODE function: 6027 6028 - DECODE(bin, charset) 6029 - DECODE(expression, search, result [, search, result] ... [, default]) 6030 6031 The second variant will always be parsed into a CASE expression. Note that NULL 6032 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6033 instead of relying on pattern matching. 6034 """ 6035 args = self._parse_csv(self._parse_assignment) 6036 6037 if len(args) < 3: 6038 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6039 6040 expression, *expressions = args 6041 if not expression: 6042 return None 6043 6044 ifs = [] 6045 for search, result in zip(expressions[::2], expressions[1::2]): 6046 if not search or not result: 6047 return None 6048 6049 if isinstance(search, exp.Literal): 6050 ifs.append( 6051 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6052 ) 6053 elif isinstance(search, exp.Null): 6054 ifs.append( 6055 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6056 ) 6057 else: 6058 cond = exp.or_( 6059 exp.EQ(this=expression.copy(), expression=search), 6060 exp.and_( 6061 exp.Is(this=expression.copy(), expression=exp.Null()), 6062 exp.Is(this=search.copy(), expression=exp.Null()), 6063 copy=False, 6064 ), 6065 copy=False, 6066 ) 6067 ifs.append(exp.If(this=cond, true=result)) 6068 6069 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6070 6071 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6072 self._match_text_seq("KEY") 6073 key = self._parse_column() 6074 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6075 self._match_text_seq("VALUE") 6076 value = self._parse_bitwise() 6077 6078 if not key and not value: 6079 return None 6080 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6081 6082 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6083 if not this or not self._match_text_seq("FORMAT", "JSON"): 6084 return this 6085 6086 return self.expression(exp.FormatJson, this=this) 6087 6088 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6089 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6090 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6091 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6092 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6093 else: 6094 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6095 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6096 6097 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6098 6099 if not empty and not error and not null: 6100 return None 6101 6102 return self.expression( 6103 exp.OnCondition, 6104 empty=empty, 6105 error=error, 6106 null=null, 6107 ) 6108 6109 def _parse_on_handling( 6110 self, on: str, *values: str 6111 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6112 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6113 for value in values: 6114 if self._match_text_seq(value, "ON", on): 6115 return f"{value} ON {on}" 6116 6117 index = self._index 6118 if self._match(TokenType.DEFAULT): 6119 default_value = self._parse_bitwise() 6120 if self._match_text_seq("ON", on): 6121 return default_value 6122 6123 self._retreat(index) 6124 6125 return None 6126 6127 @t.overload 6128 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6129 6130 @t.overload 6131 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6132 6133 def _parse_json_object(self, agg=False): 6134 star = self._parse_star() 6135 expressions = ( 6136 [star] 6137 if star 6138 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6139 ) 6140 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6141 6142 unique_keys = None 6143 if self._match_text_seq("WITH", "UNIQUE"): 6144 unique_keys = True 6145 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6146 unique_keys = False 6147 6148 self._match_text_seq("KEYS") 6149 6150 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6151 self._parse_type() 6152 ) 6153 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6154 6155 return self.expression( 6156 exp.JSONObjectAgg if agg else exp.JSONObject, 6157 expressions=expressions, 6158 null_handling=null_handling, 6159 unique_keys=unique_keys, 6160 return_type=return_type, 6161 encoding=encoding, 6162 ) 6163 6164 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6165 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6166 if not self._match_text_seq("NESTED"): 6167 this = self._parse_id_var() 6168 kind = self._parse_types(allow_identifiers=False) 6169 nested = None 6170 else: 6171 this = None 6172 kind = None 6173 nested = True 6174 6175 path = self._match_text_seq("PATH") and self._parse_string() 6176 nested_schema = nested and self._parse_json_schema() 6177 6178 return self.expression( 6179 exp.JSONColumnDef, 6180 this=this, 6181 kind=kind, 6182 path=path, 6183 nested_schema=nested_schema, 6184 ) 6185 6186 def _parse_json_schema(self) -> exp.JSONSchema: 6187 self._match_text_seq("COLUMNS") 6188 return self.expression( 6189 exp.JSONSchema, 6190 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6191 ) 6192 6193 def _parse_json_table(self) -> exp.JSONTable: 6194 this = self._parse_format_json(self._parse_bitwise()) 6195 path = self._match(TokenType.COMMA) and self._parse_string() 6196 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6197 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6198 schema = self._parse_json_schema() 6199 6200 return exp.JSONTable( 6201 this=this, 6202 schema=schema, 6203 path=path, 6204 error_handling=error_handling, 6205 empty_handling=empty_handling, 6206 ) 6207 6208 def _parse_match_against(self) -> exp.MatchAgainst: 6209 expressions = self._parse_csv(self._parse_column) 6210 6211 self._match_text_seq(")", "AGAINST", "(") 6212 6213 this = self._parse_string() 6214 6215 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6216 modifier = "IN NATURAL LANGUAGE MODE" 6217 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6218 modifier = f"{modifier} WITH QUERY EXPANSION" 6219 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6220 modifier = "IN BOOLEAN MODE" 6221 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6222 modifier = "WITH QUERY EXPANSION" 6223 else: 6224 modifier = None 6225 6226 return self.expression( 6227 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6228 ) 6229 6230 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6231 def _parse_open_json(self) -> exp.OpenJSON: 6232 this = self._parse_bitwise() 6233 path = self._match(TokenType.COMMA) and self._parse_string() 6234 6235 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6236 this = self._parse_field(any_token=True) 6237 kind = self._parse_types() 6238 path = self._parse_string() 6239 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6240 6241 return self.expression( 6242 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6243 ) 6244 6245 expressions = None 6246 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6247 self._match_l_paren() 6248 expressions = self._parse_csv(_parse_open_json_column_def) 6249 6250 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6251 6252 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6253 args = self._parse_csv(self._parse_bitwise) 6254 6255 if self._match(TokenType.IN): 6256 return self.expression( 6257 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6258 ) 6259 6260 if haystack_first: 6261 haystack = seq_get(args, 0) 6262 needle = seq_get(args, 1) 6263 else: 6264 needle = seq_get(args, 0) 6265 haystack = seq_get(args, 1) 6266 6267 return self.expression( 6268 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6269 ) 6270 6271 def _parse_predict(self) -> exp.Predict: 6272 self._match_text_seq("MODEL") 6273 this = self._parse_table() 6274 6275 self._match(TokenType.COMMA) 6276 self._match_text_seq("TABLE") 6277 6278 return self.expression( 6279 exp.Predict, 6280 this=this, 6281 expression=self._parse_table(), 6282 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6283 ) 6284 6285 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6286 args = self._parse_csv(self._parse_table) 6287 return exp.JoinHint(this=func_name.upper(), expressions=args) 6288 6289 def _parse_substring(self) -> exp.Substring: 6290 # Postgres supports the form: substring(string [from int] [for int]) 6291 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6292 6293 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6294 6295 if self._match(TokenType.FROM): 6296 args.append(self._parse_bitwise()) 6297 if self._match(TokenType.FOR): 6298 if len(args) == 1: 6299 args.append(exp.Literal.number(1)) 6300 args.append(self._parse_bitwise()) 6301 6302 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6303 6304 def _parse_trim(self) -> exp.Trim: 6305 # https://www.w3resource.com/sql/character-functions/trim.php 6306 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6307 6308 position = None 6309 collation = None 6310 expression = None 6311 6312 if self._match_texts(self.TRIM_TYPES): 6313 position = self._prev.text.upper() 6314 6315 this = self._parse_bitwise() 6316 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6317 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6318 expression = self._parse_bitwise() 6319 6320 if invert_order: 6321 this, expression = expression, this 6322 6323 if self._match(TokenType.COLLATE): 6324 collation = self._parse_bitwise() 6325 6326 return self.expression( 6327 exp.Trim, this=this, position=position, expression=expression, collation=collation 6328 ) 6329 6330 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6331 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6332 6333 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6334 return self._parse_window(self._parse_id_var(), alias=True) 6335 6336 def _parse_respect_or_ignore_nulls( 6337 self, this: t.Optional[exp.Expression] 6338 ) -> t.Optional[exp.Expression]: 6339 if self._match_text_seq("IGNORE", "NULLS"): 6340 return self.expression(exp.IgnoreNulls, this=this) 6341 if self._match_text_seq("RESPECT", "NULLS"): 6342 return self.expression(exp.RespectNulls, this=this) 6343 return this 6344 6345 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6346 if self._match(TokenType.HAVING): 6347 self._match_texts(("MAX", "MIN")) 6348 max = self._prev.text.upper() != "MIN" 6349 return self.expression( 6350 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6351 ) 6352 6353 return this 6354 6355 def _parse_window( 6356 self, this: t.Optional[exp.Expression], alias: bool = False 6357 ) -> t.Optional[exp.Expression]: 6358 func = this 6359 comments = func.comments if isinstance(func, exp.Expression) else None 6360 6361 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6362 self._match(TokenType.WHERE) 6363 this = self.expression( 6364 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6365 ) 6366 self._match_r_paren() 6367 6368 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6369 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6370 if self._match_text_seq("WITHIN", "GROUP"): 6371 order = self._parse_wrapped(self._parse_order) 6372 this = self.expression(exp.WithinGroup, this=this, expression=order) 6373 6374 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6375 # Some dialects choose to implement and some do not. 6376 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6377 6378 # There is some code above in _parse_lambda that handles 6379 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6380 6381 # The below changes handle 6382 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6383 6384 # Oracle allows both formats 6385 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6386 # and Snowflake chose to do the same for familiarity 6387 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6388 if isinstance(this, exp.AggFunc): 6389 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6390 6391 if ignore_respect and ignore_respect is not this: 6392 ignore_respect.replace(ignore_respect.this) 6393 this = self.expression(ignore_respect.__class__, this=this) 6394 6395 this = self._parse_respect_or_ignore_nulls(this) 6396 6397 # bigquery select from window x AS (partition by ...) 6398 if alias: 6399 over = None 6400 self._match(TokenType.ALIAS) 6401 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6402 return this 6403 else: 6404 over = self._prev.text.upper() 6405 6406 if comments and isinstance(func, exp.Expression): 6407 func.pop_comments() 6408 6409 if not self._match(TokenType.L_PAREN): 6410 return self.expression( 6411 exp.Window, 6412 comments=comments, 6413 this=this, 6414 alias=self._parse_id_var(False), 6415 over=over, 6416 ) 6417 6418 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6419 6420 first = self._match(TokenType.FIRST) 6421 if self._match_text_seq("LAST"): 6422 first = False 6423 6424 partition, order = self._parse_partition_and_order() 6425 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6426 6427 if kind: 6428 self._match(TokenType.BETWEEN) 6429 start = self._parse_window_spec() 6430 self._match(TokenType.AND) 6431 end = self._parse_window_spec() 6432 6433 spec = self.expression( 6434 exp.WindowSpec, 6435 kind=kind, 6436 start=start["value"], 6437 start_side=start["side"], 6438 end=end["value"], 6439 end_side=end["side"], 6440 ) 6441 else: 6442 spec = None 6443 6444 self._match_r_paren() 6445 6446 window = self.expression( 6447 exp.Window, 6448 comments=comments, 6449 this=this, 6450 partition_by=partition, 6451 order=order, 6452 spec=spec, 6453 alias=window_alias, 6454 over=over, 6455 first=first, 6456 ) 6457 6458 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6459 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6460 return self._parse_window(window, alias=alias) 6461 6462 return window 6463 6464 def _parse_partition_and_order( 6465 self, 6466 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6467 return self._parse_partition_by(), self._parse_order() 6468 6469 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6470 self._match(TokenType.BETWEEN) 6471 6472 return { 6473 "value": ( 6474 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6475 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6476 or self._parse_bitwise() 6477 ), 6478 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6479 } 6480 6481 def _parse_alias( 6482 self, this: t.Optional[exp.Expression], explicit: bool = False 6483 ) -> t.Optional[exp.Expression]: 6484 any_token = self._match(TokenType.ALIAS) 6485 comments = self._prev_comments or [] 6486 6487 if explicit and not any_token: 6488 return this 6489 6490 if self._match(TokenType.L_PAREN): 6491 aliases = self.expression( 6492 exp.Aliases, 6493 comments=comments, 6494 this=this, 6495 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6496 ) 6497 self._match_r_paren(aliases) 6498 return aliases 6499 6500 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6501 self.STRING_ALIASES and self._parse_string_as_identifier() 6502 ) 6503 6504 if alias: 6505 comments.extend(alias.pop_comments()) 6506 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6507 column = this.this 6508 6509 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6510 if not this.comments and column and column.comments: 6511 this.comments = column.pop_comments() 6512 6513 return this 6514 6515 def _parse_id_var( 6516 self, 6517 any_token: bool = True, 6518 tokens: t.Optional[t.Collection[TokenType]] = None, 6519 ) -> t.Optional[exp.Expression]: 6520 expression = self._parse_identifier() 6521 if not expression and ( 6522 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6523 ): 6524 quoted = self._prev.token_type == TokenType.STRING 6525 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6526 6527 return expression 6528 6529 def _parse_string(self) -> t.Optional[exp.Expression]: 6530 if self._match_set(self.STRING_PARSERS): 6531 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6532 return self._parse_placeholder() 6533 6534 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6535 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6536 6537 def _parse_number(self) -> t.Optional[exp.Expression]: 6538 if self._match_set(self.NUMERIC_PARSERS): 6539 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6540 return self._parse_placeholder() 6541 6542 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6543 if self._match(TokenType.IDENTIFIER): 6544 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6545 return self._parse_placeholder() 6546 6547 def _parse_var( 6548 self, 6549 any_token: bool = False, 6550 tokens: t.Optional[t.Collection[TokenType]] = None, 6551 upper: bool = False, 6552 ) -> t.Optional[exp.Expression]: 6553 if ( 6554 (any_token and self._advance_any()) 6555 or self._match(TokenType.VAR) 6556 or (self._match_set(tokens) if tokens else False) 6557 ): 6558 return self.expression( 6559 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6560 ) 6561 return self._parse_placeholder() 6562 6563 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6564 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6565 self._advance() 6566 return self._prev 6567 return None 6568 6569 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6570 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6571 6572 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6573 return self._parse_primary() or self._parse_var(any_token=True) 6574 6575 def _parse_null(self) -> t.Optional[exp.Expression]: 6576 if self._match_set(self.NULL_TOKENS): 6577 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6578 return self._parse_placeholder() 6579 6580 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6581 if self._match(TokenType.TRUE): 6582 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6583 if self._match(TokenType.FALSE): 6584 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6585 return self._parse_placeholder() 6586 6587 def _parse_star(self) -> t.Optional[exp.Expression]: 6588 if self._match(TokenType.STAR): 6589 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6590 return self._parse_placeholder() 6591 6592 def _parse_parameter(self) -> exp.Parameter: 6593 this = self._parse_identifier() or self._parse_primary_or_var() 6594 return self.expression(exp.Parameter, this=this) 6595 6596 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6597 if self._match_set(self.PLACEHOLDER_PARSERS): 6598 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6599 if placeholder: 6600 return placeholder 6601 self._advance(-1) 6602 return None 6603 6604 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6605 if not self._match_texts(keywords): 6606 return None 6607 if self._match(TokenType.L_PAREN, advance=False): 6608 return self._parse_wrapped_csv(self._parse_expression) 6609 6610 expression = self._parse_expression() 6611 return [expression] if expression else None 6612 6613 def _parse_csv( 6614 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6615 ) -> t.List[exp.Expression]: 6616 parse_result = parse_method() 6617 items = [parse_result] if parse_result is not None else [] 6618 6619 while self._match(sep): 6620 self._add_comments(parse_result) 6621 parse_result = parse_method() 6622 if parse_result is not None: 6623 items.append(parse_result) 6624 6625 return items 6626 6627 def _parse_tokens( 6628 self, parse_method: t.Callable, expressions: t.Dict 6629 ) -> t.Optional[exp.Expression]: 6630 this = parse_method() 6631 6632 while self._match_set(expressions): 6633 this = self.expression( 6634 expressions[self._prev.token_type], 6635 this=this, 6636 comments=self._prev_comments, 6637 expression=parse_method(), 6638 ) 6639 6640 return this 6641 6642 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6643 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6644 6645 def _parse_wrapped_csv( 6646 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6647 ) -> t.List[exp.Expression]: 6648 return self._parse_wrapped( 6649 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6650 ) 6651 6652 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6653 wrapped = self._match(TokenType.L_PAREN) 6654 if not wrapped and not optional: 6655 self.raise_error("Expecting (") 6656 parse_result = parse_method() 6657 if wrapped: 6658 self._match_r_paren() 6659 return parse_result 6660 6661 def _parse_expressions(self) -> t.List[exp.Expression]: 6662 return self._parse_csv(self._parse_expression) 6663 6664 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6665 return self._parse_select() or self._parse_set_operations( 6666 self._parse_expression() if alias else self._parse_assignment() 6667 ) 6668 6669 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6670 return self._parse_query_modifiers( 6671 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6672 ) 6673 6674 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6675 this = None 6676 if self._match_texts(self.TRANSACTION_KIND): 6677 this = self._prev.text 6678 6679 self._match_texts(("TRANSACTION", "WORK")) 6680 6681 modes = [] 6682 while True: 6683 mode = [] 6684 while self._match(TokenType.VAR): 6685 mode.append(self._prev.text) 6686 6687 if mode: 6688 modes.append(" ".join(mode)) 6689 if not self._match(TokenType.COMMA): 6690 break 6691 6692 return self.expression(exp.Transaction, this=this, modes=modes) 6693 6694 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6695 chain = None 6696 savepoint = None 6697 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6698 6699 self._match_texts(("TRANSACTION", "WORK")) 6700 6701 if self._match_text_seq("TO"): 6702 self._match_text_seq("SAVEPOINT") 6703 savepoint = self._parse_id_var() 6704 6705 if self._match(TokenType.AND): 6706 chain = not self._match_text_seq("NO") 6707 self._match_text_seq("CHAIN") 6708 6709 if is_rollback: 6710 return self.expression(exp.Rollback, savepoint=savepoint) 6711 6712 return self.expression(exp.Commit, chain=chain) 6713 6714 def _parse_refresh(self) -> exp.Refresh: 6715 self._match(TokenType.TABLE) 6716 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6717 6718 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6719 if not self._match_text_seq("ADD"): 6720 return None 6721 6722 self._match(TokenType.COLUMN) 6723 exists_column = self._parse_exists(not_=True) 6724 expression = self._parse_field_def() 6725 6726 if expression: 6727 expression.set("exists", exists_column) 6728 6729 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6730 if self._match_texts(("FIRST", "AFTER")): 6731 position = self._prev.text 6732 column_position = self.expression( 6733 exp.ColumnPosition, this=self._parse_column(), position=position 6734 ) 6735 expression.set("position", column_position) 6736 6737 return expression 6738 6739 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6740 drop = self._match(TokenType.DROP) and self._parse_drop() 6741 if drop and not isinstance(drop, exp.Command): 6742 drop.set("kind", drop.args.get("kind", "COLUMN")) 6743 return drop 6744 6745 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6746 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6747 return self.expression( 6748 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6749 ) 6750 6751 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6752 index = self._index - 1 6753 6754 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6755 return self._parse_csv( 6756 lambda: self.expression( 6757 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6758 ) 6759 ) 6760 6761 self._retreat(index) 6762 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6763 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6764 6765 if self._match_text_seq("ADD", "COLUMNS"): 6766 schema = self._parse_schema() 6767 if schema: 6768 return [schema] 6769 return [] 6770 6771 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6772 6773 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6774 if self._match_texts(self.ALTER_ALTER_PARSERS): 6775 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6776 6777 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6778 # keyword after ALTER we default to parsing this statement 6779 self._match(TokenType.COLUMN) 6780 column = self._parse_field(any_token=True) 6781 6782 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6783 return self.expression(exp.AlterColumn, this=column, drop=True) 6784 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6785 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6786 if self._match(TokenType.COMMENT): 6787 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6788 if self._match_text_seq("DROP", "NOT", "NULL"): 6789 return self.expression( 6790 exp.AlterColumn, 6791 this=column, 6792 drop=True, 6793 allow_null=True, 6794 ) 6795 if self._match_text_seq("SET", "NOT", "NULL"): 6796 return self.expression( 6797 exp.AlterColumn, 6798 this=column, 6799 allow_null=False, 6800 ) 6801 self._match_text_seq("SET", "DATA") 6802 self._match_text_seq("TYPE") 6803 return self.expression( 6804 exp.AlterColumn, 6805 this=column, 6806 dtype=self._parse_types(), 6807 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6808 using=self._match(TokenType.USING) and self._parse_assignment(), 6809 ) 6810 6811 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6812 if self._match_texts(("ALL", "EVEN", "AUTO")): 6813 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6814 6815 self._match_text_seq("KEY", "DISTKEY") 6816 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6817 6818 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6819 if compound: 6820 self._match_text_seq("SORTKEY") 6821 6822 if self._match(TokenType.L_PAREN, advance=False): 6823 return self.expression( 6824 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6825 ) 6826 6827 self._match_texts(("AUTO", "NONE")) 6828 return self.expression( 6829 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6830 ) 6831 6832 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6833 index = self._index - 1 6834 6835 partition_exists = self._parse_exists() 6836 if self._match(TokenType.PARTITION, advance=False): 6837 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6838 6839 self._retreat(index) 6840 return self._parse_csv(self._parse_drop_column) 6841 6842 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6843 if self._match(TokenType.COLUMN): 6844 exists = self._parse_exists() 6845 old_column = self._parse_column() 6846 to = self._match_text_seq("TO") 6847 new_column = self._parse_column() 6848 6849 if old_column is None or to is None or new_column is None: 6850 return None 6851 6852 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6853 6854 self._match_text_seq("TO") 6855 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6856 6857 def _parse_alter_table_set(self) -> exp.AlterSet: 6858 alter_set = self.expression(exp.AlterSet) 6859 6860 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6861 "TABLE", "PROPERTIES" 6862 ): 6863 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6864 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6865 alter_set.set("expressions", [self._parse_assignment()]) 6866 elif self._match_texts(("LOGGED", "UNLOGGED")): 6867 alter_set.set("option", exp.var(self._prev.text.upper())) 6868 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6869 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6870 elif self._match_text_seq("LOCATION"): 6871 alter_set.set("location", self._parse_field()) 6872 elif self._match_text_seq("ACCESS", "METHOD"): 6873 alter_set.set("access_method", self._parse_field()) 6874 elif self._match_text_seq("TABLESPACE"): 6875 alter_set.set("tablespace", self._parse_field()) 6876 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6877 alter_set.set("file_format", [self._parse_field()]) 6878 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6879 alter_set.set("file_format", self._parse_wrapped_options()) 6880 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6881 alter_set.set("copy_options", self._parse_wrapped_options()) 6882 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6883 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6884 else: 6885 if self._match_text_seq("SERDE"): 6886 alter_set.set("serde", self._parse_field()) 6887 6888 alter_set.set("expressions", [self._parse_properties()]) 6889 6890 return alter_set 6891 6892 def _parse_alter(self) -> exp.Alter | exp.Command: 6893 start = self._prev 6894 6895 alter_token = self._match_set(self.ALTERABLES) and self._prev 6896 if not alter_token: 6897 return self._parse_as_command(start) 6898 6899 exists = self._parse_exists() 6900 only = self._match_text_seq("ONLY") 6901 this = self._parse_table(schema=True) 6902 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6903 6904 if self._next: 6905 self._advance() 6906 6907 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6908 if parser: 6909 actions = ensure_list(parser(self)) 6910 not_valid = self._match_text_seq("NOT", "VALID") 6911 options = self._parse_csv(self._parse_property) 6912 6913 if not self._curr and actions: 6914 return self.expression( 6915 exp.Alter, 6916 this=this, 6917 kind=alter_token.text.upper(), 6918 exists=exists, 6919 actions=actions, 6920 only=only, 6921 options=options, 6922 cluster=cluster, 6923 not_valid=not_valid, 6924 ) 6925 6926 return self._parse_as_command(start) 6927 6928 def _parse_merge(self) -> exp.Merge: 6929 self._match(TokenType.INTO) 6930 target = self._parse_table() 6931 6932 if target and self._match(TokenType.ALIAS, advance=False): 6933 target.set("alias", self._parse_table_alias()) 6934 6935 self._match(TokenType.USING) 6936 using = self._parse_table() 6937 6938 self._match(TokenType.ON) 6939 on = self._parse_assignment() 6940 6941 return self.expression( 6942 exp.Merge, 6943 this=target, 6944 using=using, 6945 on=on, 6946 expressions=self._parse_when_matched(), 6947 returning=self._parse_returning(), 6948 ) 6949 6950 def _parse_when_matched(self) -> t.List[exp.When]: 6951 whens = [] 6952 6953 while self._match(TokenType.WHEN): 6954 matched = not self._match(TokenType.NOT) 6955 self._match_text_seq("MATCHED") 6956 source = ( 6957 False 6958 if self._match_text_seq("BY", "TARGET") 6959 else self._match_text_seq("BY", "SOURCE") 6960 ) 6961 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6962 6963 self._match(TokenType.THEN) 6964 6965 if self._match(TokenType.INSERT): 6966 this = self._parse_star() 6967 if this: 6968 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6969 else: 6970 then = self.expression( 6971 exp.Insert, 6972 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6973 expression=self._match_text_seq("VALUES") and self._parse_value(), 6974 ) 6975 elif self._match(TokenType.UPDATE): 6976 expressions = self._parse_star() 6977 if expressions: 6978 then = self.expression(exp.Update, expressions=expressions) 6979 else: 6980 then = self.expression( 6981 exp.Update, 6982 expressions=self._match(TokenType.SET) 6983 and self._parse_csv(self._parse_equality), 6984 ) 6985 elif self._match(TokenType.DELETE): 6986 then = self.expression(exp.Var, this=self._prev.text) 6987 else: 6988 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6989 6990 whens.append( 6991 self.expression( 6992 exp.When, 6993 matched=matched, 6994 source=source, 6995 condition=condition, 6996 then=then, 6997 ) 6998 ) 6999 return whens 7000 7001 def _parse_show(self) -> t.Optional[exp.Expression]: 7002 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7003 if parser: 7004 return parser(self) 7005 return self._parse_as_command(self._prev) 7006 7007 def _parse_set_item_assignment( 7008 self, kind: t.Optional[str] = None 7009 ) -> t.Optional[exp.Expression]: 7010 index = self._index 7011 7012 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7013 return self._parse_set_transaction(global_=kind == "GLOBAL") 7014 7015 left = self._parse_primary() or self._parse_column() 7016 assignment_delimiter = self._match_texts(("=", "TO")) 7017 7018 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7019 self._retreat(index) 7020 return None 7021 7022 right = self._parse_statement() or self._parse_id_var() 7023 if isinstance(right, (exp.Column, exp.Identifier)): 7024 right = exp.var(right.name) 7025 7026 this = self.expression(exp.EQ, this=left, expression=right) 7027 return self.expression(exp.SetItem, this=this, kind=kind) 7028 7029 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7030 self._match_text_seq("TRANSACTION") 7031 characteristics = self._parse_csv( 7032 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7033 ) 7034 return self.expression( 7035 exp.SetItem, 7036 expressions=characteristics, 7037 kind="TRANSACTION", 7038 **{"global": global_}, # type: ignore 7039 ) 7040 7041 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7042 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7043 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7044 7045 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7046 index = self._index 7047 set_ = self.expression( 7048 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7049 ) 7050 7051 if self._curr: 7052 self._retreat(index) 7053 return self._parse_as_command(self._prev) 7054 7055 return set_ 7056 7057 def _parse_var_from_options( 7058 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7059 ) -> t.Optional[exp.Var]: 7060 start = self._curr 7061 if not start: 7062 return None 7063 7064 option = start.text.upper() 7065 continuations = options.get(option) 7066 7067 index = self._index 7068 self._advance() 7069 for keywords in continuations or []: 7070 if isinstance(keywords, str): 7071 keywords = (keywords,) 7072 7073 if self._match_text_seq(*keywords): 7074 option = f"{option} {' '.join(keywords)}" 7075 break 7076 else: 7077 if continuations or continuations is None: 7078 if raise_unmatched: 7079 self.raise_error(f"Unknown option {option}") 7080 7081 self._retreat(index) 7082 return None 7083 7084 return exp.var(option) 7085 7086 def _parse_as_command(self, start: Token) -> exp.Command: 7087 while self._curr: 7088 self._advance() 7089 text = self._find_sql(start, self._prev) 7090 size = len(start.text) 7091 self._warn_unsupported() 7092 return exp.Command(this=text[:size], expression=text[size:]) 7093 7094 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7095 settings = [] 7096 7097 self._match_l_paren() 7098 kind = self._parse_id_var() 7099 7100 if self._match(TokenType.L_PAREN): 7101 while True: 7102 key = self._parse_id_var() 7103 value = self._parse_primary() 7104 7105 if not key and value is None: 7106 break 7107 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7108 self._match(TokenType.R_PAREN) 7109 7110 self._match_r_paren() 7111 7112 return self.expression( 7113 exp.DictProperty, 7114 this=this, 7115 kind=kind.this if kind else None, 7116 settings=settings, 7117 ) 7118 7119 def _parse_dict_range(self, this: str) -> exp.DictRange: 7120 self._match_l_paren() 7121 has_min = self._match_text_seq("MIN") 7122 if has_min: 7123 min = self._parse_var() or self._parse_primary() 7124 self._match_text_seq("MAX") 7125 max = self._parse_var() or self._parse_primary() 7126 else: 7127 max = self._parse_var() or self._parse_primary() 7128 min = exp.Literal.number(0) 7129 self._match_r_paren() 7130 return self.expression(exp.DictRange, this=this, min=min, max=max) 7131 7132 def _parse_comprehension( 7133 self, this: t.Optional[exp.Expression] 7134 ) -> t.Optional[exp.Comprehension]: 7135 index = self._index 7136 expression = self._parse_column() 7137 if not self._match(TokenType.IN): 7138 self._retreat(index - 1) 7139 return None 7140 iterator = self._parse_column() 7141 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7142 return self.expression( 7143 exp.Comprehension, 7144 this=this, 7145 expression=expression, 7146 iterator=iterator, 7147 condition=condition, 7148 ) 7149 7150 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7151 if self._match(TokenType.HEREDOC_STRING): 7152 return self.expression(exp.Heredoc, this=self._prev.text) 7153 7154 if not self._match_text_seq("$"): 7155 return None 7156 7157 tags = ["$"] 7158 tag_text = None 7159 7160 if self._is_connected(): 7161 self._advance() 7162 tags.append(self._prev.text.upper()) 7163 else: 7164 self.raise_error("No closing $ found") 7165 7166 if tags[-1] != "$": 7167 if self._is_connected() and self._match_text_seq("$"): 7168 tag_text = tags[-1] 7169 tags.append("$") 7170 else: 7171 self.raise_error("No closing $ found") 7172 7173 heredoc_start = self._curr 7174 7175 while self._curr: 7176 if self._match_text_seq(*tags, advance=False): 7177 this = self._find_sql(heredoc_start, self._prev) 7178 self._advance(len(tags)) 7179 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7180 7181 self._advance() 7182 7183 self.raise_error(f"No closing {''.join(tags)} found") 7184 return None 7185 7186 def _find_parser( 7187 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7188 ) -> t.Optional[t.Callable]: 7189 if not self._curr: 7190 return None 7191 7192 index = self._index 7193 this = [] 7194 while True: 7195 # The current token might be multiple words 7196 curr = self._curr.text.upper() 7197 key = curr.split(" ") 7198 this.append(curr) 7199 7200 self._advance() 7201 result, trie = in_trie(trie, key) 7202 if result == TrieResult.FAILED: 7203 break 7204 7205 if result == TrieResult.EXISTS: 7206 subparser = parsers[" ".join(this)] 7207 return subparser 7208 7209 self._retreat(index) 7210 return None 7211 7212 def _match(self, token_type, advance=True, expression=None): 7213 if not self._curr: 7214 return None 7215 7216 if self._curr.token_type == token_type: 7217 if advance: 7218 self._advance() 7219 self._add_comments(expression) 7220 return True 7221 7222 return None 7223 7224 def _match_set(self, types, advance=True): 7225 if not self._curr: 7226 return None 7227 7228 if self._curr.token_type in types: 7229 if advance: 7230 self._advance() 7231 return True 7232 7233 return None 7234 7235 def _match_pair(self, token_type_a, token_type_b, advance=True): 7236 if not self._curr or not self._next: 7237 return None 7238 7239 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7240 if advance: 7241 self._advance(2) 7242 return True 7243 7244 return None 7245 7246 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7247 if not self._match(TokenType.L_PAREN, expression=expression): 7248 self.raise_error("Expecting (") 7249 7250 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7251 if not self._match(TokenType.R_PAREN, expression=expression): 7252 self.raise_error("Expecting )") 7253 7254 def _match_texts(self, texts, advance=True): 7255 if ( 7256 self._curr 7257 and self._curr.token_type != TokenType.STRING 7258 and self._curr.text.upper() in texts 7259 ): 7260 if advance: 7261 self._advance() 7262 return True 7263 return None 7264 7265 def _match_text_seq(self, *texts, advance=True): 7266 index = self._index 7267 for text in texts: 7268 if ( 7269 self._curr 7270 and self._curr.token_type != TokenType.STRING 7271 and self._curr.text.upper() == text 7272 ): 7273 self._advance() 7274 else: 7275 self._retreat(index) 7276 return None 7277 7278 if not advance: 7279 self._retreat(index) 7280 7281 return True 7282 7283 def _replace_lambda( 7284 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7285 ) -> t.Optional[exp.Expression]: 7286 if not node: 7287 return node 7288 7289 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7290 7291 for column in node.find_all(exp.Column): 7292 typ = lambda_types.get(column.parts[0].name) 7293 if typ is not None: 7294 dot_or_id = column.to_dot() if column.table else column.this 7295 7296 if typ: 7297 dot_or_id = self.expression( 7298 exp.Cast, 7299 this=dot_or_id, 7300 to=typ, 7301 ) 7302 7303 parent = column.parent 7304 7305 while isinstance(parent, exp.Dot): 7306 if not isinstance(parent.parent, exp.Dot): 7307 parent.replace(dot_or_id) 7308 break 7309 parent = parent.parent 7310 else: 7311 if column is node: 7312 node = dot_or_id 7313 else: 7314 column.replace(dot_or_id) 7315 return node 7316 7317 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7318 start = self._prev 7319 7320 # Not to be confused with TRUNCATE(number, decimals) function call 7321 if self._match(TokenType.L_PAREN): 7322 self._retreat(self._index - 2) 7323 return self._parse_function() 7324 7325 # Clickhouse supports TRUNCATE DATABASE as well 7326 is_database = self._match(TokenType.DATABASE) 7327 7328 self._match(TokenType.TABLE) 7329 7330 exists = self._parse_exists(not_=False) 7331 7332 expressions = self._parse_csv( 7333 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7334 ) 7335 7336 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7337 7338 if self._match_text_seq("RESTART", "IDENTITY"): 7339 identity = "RESTART" 7340 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7341 identity = "CONTINUE" 7342 else: 7343 identity = None 7344 7345 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7346 option = self._prev.text 7347 else: 7348 option = None 7349 7350 partition = self._parse_partition() 7351 7352 # Fallback case 7353 if self._curr: 7354 return self._parse_as_command(start) 7355 7356 return self.expression( 7357 exp.TruncateTable, 7358 expressions=expressions, 7359 is_database=is_database, 7360 exists=exists, 7361 cluster=cluster, 7362 identity=identity, 7363 option=option, 7364 partition=partition, 7365 ) 7366 7367 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7368 this = self._parse_ordered(self._parse_opclass) 7369 7370 if not self._match(TokenType.WITH): 7371 return this 7372 7373 op = self._parse_var(any_token=True) 7374 7375 return self.expression(exp.WithOperator, this=this, op=op) 7376 7377 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7378 self._match(TokenType.EQ) 7379 self._match(TokenType.L_PAREN) 7380 7381 opts: t.List[t.Optional[exp.Expression]] = [] 7382 while self._curr and not self._match(TokenType.R_PAREN): 7383 if self._match_text_seq("FORMAT_NAME", "="): 7384 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7385 # so we parse it separately to use _parse_field() 7386 prop = self.expression( 7387 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7388 ) 7389 opts.append(prop) 7390 else: 7391 opts.append(self._parse_property()) 7392 7393 self._match(TokenType.COMMA) 7394 7395 return opts 7396 7397 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7398 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7399 7400 options = [] 7401 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7402 option = self._parse_var(any_token=True) 7403 prev = self._prev.text.upper() 7404 7405 # Different dialects might separate options and values by white space, "=" and "AS" 7406 self._match(TokenType.EQ) 7407 self._match(TokenType.ALIAS) 7408 7409 param = self.expression(exp.CopyParameter, this=option) 7410 7411 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7412 TokenType.L_PAREN, advance=False 7413 ): 7414 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7415 param.set("expressions", self._parse_wrapped_options()) 7416 elif prev == "FILE_FORMAT": 7417 # T-SQL's external file format case 7418 param.set("expression", self._parse_field()) 7419 else: 7420 param.set("expression", self._parse_unquoted_field()) 7421 7422 options.append(param) 7423 self._match(sep) 7424 7425 return options 7426 7427 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7428 expr = self.expression(exp.Credentials) 7429 7430 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7431 expr.set("storage", self._parse_field()) 7432 if self._match_text_seq("CREDENTIALS"): 7433 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7434 creds = ( 7435 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7436 ) 7437 expr.set("credentials", creds) 7438 if self._match_text_seq("ENCRYPTION"): 7439 expr.set("encryption", self._parse_wrapped_options()) 7440 if self._match_text_seq("IAM_ROLE"): 7441 expr.set("iam_role", self._parse_field()) 7442 if self._match_text_seq("REGION"): 7443 expr.set("region", self._parse_field()) 7444 7445 return expr 7446 7447 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7448 return self._parse_field() 7449 7450 def _parse_copy(self) -> exp.Copy | exp.Command: 7451 start = self._prev 7452 7453 self._match(TokenType.INTO) 7454 7455 this = ( 7456 self._parse_select(nested=True, parse_subquery_alias=False) 7457 if self._match(TokenType.L_PAREN, advance=False) 7458 else self._parse_table(schema=True) 7459 ) 7460 7461 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7462 7463 files = self._parse_csv(self._parse_file_location) 7464 credentials = self._parse_credentials() 7465 7466 self._match_text_seq("WITH") 7467 7468 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7469 7470 # Fallback case 7471 if self._curr: 7472 return self._parse_as_command(start) 7473 7474 return self.expression( 7475 exp.Copy, 7476 this=this, 7477 kind=kind, 7478 credentials=credentials, 7479 files=files, 7480 params=params, 7481 ) 7482 7483 def _parse_normalize(self) -> exp.Normalize: 7484 return self.expression( 7485 exp.Normalize, 7486 this=self._parse_bitwise(), 7487 form=self._match(TokenType.COMMA) and self._parse_var(), 7488 ) 7489 7490 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7491 if self._match_text_seq("COLUMNS", "(", advance=False): 7492 this = self._parse_function() 7493 if isinstance(this, exp.Columns): 7494 this.set("unpack", True) 7495 return this 7496 7497 return self.expression( 7498 exp.Star, 7499 **{ # type: ignore 7500 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7501 "replace": self._parse_star_op("REPLACE"), 7502 "rename": self._parse_star_op("RENAME"), 7503 }, 7504 ) 7505 7506 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7507 privilege_parts = [] 7508 7509 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7510 # (end of privilege list) or L_PAREN (start of column list) are met 7511 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7512 privilege_parts.append(self._curr.text.upper()) 7513 self._advance() 7514 7515 this = exp.var(" ".join(privilege_parts)) 7516 expressions = ( 7517 self._parse_wrapped_csv(self._parse_column) 7518 if self._match(TokenType.L_PAREN, advance=False) 7519 else None 7520 ) 7521 7522 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7523 7524 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7525 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7526 principal = self._parse_id_var() 7527 7528 if not principal: 7529 return None 7530 7531 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7532 7533 def _parse_grant(self) -> exp.Grant | exp.Command: 7534 start = self._prev 7535 7536 privileges = self._parse_csv(self._parse_grant_privilege) 7537 7538 self._match(TokenType.ON) 7539 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7540 7541 # Attempt to parse the securable e.g. MySQL allows names 7542 # such as "foo.*", "*.*" which are not easily parseable yet 7543 securable = self._try_parse(self._parse_table_parts) 7544 7545 if not securable or not self._match_text_seq("TO"): 7546 return self._parse_as_command(start) 7547 7548 principals = self._parse_csv(self._parse_grant_principal) 7549 7550 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7551 7552 if self._curr: 7553 return self._parse_as_command(start) 7554 7555 return self.expression( 7556 exp.Grant, 7557 privileges=privileges, 7558 kind=kind, 7559 securable=securable, 7560 principals=principals, 7561 grant_option=grant_option, 7562 ) 7563 7564 def _parse_overlay(self) -> exp.Overlay: 7565 return self.expression( 7566 exp.Overlay, 7567 **{ # type: ignore 7568 "this": self._parse_bitwise(), 7569 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7570 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7571 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7572 }, 7573 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.DECIMAL256, 356 TokenType.UDECIMAL, 357 TokenType.BIGDECIMAL, 358 TokenType.UUID, 359 TokenType.GEOGRAPHY, 360 TokenType.GEOMETRY, 361 TokenType.POINT, 362 TokenType.RING, 363 TokenType.LINESTRING, 364 TokenType.MULTILINESTRING, 365 TokenType.POLYGON, 366 TokenType.MULTIPOLYGON, 367 TokenType.HLLSKETCH, 368 TokenType.HSTORE, 369 TokenType.PSEUDO_TYPE, 370 TokenType.SUPER, 371 TokenType.SERIAL, 372 TokenType.SMALLSERIAL, 373 TokenType.BIGSERIAL, 374 TokenType.XML, 375 TokenType.YEAR, 376 TokenType.UNIQUEIDENTIFIER, 377 TokenType.USERDEFINED, 378 TokenType.MONEY, 379 TokenType.SMALLMONEY, 380 TokenType.ROWVERSION, 381 TokenType.IMAGE, 382 TokenType.VARIANT, 383 TokenType.VECTOR, 384 TokenType.OBJECT, 385 TokenType.OBJECT_IDENTIFIER, 386 TokenType.INET, 387 TokenType.IPADDRESS, 388 TokenType.IPPREFIX, 389 TokenType.IPV4, 390 TokenType.IPV6, 391 TokenType.UNKNOWN, 392 TokenType.NULL, 393 TokenType.NAME, 394 TokenType.TDIGEST, 395 *ENUM_TYPE_TOKENS, 396 *NESTED_TYPE_TOKENS, 397 *AGGREGATE_TYPE_TOKENS, 398 } 399 400 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 401 TokenType.BIGINT: TokenType.UBIGINT, 402 TokenType.INT: TokenType.UINT, 403 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 404 TokenType.SMALLINT: TokenType.USMALLINT, 405 TokenType.TINYINT: TokenType.UTINYINT, 406 TokenType.DECIMAL: TokenType.UDECIMAL, 407 } 408 409 SUBQUERY_PREDICATES = { 410 TokenType.ANY: exp.Any, 411 TokenType.ALL: exp.All, 412 TokenType.EXISTS: exp.Exists, 413 TokenType.SOME: exp.Any, 414 } 415 416 RESERVED_TOKENS = { 417 *Tokenizer.SINGLE_TOKENS.values(), 418 TokenType.SELECT, 419 } - {TokenType.IDENTIFIER} 420 421 DB_CREATABLES = { 422 TokenType.DATABASE, 423 TokenType.DICTIONARY, 424 TokenType.MODEL, 425 TokenType.SCHEMA, 426 TokenType.SEQUENCE, 427 TokenType.STORAGE_INTEGRATION, 428 TokenType.TABLE, 429 TokenType.TAG, 430 TokenType.VIEW, 431 TokenType.WAREHOUSE, 432 TokenType.STREAMLIT, 433 } 434 435 CREATABLES = { 436 TokenType.COLUMN, 437 TokenType.CONSTRAINT, 438 TokenType.FOREIGN_KEY, 439 TokenType.FUNCTION, 440 TokenType.INDEX, 441 TokenType.PROCEDURE, 442 *DB_CREATABLES, 443 } 444 445 ALTERABLES = { 446 TokenType.INDEX, 447 TokenType.TABLE, 448 TokenType.VIEW, 449 } 450 451 # Tokens that can represent identifiers 452 ID_VAR_TOKENS = { 453 TokenType.ALL, 454 TokenType.VAR, 455 TokenType.ANTI, 456 TokenType.APPLY, 457 TokenType.ASC, 458 TokenType.ASOF, 459 TokenType.AUTO_INCREMENT, 460 TokenType.BEGIN, 461 TokenType.BPCHAR, 462 TokenType.CACHE, 463 TokenType.CASE, 464 TokenType.COLLATE, 465 TokenType.COMMAND, 466 TokenType.COMMENT, 467 TokenType.COMMIT, 468 TokenType.CONSTRAINT, 469 TokenType.COPY, 470 TokenType.CUBE, 471 TokenType.DEFAULT, 472 TokenType.DELETE, 473 TokenType.DESC, 474 TokenType.DESCRIBE, 475 TokenType.DICTIONARY, 476 TokenType.DIV, 477 TokenType.END, 478 TokenType.EXECUTE, 479 TokenType.ESCAPE, 480 TokenType.FALSE, 481 TokenType.FIRST, 482 TokenType.FILTER, 483 TokenType.FINAL, 484 TokenType.FORMAT, 485 TokenType.FULL, 486 TokenType.IDENTIFIER, 487 TokenType.IS, 488 TokenType.ISNULL, 489 TokenType.INTERVAL, 490 TokenType.KEEP, 491 TokenType.KILL, 492 TokenType.LEFT, 493 TokenType.LOAD, 494 TokenType.MERGE, 495 TokenType.NATURAL, 496 TokenType.NEXT, 497 TokenType.OFFSET, 498 TokenType.OPERATOR, 499 TokenType.ORDINALITY, 500 TokenType.OVERLAPS, 501 TokenType.OVERWRITE, 502 TokenType.PARTITION, 503 TokenType.PERCENT, 504 TokenType.PIVOT, 505 TokenType.PRAGMA, 506 TokenType.RANGE, 507 TokenType.RECURSIVE, 508 TokenType.REFERENCES, 509 TokenType.REFRESH, 510 TokenType.RENAME, 511 TokenType.REPLACE, 512 TokenType.RIGHT, 513 TokenType.ROLLUP, 514 TokenType.ROW, 515 TokenType.ROWS, 516 TokenType.SEMI, 517 TokenType.SET, 518 TokenType.SETTINGS, 519 TokenType.SHOW, 520 TokenType.TEMPORARY, 521 TokenType.TOP, 522 TokenType.TRUE, 523 TokenType.TRUNCATE, 524 TokenType.UNIQUE, 525 TokenType.UNNEST, 526 TokenType.UNPIVOT, 527 TokenType.UPDATE, 528 TokenType.USE, 529 TokenType.VOLATILE, 530 TokenType.WINDOW, 531 *CREATABLES, 532 *SUBQUERY_PREDICATES, 533 *TYPE_TOKENS, 534 *NO_PAREN_FUNCTIONS, 535 } 536 ID_VAR_TOKENS.remove(TokenType.UNION) 537 538 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 539 540 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 541 TokenType.ANTI, 542 TokenType.APPLY, 543 TokenType.ASOF, 544 TokenType.FULL, 545 TokenType.LEFT, 546 TokenType.LOCK, 547 TokenType.NATURAL, 548 TokenType.OFFSET, 549 TokenType.RIGHT, 550 TokenType.SEMI, 551 TokenType.WINDOW, 552 } 553 554 ALIAS_TOKENS = ID_VAR_TOKENS 555 556 ARRAY_CONSTRUCTORS = { 557 "ARRAY": exp.Array, 558 "LIST": exp.List, 559 } 560 561 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 562 563 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 564 565 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 566 567 FUNC_TOKENS = { 568 TokenType.COLLATE, 569 TokenType.COMMAND, 570 TokenType.CURRENT_DATE, 571 TokenType.CURRENT_DATETIME, 572 TokenType.CURRENT_TIMESTAMP, 573 TokenType.CURRENT_TIME, 574 TokenType.CURRENT_USER, 575 TokenType.FILTER, 576 TokenType.FIRST, 577 TokenType.FORMAT, 578 TokenType.GLOB, 579 TokenType.IDENTIFIER, 580 TokenType.INDEX, 581 TokenType.ISNULL, 582 TokenType.ILIKE, 583 TokenType.INSERT, 584 TokenType.LIKE, 585 TokenType.MERGE, 586 TokenType.OFFSET, 587 TokenType.PRIMARY_KEY, 588 TokenType.RANGE, 589 TokenType.REPLACE, 590 TokenType.RLIKE, 591 TokenType.ROW, 592 TokenType.UNNEST, 593 TokenType.VAR, 594 TokenType.LEFT, 595 TokenType.RIGHT, 596 TokenType.SEQUENCE, 597 TokenType.DATE, 598 TokenType.DATETIME, 599 TokenType.TABLE, 600 TokenType.TIMESTAMP, 601 TokenType.TIMESTAMPTZ, 602 TokenType.TRUNCATE, 603 TokenType.WINDOW, 604 TokenType.XOR, 605 *TYPE_TOKENS, 606 *SUBQUERY_PREDICATES, 607 } 608 609 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 610 TokenType.AND: exp.And, 611 } 612 613 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 614 TokenType.COLON_EQ: exp.PropertyEQ, 615 } 616 617 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 618 TokenType.OR: exp.Or, 619 } 620 621 EQUALITY = { 622 TokenType.EQ: exp.EQ, 623 TokenType.NEQ: exp.NEQ, 624 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 625 } 626 627 COMPARISON = { 628 TokenType.GT: exp.GT, 629 TokenType.GTE: exp.GTE, 630 TokenType.LT: exp.LT, 631 TokenType.LTE: exp.LTE, 632 } 633 634 BITWISE = { 635 TokenType.AMP: exp.BitwiseAnd, 636 TokenType.CARET: exp.BitwiseXor, 637 TokenType.PIPE: exp.BitwiseOr, 638 } 639 640 TERM = { 641 TokenType.DASH: exp.Sub, 642 TokenType.PLUS: exp.Add, 643 TokenType.MOD: exp.Mod, 644 TokenType.COLLATE: exp.Collate, 645 } 646 647 FACTOR = { 648 TokenType.DIV: exp.IntDiv, 649 TokenType.LR_ARROW: exp.Distance, 650 TokenType.SLASH: exp.Div, 651 TokenType.STAR: exp.Mul, 652 } 653 654 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 655 656 TIMES = { 657 TokenType.TIME, 658 TokenType.TIMETZ, 659 } 660 661 TIMESTAMPS = { 662 TokenType.TIMESTAMP, 663 TokenType.TIMESTAMPTZ, 664 TokenType.TIMESTAMPLTZ, 665 *TIMES, 666 } 667 668 SET_OPERATIONS = { 669 TokenType.UNION, 670 TokenType.INTERSECT, 671 TokenType.EXCEPT, 672 } 673 674 JOIN_METHODS = { 675 TokenType.ASOF, 676 TokenType.NATURAL, 677 TokenType.POSITIONAL, 678 } 679 680 JOIN_SIDES = { 681 TokenType.LEFT, 682 TokenType.RIGHT, 683 TokenType.FULL, 684 } 685 686 JOIN_KINDS = { 687 TokenType.ANTI, 688 TokenType.CROSS, 689 TokenType.INNER, 690 TokenType.OUTER, 691 TokenType.SEMI, 692 TokenType.STRAIGHT_JOIN, 693 } 694 695 JOIN_HINTS: t.Set[str] = set() 696 697 LAMBDAS = { 698 TokenType.ARROW: lambda self, expressions: self.expression( 699 exp.Lambda, 700 this=self._replace_lambda( 701 self._parse_assignment(), 702 expressions, 703 ), 704 expressions=expressions, 705 ), 706 TokenType.FARROW: lambda self, expressions: self.expression( 707 exp.Kwarg, 708 this=exp.var(expressions[0].name), 709 expression=self._parse_assignment(), 710 ), 711 } 712 713 COLUMN_OPERATORS = { 714 TokenType.DOT: None, 715 TokenType.DCOLON: lambda self, this, to: self.expression( 716 exp.Cast if self.STRICT_CAST else exp.TryCast, 717 this=this, 718 to=to, 719 ), 720 TokenType.ARROW: lambda self, this, path: self.expression( 721 exp.JSONExtract, 722 this=this, 723 expression=self.dialect.to_json_path(path), 724 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 725 ), 726 TokenType.DARROW: lambda self, this, path: self.expression( 727 exp.JSONExtractScalar, 728 this=this, 729 expression=self.dialect.to_json_path(path), 730 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 731 ), 732 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 733 exp.JSONBExtract, 734 this=this, 735 expression=path, 736 ), 737 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 738 exp.JSONBExtractScalar, 739 this=this, 740 expression=path, 741 ), 742 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 743 exp.JSONBContains, 744 this=this, 745 expression=key, 746 ), 747 } 748 749 EXPRESSION_PARSERS = { 750 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 751 exp.Column: lambda self: self._parse_column(), 752 exp.Condition: lambda self: self._parse_assignment(), 753 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 754 exp.Expression: lambda self: self._parse_expression(), 755 exp.From: lambda self: self._parse_from(joins=True), 756 exp.Group: lambda self: self._parse_group(), 757 exp.Having: lambda self: self._parse_having(), 758 exp.Identifier: lambda self: self._parse_id_var(), 759 exp.Join: lambda self: self._parse_join(), 760 exp.Lambda: lambda self: self._parse_lambda(), 761 exp.Lateral: lambda self: self._parse_lateral(), 762 exp.Limit: lambda self: self._parse_limit(), 763 exp.Offset: lambda self: self._parse_offset(), 764 exp.Order: lambda self: self._parse_order(), 765 exp.Ordered: lambda self: self._parse_ordered(), 766 exp.Properties: lambda self: self._parse_properties(), 767 exp.Qualify: lambda self: self._parse_qualify(), 768 exp.Returning: lambda self: self._parse_returning(), 769 exp.Select: lambda self: self._parse_select(), 770 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 771 exp.Table: lambda self: self._parse_table_parts(), 772 exp.TableAlias: lambda self: self._parse_table_alias(), 773 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 774 exp.Where: lambda self: self._parse_where(), 775 exp.Window: lambda self: self._parse_named_window(), 776 exp.With: lambda self: self._parse_with(), 777 "JOIN_TYPE": lambda self: self._parse_join_parts(), 778 } 779 780 STATEMENT_PARSERS = { 781 TokenType.ALTER: lambda self: self._parse_alter(), 782 TokenType.BEGIN: lambda self: self._parse_transaction(), 783 TokenType.CACHE: lambda self: self._parse_cache(), 784 TokenType.COMMENT: lambda self: self._parse_comment(), 785 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 786 TokenType.COPY: lambda self: self._parse_copy(), 787 TokenType.CREATE: lambda self: self._parse_create(), 788 TokenType.DELETE: lambda self: self._parse_delete(), 789 TokenType.DESC: lambda self: self._parse_describe(), 790 TokenType.DESCRIBE: lambda self: self._parse_describe(), 791 TokenType.DROP: lambda self: self._parse_drop(), 792 TokenType.GRANT: lambda self: self._parse_grant(), 793 TokenType.INSERT: lambda self: self._parse_insert(), 794 TokenType.KILL: lambda self: self._parse_kill(), 795 TokenType.LOAD: lambda self: self._parse_load(), 796 TokenType.MERGE: lambda self: self._parse_merge(), 797 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 798 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 799 TokenType.REFRESH: lambda self: self._parse_refresh(), 800 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 801 TokenType.SET: lambda self: self._parse_set(), 802 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 803 TokenType.UNCACHE: lambda self: self._parse_uncache(), 804 TokenType.UPDATE: lambda self: self._parse_update(), 805 TokenType.USE: lambda self: self.expression( 806 exp.Use, 807 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 808 this=self._parse_table(schema=False), 809 ), 810 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 811 } 812 813 UNARY_PARSERS = { 814 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 815 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 816 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 817 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 818 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 819 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 820 } 821 822 STRING_PARSERS = { 823 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 824 exp.RawString, this=token.text 825 ), 826 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 827 exp.National, this=token.text 828 ), 829 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 830 TokenType.STRING: lambda self, token: self.expression( 831 exp.Literal, this=token.text, is_string=True 832 ), 833 TokenType.UNICODE_STRING: lambda self, token: self.expression( 834 exp.UnicodeString, 835 this=token.text, 836 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 837 ), 838 } 839 840 NUMERIC_PARSERS = { 841 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 842 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 843 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 844 TokenType.NUMBER: lambda self, token: self.expression( 845 exp.Literal, this=token.text, is_string=False 846 ), 847 } 848 849 PRIMARY_PARSERS = { 850 **STRING_PARSERS, 851 **NUMERIC_PARSERS, 852 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 853 TokenType.NULL: lambda self, _: self.expression(exp.Null), 854 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 855 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 856 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 857 TokenType.STAR: lambda self, _: self._parse_star_ops(), 858 } 859 860 PLACEHOLDER_PARSERS = { 861 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 862 TokenType.PARAMETER: lambda self: self._parse_parameter(), 863 TokenType.COLON: lambda self: ( 864 self.expression(exp.Placeholder, this=self._prev.text) 865 if self._match_set(self.ID_VAR_TOKENS) 866 else None 867 ), 868 } 869 870 RANGE_PARSERS = { 871 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 872 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 873 TokenType.GLOB: binary_range_parser(exp.Glob), 874 TokenType.ILIKE: binary_range_parser(exp.ILike), 875 TokenType.IN: lambda self, this: self._parse_in(this), 876 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 877 TokenType.IS: lambda self, this: self._parse_is(this), 878 TokenType.LIKE: binary_range_parser(exp.Like), 879 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 880 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 881 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 882 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 883 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 884 } 885 886 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 887 "ALLOWED_VALUES": lambda self: self.expression( 888 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 889 ), 890 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 891 "AUTO": lambda self: self._parse_auto_property(), 892 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 893 "BACKUP": lambda self: self.expression( 894 exp.BackupProperty, this=self._parse_var(any_token=True) 895 ), 896 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 897 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 898 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 899 "CHECKSUM": lambda self: self._parse_checksum(), 900 "CLUSTER BY": lambda self: self._parse_cluster(), 901 "CLUSTERED": lambda self: self._parse_clustered_by(), 902 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 903 exp.CollateProperty, **kwargs 904 ), 905 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 906 "CONTAINS": lambda self: self._parse_contains_property(), 907 "COPY": lambda self: self._parse_copy_property(), 908 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 909 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 910 "DEFINER": lambda self: self._parse_definer(), 911 "DETERMINISTIC": lambda self: self.expression( 912 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 913 ), 914 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 915 "DUPLICATE": lambda self: self._parse_duplicate(), 916 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 917 "DISTKEY": lambda self: self._parse_distkey(), 918 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 919 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 920 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 921 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 922 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 923 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 924 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 925 "FREESPACE": lambda self: self._parse_freespace(), 926 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 927 "HEAP": lambda self: self.expression(exp.HeapProperty), 928 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 929 "IMMUTABLE": lambda self: self.expression( 930 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 931 ), 932 "INHERITS": lambda self: self.expression( 933 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 934 ), 935 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 936 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 937 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 938 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 939 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 940 "LIKE": lambda self: self._parse_create_like(), 941 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 942 "LOCK": lambda self: self._parse_locking(), 943 "LOCKING": lambda self: self._parse_locking(), 944 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 945 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 946 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 947 "MODIFIES": lambda self: self._parse_modifies_property(), 948 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 949 "NO": lambda self: self._parse_no_property(), 950 "ON": lambda self: self._parse_on_property(), 951 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 952 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 953 "PARTITION": lambda self: self._parse_partitioned_of(), 954 "PARTITION BY": lambda self: self._parse_partitioned_by(), 955 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 956 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 957 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 958 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 959 "READS": lambda self: self._parse_reads_property(), 960 "REMOTE": lambda self: self._parse_remote_with_connection(), 961 "RETURNS": lambda self: self._parse_returns(), 962 "STRICT": lambda self: self.expression(exp.StrictProperty), 963 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 964 "ROW": lambda self: self._parse_row(), 965 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 966 "SAMPLE": lambda self: self.expression( 967 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 968 ), 969 "SECURE": lambda self: self.expression(exp.SecureProperty), 970 "SECURITY": lambda self: self._parse_security(), 971 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 972 "SETTINGS": lambda self: self._parse_settings_property(), 973 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 974 "SORTKEY": lambda self: self._parse_sortkey(), 975 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 976 "STABLE": lambda self: self.expression( 977 exp.StabilityProperty, this=exp.Literal.string("STABLE") 978 ), 979 "STORED": lambda self: self._parse_stored(), 980 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 981 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 982 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 983 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 984 "TO": lambda self: self._parse_to_table(), 985 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 986 "TRANSFORM": lambda self: self.expression( 987 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 988 ), 989 "TTL": lambda self: self._parse_ttl(), 990 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 991 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 992 "VOLATILE": lambda self: self._parse_volatile_property(), 993 "WITH": lambda self: self._parse_with_property(), 994 } 995 996 CONSTRAINT_PARSERS = { 997 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 998 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 999 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1000 "CHARACTER SET": lambda self: self.expression( 1001 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1002 ), 1003 "CHECK": lambda self: self.expression( 1004 exp.CheckColumnConstraint, 1005 this=self._parse_wrapped(self._parse_assignment), 1006 enforced=self._match_text_seq("ENFORCED"), 1007 ), 1008 "COLLATE": lambda self: self.expression( 1009 exp.CollateColumnConstraint, 1010 this=self._parse_identifier() or self._parse_column(), 1011 ), 1012 "COMMENT": lambda self: self.expression( 1013 exp.CommentColumnConstraint, this=self._parse_string() 1014 ), 1015 "COMPRESS": lambda self: self._parse_compress(), 1016 "CLUSTERED": lambda self: self.expression( 1017 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1018 ), 1019 "NONCLUSTERED": lambda self: self.expression( 1020 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1021 ), 1022 "DEFAULT": lambda self: self.expression( 1023 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1024 ), 1025 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1026 "EPHEMERAL": lambda self: self.expression( 1027 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1028 ), 1029 "EXCLUDE": lambda self: self.expression( 1030 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1031 ), 1032 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1033 "FORMAT": lambda self: self.expression( 1034 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1035 ), 1036 "GENERATED": lambda self: self._parse_generated_as_identity(), 1037 "IDENTITY": lambda self: self._parse_auto_increment(), 1038 "INLINE": lambda self: self._parse_inline(), 1039 "LIKE": lambda self: self._parse_create_like(), 1040 "NOT": lambda self: self._parse_not_constraint(), 1041 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1042 "ON": lambda self: ( 1043 self._match(TokenType.UPDATE) 1044 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1045 ) 1046 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1047 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1048 "PERIOD": lambda self: self._parse_period_for_system_time(), 1049 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1050 "REFERENCES": lambda self: self._parse_references(match=False), 1051 "TITLE": lambda self: self.expression( 1052 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1053 ), 1054 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1055 "UNIQUE": lambda self: self._parse_unique(), 1056 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1057 "WITH": lambda self: self.expression( 1058 exp.Properties, expressions=self._parse_wrapped_properties() 1059 ), 1060 } 1061 1062 ALTER_PARSERS = { 1063 "ADD": lambda self: self._parse_alter_table_add(), 1064 "AS": lambda self: self._parse_select(), 1065 "ALTER": lambda self: self._parse_alter_table_alter(), 1066 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1067 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1068 "DROP": lambda self: self._parse_alter_table_drop(), 1069 "RENAME": lambda self: self._parse_alter_table_rename(), 1070 "SET": lambda self: self._parse_alter_table_set(), 1071 "SWAP": lambda self: self.expression( 1072 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1073 ), 1074 } 1075 1076 ALTER_ALTER_PARSERS = { 1077 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1078 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1079 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1080 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1081 } 1082 1083 SCHEMA_UNNAMED_CONSTRAINTS = { 1084 "CHECK", 1085 "EXCLUDE", 1086 "FOREIGN KEY", 1087 "LIKE", 1088 "PERIOD", 1089 "PRIMARY KEY", 1090 "UNIQUE", 1091 } 1092 1093 NO_PAREN_FUNCTION_PARSERS = { 1094 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1095 "CASE": lambda self: self._parse_case(), 1096 "CONNECT_BY_ROOT": lambda self: self.expression( 1097 exp.ConnectByRoot, this=self._parse_column() 1098 ), 1099 "IF": lambda self: self._parse_if(), 1100 "NEXT": lambda self: self._parse_next_value_for(), 1101 } 1102 1103 INVALID_FUNC_NAME_TOKENS = { 1104 TokenType.IDENTIFIER, 1105 TokenType.STRING, 1106 } 1107 1108 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1109 1110 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1111 1112 FUNCTION_PARSERS = { 1113 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1114 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1115 "DECODE": lambda self: self._parse_decode(), 1116 "EXTRACT": lambda self: self._parse_extract(), 1117 "GAP_FILL": lambda self: self._parse_gap_fill(), 1118 "JSON_OBJECT": lambda self: self._parse_json_object(), 1119 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1120 "JSON_TABLE": lambda self: self._parse_json_table(), 1121 "MATCH": lambda self: self._parse_match_against(), 1122 "NORMALIZE": lambda self: self._parse_normalize(), 1123 "OPENJSON": lambda self: self._parse_open_json(), 1124 "OVERLAY": lambda self: self._parse_overlay(), 1125 "POSITION": lambda self: self._parse_position(), 1126 "PREDICT": lambda self: self._parse_predict(), 1127 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1128 "STRING_AGG": lambda self: self._parse_string_agg(), 1129 "SUBSTRING": lambda self: self._parse_substring(), 1130 "TRIM": lambda self: self._parse_trim(), 1131 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1132 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1133 } 1134 1135 QUERY_MODIFIER_PARSERS = { 1136 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1137 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1138 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1139 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1140 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1141 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1142 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1143 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1144 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1145 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1146 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1147 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1148 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1149 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1150 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1151 TokenType.CLUSTER_BY: lambda self: ( 1152 "cluster", 1153 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1154 ), 1155 TokenType.DISTRIBUTE_BY: lambda self: ( 1156 "distribute", 1157 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1158 ), 1159 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1160 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1161 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1162 } 1163 1164 SET_PARSERS = { 1165 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1166 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1167 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1168 "TRANSACTION": lambda self: self._parse_set_transaction(), 1169 } 1170 1171 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1172 1173 TYPE_LITERAL_PARSERS = { 1174 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1175 } 1176 1177 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1178 1179 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1180 1181 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1182 1183 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1184 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1185 "ISOLATION": ( 1186 ("LEVEL", "REPEATABLE", "READ"), 1187 ("LEVEL", "READ", "COMMITTED"), 1188 ("LEVEL", "READ", "UNCOMITTED"), 1189 ("LEVEL", "SERIALIZABLE"), 1190 ), 1191 "READ": ("WRITE", "ONLY"), 1192 } 1193 1194 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1195 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1196 ) 1197 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1198 1199 CREATE_SEQUENCE: OPTIONS_TYPE = { 1200 "SCALE": ("EXTEND", "NOEXTEND"), 1201 "SHARD": ("EXTEND", "NOEXTEND"), 1202 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1203 **dict.fromkeys( 1204 ( 1205 "SESSION", 1206 "GLOBAL", 1207 "KEEP", 1208 "NOKEEP", 1209 "ORDER", 1210 "NOORDER", 1211 "NOCACHE", 1212 "CYCLE", 1213 "NOCYCLE", 1214 "NOMINVALUE", 1215 "NOMAXVALUE", 1216 "NOSCALE", 1217 "NOSHARD", 1218 ), 1219 tuple(), 1220 ), 1221 } 1222 1223 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1224 1225 USABLES: OPTIONS_TYPE = dict.fromkeys( 1226 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1227 ) 1228 1229 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1230 1231 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1232 "TYPE": ("EVOLUTION",), 1233 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1234 } 1235 1236 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1237 1238 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1239 1240 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1241 "NOT": ("ENFORCED",), 1242 "MATCH": ( 1243 "FULL", 1244 "PARTIAL", 1245 "SIMPLE", 1246 ), 1247 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1248 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1249 } 1250 1251 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1252 1253 CLONE_KEYWORDS = {"CLONE", "COPY"} 1254 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1255 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1256 1257 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1258 1259 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1260 1261 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1262 1263 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1264 1265 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1266 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1267 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1268 1269 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1270 1271 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1272 1273 ADD_CONSTRAINT_TOKENS = { 1274 TokenType.CONSTRAINT, 1275 TokenType.FOREIGN_KEY, 1276 TokenType.INDEX, 1277 TokenType.KEY, 1278 TokenType.PRIMARY_KEY, 1279 TokenType.UNIQUE, 1280 } 1281 1282 DISTINCT_TOKENS = {TokenType.DISTINCT} 1283 1284 NULL_TOKENS = {TokenType.NULL} 1285 1286 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1287 1288 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1289 1290 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1291 1292 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1293 1294 ODBC_DATETIME_LITERALS = { 1295 "d": exp.Date, 1296 "t": exp.Time, 1297 "ts": exp.Timestamp, 1298 } 1299 1300 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1301 1302 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1303 1304 # The style options for the DESCRIBE statement 1305 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1306 1307 OPERATION_MODIFIERS: t.Set[str] = set() 1308 1309 STRICT_CAST = True 1310 1311 PREFIXED_PIVOT_COLUMNS = False 1312 IDENTIFY_PIVOT_STRINGS = False 1313 1314 LOG_DEFAULTS_TO_LN = False 1315 1316 # Whether ADD is present for each column added by ALTER TABLE 1317 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1318 1319 # Whether the table sample clause expects CSV syntax 1320 TABLESAMPLE_CSV = False 1321 1322 # The default method used for table sampling 1323 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1324 1325 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1326 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1327 1328 # Whether the TRIM function expects the characters to trim as its first argument 1329 TRIM_PATTERN_FIRST = False 1330 1331 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1332 STRING_ALIASES = False 1333 1334 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1335 MODIFIERS_ATTACHED_TO_SET_OP = True 1336 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1337 1338 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1339 NO_PAREN_IF_COMMANDS = True 1340 1341 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1342 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1343 1344 # Whether the `:` operator is used to extract a value from a VARIANT column 1345 COLON_IS_VARIANT_EXTRACT = False 1346 1347 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1348 # If this is True and '(' is not found, the keyword will be treated as an identifier 1349 VALUES_FOLLOWED_BY_PAREN = True 1350 1351 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1352 SUPPORTS_IMPLICIT_UNNEST = False 1353 1354 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1355 INTERVAL_SPANS = True 1356 1357 # Whether a PARTITION clause can follow a table reference 1358 SUPPORTS_PARTITION_SELECTION = False 1359 1360 __slots__ = ( 1361 "error_level", 1362 "error_message_context", 1363 "max_errors", 1364 "dialect", 1365 "sql", 1366 "errors", 1367 "_tokens", 1368 "_index", 1369 "_curr", 1370 "_next", 1371 "_prev", 1372 "_prev_comments", 1373 ) 1374 1375 # Autofilled 1376 SHOW_TRIE: t.Dict = {} 1377 SET_TRIE: t.Dict = {} 1378 1379 def __init__( 1380 self, 1381 error_level: t.Optional[ErrorLevel] = None, 1382 error_message_context: int = 100, 1383 max_errors: int = 3, 1384 dialect: DialectType = None, 1385 ): 1386 from sqlglot.dialects import Dialect 1387 1388 self.error_level = error_level or ErrorLevel.IMMEDIATE 1389 self.error_message_context = error_message_context 1390 self.max_errors = max_errors 1391 self.dialect = Dialect.get_or_raise(dialect) 1392 self.reset() 1393 1394 def reset(self): 1395 self.sql = "" 1396 self.errors = [] 1397 self._tokens = [] 1398 self._index = 0 1399 self._curr = None 1400 self._next = None 1401 self._prev = None 1402 self._prev_comments = None 1403 1404 def parse( 1405 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1406 ) -> t.List[t.Optional[exp.Expression]]: 1407 """ 1408 Parses a list of tokens and returns a list of syntax trees, one tree 1409 per parsed SQL statement. 1410 1411 Args: 1412 raw_tokens: The list of tokens. 1413 sql: The original SQL string, used to produce helpful debug messages. 1414 1415 Returns: 1416 The list of the produced syntax trees. 1417 """ 1418 return self._parse( 1419 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1420 ) 1421 1422 def parse_into( 1423 self, 1424 expression_types: exp.IntoType, 1425 raw_tokens: t.List[Token], 1426 sql: t.Optional[str] = None, 1427 ) -> t.List[t.Optional[exp.Expression]]: 1428 """ 1429 Parses a list of tokens into a given Expression type. If a collection of Expression 1430 types is given instead, this method will try to parse the token list into each one 1431 of them, stopping at the first for which the parsing succeeds. 1432 1433 Args: 1434 expression_types: The expression type(s) to try and parse the token list into. 1435 raw_tokens: The list of tokens. 1436 sql: The original SQL string, used to produce helpful debug messages. 1437 1438 Returns: 1439 The target Expression. 1440 """ 1441 errors = [] 1442 for expression_type in ensure_list(expression_types): 1443 parser = self.EXPRESSION_PARSERS.get(expression_type) 1444 if not parser: 1445 raise TypeError(f"No parser registered for {expression_type}") 1446 1447 try: 1448 return self._parse(parser, raw_tokens, sql) 1449 except ParseError as e: 1450 e.errors[0]["into_expression"] = expression_type 1451 errors.append(e) 1452 1453 raise ParseError( 1454 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1455 errors=merge_errors(errors), 1456 ) from errors[-1] 1457 1458 def _parse( 1459 self, 1460 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1461 raw_tokens: t.List[Token], 1462 sql: t.Optional[str] = None, 1463 ) -> t.List[t.Optional[exp.Expression]]: 1464 self.reset() 1465 self.sql = sql or "" 1466 1467 total = len(raw_tokens) 1468 chunks: t.List[t.List[Token]] = [[]] 1469 1470 for i, token in enumerate(raw_tokens): 1471 if token.token_type == TokenType.SEMICOLON: 1472 if token.comments: 1473 chunks.append([token]) 1474 1475 if i < total - 1: 1476 chunks.append([]) 1477 else: 1478 chunks[-1].append(token) 1479 1480 expressions = [] 1481 1482 for tokens in chunks: 1483 self._index = -1 1484 self._tokens = tokens 1485 self._advance() 1486 1487 expressions.append(parse_method(self)) 1488 1489 if self._index < len(self._tokens): 1490 self.raise_error("Invalid expression / Unexpected token") 1491 1492 self.check_errors() 1493 1494 return expressions 1495 1496 def check_errors(self) -> None: 1497 """Logs or raises any found errors, depending on the chosen error level setting.""" 1498 if self.error_level == ErrorLevel.WARN: 1499 for error in self.errors: 1500 logger.error(str(error)) 1501 elif self.error_level == ErrorLevel.RAISE and self.errors: 1502 raise ParseError( 1503 concat_messages(self.errors, self.max_errors), 1504 errors=merge_errors(self.errors), 1505 ) 1506 1507 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1508 """ 1509 Appends an error in the list of recorded errors or raises it, depending on the chosen 1510 error level setting. 1511 """ 1512 token = token or self._curr or self._prev or Token.string("") 1513 start = token.start 1514 end = token.end + 1 1515 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1516 highlight = self.sql[start:end] 1517 end_context = self.sql[end : end + self.error_message_context] 1518 1519 error = ParseError.new( 1520 f"{message}. Line {token.line}, Col: {token.col}.\n" 1521 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1522 description=message, 1523 line=token.line, 1524 col=token.col, 1525 start_context=start_context, 1526 highlight=highlight, 1527 end_context=end_context, 1528 ) 1529 1530 if self.error_level == ErrorLevel.IMMEDIATE: 1531 raise error 1532 1533 self.errors.append(error) 1534 1535 def expression( 1536 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1537 ) -> E: 1538 """ 1539 Creates a new, validated Expression. 1540 1541 Args: 1542 exp_class: The expression class to instantiate. 1543 comments: An optional list of comments to attach to the expression. 1544 kwargs: The arguments to set for the expression along with their respective values. 1545 1546 Returns: 1547 The target expression. 1548 """ 1549 instance = exp_class(**kwargs) 1550 instance.add_comments(comments) if comments else self._add_comments(instance) 1551 return self.validate_expression(instance) 1552 1553 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1554 if expression and self._prev_comments: 1555 expression.add_comments(self._prev_comments) 1556 self._prev_comments = None 1557 1558 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1559 """ 1560 Validates an Expression, making sure that all its mandatory arguments are set. 1561 1562 Args: 1563 expression: The expression to validate. 1564 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1565 1566 Returns: 1567 The validated expression. 1568 """ 1569 if self.error_level != ErrorLevel.IGNORE: 1570 for error_message in expression.error_messages(args): 1571 self.raise_error(error_message) 1572 1573 return expression 1574 1575 def _find_sql(self, start: Token, end: Token) -> str: 1576 return self.sql[start.start : end.end + 1] 1577 1578 def _is_connected(self) -> bool: 1579 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1580 1581 def _advance(self, times: int = 1) -> None: 1582 self._index += times 1583 self._curr = seq_get(self._tokens, self._index) 1584 self._next = seq_get(self._tokens, self._index + 1) 1585 1586 if self._index > 0: 1587 self._prev = self._tokens[self._index - 1] 1588 self._prev_comments = self._prev.comments 1589 else: 1590 self._prev = None 1591 self._prev_comments = None 1592 1593 def _retreat(self, index: int) -> None: 1594 if index != self._index: 1595 self._advance(index - self._index) 1596 1597 def _warn_unsupported(self) -> None: 1598 if len(self._tokens) <= 1: 1599 return 1600 1601 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1602 # interested in emitting a warning for the one being currently processed. 1603 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1604 1605 logger.warning( 1606 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1607 ) 1608 1609 def _parse_command(self) -> exp.Command: 1610 self._warn_unsupported() 1611 return self.expression( 1612 exp.Command, 1613 comments=self._prev_comments, 1614 this=self._prev.text.upper(), 1615 expression=self._parse_string(), 1616 ) 1617 1618 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1619 """ 1620 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1621 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1622 solve this by setting & resetting the parser state accordingly 1623 """ 1624 index = self._index 1625 error_level = self.error_level 1626 1627 self.error_level = ErrorLevel.IMMEDIATE 1628 try: 1629 this = parse_method() 1630 except ParseError: 1631 this = None 1632 finally: 1633 if not this or retreat: 1634 self._retreat(index) 1635 self.error_level = error_level 1636 1637 return this 1638 1639 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1640 start = self._prev 1641 exists = self._parse_exists() if allow_exists else None 1642 1643 self._match(TokenType.ON) 1644 1645 materialized = self._match_text_seq("MATERIALIZED") 1646 kind = self._match_set(self.CREATABLES) and self._prev 1647 if not kind: 1648 return self._parse_as_command(start) 1649 1650 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1651 this = self._parse_user_defined_function(kind=kind.token_type) 1652 elif kind.token_type == TokenType.TABLE: 1653 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1654 elif kind.token_type == TokenType.COLUMN: 1655 this = self._parse_column() 1656 else: 1657 this = self._parse_id_var() 1658 1659 self._match(TokenType.IS) 1660 1661 return self.expression( 1662 exp.Comment, 1663 this=this, 1664 kind=kind.text, 1665 expression=self._parse_string(), 1666 exists=exists, 1667 materialized=materialized, 1668 ) 1669 1670 def _parse_to_table( 1671 self, 1672 ) -> exp.ToTableProperty: 1673 table = self._parse_table_parts(schema=True) 1674 return self.expression(exp.ToTableProperty, this=table) 1675 1676 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1677 def _parse_ttl(self) -> exp.Expression: 1678 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1679 this = self._parse_bitwise() 1680 1681 if self._match_text_seq("DELETE"): 1682 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1683 if self._match_text_seq("RECOMPRESS"): 1684 return self.expression( 1685 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1686 ) 1687 if self._match_text_seq("TO", "DISK"): 1688 return self.expression( 1689 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1690 ) 1691 if self._match_text_seq("TO", "VOLUME"): 1692 return self.expression( 1693 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1694 ) 1695 1696 return this 1697 1698 expressions = self._parse_csv(_parse_ttl_action) 1699 where = self._parse_where() 1700 group = self._parse_group() 1701 1702 aggregates = None 1703 if group and self._match(TokenType.SET): 1704 aggregates = self._parse_csv(self._parse_set_item) 1705 1706 return self.expression( 1707 exp.MergeTreeTTL, 1708 expressions=expressions, 1709 where=where, 1710 group=group, 1711 aggregates=aggregates, 1712 ) 1713 1714 def _parse_statement(self) -> t.Optional[exp.Expression]: 1715 if self._curr is None: 1716 return None 1717 1718 if self._match_set(self.STATEMENT_PARSERS): 1719 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1720 1721 if self._match_set(self.dialect.tokenizer.COMMANDS): 1722 return self._parse_command() 1723 1724 expression = self._parse_expression() 1725 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1726 return self._parse_query_modifiers(expression) 1727 1728 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1729 start = self._prev 1730 temporary = self._match(TokenType.TEMPORARY) 1731 materialized = self._match_text_seq("MATERIALIZED") 1732 1733 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1734 if not kind: 1735 return self._parse_as_command(start) 1736 1737 concurrently = self._match_text_seq("CONCURRENTLY") 1738 if_exists = exists or self._parse_exists() 1739 table = self._parse_table_parts( 1740 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1741 ) 1742 1743 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1744 1745 if self._match(TokenType.L_PAREN, advance=False): 1746 expressions = self._parse_wrapped_csv(self._parse_types) 1747 else: 1748 expressions = None 1749 1750 return self.expression( 1751 exp.Drop, 1752 comments=start.comments, 1753 exists=if_exists, 1754 this=table, 1755 expressions=expressions, 1756 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1757 temporary=temporary, 1758 materialized=materialized, 1759 cascade=self._match_text_seq("CASCADE"), 1760 constraints=self._match_text_seq("CONSTRAINTS"), 1761 purge=self._match_text_seq("PURGE"), 1762 cluster=cluster, 1763 concurrently=concurrently, 1764 ) 1765 1766 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1767 return ( 1768 self._match_text_seq("IF") 1769 and (not not_ or self._match(TokenType.NOT)) 1770 and self._match(TokenType.EXISTS) 1771 ) 1772 1773 def _parse_create(self) -> exp.Create | exp.Command: 1774 # Note: this can't be None because we've matched a statement parser 1775 start = self._prev 1776 comments = self._prev_comments 1777 1778 replace = ( 1779 start.token_type == TokenType.REPLACE 1780 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1781 or self._match_pair(TokenType.OR, TokenType.ALTER) 1782 ) 1783 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1784 1785 unique = self._match(TokenType.UNIQUE) 1786 1787 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1788 clustered = True 1789 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1790 "COLUMNSTORE" 1791 ): 1792 clustered = False 1793 else: 1794 clustered = None 1795 1796 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1797 self._advance() 1798 1799 properties = None 1800 create_token = self._match_set(self.CREATABLES) and self._prev 1801 1802 if not create_token: 1803 # exp.Properties.Location.POST_CREATE 1804 properties = self._parse_properties() 1805 create_token = self._match_set(self.CREATABLES) and self._prev 1806 1807 if not properties or not create_token: 1808 return self._parse_as_command(start) 1809 1810 concurrently = self._match_text_seq("CONCURRENTLY") 1811 exists = self._parse_exists(not_=True) 1812 this = None 1813 expression: t.Optional[exp.Expression] = None 1814 indexes = None 1815 no_schema_binding = None 1816 begin = None 1817 end = None 1818 clone = None 1819 1820 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1821 nonlocal properties 1822 if properties and temp_props: 1823 properties.expressions.extend(temp_props.expressions) 1824 elif temp_props: 1825 properties = temp_props 1826 1827 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1828 this = self._parse_user_defined_function(kind=create_token.token_type) 1829 1830 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1831 extend_props(self._parse_properties()) 1832 1833 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1834 extend_props(self._parse_properties()) 1835 1836 if not expression: 1837 if self._match(TokenType.COMMAND): 1838 expression = self._parse_as_command(self._prev) 1839 else: 1840 begin = self._match(TokenType.BEGIN) 1841 return_ = self._match_text_seq("RETURN") 1842 1843 if self._match(TokenType.STRING, advance=False): 1844 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1845 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1846 expression = self._parse_string() 1847 extend_props(self._parse_properties()) 1848 else: 1849 expression = self._parse_user_defined_function_expression() 1850 1851 end = self._match_text_seq("END") 1852 1853 if return_: 1854 expression = self.expression(exp.Return, this=expression) 1855 elif create_token.token_type == TokenType.INDEX: 1856 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1857 if not self._match(TokenType.ON): 1858 index = self._parse_id_var() 1859 anonymous = False 1860 else: 1861 index = None 1862 anonymous = True 1863 1864 this = self._parse_index(index=index, anonymous=anonymous) 1865 elif create_token.token_type in self.DB_CREATABLES: 1866 table_parts = self._parse_table_parts( 1867 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1868 ) 1869 1870 # exp.Properties.Location.POST_NAME 1871 self._match(TokenType.COMMA) 1872 extend_props(self._parse_properties(before=True)) 1873 1874 this = self._parse_schema(this=table_parts) 1875 1876 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1877 extend_props(self._parse_properties()) 1878 1879 self._match(TokenType.ALIAS) 1880 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1881 # exp.Properties.Location.POST_ALIAS 1882 extend_props(self._parse_properties()) 1883 1884 if create_token.token_type == TokenType.SEQUENCE: 1885 expression = self._parse_types() 1886 extend_props(self._parse_properties()) 1887 else: 1888 expression = self._parse_ddl_select() 1889 1890 if create_token.token_type == TokenType.TABLE: 1891 # exp.Properties.Location.POST_EXPRESSION 1892 extend_props(self._parse_properties()) 1893 1894 indexes = [] 1895 while True: 1896 index = self._parse_index() 1897 1898 # exp.Properties.Location.POST_INDEX 1899 extend_props(self._parse_properties()) 1900 if not index: 1901 break 1902 else: 1903 self._match(TokenType.COMMA) 1904 indexes.append(index) 1905 elif create_token.token_type == TokenType.VIEW: 1906 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1907 no_schema_binding = True 1908 1909 shallow = self._match_text_seq("SHALLOW") 1910 1911 if self._match_texts(self.CLONE_KEYWORDS): 1912 copy = self._prev.text.lower() == "copy" 1913 clone = self.expression( 1914 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1915 ) 1916 1917 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1918 return self._parse_as_command(start) 1919 1920 create_kind_text = create_token.text.upper() 1921 return self.expression( 1922 exp.Create, 1923 comments=comments, 1924 this=this, 1925 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1926 replace=replace, 1927 refresh=refresh, 1928 unique=unique, 1929 expression=expression, 1930 exists=exists, 1931 properties=properties, 1932 indexes=indexes, 1933 no_schema_binding=no_schema_binding, 1934 begin=begin, 1935 end=end, 1936 clone=clone, 1937 concurrently=concurrently, 1938 clustered=clustered, 1939 ) 1940 1941 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1942 seq = exp.SequenceProperties() 1943 1944 options = [] 1945 index = self._index 1946 1947 while self._curr: 1948 self._match(TokenType.COMMA) 1949 if self._match_text_seq("INCREMENT"): 1950 self._match_text_seq("BY") 1951 self._match_text_seq("=") 1952 seq.set("increment", self._parse_term()) 1953 elif self._match_text_seq("MINVALUE"): 1954 seq.set("minvalue", self._parse_term()) 1955 elif self._match_text_seq("MAXVALUE"): 1956 seq.set("maxvalue", self._parse_term()) 1957 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1958 self._match_text_seq("=") 1959 seq.set("start", self._parse_term()) 1960 elif self._match_text_seq("CACHE"): 1961 # T-SQL allows empty CACHE which is initialized dynamically 1962 seq.set("cache", self._parse_number() or True) 1963 elif self._match_text_seq("OWNED", "BY"): 1964 # "OWNED BY NONE" is the default 1965 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1966 else: 1967 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1968 if opt: 1969 options.append(opt) 1970 else: 1971 break 1972 1973 seq.set("options", options if options else None) 1974 return None if self._index == index else seq 1975 1976 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1977 # only used for teradata currently 1978 self._match(TokenType.COMMA) 1979 1980 kwargs = { 1981 "no": self._match_text_seq("NO"), 1982 "dual": self._match_text_seq("DUAL"), 1983 "before": self._match_text_seq("BEFORE"), 1984 "default": self._match_text_seq("DEFAULT"), 1985 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1986 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1987 "after": self._match_text_seq("AFTER"), 1988 "minimum": self._match_texts(("MIN", "MINIMUM")), 1989 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1990 } 1991 1992 if self._match_texts(self.PROPERTY_PARSERS): 1993 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1994 try: 1995 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1996 except TypeError: 1997 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1998 1999 return None 2000 2001 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2002 return self._parse_wrapped_csv(self._parse_property) 2003 2004 def _parse_property(self) -> t.Optional[exp.Expression]: 2005 if self._match_texts(self.PROPERTY_PARSERS): 2006 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2007 2008 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2009 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2010 2011 if self._match_text_seq("COMPOUND", "SORTKEY"): 2012 return self._parse_sortkey(compound=True) 2013 2014 if self._match_text_seq("SQL", "SECURITY"): 2015 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2016 2017 index = self._index 2018 key = self._parse_column() 2019 2020 if not self._match(TokenType.EQ): 2021 self._retreat(index) 2022 return self._parse_sequence_properties() 2023 2024 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2025 if isinstance(key, exp.Column): 2026 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2027 2028 value = self._parse_bitwise() or self._parse_var(any_token=True) 2029 2030 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2031 if isinstance(value, exp.Column): 2032 value = exp.var(value.name) 2033 2034 return self.expression(exp.Property, this=key, value=value) 2035 2036 def _parse_stored(self) -> exp.FileFormatProperty: 2037 self._match(TokenType.ALIAS) 2038 2039 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2040 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2041 2042 return self.expression( 2043 exp.FileFormatProperty, 2044 this=( 2045 self.expression( 2046 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2047 ) 2048 if input_format or output_format 2049 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2050 ), 2051 ) 2052 2053 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2054 field = self._parse_field() 2055 if isinstance(field, exp.Identifier) and not field.quoted: 2056 field = exp.var(field) 2057 2058 return field 2059 2060 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2061 self._match(TokenType.EQ) 2062 self._match(TokenType.ALIAS) 2063 2064 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2065 2066 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2067 properties = [] 2068 while True: 2069 if before: 2070 prop = self._parse_property_before() 2071 else: 2072 prop = self._parse_property() 2073 if not prop: 2074 break 2075 for p in ensure_list(prop): 2076 properties.append(p) 2077 2078 if properties: 2079 return self.expression(exp.Properties, expressions=properties) 2080 2081 return None 2082 2083 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2084 return self.expression( 2085 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2086 ) 2087 2088 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2089 if self._match_texts(("DEFINER", "INVOKER")): 2090 security_specifier = self._prev.text.upper() 2091 return self.expression(exp.SecurityProperty, this=security_specifier) 2092 return None 2093 2094 def _parse_settings_property(self) -> exp.SettingsProperty: 2095 return self.expression( 2096 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2097 ) 2098 2099 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2100 if self._index >= 2: 2101 pre_volatile_token = self._tokens[self._index - 2] 2102 else: 2103 pre_volatile_token = None 2104 2105 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2106 return exp.VolatileProperty() 2107 2108 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2109 2110 def _parse_retention_period(self) -> exp.Var: 2111 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2112 number = self._parse_number() 2113 number_str = f"{number} " if number else "" 2114 unit = self._parse_var(any_token=True) 2115 return exp.var(f"{number_str}{unit}") 2116 2117 def _parse_system_versioning_property( 2118 self, with_: bool = False 2119 ) -> exp.WithSystemVersioningProperty: 2120 self._match(TokenType.EQ) 2121 prop = self.expression( 2122 exp.WithSystemVersioningProperty, 2123 **{ # type: ignore 2124 "on": True, 2125 "with": with_, 2126 }, 2127 ) 2128 2129 if self._match_text_seq("OFF"): 2130 prop.set("on", False) 2131 return prop 2132 2133 self._match(TokenType.ON) 2134 if self._match(TokenType.L_PAREN): 2135 while self._curr and not self._match(TokenType.R_PAREN): 2136 if self._match_text_seq("HISTORY_TABLE", "="): 2137 prop.set("this", self._parse_table_parts()) 2138 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2139 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2140 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2141 prop.set("retention_period", self._parse_retention_period()) 2142 2143 self._match(TokenType.COMMA) 2144 2145 return prop 2146 2147 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2148 self._match(TokenType.EQ) 2149 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2150 prop = self.expression(exp.DataDeletionProperty, on=on) 2151 2152 if self._match(TokenType.L_PAREN): 2153 while self._curr and not self._match(TokenType.R_PAREN): 2154 if self._match_text_seq("FILTER_COLUMN", "="): 2155 prop.set("filter_column", self._parse_column()) 2156 elif self._match_text_seq("RETENTION_PERIOD", "="): 2157 prop.set("retention_period", self._parse_retention_period()) 2158 2159 self._match(TokenType.COMMA) 2160 2161 return prop 2162 2163 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2164 kind = "HASH" 2165 expressions: t.Optional[t.List[exp.Expression]] = None 2166 if self._match_text_seq("BY", "HASH"): 2167 expressions = self._parse_wrapped_csv(self._parse_id_var) 2168 elif self._match_text_seq("BY", "RANDOM"): 2169 kind = "RANDOM" 2170 2171 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2172 buckets: t.Optional[exp.Expression] = None 2173 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2174 buckets = self._parse_number() 2175 2176 return self.expression( 2177 exp.DistributedByProperty, 2178 expressions=expressions, 2179 kind=kind, 2180 buckets=buckets, 2181 order=self._parse_order(), 2182 ) 2183 2184 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2185 self._match_text_seq("KEY") 2186 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2187 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2188 2189 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2190 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2191 prop = self._parse_system_versioning_property(with_=True) 2192 self._match_r_paren() 2193 return prop 2194 2195 if self._match(TokenType.L_PAREN, advance=False): 2196 return self._parse_wrapped_properties() 2197 2198 if self._match_text_seq("JOURNAL"): 2199 return self._parse_withjournaltable() 2200 2201 if self._match_texts(self.VIEW_ATTRIBUTES): 2202 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2203 2204 if self._match_text_seq("DATA"): 2205 return self._parse_withdata(no=False) 2206 elif self._match_text_seq("NO", "DATA"): 2207 return self._parse_withdata(no=True) 2208 2209 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2210 return self._parse_serde_properties(with_=True) 2211 2212 if self._match(TokenType.SCHEMA): 2213 return self.expression( 2214 exp.WithSchemaBindingProperty, 2215 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2216 ) 2217 2218 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2219 return self.expression( 2220 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2221 ) 2222 2223 if not self._next: 2224 return None 2225 2226 return self._parse_withisolatedloading() 2227 2228 def _parse_procedure_option(self) -> exp.Expression | None: 2229 if self._match_text_seq("EXECUTE", "AS"): 2230 return self.expression( 2231 exp.ExecuteAsProperty, 2232 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2233 or self._parse_string(), 2234 ) 2235 2236 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2237 2238 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2239 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2240 self._match(TokenType.EQ) 2241 2242 user = self._parse_id_var() 2243 self._match(TokenType.PARAMETER) 2244 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2245 2246 if not user or not host: 2247 return None 2248 2249 return exp.DefinerProperty(this=f"{user}@{host}") 2250 2251 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2252 self._match(TokenType.TABLE) 2253 self._match(TokenType.EQ) 2254 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2255 2256 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2257 return self.expression(exp.LogProperty, no=no) 2258 2259 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2260 return self.expression(exp.JournalProperty, **kwargs) 2261 2262 def _parse_checksum(self) -> exp.ChecksumProperty: 2263 self._match(TokenType.EQ) 2264 2265 on = None 2266 if self._match(TokenType.ON): 2267 on = True 2268 elif self._match_text_seq("OFF"): 2269 on = False 2270 2271 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2272 2273 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2274 return self.expression( 2275 exp.Cluster, 2276 expressions=( 2277 self._parse_wrapped_csv(self._parse_ordered) 2278 if wrapped 2279 else self._parse_csv(self._parse_ordered) 2280 ), 2281 ) 2282 2283 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2284 self._match_text_seq("BY") 2285 2286 self._match_l_paren() 2287 expressions = self._parse_csv(self._parse_column) 2288 self._match_r_paren() 2289 2290 if self._match_text_seq("SORTED", "BY"): 2291 self._match_l_paren() 2292 sorted_by = self._parse_csv(self._parse_ordered) 2293 self._match_r_paren() 2294 else: 2295 sorted_by = None 2296 2297 self._match(TokenType.INTO) 2298 buckets = self._parse_number() 2299 self._match_text_seq("BUCKETS") 2300 2301 return self.expression( 2302 exp.ClusteredByProperty, 2303 expressions=expressions, 2304 sorted_by=sorted_by, 2305 buckets=buckets, 2306 ) 2307 2308 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2309 if not self._match_text_seq("GRANTS"): 2310 self._retreat(self._index - 1) 2311 return None 2312 2313 return self.expression(exp.CopyGrantsProperty) 2314 2315 def _parse_freespace(self) -> exp.FreespaceProperty: 2316 self._match(TokenType.EQ) 2317 return self.expression( 2318 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2319 ) 2320 2321 def _parse_mergeblockratio( 2322 self, no: bool = False, default: bool = False 2323 ) -> exp.MergeBlockRatioProperty: 2324 if self._match(TokenType.EQ): 2325 return self.expression( 2326 exp.MergeBlockRatioProperty, 2327 this=self._parse_number(), 2328 percent=self._match(TokenType.PERCENT), 2329 ) 2330 2331 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2332 2333 def _parse_datablocksize( 2334 self, 2335 default: t.Optional[bool] = None, 2336 minimum: t.Optional[bool] = None, 2337 maximum: t.Optional[bool] = None, 2338 ) -> exp.DataBlocksizeProperty: 2339 self._match(TokenType.EQ) 2340 size = self._parse_number() 2341 2342 units = None 2343 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2344 units = self._prev.text 2345 2346 return self.expression( 2347 exp.DataBlocksizeProperty, 2348 size=size, 2349 units=units, 2350 default=default, 2351 minimum=minimum, 2352 maximum=maximum, 2353 ) 2354 2355 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2356 self._match(TokenType.EQ) 2357 always = self._match_text_seq("ALWAYS") 2358 manual = self._match_text_seq("MANUAL") 2359 never = self._match_text_seq("NEVER") 2360 default = self._match_text_seq("DEFAULT") 2361 2362 autotemp = None 2363 if self._match_text_seq("AUTOTEMP"): 2364 autotemp = self._parse_schema() 2365 2366 return self.expression( 2367 exp.BlockCompressionProperty, 2368 always=always, 2369 manual=manual, 2370 never=never, 2371 default=default, 2372 autotemp=autotemp, 2373 ) 2374 2375 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2376 index = self._index 2377 no = self._match_text_seq("NO") 2378 concurrent = self._match_text_seq("CONCURRENT") 2379 2380 if not self._match_text_seq("ISOLATED", "LOADING"): 2381 self._retreat(index) 2382 return None 2383 2384 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2385 return self.expression( 2386 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2387 ) 2388 2389 def _parse_locking(self) -> exp.LockingProperty: 2390 if self._match(TokenType.TABLE): 2391 kind = "TABLE" 2392 elif self._match(TokenType.VIEW): 2393 kind = "VIEW" 2394 elif self._match(TokenType.ROW): 2395 kind = "ROW" 2396 elif self._match_text_seq("DATABASE"): 2397 kind = "DATABASE" 2398 else: 2399 kind = None 2400 2401 if kind in ("DATABASE", "TABLE", "VIEW"): 2402 this = self._parse_table_parts() 2403 else: 2404 this = None 2405 2406 if self._match(TokenType.FOR): 2407 for_or_in = "FOR" 2408 elif self._match(TokenType.IN): 2409 for_or_in = "IN" 2410 else: 2411 for_or_in = None 2412 2413 if self._match_text_seq("ACCESS"): 2414 lock_type = "ACCESS" 2415 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2416 lock_type = "EXCLUSIVE" 2417 elif self._match_text_seq("SHARE"): 2418 lock_type = "SHARE" 2419 elif self._match_text_seq("READ"): 2420 lock_type = "READ" 2421 elif self._match_text_seq("WRITE"): 2422 lock_type = "WRITE" 2423 elif self._match_text_seq("CHECKSUM"): 2424 lock_type = "CHECKSUM" 2425 else: 2426 lock_type = None 2427 2428 override = self._match_text_seq("OVERRIDE") 2429 2430 return self.expression( 2431 exp.LockingProperty, 2432 this=this, 2433 kind=kind, 2434 for_or_in=for_or_in, 2435 lock_type=lock_type, 2436 override=override, 2437 ) 2438 2439 def _parse_partition_by(self) -> t.List[exp.Expression]: 2440 if self._match(TokenType.PARTITION_BY): 2441 return self._parse_csv(self._parse_assignment) 2442 return [] 2443 2444 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2445 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2446 if self._match_text_seq("MINVALUE"): 2447 return exp.var("MINVALUE") 2448 if self._match_text_seq("MAXVALUE"): 2449 return exp.var("MAXVALUE") 2450 return self._parse_bitwise() 2451 2452 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2453 expression = None 2454 from_expressions = None 2455 to_expressions = None 2456 2457 if self._match(TokenType.IN): 2458 this = self._parse_wrapped_csv(self._parse_bitwise) 2459 elif self._match(TokenType.FROM): 2460 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2461 self._match_text_seq("TO") 2462 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2463 elif self._match_text_seq("WITH", "(", "MODULUS"): 2464 this = self._parse_number() 2465 self._match_text_seq(",", "REMAINDER") 2466 expression = self._parse_number() 2467 self._match_r_paren() 2468 else: 2469 self.raise_error("Failed to parse partition bound spec.") 2470 2471 return self.expression( 2472 exp.PartitionBoundSpec, 2473 this=this, 2474 expression=expression, 2475 from_expressions=from_expressions, 2476 to_expressions=to_expressions, 2477 ) 2478 2479 # https://www.postgresql.org/docs/current/sql-createtable.html 2480 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2481 if not self._match_text_seq("OF"): 2482 self._retreat(self._index - 1) 2483 return None 2484 2485 this = self._parse_table(schema=True) 2486 2487 if self._match(TokenType.DEFAULT): 2488 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2489 elif self._match_text_seq("FOR", "VALUES"): 2490 expression = self._parse_partition_bound_spec() 2491 else: 2492 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2493 2494 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2495 2496 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2497 self._match(TokenType.EQ) 2498 return self.expression( 2499 exp.PartitionedByProperty, 2500 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2501 ) 2502 2503 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2504 if self._match_text_seq("AND", "STATISTICS"): 2505 statistics = True 2506 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2507 statistics = False 2508 else: 2509 statistics = None 2510 2511 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2512 2513 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2514 if self._match_text_seq("SQL"): 2515 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2516 return None 2517 2518 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2519 if self._match_text_seq("SQL", "DATA"): 2520 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2521 return None 2522 2523 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2524 if self._match_text_seq("PRIMARY", "INDEX"): 2525 return exp.NoPrimaryIndexProperty() 2526 if self._match_text_seq("SQL"): 2527 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2528 return None 2529 2530 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2531 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2532 return exp.OnCommitProperty() 2533 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2534 return exp.OnCommitProperty(delete=True) 2535 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2536 2537 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2538 if self._match_text_seq("SQL", "DATA"): 2539 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2540 return None 2541 2542 def _parse_distkey(self) -> exp.DistKeyProperty: 2543 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2544 2545 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2546 table = self._parse_table(schema=True) 2547 2548 options = [] 2549 while self._match_texts(("INCLUDING", "EXCLUDING")): 2550 this = self._prev.text.upper() 2551 2552 id_var = self._parse_id_var() 2553 if not id_var: 2554 return None 2555 2556 options.append( 2557 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2558 ) 2559 2560 return self.expression(exp.LikeProperty, this=table, expressions=options) 2561 2562 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2563 return self.expression( 2564 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2565 ) 2566 2567 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2568 self._match(TokenType.EQ) 2569 return self.expression( 2570 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2571 ) 2572 2573 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2574 self._match_text_seq("WITH", "CONNECTION") 2575 return self.expression( 2576 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2577 ) 2578 2579 def _parse_returns(self) -> exp.ReturnsProperty: 2580 value: t.Optional[exp.Expression] 2581 null = None 2582 is_table = self._match(TokenType.TABLE) 2583 2584 if is_table: 2585 if self._match(TokenType.LT): 2586 value = self.expression( 2587 exp.Schema, 2588 this="TABLE", 2589 expressions=self._parse_csv(self._parse_struct_types), 2590 ) 2591 if not self._match(TokenType.GT): 2592 self.raise_error("Expecting >") 2593 else: 2594 value = self._parse_schema(exp.var("TABLE")) 2595 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2596 null = True 2597 value = None 2598 else: 2599 value = self._parse_types() 2600 2601 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2602 2603 def _parse_describe(self) -> exp.Describe: 2604 kind = self._match_set(self.CREATABLES) and self._prev.text 2605 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2606 if self._match(TokenType.DOT): 2607 style = None 2608 self._retreat(self._index - 2) 2609 this = self._parse_table(schema=True) 2610 properties = self._parse_properties() 2611 expressions = properties.expressions if properties else None 2612 partition = self._parse_partition() 2613 return self.expression( 2614 exp.Describe, 2615 this=this, 2616 style=style, 2617 kind=kind, 2618 expressions=expressions, 2619 partition=partition, 2620 ) 2621 2622 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2623 kind = self._prev.text.upper() 2624 expressions = [] 2625 2626 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2627 if self._match(TokenType.WHEN): 2628 expression = self._parse_disjunction() 2629 self._match(TokenType.THEN) 2630 else: 2631 expression = None 2632 2633 else_ = self._match(TokenType.ELSE) 2634 2635 if not self._match(TokenType.INTO): 2636 return None 2637 2638 return self.expression( 2639 exp.ConditionalInsert, 2640 this=self.expression( 2641 exp.Insert, 2642 this=self._parse_table(schema=True), 2643 expression=self._parse_derived_table_values(), 2644 ), 2645 expression=expression, 2646 else_=else_, 2647 ) 2648 2649 expression = parse_conditional_insert() 2650 while expression is not None: 2651 expressions.append(expression) 2652 expression = parse_conditional_insert() 2653 2654 return self.expression( 2655 exp.MultitableInserts, 2656 kind=kind, 2657 comments=comments, 2658 expressions=expressions, 2659 source=self._parse_table(), 2660 ) 2661 2662 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2663 comments = ensure_list(self._prev_comments) 2664 hint = self._parse_hint() 2665 overwrite = self._match(TokenType.OVERWRITE) 2666 ignore = self._match(TokenType.IGNORE) 2667 local = self._match_text_seq("LOCAL") 2668 alternative = None 2669 is_function = None 2670 2671 if self._match_text_seq("DIRECTORY"): 2672 this: t.Optional[exp.Expression] = self.expression( 2673 exp.Directory, 2674 this=self._parse_var_or_string(), 2675 local=local, 2676 row_format=self._parse_row_format(match_row=True), 2677 ) 2678 else: 2679 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2680 comments += ensure_list(self._prev_comments) 2681 return self._parse_multitable_inserts(comments) 2682 2683 if self._match(TokenType.OR): 2684 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2685 2686 self._match(TokenType.INTO) 2687 comments += ensure_list(self._prev_comments) 2688 self._match(TokenType.TABLE) 2689 is_function = self._match(TokenType.FUNCTION) 2690 2691 this = ( 2692 self._parse_table(schema=True, parse_partition=True) 2693 if not is_function 2694 else self._parse_function() 2695 ) 2696 2697 returning = self._parse_returning() 2698 2699 return self.expression( 2700 exp.Insert, 2701 comments=comments, 2702 hint=hint, 2703 is_function=is_function, 2704 this=this, 2705 stored=self._match_text_seq("STORED") and self._parse_stored(), 2706 by_name=self._match_text_seq("BY", "NAME"), 2707 exists=self._parse_exists(), 2708 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2709 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2710 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2711 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2712 conflict=self._parse_on_conflict(), 2713 returning=returning or self._parse_returning(), 2714 overwrite=overwrite, 2715 alternative=alternative, 2716 ignore=ignore, 2717 source=self._match(TokenType.TABLE) and self._parse_table(), 2718 ) 2719 2720 def _parse_kill(self) -> exp.Kill: 2721 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2722 2723 return self.expression( 2724 exp.Kill, 2725 this=self._parse_primary(), 2726 kind=kind, 2727 ) 2728 2729 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2730 conflict = self._match_text_seq("ON", "CONFLICT") 2731 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2732 2733 if not conflict and not duplicate: 2734 return None 2735 2736 conflict_keys = None 2737 constraint = None 2738 2739 if conflict: 2740 if self._match_text_seq("ON", "CONSTRAINT"): 2741 constraint = self._parse_id_var() 2742 elif self._match(TokenType.L_PAREN): 2743 conflict_keys = self._parse_csv(self._parse_id_var) 2744 self._match_r_paren() 2745 2746 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2747 if self._prev.token_type == TokenType.UPDATE: 2748 self._match(TokenType.SET) 2749 expressions = self._parse_csv(self._parse_equality) 2750 else: 2751 expressions = None 2752 2753 return self.expression( 2754 exp.OnConflict, 2755 duplicate=duplicate, 2756 expressions=expressions, 2757 action=action, 2758 conflict_keys=conflict_keys, 2759 constraint=constraint, 2760 ) 2761 2762 def _parse_returning(self) -> t.Optional[exp.Returning]: 2763 if not self._match(TokenType.RETURNING): 2764 return None 2765 return self.expression( 2766 exp.Returning, 2767 expressions=self._parse_csv(self._parse_expression), 2768 into=self._match(TokenType.INTO) and self._parse_table_part(), 2769 ) 2770 2771 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2772 if not self._match(TokenType.FORMAT): 2773 return None 2774 return self._parse_row_format() 2775 2776 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2777 index = self._index 2778 with_ = with_ or self._match_text_seq("WITH") 2779 2780 if not self._match(TokenType.SERDE_PROPERTIES): 2781 self._retreat(index) 2782 return None 2783 return self.expression( 2784 exp.SerdeProperties, 2785 **{ # type: ignore 2786 "expressions": self._parse_wrapped_properties(), 2787 "with": with_, 2788 }, 2789 ) 2790 2791 def _parse_row_format( 2792 self, match_row: bool = False 2793 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2794 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2795 return None 2796 2797 if self._match_text_seq("SERDE"): 2798 this = self._parse_string() 2799 2800 serde_properties = self._parse_serde_properties() 2801 2802 return self.expression( 2803 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2804 ) 2805 2806 self._match_text_seq("DELIMITED") 2807 2808 kwargs = {} 2809 2810 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2811 kwargs["fields"] = self._parse_string() 2812 if self._match_text_seq("ESCAPED", "BY"): 2813 kwargs["escaped"] = self._parse_string() 2814 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2815 kwargs["collection_items"] = self._parse_string() 2816 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2817 kwargs["map_keys"] = self._parse_string() 2818 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2819 kwargs["lines"] = self._parse_string() 2820 if self._match_text_seq("NULL", "DEFINED", "AS"): 2821 kwargs["null"] = self._parse_string() 2822 2823 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2824 2825 def _parse_load(self) -> exp.LoadData | exp.Command: 2826 if self._match_text_seq("DATA"): 2827 local = self._match_text_seq("LOCAL") 2828 self._match_text_seq("INPATH") 2829 inpath = self._parse_string() 2830 overwrite = self._match(TokenType.OVERWRITE) 2831 self._match_pair(TokenType.INTO, TokenType.TABLE) 2832 2833 return self.expression( 2834 exp.LoadData, 2835 this=self._parse_table(schema=True), 2836 local=local, 2837 overwrite=overwrite, 2838 inpath=inpath, 2839 partition=self._parse_partition(), 2840 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2841 serde=self._match_text_seq("SERDE") and self._parse_string(), 2842 ) 2843 return self._parse_as_command(self._prev) 2844 2845 def _parse_delete(self) -> exp.Delete: 2846 # This handles MySQL's "Multiple-Table Syntax" 2847 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2848 tables = None 2849 comments = self._prev_comments 2850 if not self._match(TokenType.FROM, advance=False): 2851 tables = self._parse_csv(self._parse_table) or None 2852 2853 returning = self._parse_returning() 2854 2855 return self.expression( 2856 exp.Delete, 2857 comments=comments, 2858 tables=tables, 2859 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2860 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2861 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2862 where=self._parse_where(), 2863 returning=returning or self._parse_returning(), 2864 limit=self._parse_limit(), 2865 ) 2866 2867 def _parse_update(self) -> exp.Update: 2868 comments = self._prev_comments 2869 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2870 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2871 returning = self._parse_returning() 2872 return self.expression( 2873 exp.Update, 2874 comments=comments, 2875 **{ # type: ignore 2876 "this": this, 2877 "expressions": expressions, 2878 "from": self._parse_from(joins=True), 2879 "where": self._parse_where(), 2880 "returning": returning or self._parse_returning(), 2881 "order": self._parse_order(), 2882 "limit": self._parse_limit(), 2883 }, 2884 ) 2885 2886 def _parse_uncache(self) -> exp.Uncache: 2887 if not self._match(TokenType.TABLE): 2888 self.raise_error("Expecting TABLE after UNCACHE") 2889 2890 return self.expression( 2891 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2892 ) 2893 2894 def _parse_cache(self) -> exp.Cache: 2895 lazy = self._match_text_seq("LAZY") 2896 self._match(TokenType.TABLE) 2897 table = self._parse_table(schema=True) 2898 2899 options = [] 2900 if self._match_text_seq("OPTIONS"): 2901 self._match_l_paren() 2902 k = self._parse_string() 2903 self._match(TokenType.EQ) 2904 v = self._parse_string() 2905 options = [k, v] 2906 self._match_r_paren() 2907 2908 self._match(TokenType.ALIAS) 2909 return self.expression( 2910 exp.Cache, 2911 this=table, 2912 lazy=lazy, 2913 options=options, 2914 expression=self._parse_select(nested=True), 2915 ) 2916 2917 def _parse_partition(self) -> t.Optional[exp.Partition]: 2918 if not self._match(TokenType.PARTITION): 2919 return None 2920 2921 return self.expression( 2922 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2923 ) 2924 2925 def _parse_value(self) -> t.Optional[exp.Tuple]: 2926 if self._match(TokenType.L_PAREN): 2927 expressions = self._parse_csv(self._parse_expression) 2928 self._match_r_paren() 2929 return self.expression(exp.Tuple, expressions=expressions) 2930 2931 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2932 expression = self._parse_expression() 2933 if expression: 2934 return self.expression(exp.Tuple, expressions=[expression]) 2935 return None 2936 2937 def _parse_projections(self) -> t.List[exp.Expression]: 2938 return self._parse_expressions() 2939 2940 def _parse_select( 2941 self, 2942 nested: bool = False, 2943 table: bool = False, 2944 parse_subquery_alias: bool = True, 2945 parse_set_operation: bool = True, 2946 ) -> t.Optional[exp.Expression]: 2947 cte = self._parse_with() 2948 2949 if cte: 2950 this = self._parse_statement() 2951 2952 if not this: 2953 self.raise_error("Failed to parse any statement following CTE") 2954 return cte 2955 2956 if "with" in this.arg_types: 2957 this.set("with", cte) 2958 else: 2959 self.raise_error(f"{this.key} does not support CTE") 2960 this = cte 2961 2962 return this 2963 2964 # duckdb supports leading with FROM x 2965 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2966 2967 if self._match(TokenType.SELECT): 2968 comments = self._prev_comments 2969 2970 hint = self._parse_hint() 2971 2972 if self._next and not self._next.token_type == TokenType.DOT: 2973 all_ = self._match(TokenType.ALL) 2974 distinct = self._match_set(self.DISTINCT_TOKENS) 2975 else: 2976 all_, distinct = None, None 2977 2978 kind = ( 2979 self._match(TokenType.ALIAS) 2980 and self._match_texts(("STRUCT", "VALUE")) 2981 and self._prev.text.upper() 2982 ) 2983 2984 if distinct: 2985 distinct = self.expression( 2986 exp.Distinct, 2987 on=self._parse_value() if self._match(TokenType.ON) else None, 2988 ) 2989 2990 if all_ and distinct: 2991 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2992 2993 operation_modifiers = [] 2994 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2995 operation_modifiers.append(exp.var(self._prev.text.upper())) 2996 2997 limit = self._parse_limit(top=True) 2998 projections = self._parse_projections() 2999 3000 this = self.expression( 3001 exp.Select, 3002 kind=kind, 3003 hint=hint, 3004 distinct=distinct, 3005 expressions=projections, 3006 limit=limit, 3007 operation_modifiers=operation_modifiers or None, 3008 ) 3009 this.comments = comments 3010 3011 into = self._parse_into() 3012 if into: 3013 this.set("into", into) 3014 3015 if not from_: 3016 from_ = self._parse_from() 3017 3018 if from_: 3019 this.set("from", from_) 3020 3021 this = self._parse_query_modifiers(this) 3022 elif (table or nested) and self._match(TokenType.L_PAREN): 3023 if self._match(TokenType.PIVOT): 3024 this = self._parse_simplified_pivot() 3025 elif self._match(TokenType.FROM): 3026 this = exp.select("*").from_( 3027 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3028 ) 3029 else: 3030 this = ( 3031 self._parse_table() 3032 if table 3033 else self._parse_select(nested=True, parse_set_operation=False) 3034 ) 3035 3036 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3037 # in case a modifier (e.g. join) is following 3038 if table and isinstance(this, exp.Values) and this.alias: 3039 alias = this.args["alias"].pop() 3040 this = exp.Table(this=this, alias=alias) 3041 3042 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3043 3044 self._match_r_paren() 3045 3046 # We return early here so that the UNION isn't attached to the subquery by the 3047 # following call to _parse_set_operations, but instead becomes the parent node 3048 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3049 elif self._match(TokenType.VALUES, advance=False): 3050 this = self._parse_derived_table_values() 3051 elif from_: 3052 this = exp.select("*").from_(from_.this, copy=False) 3053 elif self._match(TokenType.SUMMARIZE): 3054 table = self._match(TokenType.TABLE) 3055 this = self._parse_select() or self._parse_string() or self._parse_table() 3056 return self.expression(exp.Summarize, this=this, table=table) 3057 elif self._match(TokenType.DESCRIBE): 3058 this = self._parse_describe() 3059 elif self._match_text_seq("STREAM"): 3060 this = self._parse_function() 3061 if this: 3062 this = self.expression(exp.Stream, this=this) 3063 else: 3064 self._retreat(self._index - 1) 3065 else: 3066 this = None 3067 3068 return self._parse_set_operations(this) if parse_set_operation else this 3069 3070 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3071 if not skip_with_token and not self._match(TokenType.WITH): 3072 return None 3073 3074 comments = self._prev_comments 3075 recursive = self._match(TokenType.RECURSIVE) 3076 3077 last_comments = None 3078 expressions = [] 3079 while True: 3080 expressions.append(self._parse_cte()) 3081 if last_comments: 3082 expressions[-1].add_comments(last_comments) 3083 3084 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3085 break 3086 else: 3087 self._match(TokenType.WITH) 3088 3089 last_comments = self._prev_comments 3090 3091 return self.expression( 3092 exp.With, comments=comments, expressions=expressions, recursive=recursive 3093 ) 3094 3095 def _parse_cte(self) -> exp.CTE: 3096 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3097 if not alias or not alias.this: 3098 self.raise_error("Expected CTE to have alias") 3099 3100 self._match(TokenType.ALIAS) 3101 comments = self._prev_comments 3102 3103 if self._match_text_seq("NOT", "MATERIALIZED"): 3104 materialized = False 3105 elif self._match_text_seq("MATERIALIZED"): 3106 materialized = True 3107 else: 3108 materialized = None 3109 3110 return self.expression( 3111 exp.CTE, 3112 this=self._parse_wrapped(self._parse_statement), 3113 alias=alias, 3114 materialized=materialized, 3115 comments=comments, 3116 ) 3117 3118 def _parse_table_alias( 3119 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3120 ) -> t.Optional[exp.TableAlias]: 3121 any_token = self._match(TokenType.ALIAS) 3122 alias = ( 3123 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3124 or self._parse_string_as_identifier() 3125 ) 3126 3127 index = self._index 3128 if self._match(TokenType.L_PAREN): 3129 columns = self._parse_csv(self._parse_function_parameter) 3130 self._match_r_paren() if columns else self._retreat(index) 3131 else: 3132 columns = None 3133 3134 if not alias and not columns: 3135 return None 3136 3137 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3138 3139 # We bubble up comments from the Identifier to the TableAlias 3140 if isinstance(alias, exp.Identifier): 3141 table_alias.add_comments(alias.pop_comments()) 3142 3143 return table_alias 3144 3145 def _parse_subquery( 3146 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3147 ) -> t.Optional[exp.Subquery]: 3148 if not this: 3149 return None 3150 3151 return self.expression( 3152 exp.Subquery, 3153 this=this, 3154 pivots=self._parse_pivots(), 3155 alias=self._parse_table_alias() if parse_alias else None, 3156 sample=self._parse_table_sample(), 3157 ) 3158 3159 def _implicit_unnests_to_explicit(self, this: E) -> E: 3160 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3161 3162 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3163 for i, join in enumerate(this.args.get("joins") or []): 3164 table = join.this 3165 normalized_table = table.copy() 3166 normalized_table.meta["maybe_column"] = True 3167 normalized_table = _norm(normalized_table, dialect=self.dialect) 3168 3169 if isinstance(table, exp.Table) and not join.args.get("on"): 3170 if normalized_table.parts[0].name in refs: 3171 table_as_column = table.to_column() 3172 unnest = exp.Unnest(expressions=[table_as_column]) 3173 3174 # Table.to_column creates a parent Alias node that we want to convert to 3175 # a TableAlias and attach to the Unnest, so it matches the parser's output 3176 if isinstance(table.args.get("alias"), exp.TableAlias): 3177 table_as_column.replace(table_as_column.this) 3178 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3179 3180 table.replace(unnest) 3181 3182 refs.add(normalized_table.alias_or_name) 3183 3184 return this 3185 3186 def _parse_query_modifiers( 3187 self, this: t.Optional[exp.Expression] 3188 ) -> t.Optional[exp.Expression]: 3189 if isinstance(this, (exp.Query, exp.Table)): 3190 for join in self._parse_joins(): 3191 this.append("joins", join) 3192 for lateral in iter(self._parse_lateral, None): 3193 this.append("laterals", lateral) 3194 3195 while True: 3196 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3197 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3198 key, expression = parser(self) 3199 3200 if expression: 3201 this.set(key, expression) 3202 if key == "limit": 3203 offset = expression.args.pop("offset", None) 3204 3205 if offset: 3206 offset = exp.Offset(expression=offset) 3207 this.set("offset", offset) 3208 3209 limit_by_expressions = expression.expressions 3210 expression.set("expressions", None) 3211 offset.set("expressions", limit_by_expressions) 3212 continue 3213 break 3214 3215 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3216 this = self._implicit_unnests_to_explicit(this) 3217 3218 return this 3219 3220 def _parse_hint(self) -> t.Optional[exp.Hint]: 3221 if self._match(TokenType.HINT): 3222 hints = [] 3223 for hint in iter( 3224 lambda: self._parse_csv( 3225 lambda: self._parse_function() or self._parse_var(upper=True) 3226 ), 3227 [], 3228 ): 3229 hints.extend(hint) 3230 3231 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3232 self.raise_error("Expected */ after HINT") 3233 3234 return self.expression(exp.Hint, expressions=hints) 3235 3236 return None 3237 3238 def _parse_into(self) -> t.Optional[exp.Into]: 3239 if not self._match(TokenType.INTO): 3240 return None 3241 3242 temp = self._match(TokenType.TEMPORARY) 3243 unlogged = self._match_text_seq("UNLOGGED") 3244 self._match(TokenType.TABLE) 3245 3246 return self.expression( 3247 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3248 ) 3249 3250 def _parse_from( 3251 self, joins: bool = False, skip_from_token: bool = False 3252 ) -> t.Optional[exp.From]: 3253 if not skip_from_token and not self._match(TokenType.FROM): 3254 return None 3255 3256 return self.expression( 3257 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3258 ) 3259 3260 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3261 return self.expression( 3262 exp.MatchRecognizeMeasure, 3263 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3264 this=self._parse_expression(), 3265 ) 3266 3267 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3268 if not self._match(TokenType.MATCH_RECOGNIZE): 3269 return None 3270 3271 self._match_l_paren() 3272 3273 partition = self._parse_partition_by() 3274 order = self._parse_order() 3275 3276 measures = ( 3277 self._parse_csv(self._parse_match_recognize_measure) 3278 if self._match_text_seq("MEASURES") 3279 else None 3280 ) 3281 3282 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3283 rows = exp.var("ONE ROW PER MATCH") 3284 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3285 text = "ALL ROWS PER MATCH" 3286 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3287 text += " SHOW EMPTY MATCHES" 3288 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3289 text += " OMIT EMPTY MATCHES" 3290 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3291 text += " WITH UNMATCHED ROWS" 3292 rows = exp.var(text) 3293 else: 3294 rows = None 3295 3296 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3297 text = "AFTER MATCH SKIP" 3298 if self._match_text_seq("PAST", "LAST", "ROW"): 3299 text += " PAST LAST ROW" 3300 elif self._match_text_seq("TO", "NEXT", "ROW"): 3301 text += " TO NEXT ROW" 3302 elif self._match_text_seq("TO", "FIRST"): 3303 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3304 elif self._match_text_seq("TO", "LAST"): 3305 text += f" TO LAST {self._advance_any().text}" # type: ignore 3306 after = exp.var(text) 3307 else: 3308 after = None 3309 3310 if self._match_text_seq("PATTERN"): 3311 self._match_l_paren() 3312 3313 if not self._curr: 3314 self.raise_error("Expecting )", self._curr) 3315 3316 paren = 1 3317 start = self._curr 3318 3319 while self._curr and paren > 0: 3320 if self._curr.token_type == TokenType.L_PAREN: 3321 paren += 1 3322 if self._curr.token_type == TokenType.R_PAREN: 3323 paren -= 1 3324 3325 end = self._prev 3326 self._advance() 3327 3328 if paren > 0: 3329 self.raise_error("Expecting )", self._curr) 3330 3331 pattern = exp.var(self._find_sql(start, end)) 3332 else: 3333 pattern = None 3334 3335 define = ( 3336 self._parse_csv(self._parse_name_as_expression) 3337 if self._match_text_seq("DEFINE") 3338 else None 3339 ) 3340 3341 self._match_r_paren() 3342 3343 return self.expression( 3344 exp.MatchRecognize, 3345 partition_by=partition, 3346 order=order, 3347 measures=measures, 3348 rows=rows, 3349 after=after, 3350 pattern=pattern, 3351 define=define, 3352 alias=self._parse_table_alias(), 3353 ) 3354 3355 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3356 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3357 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3358 cross_apply = False 3359 3360 if cross_apply is not None: 3361 this = self._parse_select(table=True) 3362 view = None 3363 outer = None 3364 elif self._match(TokenType.LATERAL): 3365 this = self._parse_select(table=True) 3366 view = self._match(TokenType.VIEW) 3367 outer = self._match(TokenType.OUTER) 3368 else: 3369 return None 3370 3371 if not this: 3372 this = ( 3373 self._parse_unnest() 3374 or self._parse_function() 3375 or self._parse_id_var(any_token=False) 3376 ) 3377 3378 while self._match(TokenType.DOT): 3379 this = exp.Dot( 3380 this=this, 3381 expression=self._parse_function() or self._parse_id_var(any_token=False), 3382 ) 3383 3384 if view: 3385 table = self._parse_id_var(any_token=False) 3386 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3387 table_alias: t.Optional[exp.TableAlias] = self.expression( 3388 exp.TableAlias, this=table, columns=columns 3389 ) 3390 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3391 # We move the alias from the lateral's child node to the lateral itself 3392 table_alias = this.args["alias"].pop() 3393 else: 3394 table_alias = self._parse_table_alias() 3395 3396 return self.expression( 3397 exp.Lateral, 3398 this=this, 3399 view=view, 3400 outer=outer, 3401 alias=table_alias, 3402 cross_apply=cross_apply, 3403 ) 3404 3405 def _parse_join_parts( 3406 self, 3407 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3408 return ( 3409 self._match_set(self.JOIN_METHODS) and self._prev, 3410 self._match_set(self.JOIN_SIDES) and self._prev, 3411 self._match_set(self.JOIN_KINDS) and self._prev, 3412 ) 3413 3414 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3415 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3416 this = self._parse_column() 3417 if isinstance(this, exp.Column): 3418 return this.this 3419 return this 3420 3421 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3422 3423 def _parse_join( 3424 self, skip_join_token: bool = False, parse_bracket: bool = False 3425 ) -> t.Optional[exp.Join]: 3426 if self._match(TokenType.COMMA): 3427 return self.expression(exp.Join, this=self._parse_table()) 3428 3429 index = self._index 3430 method, side, kind = self._parse_join_parts() 3431 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3432 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3433 3434 if not skip_join_token and not join: 3435 self._retreat(index) 3436 kind = None 3437 method = None 3438 side = None 3439 3440 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3441 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3442 3443 if not skip_join_token and not join and not outer_apply and not cross_apply: 3444 return None 3445 3446 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3447 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3448 kwargs["expressions"] = self._parse_csv( 3449 lambda: self._parse_table(parse_bracket=parse_bracket) 3450 ) 3451 3452 if method: 3453 kwargs["method"] = method.text 3454 if side: 3455 kwargs["side"] = side.text 3456 if kind: 3457 kwargs["kind"] = kind.text 3458 if hint: 3459 kwargs["hint"] = hint 3460 3461 if self._match(TokenType.MATCH_CONDITION): 3462 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3463 3464 if self._match(TokenType.ON): 3465 kwargs["on"] = self._parse_assignment() 3466 elif self._match(TokenType.USING): 3467 kwargs["using"] = self._parse_using_identifiers() 3468 elif ( 3469 not (outer_apply or cross_apply) 3470 and not isinstance(kwargs["this"], exp.Unnest) 3471 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3472 ): 3473 index = self._index 3474 joins: t.Optional[list] = list(self._parse_joins()) 3475 3476 if joins and self._match(TokenType.ON): 3477 kwargs["on"] = self._parse_assignment() 3478 elif joins and self._match(TokenType.USING): 3479 kwargs["using"] = self._parse_using_identifiers() 3480 else: 3481 joins = None 3482 self._retreat(index) 3483 3484 kwargs["this"].set("joins", joins if joins else None) 3485 3486 comments = [c for token in (method, side, kind) if token for c in token.comments] 3487 return self.expression(exp.Join, comments=comments, **kwargs) 3488 3489 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3490 this = self._parse_assignment() 3491 3492 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3493 return this 3494 3495 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3496 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3497 3498 return this 3499 3500 def _parse_index_params(self) -> exp.IndexParameters: 3501 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3502 3503 if self._match(TokenType.L_PAREN, advance=False): 3504 columns = self._parse_wrapped_csv(self._parse_with_operator) 3505 else: 3506 columns = None 3507 3508 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3509 partition_by = self._parse_partition_by() 3510 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3511 tablespace = ( 3512 self._parse_var(any_token=True) 3513 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3514 else None 3515 ) 3516 where = self._parse_where() 3517 3518 on = self._parse_field() if self._match(TokenType.ON) else None 3519 3520 return self.expression( 3521 exp.IndexParameters, 3522 using=using, 3523 columns=columns, 3524 include=include, 3525 partition_by=partition_by, 3526 where=where, 3527 with_storage=with_storage, 3528 tablespace=tablespace, 3529 on=on, 3530 ) 3531 3532 def _parse_index( 3533 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3534 ) -> t.Optional[exp.Index]: 3535 if index or anonymous: 3536 unique = None 3537 primary = None 3538 amp = None 3539 3540 self._match(TokenType.ON) 3541 self._match(TokenType.TABLE) # hive 3542 table = self._parse_table_parts(schema=True) 3543 else: 3544 unique = self._match(TokenType.UNIQUE) 3545 primary = self._match_text_seq("PRIMARY") 3546 amp = self._match_text_seq("AMP") 3547 3548 if not self._match(TokenType.INDEX): 3549 return None 3550 3551 index = self._parse_id_var() 3552 table = None 3553 3554 params = self._parse_index_params() 3555 3556 return self.expression( 3557 exp.Index, 3558 this=index, 3559 table=table, 3560 unique=unique, 3561 primary=primary, 3562 amp=amp, 3563 params=params, 3564 ) 3565 3566 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3567 hints: t.List[exp.Expression] = [] 3568 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3569 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3570 hints.append( 3571 self.expression( 3572 exp.WithTableHint, 3573 expressions=self._parse_csv( 3574 lambda: self._parse_function() or self._parse_var(any_token=True) 3575 ), 3576 ) 3577 ) 3578 self._match_r_paren() 3579 else: 3580 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3581 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3582 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3583 3584 self._match_set((TokenType.INDEX, TokenType.KEY)) 3585 if self._match(TokenType.FOR): 3586 hint.set("target", self._advance_any() and self._prev.text.upper()) 3587 3588 hint.set("expressions", self._parse_wrapped_id_vars()) 3589 hints.append(hint) 3590 3591 return hints or None 3592 3593 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3594 return ( 3595 (not schema and self._parse_function(optional_parens=False)) 3596 or self._parse_id_var(any_token=False) 3597 or self._parse_string_as_identifier() 3598 or self._parse_placeholder() 3599 ) 3600 3601 def _parse_table_parts( 3602 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3603 ) -> exp.Table: 3604 catalog = None 3605 db = None 3606 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3607 3608 while self._match(TokenType.DOT): 3609 if catalog: 3610 # This allows nesting the table in arbitrarily many dot expressions if needed 3611 table = self.expression( 3612 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3613 ) 3614 else: 3615 catalog = db 3616 db = table 3617 # "" used for tsql FROM a..b case 3618 table = self._parse_table_part(schema=schema) or "" 3619 3620 if ( 3621 wildcard 3622 and self._is_connected() 3623 and (isinstance(table, exp.Identifier) or not table) 3624 and self._match(TokenType.STAR) 3625 ): 3626 if isinstance(table, exp.Identifier): 3627 table.args["this"] += "*" 3628 else: 3629 table = exp.Identifier(this="*") 3630 3631 # We bubble up comments from the Identifier to the Table 3632 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3633 3634 if is_db_reference: 3635 catalog = db 3636 db = table 3637 table = None 3638 3639 if not table and not is_db_reference: 3640 self.raise_error(f"Expected table name but got {self._curr}") 3641 if not db and is_db_reference: 3642 self.raise_error(f"Expected database name but got {self._curr}") 3643 3644 table = self.expression( 3645 exp.Table, 3646 comments=comments, 3647 this=table, 3648 db=db, 3649 catalog=catalog, 3650 ) 3651 3652 changes = self._parse_changes() 3653 if changes: 3654 table.set("changes", changes) 3655 3656 at_before = self._parse_historical_data() 3657 if at_before: 3658 table.set("when", at_before) 3659 3660 pivots = self._parse_pivots() 3661 if pivots: 3662 table.set("pivots", pivots) 3663 3664 return table 3665 3666 def _parse_table( 3667 self, 3668 schema: bool = False, 3669 joins: bool = False, 3670 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3671 parse_bracket: bool = False, 3672 is_db_reference: bool = False, 3673 parse_partition: bool = False, 3674 ) -> t.Optional[exp.Expression]: 3675 lateral = self._parse_lateral() 3676 if lateral: 3677 return lateral 3678 3679 unnest = self._parse_unnest() 3680 if unnest: 3681 return unnest 3682 3683 values = self._parse_derived_table_values() 3684 if values: 3685 return values 3686 3687 subquery = self._parse_select(table=True) 3688 if subquery: 3689 if not subquery.args.get("pivots"): 3690 subquery.set("pivots", self._parse_pivots()) 3691 return subquery 3692 3693 bracket = parse_bracket and self._parse_bracket(None) 3694 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3695 3696 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3697 self._parse_table 3698 ) 3699 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3700 3701 only = self._match(TokenType.ONLY) 3702 3703 this = t.cast( 3704 exp.Expression, 3705 bracket 3706 or rows_from 3707 or self._parse_bracket( 3708 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3709 ), 3710 ) 3711 3712 if only: 3713 this.set("only", only) 3714 3715 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3716 self._match_text_seq("*") 3717 3718 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3719 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3720 this.set("partition", self._parse_partition()) 3721 3722 if schema: 3723 return self._parse_schema(this=this) 3724 3725 version = self._parse_version() 3726 3727 if version: 3728 this.set("version", version) 3729 3730 if self.dialect.ALIAS_POST_TABLESAMPLE: 3731 this.set("sample", self._parse_table_sample()) 3732 3733 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3734 if alias: 3735 this.set("alias", alias) 3736 3737 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3738 return self.expression( 3739 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3740 ) 3741 3742 this.set("hints", self._parse_table_hints()) 3743 3744 if not this.args.get("pivots"): 3745 this.set("pivots", self._parse_pivots()) 3746 3747 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3748 this.set("sample", self._parse_table_sample()) 3749 3750 if joins: 3751 for join in self._parse_joins(): 3752 this.append("joins", join) 3753 3754 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3755 this.set("ordinality", True) 3756 this.set("alias", self._parse_table_alias()) 3757 3758 return this 3759 3760 def _parse_version(self) -> t.Optional[exp.Version]: 3761 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3762 this = "TIMESTAMP" 3763 elif self._match(TokenType.VERSION_SNAPSHOT): 3764 this = "VERSION" 3765 else: 3766 return None 3767 3768 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3769 kind = self._prev.text.upper() 3770 start = self._parse_bitwise() 3771 self._match_texts(("TO", "AND")) 3772 end = self._parse_bitwise() 3773 expression: t.Optional[exp.Expression] = self.expression( 3774 exp.Tuple, expressions=[start, end] 3775 ) 3776 elif self._match_text_seq("CONTAINED", "IN"): 3777 kind = "CONTAINED IN" 3778 expression = self.expression( 3779 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3780 ) 3781 elif self._match(TokenType.ALL): 3782 kind = "ALL" 3783 expression = None 3784 else: 3785 self._match_text_seq("AS", "OF") 3786 kind = "AS OF" 3787 expression = self._parse_type() 3788 3789 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3790 3791 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3792 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3793 index = self._index 3794 historical_data = None 3795 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3796 this = self._prev.text.upper() 3797 kind = ( 3798 self._match(TokenType.L_PAREN) 3799 and self._match_texts(self.HISTORICAL_DATA_KIND) 3800 and self._prev.text.upper() 3801 ) 3802 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3803 3804 if expression: 3805 self._match_r_paren() 3806 historical_data = self.expression( 3807 exp.HistoricalData, this=this, kind=kind, expression=expression 3808 ) 3809 else: 3810 self._retreat(index) 3811 3812 return historical_data 3813 3814 def _parse_changes(self) -> t.Optional[exp.Changes]: 3815 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3816 return None 3817 3818 information = self._parse_var(any_token=True) 3819 self._match_r_paren() 3820 3821 return self.expression( 3822 exp.Changes, 3823 information=information, 3824 at_before=self._parse_historical_data(), 3825 end=self._parse_historical_data(), 3826 ) 3827 3828 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3829 if not self._match(TokenType.UNNEST): 3830 return None 3831 3832 expressions = self._parse_wrapped_csv(self._parse_equality) 3833 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3834 3835 alias = self._parse_table_alias() if with_alias else None 3836 3837 if alias: 3838 if self.dialect.UNNEST_COLUMN_ONLY: 3839 if alias.args.get("columns"): 3840 self.raise_error("Unexpected extra column alias in unnest.") 3841 3842 alias.set("columns", [alias.this]) 3843 alias.set("this", None) 3844 3845 columns = alias.args.get("columns") or [] 3846 if offset and len(expressions) < len(columns): 3847 offset = columns.pop() 3848 3849 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3850 self._match(TokenType.ALIAS) 3851 offset = self._parse_id_var( 3852 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3853 ) or exp.to_identifier("offset") 3854 3855 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3856 3857 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3858 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3859 if not is_derived and not ( 3860 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3861 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3862 ): 3863 return None 3864 3865 expressions = self._parse_csv(self._parse_value) 3866 alias = self._parse_table_alias() 3867 3868 if is_derived: 3869 self._match_r_paren() 3870 3871 return self.expression( 3872 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3873 ) 3874 3875 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3876 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3877 as_modifier and self._match_text_seq("USING", "SAMPLE") 3878 ): 3879 return None 3880 3881 bucket_numerator = None 3882 bucket_denominator = None 3883 bucket_field = None 3884 percent = None 3885 size = None 3886 seed = None 3887 3888 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3889 matched_l_paren = self._match(TokenType.L_PAREN) 3890 3891 if self.TABLESAMPLE_CSV: 3892 num = None 3893 expressions = self._parse_csv(self._parse_primary) 3894 else: 3895 expressions = None 3896 num = ( 3897 self._parse_factor() 3898 if self._match(TokenType.NUMBER, advance=False) 3899 else self._parse_primary() or self._parse_placeholder() 3900 ) 3901 3902 if self._match_text_seq("BUCKET"): 3903 bucket_numerator = self._parse_number() 3904 self._match_text_seq("OUT", "OF") 3905 bucket_denominator = bucket_denominator = self._parse_number() 3906 self._match(TokenType.ON) 3907 bucket_field = self._parse_field() 3908 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3909 percent = num 3910 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3911 size = num 3912 else: 3913 percent = num 3914 3915 if matched_l_paren: 3916 self._match_r_paren() 3917 3918 if self._match(TokenType.L_PAREN): 3919 method = self._parse_var(upper=True) 3920 seed = self._match(TokenType.COMMA) and self._parse_number() 3921 self._match_r_paren() 3922 elif self._match_texts(("SEED", "REPEATABLE")): 3923 seed = self._parse_wrapped(self._parse_number) 3924 3925 if not method and self.DEFAULT_SAMPLING_METHOD: 3926 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3927 3928 return self.expression( 3929 exp.TableSample, 3930 expressions=expressions, 3931 method=method, 3932 bucket_numerator=bucket_numerator, 3933 bucket_denominator=bucket_denominator, 3934 bucket_field=bucket_field, 3935 percent=percent, 3936 size=size, 3937 seed=seed, 3938 ) 3939 3940 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3941 return list(iter(self._parse_pivot, None)) or None 3942 3943 def _parse_joins(self) -> t.Iterator[exp.Join]: 3944 return iter(self._parse_join, None) 3945 3946 # https://duckdb.org/docs/sql/statements/pivot 3947 def _parse_simplified_pivot(self) -> exp.Pivot: 3948 def _parse_on() -> t.Optional[exp.Expression]: 3949 this = self._parse_bitwise() 3950 return self._parse_in(this) if self._match(TokenType.IN) else this 3951 3952 this = self._parse_table() 3953 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3954 using = self._match(TokenType.USING) and self._parse_csv( 3955 lambda: self._parse_alias(self._parse_function()) 3956 ) 3957 group = self._parse_group() 3958 return self.expression( 3959 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3960 ) 3961 3962 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3963 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3964 this = self._parse_select_or_expression() 3965 3966 self._match(TokenType.ALIAS) 3967 alias = self._parse_bitwise() 3968 if alias: 3969 if isinstance(alias, exp.Column) and not alias.db: 3970 alias = alias.this 3971 return self.expression(exp.PivotAlias, this=this, alias=alias) 3972 3973 return this 3974 3975 value = self._parse_column() 3976 3977 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3978 self.raise_error("Expecting IN (") 3979 3980 if self._match(TokenType.ANY): 3981 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3982 else: 3983 exprs = self._parse_csv(_parse_aliased_expression) 3984 3985 self._match_r_paren() 3986 return self.expression(exp.In, this=value, expressions=exprs) 3987 3988 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3989 index = self._index 3990 include_nulls = None 3991 3992 if self._match(TokenType.PIVOT): 3993 unpivot = False 3994 elif self._match(TokenType.UNPIVOT): 3995 unpivot = True 3996 3997 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3998 if self._match_text_seq("INCLUDE", "NULLS"): 3999 include_nulls = True 4000 elif self._match_text_seq("EXCLUDE", "NULLS"): 4001 include_nulls = False 4002 else: 4003 return None 4004 4005 expressions = [] 4006 4007 if not self._match(TokenType.L_PAREN): 4008 self._retreat(index) 4009 return None 4010 4011 if unpivot: 4012 expressions = self._parse_csv(self._parse_column) 4013 else: 4014 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4015 4016 if not expressions: 4017 self.raise_error("Failed to parse PIVOT's aggregation list") 4018 4019 if not self._match(TokenType.FOR): 4020 self.raise_error("Expecting FOR") 4021 4022 field = self._parse_pivot_in() 4023 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4024 self._parse_bitwise 4025 ) 4026 4027 self._match_r_paren() 4028 4029 pivot = self.expression( 4030 exp.Pivot, 4031 expressions=expressions, 4032 field=field, 4033 unpivot=unpivot, 4034 include_nulls=include_nulls, 4035 default_on_null=default_on_null, 4036 ) 4037 4038 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4039 pivot.set("alias", self._parse_table_alias()) 4040 4041 if not unpivot: 4042 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4043 4044 columns: t.List[exp.Expression] = [] 4045 for fld in pivot.args["field"].expressions: 4046 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4047 for name in names: 4048 if self.PREFIXED_PIVOT_COLUMNS: 4049 name = f"{name}_{field_name}" if name else field_name 4050 else: 4051 name = f"{field_name}_{name}" if name else field_name 4052 4053 columns.append(exp.to_identifier(name)) 4054 4055 pivot.set("columns", columns) 4056 4057 return pivot 4058 4059 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4060 return [agg.alias for agg in aggregations] 4061 4062 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4063 if not skip_where_token and not self._match(TokenType.PREWHERE): 4064 return None 4065 4066 return self.expression( 4067 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4068 ) 4069 4070 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4071 if not skip_where_token and not self._match(TokenType.WHERE): 4072 return None 4073 4074 return self.expression( 4075 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4076 ) 4077 4078 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4079 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4080 return None 4081 4082 elements: t.Dict[str, t.Any] = defaultdict(list) 4083 4084 if self._match(TokenType.ALL): 4085 elements["all"] = True 4086 elif self._match(TokenType.DISTINCT): 4087 elements["all"] = False 4088 4089 while True: 4090 index = self._index 4091 4092 elements["expressions"].extend( 4093 self._parse_csv( 4094 lambda: None 4095 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4096 else self._parse_assignment() 4097 ) 4098 ) 4099 4100 before_with_index = self._index 4101 with_prefix = self._match(TokenType.WITH) 4102 4103 if self._match(TokenType.ROLLUP): 4104 elements["rollup"].append( 4105 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4106 ) 4107 elif self._match(TokenType.CUBE): 4108 elements["cube"].append( 4109 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4110 ) 4111 elif self._match(TokenType.GROUPING_SETS): 4112 elements["grouping_sets"].append( 4113 self.expression( 4114 exp.GroupingSets, 4115 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4116 ) 4117 ) 4118 elif self._match_text_seq("TOTALS"): 4119 elements["totals"] = True # type: ignore 4120 4121 if before_with_index <= self._index <= before_with_index + 1: 4122 self._retreat(before_with_index) 4123 break 4124 4125 if index == self._index: 4126 break 4127 4128 return self.expression(exp.Group, **elements) # type: ignore 4129 4130 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4131 return self.expression( 4132 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4133 ) 4134 4135 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4136 if self._match(TokenType.L_PAREN): 4137 grouping_set = self._parse_csv(self._parse_column) 4138 self._match_r_paren() 4139 return self.expression(exp.Tuple, expressions=grouping_set) 4140 4141 return self._parse_column() 4142 4143 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4144 if not skip_having_token and not self._match(TokenType.HAVING): 4145 return None 4146 return self.expression(exp.Having, this=self._parse_assignment()) 4147 4148 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4149 if not self._match(TokenType.QUALIFY): 4150 return None 4151 return self.expression(exp.Qualify, this=self._parse_assignment()) 4152 4153 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4154 if skip_start_token: 4155 start = None 4156 elif self._match(TokenType.START_WITH): 4157 start = self._parse_assignment() 4158 else: 4159 return None 4160 4161 self._match(TokenType.CONNECT_BY) 4162 nocycle = self._match_text_seq("NOCYCLE") 4163 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4164 exp.Prior, this=self._parse_bitwise() 4165 ) 4166 connect = self._parse_assignment() 4167 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4168 4169 if not start and self._match(TokenType.START_WITH): 4170 start = self._parse_assignment() 4171 4172 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4173 4174 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4175 this = self._parse_id_var(any_token=True) 4176 if self._match(TokenType.ALIAS): 4177 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4178 return this 4179 4180 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4181 if self._match_text_seq("INTERPOLATE"): 4182 return self._parse_wrapped_csv(self._parse_name_as_expression) 4183 return None 4184 4185 def _parse_order( 4186 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4187 ) -> t.Optional[exp.Expression]: 4188 siblings = None 4189 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4190 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4191 return this 4192 4193 siblings = True 4194 4195 return self.expression( 4196 exp.Order, 4197 this=this, 4198 expressions=self._parse_csv(self._parse_ordered), 4199 siblings=siblings, 4200 ) 4201 4202 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4203 if not self._match(token): 4204 return None 4205 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4206 4207 def _parse_ordered( 4208 self, parse_method: t.Optional[t.Callable] = None 4209 ) -> t.Optional[exp.Ordered]: 4210 this = parse_method() if parse_method else self._parse_assignment() 4211 if not this: 4212 return None 4213 4214 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4215 this = exp.var("ALL") 4216 4217 asc = self._match(TokenType.ASC) 4218 desc = self._match(TokenType.DESC) or (asc and False) 4219 4220 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4221 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4222 4223 nulls_first = is_nulls_first or False 4224 explicitly_null_ordered = is_nulls_first or is_nulls_last 4225 4226 if ( 4227 not explicitly_null_ordered 4228 and ( 4229 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4230 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4231 ) 4232 and self.dialect.NULL_ORDERING != "nulls_are_last" 4233 ): 4234 nulls_first = True 4235 4236 if self._match_text_seq("WITH", "FILL"): 4237 with_fill = self.expression( 4238 exp.WithFill, 4239 **{ # type: ignore 4240 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4241 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4242 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4243 "interpolate": self._parse_interpolate(), 4244 }, 4245 ) 4246 else: 4247 with_fill = None 4248 4249 return self.expression( 4250 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4251 ) 4252 4253 def _parse_limit( 4254 self, 4255 this: t.Optional[exp.Expression] = None, 4256 top: bool = False, 4257 skip_limit_token: bool = False, 4258 ) -> t.Optional[exp.Expression]: 4259 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4260 comments = self._prev_comments 4261 if top: 4262 limit_paren = self._match(TokenType.L_PAREN) 4263 expression = self._parse_term() if limit_paren else self._parse_number() 4264 4265 if limit_paren: 4266 self._match_r_paren() 4267 else: 4268 expression = self._parse_term() 4269 4270 if self._match(TokenType.COMMA): 4271 offset = expression 4272 expression = self._parse_term() 4273 else: 4274 offset = None 4275 4276 limit_exp = self.expression( 4277 exp.Limit, 4278 this=this, 4279 expression=expression, 4280 offset=offset, 4281 comments=comments, 4282 expressions=self._parse_limit_by(), 4283 ) 4284 4285 return limit_exp 4286 4287 if self._match(TokenType.FETCH): 4288 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4289 direction = self._prev.text.upper() if direction else "FIRST" 4290 4291 count = self._parse_field(tokens=self.FETCH_TOKENS) 4292 percent = self._match(TokenType.PERCENT) 4293 4294 self._match_set((TokenType.ROW, TokenType.ROWS)) 4295 4296 only = self._match_text_seq("ONLY") 4297 with_ties = self._match_text_seq("WITH", "TIES") 4298 4299 if only and with_ties: 4300 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4301 4302 return self.expression( 4303 exp.Fetch, 4304 direction=direction, 4305 count=count, 4306 percent=percent, 4307 with_ties=with_ties, 4308 ) 4309 4310 return this 4311 4312 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4313 if not self._match(TokenType.OFFSET): 4314 return this 4315 4316 count = self._parse_term() 4317 self._match_set((TokenType.ROW, TokenType.ROWS)) 4318 4319 return self.expression( 4320 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4321 ) 4322 4323 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4324 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4325 4326 def _parse_locks(self) -> t.List[exp.Lock]: 4327 locks = [] 4328 while True: 4329 if self._match_text_seq("FOR", "UPDATE"): 4330 update = True 4331 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4332 "LOCK", "IN", "SHARE", "MODE" 4333 ): 4334 update = False 4335 else: 4336 break 4337 4338 expressions = None 4339 if self._match_text_seq("OF"): 4340 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4341 4342 wait: t.Optional[bool | exp.Expression] = None 4343 if self._match_text_seq("NOWAIT"): 4344 wait = True 4345 elif self._match_text_seq("WAIT"): 4346 wait = self._parse_primary() 4347 elif self._match_text_seq("SKIP", "LOCKED"): 4348 wait = False 4349 4350 locks.append( 4351 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4352 ) 4353 4354 return locks 4355 4356 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4357 while this and self._match_set(self.SET_OPERATIONS): 4358 token_type = self._prev.token_type 4359 4360 if token_type == TokenType.UNION: 4361 operation: t.Type[exp.SetOperation] = exp.Union 4362 elif token_type == TokenType.EXCEPT: 4363 operation = exp.Except 4364 else: 4365 operation = exp.Intersect 4366 4367 comments = self._prev.comments 4368 4369 if self._match(TokenType.DISTINCT): 4370 distinct: t.Optional[bool] = True 4371 elif self._match(TokenType.ALL): 4372 distinct = False 4373 else: 4374 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4375 if distinct is None: 4376 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4377 4378 by_name = self._match_text_seq("BY", "NAME") 4379 expression = self._parse_select(nested=True, parse_set_operation=False) 4380 4381 this = self.expression( 4382 operation, 4383 comments=comments, 4384 this=this, 4385 distinct=distinct, 4386 by_name=by_name, 4387 expression=expression, 4388 ) 4389 4390 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4391 expression = this.expression 4392 4393 if expression: 4394 for arg in self.SET_OP_MODIFIERS: 4395 expr = expression.args.get(arg) 4396 if expr: 4397 this.set(arg, expr.pop()) 4398 4399 return this 4400 4401 def _parse_expression(self) -> t.Optional[exp.Expression]: 4402 return self._parse_alias(self._parse_assignment()) 4403 4404 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4405 this = self._parse_disjunction() 4406 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4407 # This allows us to parse <non-identifier token> := <expr> 4408 this = exp.column( 4409 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4410 ) 4411 4412 while self._match_set(self.ASSIGNMENT): 4413 if isinstance(this, exp.Column) and len(this.parts) == 1: 4414 this = this.this 4415 4416 this = self.expression( 4417 self.ASSIGNMENT[self._prev.token_type], 4418 this=this, 4419 comments=self._prev_comments, 4420 expression=self._parse_assignment(), 4421 ) 4422 4423 return this 4424 4425 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4426 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4427 4428 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4429 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4430 4431 def _parse_equality(self) -> t.Optional[exp.Expression]: 4432 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4433 4434 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4435 return self._parse_tokens(self._parse_range, self.COMPARISON) 4436 4437 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4438 this = this or self._parse_bitwise() 4439 negate = self._match(TokenType.NOT) 4440 4441 if self._match_set(self.RANGE_PARSERS): 4442 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4443 if not expression: 4444 return this 4445 4446 this = expression 4447 elif self._match(TokenType.ISNULL): 4448 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4449 4450 # Postgres supports ISNULL and NOTNULL for conditions. 4451 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4452 if self._match(TokenType.NOTNULL): 4453 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4454 this = self.expression(exp.Not, this=this) 4455 4456 if negate: 4457 this = self._negate_range(this) 4458 4459 if self._match(TokenType.IS): 4460 this = self._parse_is(this) 4461 4462 return this 4463 4464 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4465 if not this: 4466 return this 4467 4468 return self.expression(exp.Not, this=this) 4469 4470 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4471 index = self._index - 1 4472 negate = self._match(TokenType.NOT) 4473 4474 if self._match_text_seq("DISTINCT", "FROM"): 4475 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4476 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4477 4478 if self._match(TokenType.JSON): 4479 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4480 4481 if self._match_text_seq("WITH"): 4482 _with = True 4483 elif self._match_text_seq("WITHOUT"): 4484 _with = False 4485 else: 4486 _with = None 4487 4488 unique = self._match(TokenType.UNIQUE) 4489 self._match_text_seq("KEYS") 4490 expression: t.Optional[exp.Expression] = self.expression( 4491 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4492 ) 4493 else: 4494 expression = self._parse_primary() or self._parse_null() 4495 if not expression: 4496 self._retreat(index) 4497 return None 4498 4499 this = self.expression(exp.Is, this=this, expression=expression) 4500 return self.expression(exp.Not, this=this) if negate else this 4501 4502 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4503 unnest = self._parse_unnest(with_alias=False) 4504 if unnest: 4505 this = self.expression(exp.In, this=this, unnest=unnest) 4506 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4507 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4508 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4509 4510 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4511 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4512 else: 4513 this = self.expression(exp.In, this=this, expressions=expressions) 4514 4515 if matched_l_paren: 4516 self._match_r_paren(this) 4517 elif not self._match(TokenType.R_BRACKET, expression=this): 4518 self.raise_error("Expecting ]") 4519 else: 4520 this = self.expression(exp.In, this=this, field=self._parse_column()) 4521 4522 return this 4523 4524 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4525 low = self._parse_bitwise() 4526 self._match(TokenType.AND) 4527 high = self._parse_bitwise() 4528 return self.expression(exp.Between, this=this, low=low, high=high) 4529 4530 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4531 if not self._match(TokenType.ESCAPE): 4532 return this 4533 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4534 4535 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4536 index = self._index 4537 4538 if not self._match(TokenType.INTERVAL) and match_interval: 4539 return None 4540 4541 if self._match(TokenType.STRING, advance=False): 4542 this = self._parse_primary() 4543 else: 4544 this = self._parse_term() 4545 4546 if not this or ( 4547 isinstance(this, exp.Column) 4548 and not this.table 4549 and not this.this.quoted 4550 and this.name.upper() == "IS" 4551 ): 4552 self._retreat(index) 4553 return None 4554 4555 unit = self._parse_function() or ( 4556 not self._match(TokenType.ALIAS, advance=False) 4557 and self._parse_var(any_token=True, upper=True) 4558 ) 4559 4560 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4561 # each INTERVAL expression into this canonical form so it's easy to transpile 4562 if this and this.is_number: 4563 this = exp.Literal.string(this.to_py()) 4564 elif this and this.is_string: 4565 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4566 if len(parts) == 1: 4567 if unit: 4568 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4569 self._retreat(self._index - 1) 4570 4571 this = exp.Literal.string(parts[0][0]) 4572 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4573 4574 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4575 unit = self.expression( 4576 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4577 ) 4578 4579 interval = self.expression(exp.Interval, this=this, unit=unit) 4580 4581 index = self._index 4582 self._match(TokenType.PLUS) 4583 4584 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4585 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4586 return self.expression( 4587 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4588 ) 4589 4590 self._retreat(index) 4591 return interval 4592 4593 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4594 this = self._parse_term() 4595 4596 while True: 4597 if self._match_set(self.BITWISE): 4598 this = self.expression( 4599 self.BITWISE[self._prev.token_type], 4600 this=this, 4601 expression=self._parse_term(), 4602 ) 4603 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4604 this = self.expression( 4605 exp.DPipe, 4606 this=this, 4607 expression=self._parse_term(), 4608 safe=not self.dialect.STRICT_STRING_CONCAT, 4609 ) 4610 elif self._match(TokenType.DQMARK): 4611 this = self.expression( 4612 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4613 ) 4614 elif self._match_pair(TokenType.LT, TokenType.LT): 4615 this = self.expression( 4616 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4617 ) 4618 elif self._match_pair(TokenType.GT, TokenType.GT): 4619 this = self.expression( 4620 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4621 ) 4622 else: 4623 break 4624 4625 return this 4626 4627 def _parse_term(self) -> t.Optional[exp.Expression]: 4628 this = self._parse_factor() 4629 4630 while self._match_set(self.TERM): 4631 klass = self.TERM[self._prev.token_type] 4632 comments = self._prev_comments 4633 expression = self._parse_factor() 4634 4635 this = self.expression(klass, this=this, comments=comments, expression=expression) 4636 4637 if isinstance(this, exp.Collate): 4638 expr = this.expression 4639 4640 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4641 # fallback to Identifier / Var 4642 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4643 ident = expr.this 4644 if isinstance(ident, exp.Identifier): 4645 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4646 4647 return this 4648 4649 def _parse_factor(self) -> t.Optional[exp.Expression]: 4650 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4651 this = parse_method() 4652 4653 while self._match_set(self.FACTOR): 4654 klass = self.FACTOR[self._prev.token_type] 4655 comments = self._prev_comments 4656 expression = parse_method() 4657 4658 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4659 self._retreat(self._index - 1) 4660 return this 4661 4662 this = self.expression(klass, this=this, comments=comments, expression=expression) 4663 4664 if isinstance(this, exp.Div): 4665 this.args["typed"] = self.dialect.TYPED_DIVISION 4666 this.args["safe"] = self.dialect.SAFE_DIVISION 4667 4668 return this 4669 4670 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4671 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4672 4673 def _parse_unary(self) -> t.Optional[exp.Expression]: 4674 if self._match_set(self.UNARY_PARSERS): 4675 return self.UNARY_PARSERS[self._prev.token_type](self) 4676 return self._parse_at_time_zone(self._parse_type()) 4677 4678 def _parse_type( 4679 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4680 ) -> t.Optional[exp.Expression]: 4681 interval = parse_interval and self._parse_interval() 4682 if interval: 4683 return interval 4684 4685 index = self._index 4686 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4687 4688 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4689 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4690 if isinstance(data_type, exp.Cast): 4691 # This constructor can contain ops directly after it, for instance struct unnesting: 4692 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4693 return self._parse_column_ops(data_type) 4694 4695 if data_type: 4696 index2 = self._index 4697 this = self._parse_primary() 4698 4699 if isinstance(this, exp.Literal): 4700 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4701 if parser: 4702 return parser(self, this, data_type) 4703 4704 return self.expression(exp.Cast, this=this, to=data_type) 4705 4706 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4707 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4708 # 4709 # If the index difference here is greater than 1, that means the parser itself must have 4710 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4711 # 4712 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4713 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4714 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4715 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4716 # 4717 # In these cases, we don't really want to return the converted type, but instead retreat 4718 # and try to parse a Column or Identifier in the section below. 4719 if data_type.expressions and index2 - index > 1: 4720 self._retreat(index2) 4721 return self._parse_column_ops(data_type) 4722 4723 self._retreat(index) 4724 4725 if fallback_to_identifier: 4726 return self._parse_id_var() 4727 4728 this = self._parse_column() 4729 return this and self._parse_column_ops(this) 4730 4731 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4732 this = self._parse_type() 4733 if not this: 4734 return None 4735 4736 if isinstance(this, exp.Column) and not this.table: 4737 this = exp.var(this.name.upper()) 4738 4739 return self.expression( 4740 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4741 ) 4742 4743 def _parse_types( 4744 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4745 ) -> t.Optional[exp.Expression]: 4746 index = self._index 4747 4748 this: t.Optional[exp.Expression] = None 4749 prefix = self._match_text_seq("SYSUDTLIB", ".") 4750 4751 if not self._match_set(self.TYPE_TOKENS): 4752 identifier = allow_identifiers and self._parse_id_var( 4753 any_token=False, tokens=(TokenType.VAR,) 4754 ) 4755 if isinstance(identifier, exp.Identifier): 4756 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4757 4758 if len(tokens) != 1: 4759 self.raise_error("Unexpected identifier", self._prev) 4760 4761 if tokens[0].token_type in self.TYPE_TOKENS: 4762 self._prev = tokens[0] 4763 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4764 type_name = identifier.name 4765 4766 while self._match(TokenType.DOT): 4767 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4768 4769 this = exp.DataType.build(type_name, udt=True) 4770 else: 4771 self._retreat(self._index - 1) 4772 return None 4773 else: 4774 return None 4775 4776 type_token = self._prev.token_type 4777 4778 if type_token == TokenType.PSEUDO_TYPE: 4779 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4780 4781 if type_token == TokenType.OBJECT_IDENTIFIER: 4782 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4783 4784 # https://materialize.com/docs/sql/types/map/ 4785 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4786 key_type = self._parse_types( 4787 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4788 ) 4789 if not self._match(TokenType.FARROW): 4790 self._retreat(index) 4791 return None 4792 4793 value_type = self._parse_types( 4794 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4795 ) 4796 if not self._match(TokenType.R_BRACKET): 4797 self._retreat(index) 4798 return None 4799 4800 return exp.DataType( 4801 this=exp.DataType.Type.MAP, 4802 expressions=[key_type, value_type], 4803 nested=True, 4804 prefix=prefix, 4805 ) 4806 4807 nested = type_token in self.NESTED_TYPE_TOKENS 4808 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4809 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4810 expressions = None 4811 maybe_func = False 4812 4813 if self._match(TokenType.L_PAREN): 4814 if is_struct: 4815 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4816 elif nested: 4817 expressions = self._parse_csv( 4818 lambda: self._parse_types( 4819 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4820 ) 4821 ) 4822 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4823 this = expressions[0] 4824 this.set("nullable", True) 4825 self._match_r_paren() 4826 return this 4827 elif type_token in self.ENUM_TYPE_TOKENS: 4828 expressions = self._parse_csv(self._parse_equality) 4829 elif is_aggregate: 4830 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4831 any_token=False, tokens=(TokenType.VAR,) 4832 ) 4833 if not func_or_ident or not self._match(TokenType.COMMA): 4834 return None 4835 expressions = self._parse_csv( 4836 lambda: self._parse_types( 4837 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4838 ) 4839 ) 4840 expressions.insert(0, func_or_ident) 4841 else: 4842 expressions = self._parse_csv(self._parse_type_size) 4843 4844 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4845 if type_token == TokenType.VECTOR and len(expressions) == 2: 4846 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4847 4848 if not expressions or not self._match(TokenType.R_PAREN): 4849 self._retreat(index) 4850 return None 4851 4852 maybe_func = True 4853 4854 values: t.Optional[t.List[exp.Expression]] = None 4855 4856 if nested and self._match(TokenType.LT): 4857 if is_struct: 4858 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4859 else: 4860 expressions = self._parse_csv( 4861 lambda: self._parse_types( 4862 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4863 ) 4864 ) 4865 4866 if not self._match(TokenType.GT): 4867 self.raise_error("Expecting >") 4868 4869 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4870 values = self._parse_csv(self._parse_assignment) 4871 if not values and is_struct: 4872 values = None 4873 self._retreat(self._index - 1) 4874 else: 4875 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4876 4877 if type_token in self.TIMESTAMPS: 4878 if self._match_text_seq("WITH", "TIME", "ZONE"): 4879 maybe_func = False 4880 tz_type = ( 4881 exp.DataType.Type.TIMETZ 4882 if type_token in self.TIMES 4883 else exp.DataType.Type.TIMESTAMPTZ 4884 ) 4885 this = exp.DataType(this=tz_type, expressions=expressions) 4886 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4887 maybe_func = False 4888 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4889 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4890 maybe_func = False 4891 elif type_token == TokenType.INTERVAL: 4892 unit = self._parse_var(upper=True) 4893 if unit: 4894 if self._match_text_seq("TO"): 4895 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4896 4897 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4898 else: 4899 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4900 4901 if maybe_func and check_func: 4902 index2 = self._index 4903 peek = self._parse_string() 4904 4905 if not peek: 4906 self._retreat(index) 4907 return None 4908 4909 self._retreat(index2) 4910 4911 if not this: 4912 if self._match_text_seq("UNSIGNED"): 4913 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4914 if not unsigned_type_token: 4915 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4916 4917 type_token = unsigned_type_token or type_token 4918 4919 this = exp.DataType( 4920 this=exp.DataType.Type[type_token.value], 4921 expressions=expressions, 4922 nested=nested, 4923 prefix=prefix, 4924 ) 4925 4926 # Empty arrays/structs are allowed 4927 if values is not None: 4928 cls = exp.Struct if is_struct else exp.Array 4929 this = exp.cast(cls(expressions=values), this, copy=False) 4930 4931 elif expressions: 4932 this.set("expressions", expressions) 4933 4934 # https://materialize.com/docs/sql/types/list/#type-name 4935 while self._match(TokenType.LIST): 4936 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4937 4938 index = self._index 4939 4940 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4941 matched_array = self._match(TokenType.ARRAY) 4942 4943 while self._curr: 4944 datatype_token = self._prev.token_type 4945 matched_l_bracket = self._match(TokenType.L_BRACKET) 4946 if not matched_l_bracket and not matched_array: 4947 break 4948 4949 matched_array = False 4950 values = self._parse_csv(self._parse_assignment) or None 4951 if ( 4952 values 4953 and not schema 4954 and ( 4955 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4956 ) 4957 ): 4958 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4959 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4960 self._retreat(index) 4961 break 4962 4963 this = exp.DataType( 4964 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4965 ) 4966 self._match(TokenType.R_BRACKET) 4967 4968 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4969 converter = self.TYPE_CONVERTERS.get(this.this) 4970 if converter: 4971 this = converter(t.cast(exp.DataType, this)) 4972 4973 return this 4974 4975 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4976 index = self._index 4977 4978 if ( 4979 self._curr 4980 and self._next 4981 and self._curr.token_type in self.TYPE_TOKENS 4982 and self._next.token_type in self.TYPE_TOKENS 4983 ): 4984 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4985 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4986 this = self._parse_id_var() 4987 else: 4988 this = ( 4989 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4990 or self._parse_id_var() 4991 ) 4992 4993 self._match(TokenType.COLON) 4994 4995 if ( 4996 type_required 4997 and not isinstance(this, exp.DataType) 4998 and not self._match_set(self.TYPE_TOKENS, advance=False) 4999 ): 5000 self._retreat(index) 5001 return self._parse_types() 5002 5003 return self._parse_column_def(this) 5004 5005 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5006 if not self._match_text_seq("AT", "TIME", "ZONE"): 5007 return this 5008 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5009 5010 def _parse_column(self) -> t.Optional[exp.Expression]: 5011 this = self._parse_column_reference() 5012 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5013 5014 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5015 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5016 5017 return column 5018 5019 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5020 this = self._parse_field() 5021 if ( 5022 not this 5023 and self._match(TokenType.VALUES, advance=False) 5024 and self.VALUES_FOLLOWED_BY_PAREN 5025 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5026 ): 5027 this = self._parse_id_var() 5028 5029 if isinstance(this, exp.Identifier): 5030 # We bubble up comments from the Identifier to the Column 5031 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5032 5033 return this 5034 5035 def _parse_colon_as_variant_extract( 5036 self, this: t.Optional[exp.Expression] 5037 ) -> t.Optional[exp.Expression]: 5038 casts = [] 5039 json_path = [] 5040 escape = None 5041 5042 while self._match(TokenType.COLON): 5043 start_index = self._index 5044 5045 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5046 path = self._parse_column_ops( 5047 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5048 ) 5049 5050 # The cast :: operator has a lower precedence than the extraction operator :, so 5051 # we rearrange the AST appropriately to avoid casting the JSON path 5052 while isinstance(path, exp.Cast): 5053 casts.append(path.to) 5054 path = path.this 5055 5056 if casts: 5057 dcolon_offset = next( 5058 i 5059 for i, t in enumerate(self._tokens[start_index:]) 5060 if t.token_type == TokenType.DCOLON 5061 ) 5062 end_token = self._tokens[start_index + dcolon_offset - 1] 5063 else: 5064 end_token = self._prev 5065 5066 if path: 5067 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5068 # it'll roundtrip to a string literal in GET_PATH 5069 if isinstance(path, exp.Identifier) and path.quoted: 5070 escape = True 5071 5072 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5073 5074 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5075 # Databricks transforms it back to the colon/dot notation 5076 if json_path: 5077 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5078 5079 if json_path_expr: 5080 json_path_expr.set("escape", escape) 5081 5082 this = self.expression( 5083 exp.JSONExtract, 5084 this=this, 5085 expression=json_path_expr, 5086 variant_extract=True, 5087 ) 5088 5089 while casts: 5090 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5091 5092 return this 5093 5094 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5095 return self._parse_types() 5096 5097 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5098 this = self._parse_bracket(this) 5099 5100 while self._match_set(self.COLUMN_OPERATORS): 5101 op_token = self._prev.token_type 5102 op = self.COLUMN_OPERATORS.get(op_token) 5103 5104 if op_token == TokenType.DCOLON: 5105 field = self._parse_dcolon() 5106 if not field: 5107 self.raise_error("Expected type") 5108 elif op and self._curr: 5109 field = self._parse_column_reference() or self._parse_bracket() 5110 else: 5111 field = self._parse_field(any_token=True, anonymous_func=True) 5112 5113 if isinstance(field, exp.Func) and this: 5114 # bigquery allows function calls like x.y.count(...) 5115 # SAFE.SUBSTR(...) 5116 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5117 this = exp.replace_tree( 5118 this, 5119 lambda n: ( 5120 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5121 if n.table 5122 else n.this 5123 ) 5124 if isinstance(n, exp.Column) 5125 else n, 5126 ) 5127 5128 if op: 5129 this = op(self, this, field) 5130 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5131 this = self.expression( 5132 exp.Column, 5133 comments=this.comments, 5134 this=field, 5135 table=this.this, 5136 db=this.args.get("table"), 5137 catalog=this.args.get("db"), 5138 ) 5139 else: 5140 this = self.expression(exp.Dot, this=this, expression=field) 5141 5142 this = self._parse_bracket(this) 5143 5144 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5145 5146 def _parse_primary(self) -> t.Optional[exp.Expression]: 5147 if self._match_set(self.PRIMARY_PARSERS): 5148 token_type = self._prev.token_type 5149 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5150 5151 if token_type == TokenType.STRING: 5152 expressions = [primary] 5153 while self._match(TokenType.STRING): 5154 expressions.append(exp.Literal.string(self._prev.text)) 5155 5156 if len(expressions) > 1: 5157 return self.expression(exp.Concat, expressions=expressions) 5158 5159 return primary 5160 5161 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5162 return exp.Literal.number(f"0.{self._prev.text}") 5163 5164 if self._match(TokenType.L_PAREN): 5165 comments = self._prev_comments 5166 query = self._parse_select() 5167 5168 if query: 5169 expressions = [query] 5170 else: 5171 expressions = self._parse_expressions() 5172 5173 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5174 5175 if not this and self._match(TokenType.R_PAREN, advance=False): 5176 this = self.expression(exp.Tuple) 5177 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5178 this = self._parse_subquery(this=this, parse_alias=False) 5179 elif isinstance(this, exp.Subquery): 5180 this = self._parse_subquery( 5181 this=self._parse_set_operations(this), parse_alias=False 5182 ) 5183 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5184 this = self.expression(exp.Tuple, expressions=expressions) 5185 else: 5186 this = self.expression(exp.Paren, this=this) 5187 5188 if this: 5189 this.add_comments(comments) 5190 5191 self._match_r_paren(expression=this) 5192 return this 5193 5194 return None 5195 5196 def _parse_field( 5197 self, 5198 any_token: bool = False, 5199 tokens: t.Optional[t.Collection[TokenType]] = None, 5200 anonymous_func: bool = False, 5201 ) -> t.Optional[exp.Expression]: 5202 if anonymous_func: 5203 field = ( 5204 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5205 or self._parse_primary() 5206 ) 5207 else: 5208 field = self._parse_primary() or self._parse_function( 5209 anonymous=anonymous_func, any_token=any_token 5210 ) 5211 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5212 5213 def _parse_function( 5214 self, 5215 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5216 anonymous: bool = False, 5217 optional_parens: bool = True, 5218 any_token: bool = False, 5219 ) -> t.Optional[exp.Expression]: 5220 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5221 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5222 fn_syntax = False 5223 if ( 5224 self._match(TokenType.L_BRACE, advance=False) 5225 and self._next 5226 and self._next.text.upper() == "FN" 5227 ): 5228 self._advance(2) 5229 fn_syntax = True 5230 5231 func = self._parse_function_call( 5232 functions=functions, 5233 anonymous=anonymous, 5234 optional_parens=optional_parens, 5235 any_token=any_token, 5236 ) 5237 5238 if fn_syntax: 5239 self._match(TokenType.R_BRACE) 5240 5241 return func 5242 5243 def _parse_function_call( 5244 self, 5245 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5246 anonymous: bool = False, 5247 optional_parens: bool = True, 5248 any_token: bool = False, 5249 ) -> t.Optional[exp.Expression]: 5250 if not self._curr: 5251 return None 5252 5253 comments = self._curr.comments 5254 token_type = self._curr.token_type 5255 this = self._curr.text 5256 upper = this.upper() 5257 5258 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5259 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5260 self._advance() 5261 return self._parse_window(parser(self)) 5262 5263 if not self._next or self._next.token_type != TokenType.L_PAREN: 5264 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5265 self._advance() 5266 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5267 5268 return None 5269 5270 if any_token: 5271 if token_type in self.RESERVED_TOKENS: 5272 return None 5273 elif token_type not in self.FUNC_TOKENS: 5274 return None 5275 5276 self._advance(2) 5277 5278 parser = self.FUNCTION_PARSERS.get(upper) 5279 if parser and not anonymous: 5280 this = parser(self) 5281 else: 5282 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5283 5284 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5285 this = self.expression( 5286 subquery_predicate, comments=comments, this=self._parse_select() 5287 ) 5288 self._match_r_paren() 5289 return this 5290 5291 if functions is None: 5292 functions = self.FUNCTIONS 5293 5294 function = functions.get(upper) 5295 5296 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5297 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5298 5299 if alias: 5300 args = self._kv_to_prop_eq(args) 5301 5302 if function and not anonymous: 5303 if "dialect" in function.__code__.co_varnames: 5304 func = function(args, dialect=self.dialect) 5305 else: 5306 func = function(args) 5307 5308 func = self.validate_expression(func, args) 5309 if not self.dialect.NORMALIZE_FUNCTIONS: 5310 func.meta["name"] = this 5311 5312 this = func 5313 else: 5314 if token_type == TokenType.IDENTIFIER: 5315 this = exp.Identifier(this=this, quoted=True) 5316 this = self.expression(exp.Anonymous, this=this, expressions=args) 5317 5318 if isinstance(this, exp.Expression): 5319 this.add_comments(comments) 5320 5321 self._match_r_paren(this) 5322 return self._parse_window(this) 5323 5324 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5325 return expression 5326 5327 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5328 transformed = [] 5329 5330 for index, e in enumerate(expressions): 5331 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5332 if isinstance(e, exp.Alias): 5333 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5334 5335 if not isinstance(e, exp.PropertyEQ): 5336 e = self.expression( 5337 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5338 ) 5339 5340 if isinstance(e.this, exp.Column): 5341 e.this.replace(e.this.this) 5342 else: 5343 e = self._to_prop_eq(e, index) 5344 5345 transformed.append(e) 5346 5347 return transformed 5348 5349 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5350 return self._parse_statement() 5351 5352 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5353 return self._parse_column_def(self._parse_id_var()) 5354 5355 def _parse_user_defined_function( 5356 self, kind: t.Optional[TokenType] = None 5357 ) -> t.Optional[exp.Expression]: 5358 this = self._parse_id_var() 5359 5360 while self._match(TokenType.DOT): 5361 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5362 5363 if not self._match(TokenType.L_PAREN): 5364 return this 5365 5366 expressions = self._parse_csv(self._parse_function_parameter) 5367 self._match_r_paren() 5368 return self.expression( 5369 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5370 ) 5371 5372 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5373 literal = self._parse_primary() 5374 if literal: 5375 return self.expression(exp.Introducer, this=token.text, expression=literal) 5376 5377 return self.expression(exp.Identifier, this=token.text) 5378 5379 def _parse_session_parameter(self) -> exp.SessionParameter: 5380 kind = None 5381 this = self._parse_id_var() or self._parse_primary() 5382 5383 if this and self._match(TokenType.DOT): 5384 kind = this.name 5385 this = self._parse_var() or self._parse_primary() 5386 5387 return self.expression(exp.SessionParameter, this=this, kind=kind) 5388 5389 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5390 return self._parse_id_var() 5391 5392 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5393 index = self._index 5394 5395 if self._match(TokenType.L_PAREN): 5396 expressions = t.cast( 5397 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5398 ) 5399 5400 if not self._match(TokenType.R_PAREN): 5401 self._retreat(index) 5402 else: 5403 expressions = [self._parse_lambda_arg()] 5404 5405 if self._match_set(self.LAMBDAS): 5406 return self.LAMBDAS[self._prev.token_type](self, expressions) 5407 5408 self._retreat(index) 5409 5410 this: t.Optional[exp.Expression] 5411 5412 if self._match(TokenType.DISTINCT): 5413 this = self.expression( 5414 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5415 ) 5416 else: 5417 this = self._parse_select_or_expression(alias=alias) 5418 5419 return self._parse_limit( 5420 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5421 ) 5422 5423 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5424 index = self._index 5425 if not self._match(TokenType.L_PAREN): 5426 return this 5427 5428 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5429 # expr can be of both types 5430 if self._match_set(self.SELECT_START_TOKENS): 5431 self._retreat(index) 5432 return this 5433 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5434 self._match_r_paren() 5435 return self.expression(exp.Schema, this=this, expressions=args) 5436 5437 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5438 return self._parse_column_def(self._parse_field(any_token=True)) 5439 5440 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5441 # column defs are not really columns, they're identifiers 5442 if isinstance(this, exp.Column): 5443 this = this.this 5444 5445 kind = self._parse_types(schema=True) 5446 5447 if self._match_text_seq("FOR", "ORDINALITY"): 5448 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5449 5450 constraints: t.List[exp.Expression] = [] 5451 5452 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5453 ("ALIAS", "MATERIALIZED") 5454 ): 5455 persisted = self._prev.text.upper() == "MATERIALIZED" 5456 constraint_kind = exp.ComputedColumnConstraint( 5457 this=self._parse_assignment(), 5458 persisted=persisted or self._match_text_seq("PERSISTED"), 5459 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5460 ) 5461 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5462 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5463 self._match(TokenType.ALIAS) 5464 constraints.append( 5465 self.expression( 5466 exp.ColumnConstraint, 5467 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5468 ) 5469 ) 5470 5471 while True: 5472 constraint = self._parse_column_constraint() 5473 if not constraint: 5474 break 5475 constraints.append(constraint) 5476 5477 if not kind and not constraints: 5478 return this 5479 5480 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5481 5482 def _parse_auto_increment( 5483 self, 5484 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5485 start = None 5486 increment = None 5487 5488 if self._match(TokenType.L_PAREN, advance=False): 5489 args = self._parse_wrapped_csv(self._parse_bitwise) 5490 start = seq_get(args, 0) 5491 increment = seq_get(args, 1) 5492 elif self._match_text_seq("START"): 5493 start = self._parse_bitwise() 5494 self._match_text_seq("INCREMENT") 5495 increment = self._parse_bitwise() 5496 5497 if start and increment: 5498 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5499 5500 return exp.AutoIncrementColumnConstraint() 5501 5502 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5503 if not self._match_text_seq("REFRESH"): 5504 self._retreat(self._index - 1) 5505 return None 5506 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5507 5508 def _parse_compress(self) -> exp.CompressColumnConstraint: 5509 if self._match(TokenType.L_PAREN, advance=False): 5510 return self.expression( 5511 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5512 ) 5513 5514 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5515 5516 def _parse_generated_as_identity( 5517 self, 5518 ) -> ( 5519 exp.GeneratedAsIdentityColumnConstraint 5520 | exp.ComputedColumnConstraint 5521 | exp.GeneratedAsRowColumnConstraint 5522 ): 5523 if self._match_text_seq("BY", "DEFAULT"): 5524 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5525 this = self.expression( 5526 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5527 ) 5528 else: 5529 self._match_text_seq("ALWAYS") 5530 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5531 5532 self._match(TokenType.ALIAS) 5533 5534 if self._match_text_seq("ROW"): 5535 start = self._match_text_seq("START") 5536 if not start: 5537 self._match(TokenType.END) 5538 hidden = self._match_text_seq("HIDDEN") 5539 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5540 5541 identity = self._match_text_seq("IDENTITY") 5542 5543 if self._match(TokenType.L_PAREN): 5544 if self._match(TokenType.START_WITH): 5545 this.set("start", self._parse_bitwise()) 5546 if self._match_text_seq("INCREMENT", "BY"): 5547 this.set("increment", self._parse_bitwise()) 5548 if self._match_text_seq("MINVALUE"): 5549 this.set("minvalue", self._parse_bitwise()) 5550 if self._match_text_seq("MAXVALUE"): 5551 this.set("maxvalue", self._parse_bitwise()) 5552 5553 if self._match_text_seq("CYCLE"): 5554 this.set("cycle", True) 5555 elif self._match_text_seq("NO", "CYCLE"): 5556 this.set("cycle", False) 5557 5558 if not identity: 5559 this.set("expression", self._parse_range()) 5560 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5561 args = self._parse_csv(self._parse_bitwise) 5562 this.set("start", seq_get(args, 0)) 5563 this.set("increment", seq_get(args, 1)) 5564 5565 self._match_r_paren() 5566 5567 return this 5568 5569 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5570 self._match_text_seq("LENGTH") 5571 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5572 5573 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5574 if self._match_text_seq("NULL"): 5575 return self.expression(exp.NotNullColumnConstraint) 5576 if self._match_text_seq("CASESPECIFIC"): 5577 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5578 if self._match_text_seq("FOR", "REPLICATION"): 5579 return self.expression(exp.NotForReplicationColumnConstraint) 5580 5581 # Unconsume the `NOT` token 5582 self._retreat(self._index - 1) 5583 return None 5584 5585 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5586 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5587 5588 procedure_option_follows = ( 5589 self._match(TokenType.WITH, advance=False) 5590 and self._next 5591 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5592 ) 5593 5594 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5595 return self.expression( 5596 exp.ColumnConstraint, 5597 this=this, 5598 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5599 ) 5600 5601 return this 5602 5603 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5604 if not self._match(TokenType.CONSTRAINT): 5605 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5606 5607 return self.expression( 5608 exp.Constraint, 5609 this=self._parse_id_var(), 5610 expressions=self._parse_unnamed_constraints(), 5611 ) 5612 5613 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5614 constraints = [] 5615 while True: 5616 constraint = self._parse_unnamed_constraint() or self._parse_function() 5617 if not constraint: 5618 break 5619 constraints.append(constraint) 5620 5621 return constraints 5622 5623 def _parse_unnamed_constraint( 5624 self, constraints: t.Optional[t.Collection[str]] = None 5625 ) -> t.Optional[exp.Expression]: 5626 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5627 constraints or self.CONSTRAINT_PARSERS 5628 ): 5629 return None 5630 5631 constraint = self._prev.text.upper() 5632 if constraint not in self.CONSTRAINT_PARSERS: 5633 self.raise_error(f"No parser found for schema constraint {constraint}.") 5634 5635 return self.CONSTRAINT_PARSERS[constraint](self) 5636 5637 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5638 return self._parse_id_var(any_token=False) 5639 5640 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5641 self._match_text_seq("KEY") 5642 return self.expression( 5643 exp.UniqueColumnConstraint, 5644 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5645 this=self._parse_schema(self._parse_unique_key()), 5646 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5647 on_conflict=self._parse_on_conflict(), 5648 ) 5649 5650 def _parse_key_constraint_options(self) -> t.List[str]: 5651 options = [] 5652 while True: 5653 if not self._curr: 5654 break 5655 5656 if self._match(TokenType.ON): 5657 action = None 5658 on = self._advance_any() and self._prev.text 5659 5660 if self._match_text_seq("NO", "ACTION"): 5661 action = "NO ACTION" 5662 elif self._match_text_seq("CASCADE"): 5663 action = "CASCADE" 5664 elif self._match_text_seq("RESTRICT"): 5665 action = "RESTRICT" 5666 elif self._match_pair(TokenType.SET, TokenType.NULL): 5667 action = "SET NULL" 5668 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5669 action = "SET DEFAULT" 5670 else: 5671 self.raise_error("Invalid key constraint") 5672 5673 options.append(f"ON {on} {action}") 5674 else: 5675 var = self._parse_var_from_options( 5676 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5677 ) 5678 if not var: 5679 break 5680 options.append(var.name) 5681 5682 return options 5683 5684 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5685 if match and not self._match(TokenType.REFERENCES): 5686 return None 5687 5688 expressions = None 5689 this = self._parse_table(schema=True) 5690 options = self._parse_key_constraint_options() 5691 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5692 5693 def _parse_foreign_key(self) -> exp.ForeignKey: 5694 expressions = self._parse_wrapped_id_vars() 5695 reference = self._parse_references() 5696 options = {} 5697 5698 while self._match(TokenType.ON): 5699 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5700 self.raise_error("Expected DELETE or UPDATE") 5701 5702 kind = self._prev.text.lower() 5703 5704 if self._match_text_seq("NO", "ACTION"): 5705 action = "NO ACTION" 5706 elif self._match(TokenType.SET): 5707 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5708 action = "SET " + self._prev.text.upper() 5709 else: 5710 self._advance() 5711 action = self._prev.text.upper() 5712 5713 options[kind] = action 5714 5715 return self.expression( 5716 exp.ForeignKey, 5717 expressions=expressions, 5718 reference=reference, 5719 **options, # type: ignore 5720 ) 5721 5722 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5723 return self._parse_field() 5724 5725 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5726 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5727 self._retreat(self._index - 1) 5728 return None 5729 5730 id_vars = self._parse_wrapped_id_vars() 5731 return self.expression( 5732 exp.PeriodForSystemTimeConstraint, 5733 this=seq_get(id_vars, 0), 5734 expression=seq_get(id_vars, 1), 5735 ) 5736 5737 def _parse_primary_key( 5738 self, wrapped_optional: bool = False, in_props: bool = False 5739 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5740 desc = ( 5741 self._match_set((TokenType.ASC, TokenType.DESC)) 5742 and self._prev.token_type == TokenType.DESC 5743 ) 5744 5745 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5746 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5747 5748 expressions = self._parse_wrapped_csv( 5749 self._parse_primary_key_part, optional=wrapped_optional 5750 ) 5751 options = self._parse_key_constraint_options() 5752 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5753 5754 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5755 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5756 5757 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5758 """ 5759 Parses a datetime column in ODBC format. We parse the column into the corresponding 5760 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5761 same as we did for `DATE('yyyy-mm-dd')`. 5762 5763 Reference: 5764 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5765 """ 5766 self._match(TokenType.VAR) 5767 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5768 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5769 if not self._match(TokenType.R_BRACE): 5770 self.raise_error("Expected }") 5771 return expression 5772 5773 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5774 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5775 return this 5776 5777 bracket_kind = self._prev.token_type 5778 if ( 5779 bracket_kind == TokenType.L_BRACE 5780 and self._curr 5781 and self._curr.token_type == TokenType.VAR 5782 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5783 ): 5784 return self._parse_odbc_datetime_literal() 5785 5786 expressions = self._parse_csv( 5787 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5788 ) 5789 5790 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5791 self.raise_error("Expected ]") 5792 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5793 self.raise_error("Expected }") 5794 5795 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5796 if bracket_kind == TokenType.L_BRACE: 5797 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5798 elif not this: 5799 this = build_array_constructor( 5800 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5801 ) 5802 else: 5803 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5804 if constructor_type: 5805 return build_array_constructor( 5806 constructor_type, 5807 args=expressions, 5808 bracket_kind=bracket_kind, 5809 dialect=self.dialect, 5810 ) 5811 5812 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5813 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5814 5815 self._add_comments(this) 5816 return self._parse_bracket(this) 5817 5818 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5819 if self._match(TokenType.COLON): 5820 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5821 return this 5822 5823 def _parse_case(self) -> t.Optional[exp.Expression]: 5824 ifs = [] 5825 default = None 5826 5827 comments = self._prev_comments 5828 expression = self._parse_assignment() 5829 5830 while self._match(TokenType.WHEN): 5831 this = self._parse_assignment() 5832 self._match(TokenType.THEN) 5833 then = self._parse_assignment() 5834 ifs.append(self.expression(exp.If, this=this, true=then)) 5835 5836 if self._match(TokenType.ELSE): 5837 default = self._parse_assignment() 5838 5839 if not self._match(TokenType.END): 5840 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5841 default = exp.column("interval") 5842 else: 5843 self.raise_error("Expected END after CASE", self._prev) 5844 5845 return self.expression( 5846 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5847 ) 5848 5849 def _parse_if(self) -> t.Optional[exp.Expression]: 5850 if self._match(TokenType.L_PAREN): 5851 args = self._parse_csv(self._parse_assignment) 5852 this = self.validate_expression(exp.If.from_arg_list(args), args) 5853 self._match_r_paren() 5854 else: 5855 index = self._index - 1 5856 5857 if self.NO_PAREN_IF_COMMANDS and index == 0: 5858 return self._parse_as_command(self._prev) 5859 5860 condition = self._parse_assignment() 5861 5862 if not condition: 5863 self._retreat(index) 5864 return None 5865 5866 self._match(TokenType.THEN) 5867 true = self._parse_assignment() 5868 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5869 self._match(TokenType.END) 5870 this = self.expression(exp.If, this=condition, true=true, false=false) 5871 5872 return this 5873 5874 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5875 if not self._match_text_seq("VALUE", "FOR"): 5876 self._retreat(self._index - 1) 5877 return None 5878 5879 return self.expression( 5880 exp.NextValueFor, 5881 this=self._parse_column(), 5882 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5883 ) 5884 5885 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5886 this = self._parse_function() or self._parse_var_or_string(upper=True) 5887 5888 if self._match(TokenType.FROM): 5889 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5890 5891 if not self._match(TokenType.COMMA): 5892 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5893 5894 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5895 5896 def _parse_gap_fill(self) -> exp.GapFill: 5897 self._match(TokenType.TABLE) 5898 this = self._parse_table() 5899 5900 self._match(TokenType.COMMA) 5901 args = [this, *self._parse_csv(self._parse_lambda)] 5902 5903 gap_fill = exp.GapFill.from_arg_list(args) 5904 return self.validate_expression(gap_fill, args) 5905 5906 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5907 this = self._parse_assignment() 5908 5909 if not self._match(TokenType.ALIAS): 5910 if self._match(TokenType.COMMA): 5911 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5912 5913 self.raise_error("Expected AS after CAST") 5914 5915 fmt = None 5916 to = self._parse_types() 5917 5918 if self._match(TokenType.FORMAT): 5919 fmt_string = self._parse_string() 5920 fmt = self._parse_at_time_zone(fmt_string) 5921 5922 if not to: 5923 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5924 if to.this in exp.DataType.TEMPORAL_TYPES: 5925 this = self.expression( 5926 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5927 this=this, 5928 format=exp.Literal.string( 5929 format_time( 5930 fmt_string.this if fmt_string else "", 5931 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5932 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5933 ) 5934 ), 5935 safe=safe, 5936 ) 5937 5938 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5939 this.set("zone", fmt.args["zone"]) 5940 return this 5941 elif not to: 5942 self.raise_error("Expected TYPE after CAST") 5943 elif isinstance(to, exp.Identifier): 5944 to = exp.DataType.build(to.name, udt=True) 5945 elif to.this == exp.DataType.Type.CHAR: 5946 if self._match(TokenType.CHARACTER_SET): 5947 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5948 5949 return self.expression( 5950 exp.Cast if strict else exp.TryCast, 5951 this=this, 5952 to=to, 5953 format=fmt, 5954 safe=safe, 5955 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5956 ) 5957 5958 def _parse_string_agg(self) -> exp.GroupConcat: 5959 if self._match(TokenType.DISTINCT): 5960 args: t.List[t.Optional[exp.Expression]] = [ 5961 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5962 ] 5963 if self._match(TokenType.COMMA): 5964 args.extend(self._parse_csv(self._parse_assignment)) 5965 else: 5966 args = self._parse_csv(self._parse_assignment) # type: ignore 5967 5968 if self._match_text_seq("ON", "OVERFLOW"): 5969 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 5970 if self._match_text_seq("ERROR"): 5971 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 5972 else: 5973 self._match_text_seq("TRUNCATE") 5974 on_overflow = self.expression( 5975 exp.OverflowTruncateBehavior, 5976 this=self._parse_string(), 5977 with_count=( 5978 self._match_text_seq("WITH", "COUNT") 5979 or not self._match_text_seq("WITHOUT", "COUNT") 5980 ), 5981 ) 5982 else: 5983 on_overflow = None 5984 5985 index = self._index 5986 if not self._match(TokenType.R_PAREN) and args: 5987 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5988 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5989 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5990 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5991 5992 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5993 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5994 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5995 if not self._match_text_seq("WITHIN", "GROUP"): 5996 self._retreat(index) 5997 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5998 5999 # The corresponding match_r_paren will be called in parse_function (caller) 6000 self._match_l_paren() 6001 6002 return self.expression( 6003 exp.GroupConcat, 6004 this=self._parse_order(this=seq_get(args, 0)), 6005 separator=seq_get(args, 1), 6006 on_overflow=on_overflow, 6007 ) 6008 6009 def _parse_convert( 6010 self, strict: bool, safe: t.Optional[bool] = None 6011 ) -> t.Optional[exp.Expression]: 6012 this = self._parse_bitwise() 6013 6014 if self._match(TokenType.USING): 6015 to: t.Optional[exp.Expression] = self.expression( 6016 exp.CharacterSet, this=self._parse_var() 6017 ) 6018 elif self._match(TokenType.COMMA): 6019 to = self._parse_types() 6020 else: 6021 to = None 6022 6023 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6024 6025 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6026 """ 6027 There are generally two variants of the DECODE function: 6028 6029 - DECODE(bin, charset) 6030 - DECODE(expression, search, result [, search, result] ... [, default]) 6031 6032 The second variant will always be parsed into a CASE expression. Note that NULL 6033 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6034 instead of relying on pattern matching. 6035 """ 6036 args = self._parse_csv(self._parse_assignment) 6037 6038 if len(args) < 3: 6039 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6040 6041 expression, *expressions = args 6042 if not expression: 6043 return None 6044 6045 ifs = [] 6046 for search, result in zip(expressions[::2], expressions[1::2]): 6047 if not search or not result: 6048 return None 6049 6050 if isinstance(search, exp.Literal): 6051 ifs.append( 6052 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6053 ) 6054 elif isinstance(search, exp.Null): 6055 ifs.append( 6056 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6057 ) 6058 else: 6059 cond = exp.or_( 6060 exp.EQ(this=expression.copy(), expression=search), 6061 exp.and_( 6062 exp.Is(this=expression.copy(), expression=exp.Null()), 6063 exp.Is(this=search.copy(), expression=exp.Null()), 6064 copy=False, 6065 ), 6066 copy=False, 6067 ) 6068 ifs.append(exp.If(this=cond, true=result)) 6069 6070 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6071 6072 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6073 self._match_text_seq("KEY") 6074 key = self._parse_column() 6075 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6076 self._match_text_seq("VALUE") 6077 value = self._parse_bitwise() 6078 6079 if not key and not value: 6080 return None 6081 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6082 6083 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6084 if not this or not self._match_text_seq("FORMAT", "JSON"): 6085 return this 6086 6087 return self.expression(exp.FormatJson, this=this) 6088 6089 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6090 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6091 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6092 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6093 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6094 else: 6095 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6096 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6097 6098 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6099 6100 if not empty and not error and not null: 6101 return None 6102 6103 return self.expression( 6104 exp.OnCondition, 6105 empty=empty, 6106 error=error, 6107 null=null, 6108 ) 6109 6110 def _parse_on_handling( 6111 self, on: str, *values: str 6112 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6113 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6114 for value in values: 6115 if self._match_text_seq(value, "ON", on): 6116 return f"{value} ON {on}" 6117 6118 index = self._index 6119 if self._match(TokenType.DEFAULT): 6120 default_value = self._parse_bitwise() 6121 if self._match_text_seq("ON", on): 6122 return default_value 6123 6124 self._retreat(index) 6125 6126 return None 6127 6128 @t.overload 6129 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6130 6131 @t.overload 6132 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6133 6134 def _parse_json_object(self, agg=False): 6135 star = self._parse_star() 6136 expressions = ( 6137 [star] 6138 if star 6139 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6140 ) 6141 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6142 6143 unique_keys = None 6144 if self._match_text_seq("WITH", "UNIQUE"): 6145 unique_keys = True 6146 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6147 unique_keys = False 6148 6149 self._match_text_seq("KEYS") 6150 6151 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6152 self._parse_type() 6153 ) 6154 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6155 6156 return self.expression( 6157 exp.JSONObjectAgg if agg else exp.JSONObject, 6158 expressions=expressions, 6159 null_handling=null_handling, 6160 unique_keys=unique_keys, 6161 return_type=return_type, 6162 encoding=encoding, 6163 ) 6164 6165 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6166 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6167 if not self._match_text_seq("NESTED"): 6168 this = self._parse_id_var() 6169 kind = self._parse_types(allow_identifiers=False) 6170 nested = None 6171 else: 6172 this = None 6173 kind = None 6174 nested = True 6175 6176 path = self._match_text_seq("PATH") and self._parse_string() 6177 nested_schema = nested and self._parse_json_schema() 6178 6179 return self.expression( 6180 exp.JSONColumnDef, 6181 this=this, 6182 kind=kind, 6183 path=path, 6184 nested_schema=nested_schema, 6185 ) 6186 6187 def _parse_json_schema(self) -> exp.JSONSchema: 6188 self._match_text_seq("COLUMNS") 6189 return self.expression( 6190 exp.JSONSchema, 6191 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6192 ) 6193 6194 def _parse_json_table(self) -> exp.JSONTable: 6195 this = self._parse_format_json(self._parse_bitwise()) 6196 path = self._match(TokenType.COMMA) and self._parse_string() 6197 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6198 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6199 schema = self._parse_json_schema() 6200 6201 return exp.JSONTable( 6202 this=this, 6203 schema=schema, 6204 path=path, 6205 error_handling=error_handling, 6206 empty_handling=empty_handling, 6207 ) 6208 6209 def _parse_match_against(self) -> exp.MatchAgainst: 6210 expressions = self._parse_csv(self._parse_column) 6211 6212 self._match_text_seq(")", "AGAINST", "(") 6213 6214 this = self._parse_string() 6215 6216 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6217 modifier = "IN NATURAL LANGUAGE MODE" 6218 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6219 modifier = f"{modifier} WITH QUERY EXPANSION" 6220 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6221 modifier = "IN BOOLEAN MODE" 6222 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6223 modifier = "WITH QUERY EXPANSION" 6224 else: 6225 modifier = None 6226 6227 return self.expression( 6228 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6229 ) 6230 6231 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6232 def _parse_open_json(self) -> exp.OpenJSON: 6233 this = self._parse_bitwise() 6234 path = self._match(TokenType.COMMA) and self._parse_string() 6235 6236 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6237 this = self._parse_field(any_token=True) 6238 kind = self._parse_types() 6239 path = self._parse_string() 6240 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6241 6242 return self.expression( 6243 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6244 ) 6245 6246 expressions = None 6247 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6248 self._match_l_paren() 6249 expressions = self._parse_csv(_parse_open_json_column_def) 6250 6251 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6252 6253 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6254 args = self._parse_csv(self._parse_bitwise) 6255 6256 if self._match(TokenType.IN): 6257 return self.expression( 6258 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6259 ) 6260 6261 if haystack_first: 6262 haystack = seq_get(args, 0) 6263 needle = seq_get(args, 1) 6264 else: 6265 needle = seq_get(args, 0) 6266 haystack = seq_get(args, 1) 6267 6268 return self.expression( 6269 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6270 ) 6271 6272 def _parse_predict(self) -> exp.Predict: 6273 self._match_text_seq("MODEL") 6274 this = self._parse_table() 6275 6276 self._match(TokenType.COMMA) 6277 self._match_text_seq("TABLE") 6278 6279 return self.expression( 6280 exp.Predict, 6281 this=this, 6282 expression=self._parse_table(), 6283 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6284 ) 6285 6286 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6287 args = self._parse_csv(self._parse_table) 6288 return exp.JoinHint(this=func_name.upper(), expressions=args) 6289 6290 def _parse_substring(self) -> exp.Substring: 6291 # Postgres supports the form: substring(string [from int] [for int]) 6292 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6293 6294 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6295 6296 if self._match(TokenType.FROM): 6297 args.append(self._parse_bitwise()) 6298 if self._match(TokenType.FOR): 6299 if len(args) == 1: 6300 args.append(exp.Literal.number(1)) 6301 args.append(self._parse_bitwise()) 6302 6303 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6304 6305 def _parse_trim(self) -> exp.Trim: 6306 # https://www.w3resource.com/sql/character-functions/trim.php 6307 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6308 6309 position = None 6310 collation = None 6311 expression = None 6312 6313 if self._match_texts(self.TRIM_TYPES): 6314 position = self._prev.text.upper() 6315 6316 this = self._parse_bitwise() 6317 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6318 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6319 expression = self._parse_bitwise() 6320 6321 if invert_order: 6322 this, expression = expression, this 6323 6324 if self._match(TokenType.COLLATE): 6325 collation = self._parse_bitwise() 6326 6327 return self.expression( 6328 exp.Trim, this=this, position=position, expression=expression, collation=collation 6329 ) 6330 6331 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6332 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6333 6334 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6335 return self._parse_window(self._parse_id_var(), alias=True) 6336 6337 def _parse_respect_or_ignore_nulls( 6338 self, this: t.Optional[exp.Expression] 6339 ) -> t.Optional[exp.Expression]: 6340 if self._match_text_seq("IGNORE", "NULLS"): 6341 return self.expression(exp.IgnoreNulls, this=this) 6342 if self._match_text_seq("RESPECT", "NULLS"): 6343 return self.expression(exp.RespectNulls, this=this) 6344 return this 6345 6346 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6347 if self._match(TokenType.HAVING): 6348 self._match_texts(("MAX", "MIN")) 6349 max = self._prev.text.upper() != "MIN" 6350 return self.expression( 6351 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6352 ) 6353 6354 return this 6355 6356 def _parse_window( 6357 self, this: t.Optional[exp.Expression], alias: bool = False 6358 ) -> t.Optional[exp.Expression]: 6359 func = this 6360 comments = func.comments if isinstance(func, exp.Expression) else None 6361 6362 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6363 self._match(TokenType.WHERE) 6364 this = self.expression( 6365 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6366 ) 6367 self._match_r_paren() 6368 6369 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6370 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6371 if self._match_text_seq("WITHIN", "GROUP"): 6372 order = self._parse_wrapped(self._parse_order) 6373 this = self.expression(exp.WithinGroup, this=this, expression=order) 6374 6375 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6376 # Some dialects choose to implement and some do not. 6377 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6378 6379 # There is some code above in _parse_lambda that handles 6380 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6381 6382 # The below changes handle 6383 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6384 6385 # Oracle allows both formats 6386 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6387 # and Snowflake chose to do the same for familiarity 6388 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6389 if isinstance(this, exp.AggFunc): 6390 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6391 6392 if ignore_respect and ignore_respect is not this: 6393 ignore_respect.replace(ignore_respect.this) 6394 this = self.expression(ignore_respect.__class__, this=this) 6395 6396 this = self._parse_respect_or_ignore_nulls(this) 6397 6398 # bigquery select from window x AS (partition by ...) 6399 if alias: 6400 over = None 6401 self._match(TokenType.ALIAS) 6402 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6403 return this 6404 else: 6405 over = self._prev.text.upper() 6406 6407 if comments and isinstance(func, exp.Expression): 6408 func.pop_comments() 6409 6410 if not self._match(TokenType.L_PAREN): 6411 return self.expression( 6412 exp.Window, 6413 comments=comments, 6414 this=this, 6415 alias=self._parse_id_var(False), 6416 over=over, 6417 ) 6418 6419 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6420 6421 first = self._match(TokenType.FIRST) 6422 if self._match_text_seq("LAST"): 6423 first = False 6424 6425 partition, order = self._parse_partition_and_order() 6426 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6427 6428 if kind: 6429 self._match(TokenType.BETWEEN) 6430 start = self._parse_window_spec() 6431 self._match(TokenType.AND) 6432 end = self._parse_window_spec() 6433 6434 spec = self.expression( 6435 exp.WindowSpec, 6436 kind=kind, 6437 start=start["value"], 6438 start_side=start["side"], 6439 end=end["value"], 6440 end_side=end["side"], 6441 ) 6442 else: 6443 spec = None 6444 6445 self._match_r_paren() 6446 6447 window = self.expression( 6448 exp.Window, 6449 comments=comments, 6450 this=this, 6451 partition_by=partition, 6452 order=order, 6453 spec=spec, 6454 alias=window_alias, 6455 over=over, 6456 first=first, 6457 ) 6458 6459 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6460 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6461 return self._parse_window(window, alias=alias) 6462 6463 return window 6464 6465 def _parse_partition_and_order( 6466 self, 6467 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6468 return self._parse_partition_by(), self._parse_order() 6469 6470 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6471 self._match(TokenType.BETWEEN) 6472 6473 return { 6474 "value": ( 6475 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6476 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6477 or self._parse_bitwise() 6478 ), 6479 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6480 } 6481 6482 def _parse_alias( 6483 self, this: t.Optional[exp.Expression], explicit: bool = False 6484 ) -> t.Optional[exp.Expression]: 6485 any_token = self._match(TokenType.ALIAS) 6486 comments = self._prev_comments or [] 6487 6488 if explicit and not any_token: 6489 return this 6490 6491 if self._match(TokenType.L_PAREN): 6492 aliases = self.expression( 6493 exp.Aliases, 6494 comments=comments, 6495 this=this, 6496 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6497 ) 6498 self._match_r_paren(aliases) 6499 return aliases 6500 6501 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6502 self.STRING_ALIASES and self._parse_string_as_identifier() 6503 ) 6504 6505 if alias: 6506 comments.extend(alias.pop_comments()) 6507 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6508 column = this.this 6509 6510 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6511 if not this.comments and column and column.comments: 6512 this.comments = column.pop_comments() 6513 6514 return this 6515 6516 def _parse_id_var( 6517 self, 6518 any_token: bool = True, 6519 tokens: t.Optional[t.Collection[TokenType]] = None, 6520 ) -> t.Optional[exp.Expression]: 6521 expression = self._parse_identifier() 6522 if not expression and ( 6523 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6524 ): 6525 quoted = self._prev.token_type == TokenType.STRING 6526 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6527 6528 return expression 6529 6530 def _parse_string(self) -> t.Optional[exp.Expression]: 6531 if self._match_set(self.STRING_PARSERS): 6532 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6533 return self._parse_placeholder() 6534 6535 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6536 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6537 6538 def _parse_number(self) -> t.Optional[exp.Expression]: 6539 if self._match_set(self.NUMERIC_PARSERS): 6540 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6541 return self._parse_placeholder() 6542 6543 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6544 if self._match(TokenType.IDENTIFIER): 6545 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6546 return self._parse_placeholder() 6547 6548 def _parse_var( 6549 self, 6550 any_token: bool = False, 6551 tokens: t.Optional[t.Collection[TokenType]] = None, 6552 upper: bool = False, 6553 ) -> t.Optional[exp.Expression]: 6554 if ( 6555 (any_token and self._advance_any()) 6556 or self._match(TokenType.VAR) 6557 or (self._match_set(tokens) if tokens else False) 6558 ): 6559 return self.expression( 6560 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6561 ) 6562 return self._parse_placeholder() 6563 6564 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6565 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6566 self._advance() 6567 return self._prev 6568 return None 6569 6570 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6571 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6572 6573 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6574 return self._parse_primary() or self._parse_var(any_token=True) 6575 6576 def _parse_null(self) -> t.Optional[exp.Expression]: 6577 if self._match_set(self.NULL_TOKENS): 6578 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6579 return self._parse_placeholder() 6580 6581 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6582 if self._match(TokenType.TRUE): 6583 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6584 if self._match(TokenType.FALSE): 6585 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6586 return self._parse_placeholder() 6587 6588 def _parse_star(self) -> t.Optional[exp.Expression]: 6589 if self._match(TokenType.STAR): 6590 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6591 return self._parse_placeholder() 6592 6593 def _parse_parameter(self) -> exp.Parameter: 6594 this = self._parse_identifier() or self._parse_primary_or_var() 6595 return self.expression(exp.Parameter, this=this) 6596 6597 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6598 if self._match_set(self.PLACEHOLDER_PARSERS): 6599 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6600 if placeholder: 6601 return placeholder 6602 self._advance(-1) 6603 return None 6604 6605 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6606 if not self._match_texts(keywords): 6607 return None 6608 if self._match(TokenType.L_PAREN, advance=False): 6609 return self._parse_wrapped_csv(self._parse_expression) 6610 6611 expression = self._parse_expression() 6612 return [expression] if expression else None 6613 6614 def _parse_csv( 6615 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6616 ) -> t.List[exp.Expression]: 6617 parse_result = parse_method() 6618 items = [parse_result] if parse_result is not None else [] 6619 6620 while self._match(sep): 6621 self._add_comments(parse_result) 6622 parse_result = parse_method() 6623 if parse_result is not None: 6624 items.append(parse_result) 6625 6626 return items 6627 6628 def _parse_tokens( 6629 self, parse_method: t.Callable, expressions: t.Dict 6630 ) -> t.Optional[exp.Expression]: 6631 this = parse_method() 6632 6633 while self._match_set(expressions): 6634 this = self.expression( 6635 expressions[self._prev.token_type], 6636 this=this, 6637 comments=self._prev_comments, 6638 expression=parse_method(), 6639 ) 6640 6641 return this 6642 6643 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6644 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6645 6646 def _parse_wrapped_csv( 6647 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6648 ) -> t.List[exp.Expression]: 6649 return self._parse_wrapped( 6650 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6651 ) 6652 6653 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6654 wrapped = self._match(TokenType.L_PAREN) 6655 if not wrapped and not optional: 6656 self.raise_error("Expecting (") 6657 parse_result = parse_method() 6658 if wrapped: 6659 self._match_r_paren() 6660 return parse_result 6661 6662 def _parse_expressions(self) -> t.List[exp.Expression]: 6663 return self._parse_csv(self._parse_expression) 6664 6665 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6666 return self._parse_select() or self._parse_set_operations( 6667 self._parse_expression() if alias else self._parse_assignment() 6668 ) 6669 6670 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6671 return self._parse_query_modifiers( 6672 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6673 ) 6674 6675 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6676 this = None 6677 if self._match_texts(self.TRANSACTION_KIND): 6678 this = self._prev.text 6679 6680 self._match_texts(("TRANSACTION", "WORK")) 6681 6682 modes = [] 6683 while True: 6684 mode = [] 6685 while self._match(TokenType.VAR): 6686 mode.append(self._prev.text) 6687 6688 if mode: 6689 modes.append(" ".join(mode)) 6690 if not self._match(TokenType.COMMA): 6691 break 6692 6693 return self.expression(exp.Transaction, this=this, modes=modes) 6694 6695 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6696 chain = None 6697 savepoint = None 6698 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6699 6700 self._match_texts(("TRANSACTION", "WORK")) 6701 6702 if self._match_text_seq("TO"): 6703 self._match_text_seq("SAVEPOINT") 6704 savepoint = self._parse_id_var() 6705 6706 if self._match(TokenType.AND): 6707 chain = not self._match_text_seq("NO") 6708 self._match_text_seq("CHAIN") 6709 6710 if is_rollback: 6711 return self.expression(exp.Rollback, savepoint=savepoint) 6712 6713 return self.expression(exp.Commit, chain=chain) 6714 6715 def _parse_refresh(self) -> exp.Refresh: 6716 self._match(TokenType.TABLE) 6717 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6718 6719 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6720 if not self._match_text_seq("ADD"): 6721 return None 6722 6723 self._match(TokenType.COLUMN) 6724 exists_column = self._parse_exists(not_=True) 6725 expression = self._parse_field_def() 6726 6727 if expression: 6728 expression.set("exists", exists_column) 6729 6730 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6731 if self._match_texts(("FIRST", "AFTER")): 6732 position = self._prev.text 6733 column_position = self.expression( 6734 exp.ColumnPosition, this=self._parse_column(), position=position 6735 ) 6736 expression.set("position", column_position) 6737 6738 return expression 6739 6740 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6741 drop = self._match(TokenType.DROP) and self._parse_drop() 6742 if drop and not isinstance(drop, exp.Command): 6743 drop.set("kind", drop.args.get("kind", "COLUMN")) 6744 return drop 6745 6746 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6747 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6748 return self.expression( 6749 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6750 ) 6751 6752 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6753 index = self._index - 1 6754 6755 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6756 return self._parse_csv( 6757 lambda: self.expression( 6758 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6759 ) 6760 ) 6761 6762 self._retreat(index) 6763 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6764 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6765 6766 if self._match_text_seq("ADD", "COLUMNS"): 6767 schema = self._parse_schema() 6768 if schema: 6769 return [schema] 6770 return [] 6771 6772 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6773 6774 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6775 if self._match_texts(self.ALTER_ALTER_PARSERS): 6776 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6777 6778 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6779 # keyword after ALTER we default to parsing this statement 6780 self._match(TokenType.COLUMN) 6781 column = self._parse_field(any_token=True) 6782 6783 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6784 return self.expression(exp.AlterColumn, this=column, drop=True) 6785 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6786 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6787 if self._match(TokenType.COMMENT): 6788 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6789 if self._match_text_seq("DROP", "NOT", "NULL"): 6790 return self.expression( 6791 exp.AlterColumn, 6792 this=column, 6793 drop=True, 6794 allow_null=True, 6795 ) 6796 if self._match_text_seq("SET", "NOT", "NULL"): 6797 return self.expression( 6798 exp.AlterColumn, 6799 this=column, 6800 allow_null=False, 6801 ) 6802 self._match_text_seq("SET", "DATA") 6803 self._match_text_seq("TYPE") 6804 return self.expression( 6805 exp.AlterColumn, 6806 this=column, 6807 dtype=self._parse_types(), 6808 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6809 using=self._match(TokenType.USING) and self._parse_assignment(), 6810 ) 6811 6812 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6813 if self._match_texts(("ALL", "EVEN", "AUTO")): 6814 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6815 6816 self._match_text_seq("KEY", "DISTKEY") 6817 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6818 6819 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6820 if compound: 6821 self._match_text_seq("SORTKEY") 6822 6823 if self._match(TokenType.L_PAREN, advance=False): 6824 return self.expression( 6825 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6826 ) 6827 6828 self._match_texts(("AUTO", "NONE")) 6829 return self.expression( 6830 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6831 ) 6832 6833 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6834 index = self._index - 1 6835 6836 partition_exists = self._parse_exists() 6837 if self._match(TokenType.PARTITION, advance=False): 6838 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6839 6840 self._retreat(index) 6841 return self._parse_csv(self._parse_drop_column) 6842 6843 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6844 if self._match(TokenType.COLUMN): 6845 exists = self._parse_exists() 6846 old_column = self._parse_column() 6847 to = self._match_text_seq("TO") 6848 new_column = self._parse_column() 6849 6850 if old_column is None or to is None or new_column is None: 6851 return None 6852 6853 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6854 6855 self._match_text_seq("TO") 6856 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6857 6858 def _parse_alter_table_set(self) -> exp.AlterSet: 6859 alter_set = self.expression(exp.AlterSet) 6860 6861 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6862 "TABLE", "PROPERTIES" 6863 ): 6864 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6865 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6866 alter_set.set("expressions", [self._parse_assignment()]) 6867 elif self._match_texts(("LOGGED", "UNLOGGED")): 6868 alter_set.set("option", exp.var(self._prev.text.upper())) 6869 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6870 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6871 elif self._match_text_seq("LOCATION"): 6872 alter_set.set("location", self._parse_field()) 6873 elif self._match_text_seq("ACCESS", "METHOD"): 6874 alter_set.set("access_method", self._parse_field()) 6875 elif self._match_text_seq("TABLESPACE"): 6876 alter_set.set("tablespace", self._parse_field()) 6877 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6878 alter_set.set("file_format", [self._parse_field()]) 6879 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6880 alter_set.set("file_format", self._parse_wrapped_options()) 6881 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6882 alter_set.set("copy_options", self._parse_wrapped_options()) 6883 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6884 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6885 else: 6886 if self._match_text_seq("SERDE"): 6887 alter_set.set("serde", self._parse_field()) 6888 6889 alter_set.set("expressions", [self._parse_properties()]) 6890 6891 return alter_set 6892 6893 def _parse_alter(self) -> exp.Alter | exp.Command: 6894 start = self._prev 6895 6896 alter_token = self._match_set(self.ALTERABLES) and self._prev 6897 if not alter_token: 6898 return self._parse_as_command(start) 6899 6900 exists = self._parse_exists() 6901 only = self._match_text_seq("ONLY") 6902 this = self._parse_table(schema=True) 6903 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6904 6905 if self._next: 6906 self._advance() 6907 6908 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6909 if parser: 6910 actions = ensure_list(parser(self)) 6911 not_valid = self._match_text_seq("NOT", "VALID") 6912 options = self._parse_csv(self._parse_property) 6913 6914 if not self._curr and actions: 6915 return self.expression( 6916 exp.Alter, 6917 this=this, 6918 kind=alter_token.text.upper(), 6919 exists=exists, 6920 actions=actions, 6921 only=only, 6922 options=options, 6923 cluster=cluster, 6924 not_valid=not_valid, 6925 ) 6926 6927 return self._parse_as_command(start) 6928 6929 def _parse_merge(self) -> exp.Merge: 6930 self._match(TokenType.INTO) 6931 target = self._parse_table() 6932 6933 if target and self._match(TokenType.ALIAS, advance=False): 6934 target.set("alias", self._parse_table_alias()) 6935 6936 self._match(TokenType.USING) 6937 using = self._parse_table() 6938 6939 self._match(TokenType.ON) 6940 on = self._parse_assignment() 6941 6942 return self.expression( 6943 exp.Merge, 6944 this=target, 6945 using=using, 6946 on=on, 6947 expressions=self._parse_when_matched(), 6948 returning=self._parse_returning(), 6949 ) 6950 6951 def _parse_when_matched(self) -> t.List[exp.When]: 6952 whens = [] 6953 6954 while self._match(TokenType.WHEN): 6955 matched = not self._match(TokenType.NOT) 6956 self._match_text_seq("MATCHED") 6957 source = ( 6958 False 6959 if self._match_text_seq("BY", "TARGET") 6960 else self._match_text_seq("BY", "SOURCE") 6961 ) 6962 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6963 6964 self._match(TokenType.THEN) 6965 6966 if self._match(TokenType.INSERT): 6967 this = self._parse_star() 6968 if this: 6969 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6970 else: 6971 then = self.expression( 6972 exp.Insert, 6973 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6974 expression=self._match_text_seq("VALUES") and self._parse_value(), 6975 ) 6976 elif self._match(TokenType.UPDATE): 6977 expressions = self._parse_star() 6978 if expressions: 6979 then = self.expression(exp.Update, expressions=expressions) 6980 else: 6981 then = self.expression( 6982 exp.Update, 6983 expressions=self._match(TokenType.SET) 6984 and self._parse_csv(self._parse_equality), 6985 ) 6986 elif self._match(TokenType.DELETE): 6987 then = self.expression(exp.Var, this=self._prev.text) 6988 else: 6989 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6990 6991 whens.append( 6992 self.expression( 6993 exp.When, 6994 matched=matched, 6995 source=source, 6996 condition=condition, 6997 then=then, 6998 ) 6999 ) 7000 return whens 7001 7002 def _parse_show(self) -> t.Optional[exp.Expression]: 7003 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7004 if parser: 7005 return parser(self) 7006 return self._parse_as_command(self._prev) 7007 7008 def _parse_set_item_assignment( 7009 self, kind: t.Optional[str] = None 7010 ) -> t.Optional[exp.Expression]: 7011 index = self._index 7012 7013 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7014 return self._parse_set_transaction(global_=kind == "GLOBAL") 7015 7016 left = self._parse_primary() or self._parse_column() 7017 assignment_delimiter = self._match_texts(("=", "TO")) 7018 7019 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7020 self._retreat(index) 7021 return None 7022 7023 right = self._parse_statement() or self._parse_id_var() 7024 if isinstance(right, (exp.Column, exp.Identifier)): 7025 right = exp.var(right.name) 7026 7027 this = self.expression(exp.EQ, this=left, expression=right) 7028 return self.expression(exp.SetItem, this=this, kind=kind) 7029 7030 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7031 self._match_text_seq("TRANSACTION") 7032 characteristics = self._parse_csv( 7033 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7034 ) 7035 return self.expression( 7036 exp.SetItem, 7037 expressions=characteristics, 7038 kind="TRANSACTION", 7039 **{"global": global_}, # type: ignore 7040 ) 7041 7042 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7043 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7044 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7045 7046 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7047 index = self._index 7048 set_ = self.expression( 7049 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7050 ) 7051 7052 if self._curr: 7053 self._retreat(index) 7054 return self._parse_as_command(self._prev) 7055 7056 return set_ 7057 7058 def _parse_var_from_options( 7059 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7060 ) -> t.Optional[exp.Var]: 7061 start = self._curr 7062 if not start: 7063 return None 7064 7065 option = start.text.upper() 7066 continuations = options.get(option) 7067 7068 index = self._index 7069 self._advance() 7070 for keywords in continuations or []: 7071 if isinstance(keywords, str): 7072 keywords = (keywords,) 7073 7074 if self._match_text_seq(*keywords): 7075 option = f"{option} {' '.join(keywords)}" 7076 break 7077 else: 7078 if continuations or continuations is None: 7079 if raise_unmatched: 7080 self.raise_error(f"Unknown option {option}") 7081 7082 self._retreat(index) 7083 return None 7084 7085 return exp.var(option) 7086 7087 def _parse_as_command(self, start: Token) -> exp.Command: 7088 while self._curr: 7089 self._advance() 7090 text = self._find_sql(start, self._prev) 7091 size = len(start.text) 7092 self._warn_unsupported() 7093 return exp.Command(this=text[:size], expression=text[size:]) 7094 7095 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7096 settings = [] 7097 7098 self._match_l_paren() 7099 kind = self._parse_id_var() 7100 7101 if self._match(TokenType.L_PAREN): 7102 while True: 7103 key = self._parse_id_var() 7104 value = self._parse_primary() 7105 7106 if not key and value is None: 7107 break 7108 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7109 self._match(TokenType.R_PAREN) 7110 7111 self._match_r_paren() 7112 7113 return self.expression( 7114 exp.DictProperty, 7115 this=this, 7116 kind=kind.this if kind else None, 7117 settings=settings, 7118 ) 7119 7120 def _parse_dict_range(self, this: str) -> exp.DictRange: 7121 self._match_l_paren() 7122 has_min = self._match_text_seq("MIN") 7123 if has_min: 7124 min = self._parse_var() or self._parse_primary() 7125 self._match_text_seq("MAX") 7126 max = self._parse_var() or self._parse_primary() 7127 else: 7128 max = self._parse_var() or self._parse_primary() 7129 min = exp.Literal.number(0) 7130 self._match_r_paren() 7131 return self.expression(exp.DictRange, this=this, min=min, max=max) 7132 7133 def _parse_comprehension( 7134 self, this: t.Optional[exp.Expression] 7135 ) -> t.Optional[exp.Comprehension]: 7136 index = self._index 7137 expression = self._parse_column() 7138 if not self._match(TokenType.IN): 7139 self._retreat(index - 1) 7140 return None 7141 iterator = self._parse_column() 7142 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7143 return self.expression( 7144 exp.Comprehension, 7145 this=this, 7146 expression=expression, 7147 iterator=iterator, 7148 condition=condition, 7149 ) 7150 7151 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7152 if self._match(TokenType.HEREDOC_STRING): 7153 return self.expression(exp.Heredoc, this=self._prev.text) 7154 7155 if not self._match_text_seq("$"): 7156 return None 7157 7158 tags = ["$"] 7159 tag_text = None 7160 7161 if self._is_connected(): 7162 self._advance() 7163 tags.append(self._prev.text.upper()) 7164 else: 7165 self.raise_error("No closing $ found") 7166 7167 if tags[-1] != "$": 7168 if self._is_connected() and self._match_text_seq("$"): 7169 tag_text = tags[-1] 7170 tags.append("$") 7171 else: 7172 self.raise_error("No closing $ found") 7173 7174 heredoc_start = self._curr 7175 7176 while self._curr: 7177 if self._match_text_seq(*tags, advance=False): 7178 this = self._find_sql(heredoc_start, self._prev) 7179 self._advance(len(tags)) 7180 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7181 7182 self._advance() 7183 7184 self.raise_error(f"No closing {''.join(tags)} found") 7185 return None 7186 7187 def _find_parser( 7188 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7189 ) -> t.Optional[t.Callable]: 7190 if not self._curr: 7191 return None 7192 7193 index = self._index 7194 this = [] 7195 while True: 7196 # The current token might be multiple words 7197 curr = self._curr.text.upper() 7198 key = curr.split(" ") 7199 this.append(curr) 7200 7201 self._advance() 7202 result, trie = in_trie(trie, key) 7203 if result == TrieResult.FAILED: 7204 break 7205 7206 if result == TrieResult.EXISTS: 7207 subparser = parsers[" ".join(this)] 7208 return subparser 7209 7210 self._retreat(index) 7211 return None 7212 7213 def _match(self, token_type, advance=True, expression=None): 7214 if not self._curr: 7215 return None 7216 7217 if self._curr.token_type == token_type: 7218 if advance: 7219 self._advance() 7220 self._add_comments(expression) 7221 return True 7222 7223 return None 7224 7225 def _match_set(self, types, advance=True): 7226 if not self._curr: 7227 return None 7228 7229 if self._curr.token_type in types: 7230 if advance: 7231 self._advance() 7232 return True 7233 7234 return None 7235 7236 def _match_pair(self, token_type_a, token_type_b, advance=True): 7237 if not self._curr or not self._next: 7238 return None 7239 7240 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7241 if advance: 7242 self._advance(2) 7243 return True 7244 7245 return None 7246 7247 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7248 if not self._match(TokenType.L_PAREN, expression=expression): 7249 self.raise_error("Expecting (") 7250 7251 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7252 if not self._match(TokenType.R_PAREN, expression=expression): 7253 self.raise_error("Expecting )") 7254 7255 def _match_texts(self, texts, advance=True): 7256 if ( 7257 self._curr 7258 and self._curr.token_type != TokenType.STRING 7259 and self._curr.text.upper() in texts 7260 ): 7261 if advance: 7262 self._advance() 7263 return True 7264 return None 7265 7266 def _match_text_seq(self, *texts, advance=True): 7267 index = self._index 7268 for text in texts: 7269 if ( 7270 self._curr 7271 and self._curr.token_type != TokenType.STRING 7272 and self._curr.text.upper() == text 7273 ): 7274 self._advance() 7275 else: 7276 self._retreat(index) 7277 return None 7278 7279 if not advance: 7280 self._retreat(index) 7281 7282 return True 7283 7284 def _replace_lambda( 7285 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7286 ) -> t.Optional[exp.Expression]: 7287 if not node: 7288 return node 7289 7290 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7291 7292 for column in node.find_all(exp.Column): 7293 typ = lambda_types.get(column.parts[0].name) 7294 if typ is not None: 7295 dot_or_id = column.to_dot() if column.table else column.this 7296 7297 if typ: 7298 dot_or_id = self.expression( 7299 exp.Cast, 7300 this=dot_or_id, 7301 to=typ, 7302 ) 7303 7304 parent = column.parent 7305 7306 while isinstance(parent, exp.Dot): 7307 if not isinstance(parent.parent, exp.Dot): 7308 parent.replace(dot_or_id) 7309 break 7310 parent = parent.parent 7311 else: 7312 if column is node: 7313 node = dot_or_id 7314 else: 7315 column.replace(dot_or_id) 7316 return node 7317 7318 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7319 start = self._prev 7320 7321 # Not to be confused with TRUNCATE(number, decimals) function call 7322 if self._match(TokenType.L_PAREN): 7323 self._retreat(self._index - 2) 7324 return self._parse_function() 7325 7326 # Clickhouse supports TRUNCATE DATABASE as well 7327 is_database = self._match(TokenType.DATABASE) 7328 7329 self._match(TokenType.TABLE) 7330 7331 exists = self._parse_exists(not_=False) 7332 7333 expressions = self._parse_csv( 7334 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7335 ) 7336 7337 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7338 7339 if self._match_text_seq("RESTART", "IDENTITY"): 7340 identity = "RESTART" 7341 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7342 identity = "CONTINUE" 7343 else: 7344 identity = None 7345 7346 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7347 option = self._prev.text 7348 else: 7349 option = None 7350 7351 partition = self._parse_partition() 7352 7353 # Fallback case 7354 if self._curr: 7355 return self._parse_as_command(start) 7356 7357 return self.expression( 7358 exp.TruncateTable, 7359 expressions=expressions, 7360 is_database=is_database, 7361 exists=exists, 7362 cluster=cluster, 7363 identity=identity, 7364 option=option, 7365 partition=partition, 7366 ) 7367 7368 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7369 this = self._parse_ordered(self._parse_opclass) 7370 7371 if not self._match(TokenType.WITH): 7372 return this 7373 7374 op = self._parse_var(any_token=True) 7375 7376 return self.expression(exp.WithOperator, this=this, op=op) 7377 7378 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7379 self._match(TokenType.EQ) 7380 self._match(TokenType.L_PAREN) 7381 7382 opts: t.List[t.Optional[exp.Expression]] = [] 7383 while self._curr and not self._match(TokenType.R_PAREN): 7384 if self._match_text_seq("FORMAT_NAME", "="): 7385 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7386 # so we parse it separately to use _parse_field() 7387 prop = self.expression( 7388 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7389 ) 7390 opts.append(prop) 7391 else: 7392 opts.append(self._parse_property()) 7393 7394 self._match(TokenType.COMMA) 7395 7396 return opts 7397 7398 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7399 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7400 7401 options = [] 7402 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7403 option = self._parse_var(any_token=True) 7404 prev = self._prev.text.upper() 7405 7406 # Different dialects might separate options and values by white space, "=" and "AS" 7407 self._match(TokenType.EQ) 7408 self._match(TokenType.ALIAS) 7409 7410 param = self.expression(exp.CopyParameter, this=option) 7411 7412 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7413 TokenType.L_PAREN, advance=False 7414 ): 7415 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7416 param.set("expressions", self._parse_wrapped_options()) 7417 elif prev == "FILE_FORMAT": 7418 # T-SQL's external file format case 7419 param.set("expression", self._parse_field()) 7420 else: 7421 param.set("expression", self._parse_unquoted_field()) 7422 7423 options.append(param) 7424 self._match(sep) 7425 7426 return options 7427 7428 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7429 expr = self.expression(exp.Credentials) 7430 7431 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7432 expr.set("storage", self._parse_field()) 7433 if self._match_text_seq("CREDENTIALS"): 7434 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7435 creds = ( 7436 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7437 ) 7438 expr.set("credentials", creds) 7439 if self._match_text_seq("ENCRYPTION"): 7440 expr.set("encryption", self._parse_wrapped_options()) 7441 if self._match_text_seq("IAM_ROLE"): 7442 expr.set("iam_role", self._parse_field()) 7443 if self._match_text_seq("REGION"): 7444 expr.set("region", self._parse_field()) 7445 7446 return expr 7447 7448 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7449 return self._parse_field() 7450 7451 def _parse_copy(self) -> exp.Copy | exp.Command: 7452 start = self._prev 7453 7454 self._match(TokenType.INTO) 7455 7456 this = ( 7457 self._parse_select(nested=True, parse_subquery_alias=False) 7458 if self._match(TokenType.L_PAREN, advance=False) 7459 else self._parse_table(schema=True) 7460 ) 7461 7462 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7463 7464 files = self._parse_csv(self._parse_file_location) 7465 credentials = self._parse_credentials() 7466 7467 self._match_text_seq("WITH") 7468 7469 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7470 7471 # Fallback case 7472 if self._curr: 7473 return self._parse_as_command(start) 7474 7475 return self.expression( 7476 exp.Copy, 7477 this=this, 7478 kind=kind, 7479 credentials=credentials, 7480 files=files, 7481 params=params, 7482 ) 7483 7484 def _parse_normalize(self) -> exp.Normalize: 7485 return self.expression( 7486 exp.Normalize, 7487 this=self._parse_bitwise(), 7488 form=self._match(TokenType.COMMA) and self._parse_var(), 7489 ) 7490 7491 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7492 if self._match_text_seq("COLUMNS", "(", advance=False): 7493 this = self._parse_function() 7494 if isinstance(this, exp.Columns): 7495 this.set("unpack", True) 7496 return this 7497 7498 return self.expression( 7499 exp.Star, 7500 **{ # type: ignore 7501 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7502 "replace": self._parse_star_op("REPLACE"), 7503 "rename": self._parse_star_op("RENAME"), 7504 }, 7505 ) 7506 7507 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7508 privilege_parts = [] 7509 7510 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7511 # (end of privilege list) or L_PAREN (start of column list) are met 7512 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7513 privilege_parts.append(self._curr.text.upper()) 7514 self._advance() 7515 7516 this = exp.var(" ".join(privilege_parts)) 7517 expressions = ( 7518 self._parse_wrapped_csv(self._parse_column) 7519 if self._match(TokenType.L_PAREN, advance=False) 7520 else None 7521 ) 7522 7523 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7524 7525 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7526 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7527 principal = self._parse_id_var() 7528 7529 if not principal: 7530 return None 7531 7532 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7533 7534 def _parse_grant(self) -> exp.Grant | exp.Command: 7535 start = self._prev 7536 7537 privileges = self._parse_csv(self._parse_grant_privilege) 7538 7539 self._match(TokenType.ON) 7540 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7541 7542 # Attempt to parse the securable e.g. MySQL allows names 7543 # such as "foo.*", "*.*" which are not easily parseable yet 7544 securable = self._try_parse(self._parse_table_parts) 7545 7546 if not securable or not self._match_text_seq("TO"): 7547 return self._parse_as_command(start) 7548 7549 principals = self._parse_csv(self._parse_grant_principal) 7550 7551 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7552 7553 if self._curr: 7554 return self._parse_as_command(start) 7555 7556 return self.expression( 7557 exp.Grant, 7558 privileges=privileges, 7559 kind=kind, 7560 securable=securable, 7561 principals=principals, 7562 grant_option=grant_option, 7563 ) 7564 7565 def _parse_overlay(self) -> exp.Overlay: 7566 return self.expression( 7567 exp.Overlay, 7568 **{ # type: ignore 7569 "this": self._parse_bitwise(), 7570 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7571 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7572 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7573 }, 7574 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1379 def __init__( 1380 self, 1381 error_level: t.Optional[ErrorLevel] = None, 1382 error_message_context: int = 100, 1383 max_errors: int = 3, 1384 dialect: DialectType = None, 1385 ): 1386 from sqlglot.dialects import Dialect 1387 1388 self.error_level = error_level or ErrorLevel.IMMEDIATE 1389 self.error_message_context = error_message_context 1390 self.max_errors = max_errors 1391 self.dialect = Dialect.get_or_raise(dialect) 1392 self.reset()
1404 def parse( 1405 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1406 ) -> t.List[t.Optional[exp.Expression]]: 1407 """ 1408 Parses a list of tokens and returns a list of syntax trees, one tree 1409 per parsed SQL statement. 1410 1411 Args: 1412 raw_tokens: The list of tokens. 1413 sql: The original SQL string, used to produce helpful debug messages. 1414 1415 Returns: 1416 The list of the produced syntax trees. 1417 """ 1418 return self._parse( 1419 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1420 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1422 def parse_into( 1423 self, 1424 expression_types: exp.IntoType, 1425 raw_tokens: t.List[Token], 1426 sql: t.Optional[str] = None, 1427 ) -> t.List[t.Optional[exp.Expression]]: 1428 """ 1429 Parses a list of tokens into a given Expression type. If a collection of Expression 1430 types is given instead, this method will try to parse the token list into each one 1431 of them, stopping at the first for which the parsing succeeds. 1432 1433 Args: 1434 expression_types: The expression type(s) to try and parse the token list into. 1435 raw_tokens: The list of tokens. 1436 sql: The original SQL string, used to produce helpful debug messages. 1437 1438 Returns: 1439 The target Expression. 1440 """ 1441 errors = [] 1442 for expression_type in ensure_list(expression_types): 1443 parser = self.EXPRESSION_PARSERS.get(expression_type) 1444 if not parser: 1445 raise TypeError(f"No parser registered for {expression_type}") 1446 1447 try: 1448 return self._parse(parser, raw_tokens, sql) 1449 except ParseError as e: 1450 e.errors[0]["into_expression"] = expression_type 1451 errors.append(e) 1452 1453 raise ParseError( 1454 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1455 errors=merge_errors(errors), 1456 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1496 def check_errors(self) -> None: 1497 """Logs or raises any found errors, depending on the chosen error level setting.""" 1498 if self.error_level == ErrorLevel.WARN: 1499 for error in self.errors: 1500 logger.error(str(error)) 1501 elif self.error_level == ErrorLevel.RAISE and self.errors: 1502 raise ParseError( 1503 concat_messages(self.errors, self.max_errors), 1504 errors=merge_errors(self.errors), 1505 )
Logs or raises any found errors, depending on the chosen error level setting.
1507 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1508 """ 1509 Appends an error in the list of recorded errors or raises it, depending on the chosen 1510 error level setting. 1511 """ 1512 token = token or self._curr or self._prev or Token.string("") 1513 start = token.start 1514 end = token.end + 1 1515 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1516 highlight = self.sql[start:end] 1517 end_context = self.sql[end : end + self.error_message_context] 1518 1519 error = ParseError.new( 1520 f"{message}. Line {token.line}, Col: {token.col}.\n" 1521 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1522 description=message, 1523 line=token.line, 1524 col=token.col, 1525 start_context=start_context, 1526 highlight=highlight, 1527 end_context=end_context, 1528 ) 1529 1530 if self.error_level == ErrorLevel.IMMEDIATE: 1531 raise error 1532 1533 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1535 def expression( 1536 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1537 ) -> E: 1538 """ 1539 Creates a new, validated Expression. 1540 1541 Args: 1542 exp_class: The expression class to instantiate. 1543 comments: An optional list of comments to attach to the expression. 1544 kwargs: The arguments to set for the expression along with their respective values. 1545 1546 Returns: 1547 The target expression. 1548 """ 1549 instance = exp_class(**kwargs) 1550 instance.add_comments(comments) if comments else self._add_comments(instance) 1551 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1558 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1559 """ 1560 Validates an Expression, making sure that all its mandatory arguments are set. 1561 1562 Args: 1563 expression: The expression to validate. 1564 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1565 1566 Returns: 1567 The validated expression. 1568 """ 1569 if self.error_level != ErrorLevel.IGNORE: 1570 for error_message in expression.error_messages(args): 1571 self.raise_error(error_message) 1572 1573 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.