sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.DECIMAL256, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.POINT, 361 TokenType.RING, 362 TokenType.LINESTRING, 363 TokenType.MULTILINESTRING, 364 TokenType.POLYGON, 365 TokenType.MULTIPOLYGON, 366 TokenType.HLLSKETCH, 367 TokenType.HSTORE, 368 TokenType.PSEUDO_TYPE, 369 TokenType.SUPER, 370 TokenType.SERIAL, 371 TokenType.SMALLSERIAL, 372 TokenType.BIGSERIAL, 373 TokenType.XML, 374 TokenType.YEAR, 375 TokenType.UNIQUEIDENTIFIER, 376 TokenType.USERDEFINED, 377 TokenType.MONEY, 378 TokenType.SMALLMONEY, 379 TokenType.ROWVERSION, 380 TokenType.IMAGE, 381 TokenType.VARIANT, 382 TokenType.VECTOR, 383 TokenType.OBJECT, 384 TokenType.OBJECT_IDENTIFIER, 385 TokenType.INET, 386 TokenType.IPADDRESS, 387 TokenType.IPPREFIX, 388 TokenType.IPV4, 389 TokenType.IPV6, 390 TokenType.UNKNOWN, 391 TokenType.NULL, 392 TokenType.NAME, 393 TokenType.TDIGEST, 394 *ENUM_TYPE_TOKENS, 395 *NESTED_TYPE_TOKENS, 396 *AGGREGATE_TYPE_TOKENS, 397 } 398 399 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 400 TokenType.BIGINT: TokenType.UBIGINT, 401 TokenType.INT: TokenType.UINT, 402 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 403 TokenType.SMALLINT: TokenType.USMALLINT, 404 TokenType.TINYINT: TokenType.UTINYINT, 405 TokenType.DECIMAL: TokenType.UDECIMAL, 406 } 407 408 SUBQUERY_PREDICATES = { 409 TokenType.ANY: exp.Any, 410 TokenType.ALL: exp.All, 411 TokenType.EXISTS: exp.Exists, 412 TokenType.SOME: exp.Any, 413 } 414 415 RESERVED_TOKENS = { 416 *Tokenizer.SINGLE_TOKENS.values(), 417 TokenType.SELECT, 418 } - {TokenType.IDENTIFIER} 419 420 DB_CREATABLES = { 421 TokenType.DATABASE, 422 TokenType.DICTIONARY, 423 TokenType.MODEL, 424 TokenType.SCHEMA, 425 TokenType.SEQUENCE, 426 TokenType.STORAGE_INTEGRATION, 427 TokenType.TABLE, 428 TokenType.TAG, 429 TokenType.VIEW, 430 TokenType.WAREHOUSE, 431 TokenType.STREAMLIT, 432 } 433 434 CREATABLES = { 435 TokenType.COLUMN, 436 TokenType.CONSTRAINT, 437 TokenType.FOREIGN_KEY, 438 TokenType.FUNCTION, 439 TokenType.INDEX, 440 TokenType.PROCEDURE, 441 *DB_CREATABLES, 442 } 443 444 ALTERABLES = { 445 TokenType.INDEX, 446 TokenType.TABLE, 447 TokenType.VIEW, 448 } 449 450 # Tokens that can represent identifiers 451 ID_VAR_TOKENS = { 452 TokenType.ALL, 453 TokenType.VAR, 454 TokenType.ANTI, 455 TokenType.APPLY, 456 TokenType.ASC, 457 TokenType.ASOF, 458 TokenType.AUTO_INCREMENT, 459 TokenType.BEGIN, 460 TokenType.BPCHAR, 461 TokenType.CACHE, 462 TokenType.CASE, 463 TokenType.COLLATE, 464 TokenType.COMMAND, 465 TokenType.COMMENT, 466 TokenType.COMMIT, 467 TokenType.CONSTRAINT, 468 TokenType.COPY, 469 TokenType.CUBE, 470 TokenType.DEFAULT, 471 TokenType.DELETE, 472 TokenType.DESC, 473 TokenType.DESCRIBE, 474 TokenType.DICTIONARY, 475 TokenType.DIV, 476 TokenType.END, 477 TokenType.EXECUTE, 478 TokenType.ESCAPE, 479 TokenType.FALSE, 480 TokenType.FIRST, 481 TokenType.FILTER, 482 TokenType.FINAL, 483 TokenType.FORMAT, 484 TokenType.FULL, 485 TokenType.IDENTIFIER, 486 TokenType.IS, 487 TokenType.ISNULL, 488 TokenType.INTERVAL, 489 TokenType.KEEP, 490 TokenType.KILL, 491 TokenType.LEFT, 492 TokenType.LOAD, 493 TokenType.MERGE, 494 TokenType.NATURAL, 495 TokenType.NEXT, 496 TokenType.OFFSET, 497 TokenType.OPERATOR, 498 TokenType.ORDINALITY, 499 TokenType.OVERLAPS, 500 TokenType.OVERWRITE, 501 TokenType.PARTITION, 502 TokenType.PERCENT, 503 TokenType.PIVOT, 504 TokenType.PRAGMA, 505 TokenType.RANGE, 506 TokenType.RECURSIVE, 507 TokenType.REFERENCES, 508 TokenType.REFRESH, 509 TokenType.RENAME, 510 TokenType.REPLACE, 511 TokenType.RIGHT, 512 TokenType.ROLLUP, 513 TokenType.ROW, 514 TokenType.ROWS, 515 TokenType.SEMI, 516 TokenType.SET, 517 TokenType.SETTINGS, 518 TokenType.SHOW, 519 TokenType.TEMPORARY, 520 TokenType.TOP, 521 TokenType.TRUE, 522 TokenType.TRUNCATE, 523 TokenType.UNIQUE, 524 TokenType.UNNEST, 525 TokenType.UNPIVOT, 526 TokenType.UPDATE, 527 TokenType.USE, 528 TokenType.VOLATILE, 529 TokenType.WINDOW, 530 *CREATABLES, 531 *SUBQUERY_PREDICATES, 532 *TYPE_TOKENS, 533 *NO_PAREN_FUNCTIONS, 534 } 535 ID_VAR_TOKENS.remove(TokenType.UNION) 536 537 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 538 539 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 540 TokenType.ANTI, 541 TokenType.APPLY, 542 TokenType.ASOF, 543 TokenType.FULL, 544 TokenType.LEFT, 545 TokenType.LOCK, 546 TokenType.NATURAL, 547 TokenType.OFFSET, 548 TokenType.RIGHT, 549 TokenType.SEMI, 550 TokenType.WINDOW, 551 } 552 553 ALIAS_TOKENS = ID_VAR_TOKENS 554 555 ARRAY_CONSTRUCTORS = { 556 "ARRAY": exp.Array, 557 "LIST": exp.List, 558 } 559 560 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 561 562 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 563 564 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 565 566 FUNC_TOKENS = { 567 TokenType.COLLATE, 568 TokenType.COMMAND, 569 TokenType.CURRENT_DATE, 570 TokenType.CURRENT_DATETIME, 571 TokenType.CURRENT_TIMESTAMP, 572 TokenType.CURRENT_TIME, 573 TokenType.CURRENT_USER, 574 TokenType.FILTER, 575 TokenType.FIRST, 576 TokenType.FORMAT, 577 TokenType.GLOB, 578 TokenType.IDENTIFIER, 579 TokenType.INDEX, 580 TokenType.ISNULL, 581 TokenType.ILIKE, 582 TokenType.INSERT, 583 TokenType.LIKE, 584 TokenType.MERGE, 585 TokenType.OFFSET, 586 TokenType.PRIMARY_KEY, 587 TokenType.RANGE, 588 TokenType.REPLACE, 589 TokenType.RLIKE, 590 TokenType.ROW, 591 TokenType.UNNEST, 592 TokenType.VAR, 593 TokenType.LEFT, 594 TokenType.RIGHT, 595 TokenType.SEQUENCE, 596 TokenType.DATE, 597 TokenType.DATETIME, 598 TokenType.TABLE, 599 TokenType.TIMESTAMP, 600 TokenType.TIMESTAMPTZ, 601 TokenType.TRUNCATE, 602 TokenType.WINDOW, 603 TokenType.XOR, 604 *TYPE_TOKENS, 605 *SUBQUERY_PREDICATES, 606 } 607 608 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 609 TokenType.AND: exp.And, 610 } 611 612 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 613 TokenType.COLON_EQ: exp.PropertyEQ, 614 } 615 616 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 617 TokenType.OR: exp.Or, 618 } 619 620 EQUALITY = { 621 TokenType.EQ: exp.EQ, 622 TokenType.NEQ: exp.NEQ, 623 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 624 } 625 626 COMPARISON = { 627 TokenType.GT: exp.GT, 628 TokenType.GTE: exp.GTE, 629 TokenType.LT: exp.LT, 630 TokenType.LTE: exp.LTE, 631 } 632 633 BITWISE = { 634 TokenType.AMP: exp.BitwiseAnd, 635 TokenType.CARET: exp.BitwiseXor, 636 TokenType.PIPE: exp.BitwiseOr, 637 } 638 639 TERM = { 640 TokenType.DASH: exp.Sub, 641 TokenType.PLUS: exp.Add, 642 TokenType.MOD: exp.Mod, 643 TokenType.COLLATE: exp.Collate, 644 } 645 646 FACTOR = { 647 TokenType.DIV: exp.IntDiv, 648 TokenType.LR_ARROW: exp.Distance, 649 TokenType.SLASH: exp.Div, 650 TokenType.STAR: exp.Mul, 651 } 652 653 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 654 655 TIMES = { 656 TokenType.TIME, 657 TokenType.TIMETZ, 658 } 659 660 TIMESTAMPS = { 661 TokenType.TIMESTAMP, 662 TokenType.TIMESTAMPTZ, 663 TokenType.TIMESTAMPLTZ, 664 *TIMES, 665 } 666 667 SET_OPERATIONS = { 668 TokenType.UNION, 669 TokenType.INTERSECT, 670 TokenType.EXCEPT, 671 } 672 673 JOIN_METHODS = { 674 TokenType.ASOF, 675 TokenType.NATURAL, 676 TokenType.POSITIONAL, 677 } 678 679 JOIN_SIDES = { 680 TokenType.LEFT, 681 TokenType.RIGHT, 682 TokenType.FULL, 683 } 684 685 JOIN_KINDS = { 686 TokenType.ANTI, 687 TokenType.CROSS, 688 TokenType.INNER, 689 TokenType.OUTER, 690 TokenType.SEMI, 691 TokenType.STRAIGHT_JOIN, 692 } 693 694 JOIN_HINTS: t.Set[str] = set() 695 696 LAMBDAS = { 697 TokenType.ARROW: lambda self, expressions: self.expression( 698 exp.Lambda, 699 this=self._replace_lambda( 700 self._parse_assignment(), 701 expressions, 702 ), 703 expressions=expressions, 704 ), 705 TokenType.FARROW: lambda self, expressions: self.expression( 706 exp.Kwarg, 707 this=exp.var(expressions[0].name), 708 expression=self._parse_assignment(), 709 ), 710 } 711 712 COLUMN_OPERATORS = { 713 TokenType.DOT: None, 714 TokenType.DCOLON: lambda self, this, to: self.expression( 715 exp.Cast if self.STRICT_CAST else exp.TryCast, 716 this=this, 717 to=to, 718 ), 719 TokenType.ARROW: lambda self, this, path: self.expression( 720 exp.JSONExtract, 721 this=this, 722 expression=self.dialect.to_json_path(path), 723 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 724 ), 725 TokenType.DARROW: lambda self, this, path: self.expression( 726 exp.JSONExtractScalar, 727 this=this, 728 expression=self.dialect.to_json_path(path), 729 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 730 ), 731 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 732 exp.JSONBExtract, 733 this=this, 734 expression=path, 735 ), 736 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 737 exp.JSONBExtractScalar, 738 this=this, 739 expression=path, 740 ), 741 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 742 exp.JSONBContains, 743 this=this, 744 expression=key, 745 ), 746 } 747 748 EXPRESSION_PARSERS = { 749 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 750 exp.Column: lambda self: self._parse_column(), 751 exp.Condition: lambda self: self._parse_assignment(), 752 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 753 exp.Expression: lambda self: self._parse_expression(), 754 exp.From: lambda self: self._parse_from(joins=True), 755 exp.Group: lambda self: self._parse_group(), 756 exp.Having: lambda self: self._parse_having(), 757 exp.Identifier: lambda self: self._parse_id_var(), 758 exp.Join: lambda self: self._parse_join(), 759 exp.Lambda: lambda self: self._parse_lambda(), 760 exp.Lateral: lambda self: self._parse_lateral(), 761 exp.Limit: lambda self: self._parse_limit(), 762 exp.Offset: lambda self: self._parse_offset(), 763 exp.Order: lambda self: self._parse_order(), 764 exp.Ordered: lambda self: self._parse_ordered(), 765 exp.Properties: lambda self: self._parse_properties(), 766 exp.Qualify: lambda self: self._parse_qualify(), 767 exp.Returning: lambda self: self._parse_returning(), 768 exp.Select: lambda self: self._parse_select(), 769 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 770 exp.Table: lambda self: self._parse_table_parts(), 771 exp.TableAlias: lambda self: self._parse_table_alias(), 772 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 773 exp.Where: lambda self: self._parse_where(), 774 exp.Window: lambda self: self._parse_named_window(), 775 exp.With: lambda self: self._parse_with(), 776 "JOIN_TYPE": lambda self: self._parse_join_parts(), 777 } 778 779 STATEMENT_PARSERS = { 780 TokenType.ALTER: lambda self: self._parse_alter(), 781 TokenType.BEGIN: lambda self: self._parse_transaction(), 782 TokenType.CACHE: lambda self: self._parse_cache(), 783 TokenType.COMMENT: lambda self: self._parse_comment(), 784 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 785 TokenType.COPY: lambda self: self._parse_copy(), 786 TokenType.CREATE: lambda self: self._parse_create(), 787 TokenType.DELETE: lambda self: self._parse_delete(), 788 TokenType.DESC: lambda self: self._parse_describe(), 789 TokenType.DESCRIBE: lambda self: self._parse_describe(), 790 TokenType.DROP: lambda self: self._parse_drop(), 791 TokenType.GRANT: lambda self: self._parse_grant(), 792 TokenType.INSERT: lambda self: self._parse_insert(), 793 TokenType.KILL: lambda self: self._parse_kill(), 794 TokenType.LOAD: lambda self: self._parse_load(), 795 TokenType.MERGE: lambda self: self._parse_merge(), 796 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 797 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 798 TokenType.REFRESH: lambda self: self._parse_refresh(), 799 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 800 TokenType.SET: lambda self: self._parse_set(), 801 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 802 TokenType.UNCACHE: lambda self: self._parse_uncache(), 803 TokenType.UPDATE: lambda self: self._parse_update(), 804 TokenType.USE: lambda self: self.expression( 805 exp.Use, 806 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 807 this=self._parse_table(schema=False), 808 ), 809 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 810 } 811 812 UNARY_PARSERS = { 813 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 814 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 815 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 816 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 817 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 818 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 819 } 820 821 STRING_PARSERS = { 822 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 823 exp.RawString, this=token.text 824 ), 825 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 826 exp.National, this=token.text 827 ), 828 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 829 TokenType.STRING: lambda self, token: self.expression( 830 exp.Literal, this=token.text, is_string=True 831 ), 832 TokenType.UNICODE_STRING: lambda self, token: self.expression( 833 exp.UnicodeString, 834 this=token.text, 835 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 836 ), 837 } 838 839 NUMERIC_PARSERS = { 840 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 841 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 842 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 843 TokenType.NUMBER: lambda self, token: self.expression( 844 exp.Literal, this=token.text, is_string=False 845 ), 846 } 847 848 PRIMARY_PARSERS = { 849 **STRING_PARSERS, 850 **NUMERIC_PARSERS, 851 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 852 TokenType.NULL: lambda self, _: self.expression(exp.Null), 853 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 854 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 855 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 856 TokenType.STAR: lambda self, _: self._parse_star_ops(), 857 } 858 859 PLACEHOLDER_PARSERS = { 860 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 861 TokenType.PARAMETER: lambda self: self._parse_parameter(), 862 TokenType.COLON: lambda self: ( 863 self.expression(exp.Placeholder, this=self._prev.text) 864 if self._match_set(self.ID_VAR_TOKENS) 865 else None 866 ), 867 } 868 869 RANGE_PARSERS = { 870 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 871 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 872 TokenType.GLOB: binary_range_parser(exp.Glob), 873 TokenType.ILIKE: binary_range_parser(exp.ILike), 874 TokenType.IN: lambda self, this: self._parse_in(this), 875 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 876 TokenType.IS: lambda self, this: self._parse_is(this), 877 TokenType.LIKE: binary_range_parser(exp.Like), 878 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 879 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 880 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 881 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 882 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 883 } 884 885 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 886 "ALLOWED_VALUES": lambda self: self.expression( 887 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 888 ), 889 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 890 "AUTO": lambda self: self._parse_auto_property(), 891 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 892 "BACKUP": lambda self: self.expression( 893 exp.BackupProperty, this=self._parse_var(any_token=True) 894 ), 895 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 896 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 897 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 898 "CHECKSUM": lambda self: self._parse_checksum(), 899 "CLUSTER BY": lambda self: self._parse_cluster(), 900 "CLUSTERED": lambda self: self._parse_clustered_by(), 901 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 902 exp.CollateProperty, **kwargs 903 ), 904 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 905 "CONTAINS": lambda self: self._parse_contains_property(), 906 "COPY": lambda self: self._parse_copy_property(), 907 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 908 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 909 "DEFINER": lambda self: self._parse_definer(), 910 "DETERMINISTIC": lambda self: self.expression( 911 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 912 ), 913 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 914 "DUPLICATE": lambda self: self._parse_duplicate(), 915 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 916 "DISTKEY": lambda self: self._parse_distkey(), 917 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 918 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 919 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 920 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 921 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 922 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 923 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 924 "FREESPACE": lambda self: self._parse_freespace(), 925 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 926 "HEAP": lambda self: self.expression(exp.HeapProperty), 927 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 928 "IMMUTABLE": lambda self: self.expression( 929 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 930 ), 931 "INHERITS": lambda self: self.expression( 932 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 933 ), 934 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 935 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 936 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 937 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 938 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 939 "LIKE": lambda self: self._parse_create_like(), 940 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 941 "LOCK": lambda self: self._parse_locking(), 942 "LOCKING": lambda self: self._parse_locking(), 943 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 944 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 945 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 946 "MODIFIES": lambda self: self._parse_modifies_property(), 947 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 948 "NO": lambda self: self._parse_no_property(), 949 "ON": lambda self: self._parse_on_property(), 950 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 951 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 952 "PARTITION": lambda self: self._parse_partitioned_of(), 953 "PARTITION BY": lambda self: self._parse_partitioned_by(), 954 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 955 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 956 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 957 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 958 "READS": lambda self: self._parse_reads_property(), 959 "REMOTE": lambda self: self._parse_remote_with_connection(), 960 "RETURNS": lambda self: self._parse_returns(), 961 "STRICT": lambda self: self.expression(exp.StrictProperty), 962 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 963 "ROW": lambda self: self._parse_row(), 964 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 965 "SAMPLE": lambda self: self.expression( 966 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 967 ), 968 "SECURE": lambda self: self.expression(exp.SecureProperty), 969 "SECURITY": lambda self: self._parse_security(), 970 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 971 "SETTINGS": lambda self: self._parse_settings_property(), 972 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 973 "SORTKEY": lambda self: self._parse_sortkey(), 974 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 975 "STABLE": lambda self: self.expression( 976 exp.StabilityProperty, this=exp.Literal.string("STABLE") 977 ), 978 "STORED": lambda self: self._parse_stored(), 979 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 980 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 981 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 982 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 983 "TO": lambda self: self._parse_to_table(), 984 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 985 "TRANSFORM": lambda self: self.expression( 986 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 987 ), 988 "TTL": lambda self: self._parse_ttl(), 989 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 990 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 991 "VOLATILE": lambda self: self._parse_volatile_property(), 992 "WITH": lambda self: self._parse_with_property(), 993 } 994 995 CONSTRAINT_PARSERS = { 996 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 997 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 998 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 999 "CHARACTER SET": lambda self: self.expression( 1000 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1001 ), 1002 "CHECK": lambda self: self.expression( 1003 exp.CheckColumnConstraint, 1004 this=self._parse_wrapped(self._parse_assignment), 1005 enforced=self._match_text_seq("ENFORCED"), 1006 ), 1007 "COLLATE": lambda self: self.expression( 1008 exp.CollateColumnConstraint, 1009 this=self._parse_identifier() or self._parse_column(), 1010 ), 1011 "COMMENT": lambda self: self.expression( 1012 exp.CommentColumnConstraint, this=self._parse_string() 1013 ), 1014 "COMPRESS": lambda self: self._parse_compress(), 1015 "CLUSTERED": lambda self: self.expression( 1016 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1017 ), 1018 "NONCLUSTERED": lambda self: self.expression( 1019 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1020 ), 1021 "DEFAULT": lambda self: self.expression( 1022 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1023 ), 1024 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1025 "EPHEMERAL": lambda self: self.expression( 1026 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1027 ), 1028 "EXCLUDE": lambda self: self.expression( 1029 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1030 ), 1031 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1032 "FORMAT": lambda self: self.expression( 1033 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1034 ), 1035 "GENERATED": lambda self: self._parse_generated_as_identity(), 1036 "IDENTITY": lambda self: self._parse_auto_increment(), 1037 "INLINE": lambda self: self._parse_inline(), 1038 "LIKE": lambda self: self._parse_create_like(), 1039 "NOT": lambda self: self._parse_not_constraint(), 1040 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1041 "ON": lambda self: ( 1042 self._match(TokenType.UPDATE) 1043 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1044 ) 1045 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1046 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1047 "PERIOD": lambda self: self._parse_period_for_system_time(), 1048 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1049 "REFERENCES": lambda self: self._parse_references(match=False), 1050 "TITLE": lambda self: self.expression( 1051 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1052 ), 1053 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1054 "UNIQUE": lambda self: self._parse_unique(), 1055 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1056 "WITH": lambda self: self.expression( 1057 exp.Properties, expressions=self._parse_wrapped_properties() 1058 ), 1059 } 1060 1061 ALTER_PARSERS = { 1062 "ADD": lambda self: self._parse_alter_table_add(), 1063 "AS": lambda self: self._parse_select(), 1064 "ALTER": lambda self: self._parse_alter_table_alter(), 1065 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1066 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1067 "DROP": lambda self: self._parse_alter_table_drop(), 1068 "RENAME": lambda self: self._parse_alter_table_rename(), 1069 "SET": lambda self: self._parse_alter_table_set(), 1070 "SWAP": lambda self: self.expression( 1071 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1072 ), 1073 } 1074 1075 ALTER_ALTER_PARSERS = { 1076 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1077 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1078 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1079 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1080 } 1081 1082 SCHEMA_UNNAMED_CONSTRAINTS = { 1083 "CHECK", 1084 "EXCLUDE", 1085 "FOREIGN KEY", 1086 "LIKE", 1087 "PERIOD", 1088 "PRIMARY KEY", 1089 "UNIQUE", 1090 } 1091 1092 NO_PAREN_FUNCTION_PARSERS = { 1093 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1094 "CASE": lambda self: self._parse_case(), 1095 "CONNECT_BY_ROOT": lambda self: self.expression( 1096 exp.ConnectByRoot, this=self._parse_column() 1097 ), 1098 "IF": lambda self: self._parse_if(), 1099 "NEXT": lambda self: self._parse_next_value_for(), 1100 } 1101 1102 INVALID_FUNC_NAME_TOKENS = { 1103 TokenType.IDENTIFIER, 1104 TokenType.STRING, 1105 } 1106 1107 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1108 1109 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1110 1111 FUNCTION_PARSERS = { 1112 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1113 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1114 "DECODE": lambda self: self._parse_decode(), 1115 "EXTRACT": lambda self: self._parse_extract(), 1116 "GAP_FILL": lambda self: self._parse_gap_fill(), 1117 "JSON_OBJECT": lambda self: self._parse_json_object(), 1118 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1119 "JSON_TABLE": lambda self: self._parse_json_table(), 1120 "MATCH": lambda self: self._parse_match_against(), 1121 "NORMALIZE": lambda self: self._parse_normalize(), 1122 "OPENJSON": lambda self: self._parse_open_json(), 1123 "OVERLAY": lambda self: self._parse_overlay(), 1124 "POSITION": lambda self: self._parse_position(), 1125 "PREDICT": lambda self: self._parse_predict(), 1126 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1127 "STRING_AGG": lambda self: self._parse_string_agg(), 1128 "SUBSTRING": lambda self: self._parse_substring(), 1129 "TRIM": lambda self: self._parse_trim(), 1130 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1131 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1132 } 1133 1134 QUERY_MODIFIER_PARSERS = { 1135 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1136 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1137 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1138 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1139 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1140 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1141 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1142 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1143 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1144 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1145 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1146 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1147 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1148 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1149 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1150 TokenType.CLUSTER_BY: lambda self: ( 1151 "cluster", 1152 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1153 ), 1154 TokenType.DISTRIBUTE_BY: lambda self: ( 1155 "distribute", 1156 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1157 ), 1158 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1159 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1160 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1161 } 1162 1163 SET_PARSERS = { 1164 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1165 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1166 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1167 "TRANSACTION": lambda self: self._parse_set_transaction(), 1168 } 1169 1170 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1171 1172 TYPE_LITERAL_PARSERS = { 1173 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1174 } 1175 1176 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1177 1178 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1179 1180 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1181 1182 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1183 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1184 "ISOLATION": ( 1185 ("LEVEL", "REPEATABLE", "READ"), 1186 ("LEVEL", "READ", "COMMITTED"), 1187 ("LEVEL", "READ", "UNCOMITTED"), 1188 ("LEVEL", "SERIALIZABLE"), 1189 ), 1190 "READ": ("WRITE", "ONLY"), 1191 } 1192 1193 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1194 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1195 ) 1196 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1197 1198 CREATE_SEQUENCE: OPTIONS_TYPE = { 1199 "SCALE": ("EXTEND", "NOEXTEND"), 1200 "SHARD": ("EXTEND", "NOEXTEND"), 1201 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1202 **dict.fromkeys( 1203 ( 1204 "SESSION", 1205 "GLOBAL", 1206 "KEEP", 1207 "NOKEEP", 1208 "ORDER", 1209 "NOORDER", 1210 "NOCACHE", 1211 "CYCLE", 1212 "NOCYCLE", 1213 "NOMINVALUE", 1214 "NOMAXVALUE", 1215 "NOSCALE", 1216 "NOSHARD", 1217 ), 1218 tuple(), 1219 ), 1220 } 1221 1222 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1223 1224 USABLES: OPTIONS_TYPE = dict.fromkeys( 1225 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1226 ) 1227 1228 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1229 1230 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1231 "TYPE": ("EVOLUTION",), 1232 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1233 } 1234 1235 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1236 1237 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1238 1239 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1240 "NOT": ("ENFORCED",), 1241 "MATCH": ( 1242 "FULL", 1243 "PARTIAL", 1244 "SIMPLE", 1245 ), 1246 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1247 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1248 } 1249 1250 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1251 1252 CLONE_KEYWORDS = {"CLONE", "COPY"} 1253 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1254 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1255 1256 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1257 1258 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1259 1260 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1261 1262 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1263 1264 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1265 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1266 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1267 1268 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1269 1270 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1271 1272 ADD_CONSTRAINT_TOKENS = { 1273 TokenType.CONSTRAINT, 1274 TokenType.FOREIGN_KEY, 1275 TokenType.INDEX, 1276 TokenType.KEY, 1277 TokenType.PRIMARY_KEY, 1278 TokenType.UNIQUE, 1279 } 1280 1281 DISTINCT_TOKENS = {TokenType.DISTINCT} 1282 1283 NULL_TOKENS = {TokenType.NULL} 1284 1285 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1286 1287 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1288 1289 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1290 1291 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1292 1293 ODBC_DATETIME_LITERALS = { 1294 "d": exp.Date, 1295 "t": exp.Time, 1296 "ts": exp.Timestamp, 1297 } 1298 1299 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1300 1301 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1302 1303 # The style options for the DESCRIBE statement 1304 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1305 1306 OPERATION_MODIFIERS: t.Set[str] = set() 1307 1308 STRICT_CAST = True 1309 1310 PREFIXED_PIVOT_COLUMNS = False 1311 IDENTIFY_PIVOT_STRINGS = False 1312 1313 LOG_DEFAULTS_TO_LN = False 1314 1315 # Whether ADD is present for each column added by ALTER TABLE 1316 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1317 1318 # Whether the table sample clause expects CSV syntax 1319 TABLESAMPLE_CSV = False 1320 1321 # The default method used for table sampling 1322 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1323 1324 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1325 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1326 1327 # Whether the TRIM function expects the characters to trim as its first argument 1328 TRIM_PATTERN_FIRST = False 1329 1330 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1331 STRING_ALIASES = False 1332 1333 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1334 MODIFIERS_ATTACHED_TO_SET_OP = True 1335 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1336 1337 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1338 NO_PAREN_IF_COMMANDS = True 1339 1340 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1341 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1342 1343 # Whether the `:` operator is used to extract a value from a VARIANT column 1344 COLON_IS_VARIANT_EXTRACT = False 1345 1346 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1347 # If this is True and '(' is not found, the keyword will be treated as an identifier 1348 VALUES_FOLLOWED_BY_PAREN = True 1349 1350 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1351 SUPPORTS_IMPLICIT_UNNEST = False 1352 1353 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1354 INTERVAL_SPANS = True 1355 1356 # Whether a PARTITION clause can follow a table reference 1357 SUPPORTS_PARTITION_SELECTION = False 1358 1359 __slots__ = ( 1360 "error_level", 1361 "error_message_context", 1362 "max_errors", 1363 "dialect", 1364 "sql", 1365 "errors", 1366 "_tokens", 1367 "_index", 1368 "_curr", 1369 "_next", 1370 "_prev", 1371 "_prev_comments", 1372 ) 1373 1374 # Autofilled 1375 SHOW_TRIE: t.Dict = {} 1376 SET_TRIE: t.Dict = {} 1377 1378 def __init__( 1379 self, 1380 error_level: t.Optional[ErrorLevel] = None, 1381 error_message_context: int = 100, 1382 max_errors: int = 3, 1383 dialect: DialectType = None, 1384 ): 1385 from sqlglot.dialects import Dialect 1386 1387 self.error_level = error_level or ErrorLevel.IMMEDIATE 1388 self.error_message_context = error_message_context 1389 self.max_errors = max_errors 1390 self.dialect = Dialect.get_or_raise(dialect) 1391 self.reset() 1392 1393 def reset(self): 1394 self.sql = "" 1395 self.errors = [] 1396 self._tokens = [] 1397 self._index = 0 1398 self._curr = None 1399 self._next = None 1400 self._prev = None 1401 self._prev_comments = None 1402 1403 def parse( 1404 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1405 ) -> t.List[t.Optional[exp.Expression]]: 1406 """ 1407 Parses a list of tokens and returns a list of syntax trees, one tree 1408 per parsed SQL statement. 1409 1410 Args: 1411 raw_tokens: The list of tokens. 1412 sql: The original SQL string, used to produce helpful debug messages. 1413 1414 Returns: 1415 The list of the produced syntax trees. 1416 """ 1417 return self._parse( 1418 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1419 ) 1420 1421 def parse_into( 1422 self, 1423 expression_types: exp.IntoType, 1424 raw_tokens: t.List[Token], 1425 sql: t.Optional[str] = None, 1426 ) -> t.List[t.Optional[exp.Expression]]: 1427 """ 1428 Parses a list of tokens into a given Expression type. If a collection of Expression 1429 types is given instead, this method will try to parse the token list into each one 1430 of them, stopping at the first for which the parsing succeeds. 1431 1432 Args: 1433 expression_types: The expression type(s) to try and parse the token list into. 1434 raw_tokens: The list of tokens. 1435 sql: The original SQL string, used to produce helpful debug messages. 1436 1437 Returns: 1438 The target Expression. 1439 """ 1440 errors = [] 1441 for expression_type in ensure_list(expression_types): 1442 parser = self.EXPRESSION_PARSERS.get(expression_type) 1443 if not parser: 1444 raise TypeError(f"No parser registered for {expression_type}") 1445 1446 try: 1447 return self._parse(parser, raw_tokens, sql) 1448 except ParseError as e: 1449 e.errors[0]["into_expression"] = expression_type 1450 errors.append(e) 1451 1452 raise ParseError( 1453 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1454 errors=merge_errors(errors), 1455 ) from errors[-1] 1456 1457 def _parse( 1458 self, 1459 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1460 raw_tokens: t.List[Token], 1461 sql: t.Optional[str] = None, 1462 ) -> t.List[t.Optional[exp.Expression]]: 1463 self.reset() 1464 self.sql = sql or "" 1465 1466 total = len(raw_tokens) 1467 chunks: t.List[t.List[Token]] = [[]] 1468 1469 for i, token in enumerate(raw_tokens): 1470 if token.token_type == TokenType.SEMICOLON: 1471 if token.comments: 1472 chunks.append([token]) 1473 1474 if i < total - 1: 1475 chunks.append([]) 1476 else: 1477 chunks[-1].append(token) 1478 1479 expressions = [] 1480 1481 for tokens in chunks: 1482 self._index = -1 1483 self._tokens = tokens 1484 self._advance() 1485 1486 expressions.append(parse_method(self)) 1487 1488 if self._index < len(self._tokens): 1489 self.raise_error("Invalid expression / Unexpected token") 1490 1491 self.check_errors() 1492 1493 return expressions 1494 1495 def check_errors(self) -> None: 1496 """Logs or raises any found errors, depending on the chosen error level setting.""" 1497 if self.error_level == ErrorLevel.WARN: 1498 for error in self.errors: 1499 logger.error(str(error)) 1500 elif self.error_level == ErrorLevel.RAISE and self.errors: 1501 raise ParseError( 1502 concat_messages(self.errors, self.max_errors), 1503 errors=merge_errors(self.errors), 1504 ) 1505 1506 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1507 """ 1508 Appends an error in the list of recorded errors or raises it, depending on the chosen 1509 error level setting. 1510 """ 1511 token = token or self._curr or self._prev or Token.string("") 1512 start = token.start 1513 end = token.end + 1 1514 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1515 highlight = self.sql[start:end] 1516 end_context = self.sql[end : end + self.error_message_context] 1517 1518 error = ParseError.new( 1519 f"{message}. Line {token.line}, Col: {token.col}.\n" 1520 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1521 description=message, 1522 line=token.line, 1523 col=token.col, 1524 start_context=start_context, 1525 highlight=highlight, 1526 end_context=end_context, 1527 ) 1528 1529 if self.error_level == ErrorLevel.IMMEDIATE: 1530 raise error 1531 1532 self.errors.append(error) 1533 1534 def expression( 1535 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1536 ) -> E: 1537 """ 1538 Creates a new, validated Expression. 1539 1540 Args: 1541 exp_class: The expression class to instantiate. 1542 comments: An optional list of comments to attach to the expression. 1543 kwargs: The arguments to set for the expression along with their respective values. 1544 1545 Returns: 1546 The target expression. 1547 """ 1548 instance = exp_class(**kwargs) 1549 instance.add_comments(comments) if comments else self._add_comments(instance) 1550 return self.validate_expression(instance) 1551 1552 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1553 if expression and self._prev_comments: 1554 expression.add_comments(self._prev_comments) 1555 self._prev_comments = None 1556 1557 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1558 """ 1559 Validates an Expression, making sure that all its mandatory arguments are set. 1560 1561 Args: 1562 expression: The expression to validate. 1563 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1564 1565 Returns: 1566 The validated expression. 1567 """ 1568 if self.error_level != ErrorLevel.IGNORE: 1569 for error_message in expression.error_messages(args): 1570 self.raise_error(error_message) 1571 1572 return expression 1573 1574 def _find_sql(self, start: Token, end: Token) -> str: 1575 return self.sql[start.start : end.end + 1] 1576 1577 def _is_connected(self) -> bool: 1578 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1579 1580 def _advance(self, times: int = 1) -> None: 1581 self._index += times 1582 self._curr = seq_get(self._tokens, self._index) 1583 self._next = seq_get(self._tokens, self._index + 1) 1584 1585 if self._index > 0: 1586 self._prev = self._tokens[self._index - 1] 1587 self._prev_comments = self._prev.comments 1588 else: 1589 self._prev = None 1590 self._prev_comments = None 1591 1592 def _retreat(self, index: int) -> None: 1593 if index != self._index: 1594 self._advance(index - self._index) 1595 1596 def _warn_unsupported(self) -> None: 1597 if len(self._tokens) <= 1: 1598 return 1599 1600 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1601 # interested in emitting a warning for the one being currently processed. 1602 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1603 1604 logger.warning( 1605 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1606 ) 1607 1608 def _parse_command(self) -> exp.Command: 1609 self._warn_unsupported() 1610 return self.expression( 1611 exp.Command, 1612 comments=self._prev_comments, 1613 this=self._prev.text.upper(), 1614 expression=self._parse_string(), 1615 ) 1616 1617 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1618 """ 1619 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1620 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1621 solve this by setting & resetting the parser state accordingly 1622 """ 1623 index = self._index 1624 error_level = self.error_level 1625 1626 self.error_level = ErrorLevel.IMMEDIATE 1627 try: 1628 this = parse_method() 1629 except ParseError: 1630 this = None 1631 finally: 1632 if not this or retreat: 1633 self._retreat(index) 1634 self.error_level = error_level 1635 1636 return this 1637 1638 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1639 start = self._prev 1640 exists = self._parse_exists() if allow_exists else None 1641 1642 self._match(TokenType.ON) 1643 1644 materialized = self._match_text_seq("MATERIALIZED") 1645 kind = self._match_set(self.CREATABLES) and self._prev 1646 if not kind: 1647 return self._parse_as_command(start) 1648 1649 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1650 this = self._parse_user_defined_function(kind=kind.token_type) 1651 elif kind.token_type == TokenType.TABLE: 1652 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1653 elif kind.token_type == TokenType.COLUMN: 1654 this = self._parse_column() 1655 else: 1656 this = self._parse_id_var() 1657 1658 self._match(TokenType.IS) 1659 1660 return self.expression( 1661 exp.Comment, 1662 this=this, 1663 kind=kind.text, 1664 expression=self._parse_string(), 1665 exists=exists, 1666 materialized=materialized, 1667 ) 1668 1669 def _parse_to_table( 1670 self, 1671 ) -> exp.ToTableProperty: 1672 table = self._parse_table_parts(schema=True) 1673 return self.expression(exp.ToTableProperty, this=table) 1674 1675 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1676 def _parse_ttl(self) -> exp.Expression: 1677 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1678 this = self._parse_bitwise() 1679 1680 if self._match_text_seq("DELETE"): 1681 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1682 if self._match_text_seq("RECOMPRESS"): 1683 return self.expression( 1684 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1685 ) 1686 if self._match_text_seq("TO", "DISK"): 1687 return self.expression( 1688 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1689 ) 1690 if self._match_text_seq("TO", "VOLUME"): 1691 return self.expression( 1692 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1693 ) 1694 1695 return this 1696 1697 expressions = self._parse_csv(_parse_ttl_action) 1698 where = self._parse_where() 1699 group = self._parse_group() 1700 1701 aggregates = None 1702 if group and self._match(TokenType.SET): 1703 aggregates = self._parse_csv(self._parse_set_item) 1704 1705 return self.expression( 1706 exp.MergeTreeTTL, 1707 expressions=expressions, 1708 where=where, 1709 group=group, 1710 aggregates=aggregates, 1711 ) 1712 1713 def _parse_statement(self) -> t.Optional[exp.Expression]: 1714 if self._curr is None: 1715 return None 1716 1717 if self._match_set(self.STATEMENT_PARSERS): 1718 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1719 1720 if self._match_set(self.dialect.tokenizer.COMMANDS): 1721 return self._parse_command() 1722 1723 expression = self._parse_expression() 1724 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1725 return self._parse_query_modifiers(expression) 1726 1727 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1728 start = self._prev 1729 temporary = self._match(TokenType.TEMPORARY) 1730 materialized = self._match_text_seq("MATERIALIZED") 1731 1732 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1733 if not kind: 1734 return self._parse_as_command(start) 1735 1736 concurrently = self._match_text_seq("CONCURRENTLY") 1737 if_exists = exists or self._parse_exists() 1738 table = self._parse_table_parts( 1739 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1740 ) 1741 1742 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1743 1744 if self._match(TokenType.L_PAREN, advance=False): 1745 expressions = self._parse_wrapped_csv(self._parse_types) 1746 else: 1747 expressions = None 1748 1749 return self.expression( 1750 exp.Drop, 1751 comments=start.comments, 1752 exists=if_exists, 1753 this=table, 1754 expressions=expressions, 1755 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1756 temporary=temporary, 1757 materialized=materialized, 1758 cascade=self._match_text_seq("CASCADE"), 1759 constraints=self._match_text_seq("CONSTRAINTS"), 1760 purge=self._match_text_seq("PURGE"), 1761 cluster=cluster, 1762 concurrently=concurrently, 1763 ) 1764 1765 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1766 return ( 1767 self._match_text_seq("IF") 1768 and (not not_ or self._match(TokenType.NOT)) 1769 and self._match(TokenType.EXISTS) 1770 ) 1771 1772 def _parse_create(self) -> exp.Create | exp.Command: 1773 # Note: this can't be None because we've matched a statement parser 1774 start = self._prev 1775 comments = self._prev_comments 1776 1777 replace = ( 1778 start.token_type == TokenType.REPLACE 1779 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1780 or self._match_pair(TokenType.OR, TokenType.ALTER) 1781 ) 1782 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1783 1784 unique = self._match(TokenType.UNIQUE) 1785 1786 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1787 clustered = True 1788 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1789 "COLUMNSTORE" 1790 ): 1791 clustered = False 1792 else: 1793 clustered = None 1794 1795 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1796 self._advance() 1797 1798 properties = None 1799 create_token = self._match_set(self.CREATABLES) and self._prev 1800 1801 if not create_token: 1802 # exp.Properties.Location.POST_CREATE 1803 properties = self._parse_properties() 1804 create_token = self._match_set(self.CREATABLES) and self._prev 1805 1806 if not properties or not create_token: 1807 return self._parse_as_command(start) 1808 1809 concurrently = self._match_text_seq("CONCURRENTLY") 1810 exists = self._parse_exists(not_=True) 1811 this = None 1812 expression: t.Optional[exp.Expression] = None 1813 indexes = None 1814 no_schema_binding = None 1815 begin = None 1816 end = None 1817 clone = None 1818 1819 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1820 nonlocal properties 1821 if properties and temp_props: 1822 properties.expressions.extend(temp_props.expressions) 1823 elif temp_props: 1824 properties = temp_props 1825 1826 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1827 this = self._parse_user_defined_function(kind=create_token.token_type) 1828 1829 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1830 extend_props(self._parse_properties()) 1831 1832 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1833 extend_props(self._parse_properties()) 1834 1835 if not expression: 1836 if self._match(TokenType.COMMAND): 1837 expression = self._parse_as_command(self._prev) 1838 else: 1839 begin = self._match(TokenType.BEGIN) 1840 return_ = self._match_text_seq("RETURN") 1841 1842 if self._match(TokenType.STRING, advance=False): 1843 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1844 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1845 expression = self._parse_string() 1846 extend_props(self._parse_properties()) 1847 else: 1848 expression = self._parse_user_defined_function_expression() 1849 1850 end = self._match_text_seq("END") 1851 1852 if return_: 1853 expression = self.expression(exp.Return, this=expression) 1854 elif create_token.token_type == TokenType.INDEX: 1855 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1856 if not self._match(TokenType.ON): 1857 index = self._parse_id_var() 1858 anonymous = False 1859 else: 1860 index = None 1861 anonymous = True 1862 1863 this = self._parse_index(index=index, anonymous=anonymous) 1864 elif create_token.token_type in self.DB_CREATABLES: 1865 table_parts = self._parse_table_parts( 1866 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1867 ) 1868 1869 # exp.Properties.Location.POST_NAME 1870 self._match(TokenType.COMMA) 1871 extend_props(self._parse_properties(before=True)) 1872 1873 this = self._parse_schema(this=table_parts) 1874 1875 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1876 extend_props(self._parse_properties()) 1877 1878 self._match(TokenType.ALIAS) 1879 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1880 # exp.Properties.Location.POST_ALIAS 1881 extend_props(self._parse_properties()) 1882 1883 if create_token.token_type == TokenType.SEQUENCE: 1884 expression = self._parse_types() 1885 extend_props(self._parse_properties()) 1886 else: 1887 expression = self._parse_ddl_select() 1888 1889 if create_token.token_type == TokenType.TABLE: 1890 # exp.Properties.Location.POST_EXPRESSION 1891 extend_props(self._parse_properties()) 1892 1893 indexes = [] 1894 while True: 1895 index = self._parse_index() 1896 1897 # exp.Properties.Location.POST_INDEX 1898 extend_props(self._parse_properties()) 1899 if not index: 1900 break 1901 else: 1902 self._match(TokenType.COMMA) 1903 indexes.append(index) 1904 elif create_token.token_type == TokenType.VIEW: 1905 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1906 no_schema_binding = True 1907 1908 shallow = self._match_text_seq("SHALLOW") 1909 1910 if self._match_texts(self.CLONE_KEYWORDS): 1911 copy = self._prev.text.lower() == "copy" 1912 clone = self.expression( 1913 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1914 ) 1915 1916 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1917 return self._parse_as_command(start) 1918 1919 create_kind_text = create_token.text.upper() 1920 return self.expression( 1921 exp.Create, 1922 comments=comments, 1923 this=this, 1924 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1925 replace=replace, 1926 refresh=refresh, 1927 unique=unique, 1928 expression=expression, 1929 exists=exists, 1930 properties=properties, 1931 indexes=indexes, 1932 no_schema_binding=no_schema_binding, 1933 begin=begin, 1934 end=end, 1935 clone=clone, 1936 concurrently=concurrently, 1937 clustered=clustered, 1938 ) 1939 1940 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1941 seq = exp.SequenceProperties() 1942 1943 options = [] 1944 index = self._index 1945 1946 while self._curr: 1947 self._match(TokenType.COMMA) 1948 if self._match_text_seq("INCREMENT"): 1949 self._match_text_seq("BY") 1950 self._match_text_seq("=") 1951 seq.set("increment", self._parse_term()) 1952 elif self._match_text_seq("MINVALUE"): 1953 seq.set("minvalue", self._parse_term()) 1954 elif self._match_text_seq("MAXVALUE"): 1955 seq.set("maxvalue", self._parse_term()) 1956 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1957 self._match_text_seq("=") 1958 seq.set("start", self._parse_term()) 1959 elif self._match_text_seq("CACHE"): 1960 # T-SQL allows empty CACHE which is initialized dynamically 1961 seq.set("cache", self._parse_number() or True) 1962 elif self._match_text_seq("OWNED", "BY"): 1963 # "OWNED BY NONE" is the default 1964 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1965 else: 1966 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1967 if opt: 1968 options.append(opt) 1969 else: 1970 break 1971 1972 seq.set("options", options if options else None) 1973 return None if self._index == index else seq 1974 1975 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1976 # only used for teradata currently 1977 self._match(TokenType.COMMA) 1978 1979 kwargs = { 1980 "no": self._match_text_seq("NO"), 1981 "dual": self._match_text_seq("DUAL"), 1982 "before": self._match_text_seq("BEFORE"), 1983 "default": self._match_text_seq("DEFAULT"), 1984 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1985 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1986 "after": self._match_text_seq("AFTER"), 1987 "minimum": self._match_texts(("MIN", "MINIMUM")), 1988 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1989 } 1990 1991 if self._match_texts(self.PROPERTY_PARSERS): 1992 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1993 try: 1994 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1995 except TypeError: 1996 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1997 1998 return None 1999 2000 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2001 return self._parse_wrapped_csv(self._parse_property) 2002 2003 def _parse_property(self) -> t.Optional[exp.Expression]: 2004 if self._match_texts(self.PROPERTY_PARSERS): 2005 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2006 2007 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2008 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2009 2010 if self._match_text_seq("COMPOUND", "SORTKEY"): 2011 return self._parse_sortkey(compound=True) 2012 2013 if self._match_text_seq("SQL", "SECURITY"): 2014 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2015 2016 index = self._index 2017 key = self._parse_column() 2018 2019 if not self._match(TokenType.EQ): 2020 self._retreat(index) 2021 return self._parse_sequence_properties() 2022 2023 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2024 if isinstance(key, exp.Column): 2025 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2026 2027 value = self._parse_bitwise() or self._parse_var(any_token=True) 2028 2029 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2030 if isinstance(value, exp.Column): 2031 value = exp.var(value.name) 2032 2033 return self.expression(exp.Property, this=key, value=value) 2034 2035 def _parse_stored(self) -> exp.FileFormatProperty: 2036 self._match(TokenType.ALIAS) 2037 2038 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2039 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2040 2041 return self.expression( 2042 exp.FileFormatProperty, 2043 this=( 2044 self.expression( 2045 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2046 ) 2047 if input_format or output_format 2048 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2049 ), 2050 ) 2051 2052 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2053 field = self._parse_field() 2054 if isinstance(field, exp.Identifier) and not field.quoted: 2055 field = exp.var(field) 2056 2057 return field 2058 2059 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2060 self._match(TokenType.EQ) 2061 self._match(TokenType.ALIAS) 2062 2063 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2064 2065 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2066 properties = [] 2067 while True: 2068 if before: 2069 prop = self._parse_property_before() 2070 else: 2071 prop = self._parse_property() 2072 if not prop: 2073 break 2074 for p in ensure_list(prop): 2075 properties.append(p) 2076 2077 if properties: 2078 return self.expression(exp.Properties, expressions=properties) 2079 2080 return None 2081 2082 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2083 return self.expression( 2084 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2085 ) 2086 2087 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2088 if self._match_texts(("DEFINER", "INVOKER")): 2089 security_specifier = self._prev.text.upper() 2090 return self.expression(exp.SecurityProperty, this=security_specifier) 2091 return None 2092 2093 def _parse_settings_property(self) -> exp.SettingsProperty: 2094 return self.expression( 2095 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2096 ) 2097 2098 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2099 if self._index >= 2: 2100 pre_volatile_token = self._tokens[self._index - 2] 2101 else: 2102 pre_volatile_token = None 2103 2104 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2105 return exp.VolatileProperty() 2106 2107 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2108 2109 def _parse_retention_period(self) -> exp.Var: 2110 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2111 number = self._parse_number() 2112 number_str = f"{number} " if number else "" 2113 unit = self._parse_var(any_token=True) 2114 return exp.var(f"{number_str}{unit}") 2115 2116 def _parse_system_versioning_property( 2117 self, with_: bool = False 2118 ) -> exp.WithSystemVersioningProperty: 2119 self._match(TokenType.EQ) 2120 prop = self.expression( 2121 exp.WithSystemVersioningProperty, 2122 **{ # type: ignore 2123 "on": True, 2124 "with": with_, 2125 }, 2126 ) 2127 2128 if self._match_text_seq("OFF"): 2129 prop.set("on", False) 2130 return prop 2131 2132 self._match(TokenType.ON) 2133 if self._match(TokenType.L_PAREN): 2134 while self._curr and not self._match(TokenType.R_PAREN): 2135 if self._match_text_seq("HISTORY_TABLE", "="): 2136 prop.set("this", self._parse_table_parts()) 2137 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2138 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2139 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2140 prop.set("retention_period", self._parse_retention_period()) 2141 2142 self._match(TokenType.COMMA) 2143 2144 return prop 2145 2146 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2147 self._match(TokenType.EQ) 2148 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2149 prop = self.expression(exp.DataDeletionProperty, on=on) 2150 2151 if self._match(TokenType.L_PAREN): 2152 while self._curr and not self._match(TokenType.R_PAREN): 2153 if self._match_text_seq("FILTER_COLUMN", "="): 2154 prop.set("filter_column", self._parse_column()) 2155 elif self._match_text_seq("RETENTION_PERIOD", "="): 2156 prop.set("retention_period", self._parse_retention_period()) 2157 2158 self._match(TokenType.COMMA) 2159 2160 return prop 2161 2162 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2163 kind = "HASH" 2164 expressions: t.Optional[t.List[exp.Expression]] = None 2165 if self._match_text_seq("BY", "HASH"): 2166 expressions = self._parse_wrapped_csv(self._parse_id_var) 2167 elif self._match_text_seq("BY", "RANDOM"): 2168 kind = "RANDOM" 2169 2170 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2171 buckets: t.Optional[exp.Expression] = None 2172 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2173 buckets = self._parse_number() 2174 2175 return self.expression( 2176 exp.DistributedByProperty, 2177 expressions=expressions, 2178 kind=kind, 2179 buckets=buckets, 2180 order=self._parse_order(), 2181 ) 2182 2183 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2184 self._match_text_seq("KEY") 2185 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2186 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2187 2188 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2189 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2190 prop = self._parse_system_versioning_property(with_=True) 2191 self._match_r_paren() 2192 return prop 2193 2194 if self._match(TokenType.L_PAREN, advance=False): 2195 return self._parse_wrapped_properties() 2196 2197 if self._match_text_seq("JOURNAL"): 2198 return self._parse_withjournaltable() 2199 2200 if self._match_texts(self.VIEW_ATTRIBUTES): 2201 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2202 2203 if self._match_text_seq("DATA"): 2204 return self._parse_withdata(no=False) 2205 elif self._match_text_seq("NO", "DATA"): 2206 return self._parse_withdata(no=True) 2207 2208 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2209 return self._parse_serde_properties(with_=True) 2210 2211 if self._match(TokenType.SCHEMA): 2212 return self.expression( 2213 exp.WithSchemaBindingProperty, 2214 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2215 ) 2216 2217 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2218 return self.expression( 2219 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2220 ) 2221 2222 if not self._next: 2223 return None 2224 2225 return self._parse_withisolatedloading() 2226 2227 def _parse_procedure_option(self) -> exp.Expression | None: 2228 if self._match_text_seq("EXECUTE", "AS"): 2229 return self.expression( 2230 exp.ExecuteAsProperty, 2231 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2232 or self._parse_string(), 2233 ) 2234 2235 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2236 2237 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2238 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2239 self._match(TokenType.EQ) 2240 2241 user = self._parse_id_var() 2242 self._match(TokenType.PARAMETER) 2243 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2244 2245 if not user or not host: 2246 return None 2247 2248 return exp.DefinerProperty(this=f"{user}@{host}") 2249 2250 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2251 self._match(TokenType.TABLE) 2252 self._match(TokenType.EQ) 2253 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2254 2255 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2256 return self.expression(exp.LogProperty, no=no) 2257 2258 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2259 return self.expression(exp.JournalProperty, **kwargs) 2260 2261 def _parse_checksum(self) -> exp.ChecksumProperty: 2262 self._match(TokenType.EQ) 2263 2264 on = None 2265 if self._match(TokenType.ON): 2266 on = True 2267 elif self._match_text_seq("OFF"): 2268 on = False 2269 2270 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2271 2272 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2273 return self.expression( 2274 exp.Cluster, 2275 expressions=( 2276 self._parse_wrapped_csv(self._parse_ordered) 2277 if wrapped 2278 else self._parse_csv(self._parse_ordered) 2279 ), 2280 ) 2281 2282 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2283 self._match_text_seq("BY") 2284 2285 self._match_l_paren() 2286 expressions = self._parse_csv(self._parse_column) 2287 self._match_r_paren() 2288 2289 if self._match_text_seq("SORTED", "BY"): 2290 self._match_l_paren() 2291 sorted_by = self._parse_csv(self._parse_ordered) 2292 self._match_r_paren() 2293 else: 2294 sorted_by = None 2295 2296 self._match(TokenType.INTO) 2297 buckets = self._parse_number() 2298 self._match_text_seq("BUCKETS") 2299 2300 return self.expression( 2301 exp.ClusteredByProperty, 2302 expressions=expressions, 2303 sorted_by=sorted_by, 2304 buckets=buckets, 2305 ) 2306 2307 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2308 if not self._match_text_seq("GRANTS"): 2309 self._retreat(self._index - 1) 2310 return None 2311 2312 return self.expression(exp.CopyGrantsProperty) 2313 2314 def _parse_freespace(self) -> exp.FreespaceProperty: 2315 self._match(TokenType.EQ) 2316 return self.expression( 2317 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2318 ) 2319 2320 def _parse_mergeblockratio( 2321 self, no: bool = False, default: bool = False 2322 ) -> exp.MergeBlockRatioProperty: 2323 if self._match(TokenType.EQ): 2324 return self.expression( 2325 exp.MergeBlockRatioProperty, 2326 this=self._parse_number(), 2327 percent=self._match(TokenType.PERCENT), 2328 ) 2329 2330 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2331 2332 def _parse_datablocksize( 2333 self, 2334 default: t.Optional[bool] = None, 2335 minimum: t.Optional[bool] = None, 2336 maximum: t.Optional[bool] = None, 2337 ) -> exp.DataBlocksizeProperty: 2338 self._match(TokenType.EQ) 2339 size = self._parse_number() 2340 2341 units = None 2342 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2343 units = self._prev.text 2344 2345 return self.expression( 2346 exp.DataBlocksizeProperty, 2347 size=size, 2348 units=units, 2349 default=default, 2350 minimum=minimum, 2351 maximum=maximum, 2352 ) 2353 2354 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2355 self._match(TokenType.EQ) 2356 always = self._match_text_seq("ALWAYS") 2357 manual = self._match_text_seq("MANUAL") 2358 never = self._match_text_seq("NEVER") 2359 default = self._match_text_seq("DEFAULT") 2360 2361 autotemp = None 2362 if self._match_text_seq("AUTOTEMP"): 2363 autotemp = self._parse_schema() 2364 2365 return self.expression( 2366 exp.BlockCompressionProperty, 2367 always=always, 2368 manual=manual, 2369 never=never, 2370 default=default, 2371 autotemp=autotemp, 2372 ) 2373 2374 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2375 index = self._index 2376 no = self._match_text_seq("NO") 2377 concurrent = self._match_text_seq("CONCURRENT") 2378 2379 if not self._match_text_seq("ISOLATED", "LOADING"): 2380 self._retreat(index) 2381 return None 2382 2383 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2384 return self.expression( 2385 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2386 ) 2387 2388 def _parse_locking(self) -> exp.LockingProperty: 2389 if self._match(TokenType.TABLE): 2390 kind = "TABLE" 2391 elif self._match(TokenType.VIEW): 2392 kind = "VIEW" 2393 elif self._match(TokenType.ROW): 2394 kind = "ROW" 2395 elif self._match_text_seq("DATABASE"): 2396 kind = "DATABASE" 2397 else: 2398 kind = None 2399 2400 if kind in ("DATABASE", "TABLE", "VIEW"): 2401 this = self._parse_table_parts() 2402 else: 2403 this = None 2404 2405 if self._match(TokenType.FOR): 2406 for_or_in = "FOR" 2407 elif self._match(TokenType.IN): 2408 for_or_in = "IN" 2409 else: 2410 for_or_in = None 2411 2412 if self._match_text_seq("ACCESS"): 2413 lock_type = "ACCESS" 2414 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2415 lock_type = "EXCLUSIVE" 2416 elif self._match_text_seq("SHARE"): 2417 lock_type = "SHARE" 2418 elif self._match_text_seq("READ"): 2419 lock_type = "READ" 2420 elif self._match_text_seq("WRITE"): 2421 lock_type = "WRITE" 2422 elif self._match_text_seq("CHECKSUM"): 2423 lock_type = "CHECKSUM" 2424 else: 2425 lock_type = None 2426 2427 override = self._match_text_seq("OVERRIDE") 2428 2429 return self.expression( 2430 exp.LockingProperty, 2431 this=this, 2432 kind=kind, 2433 for_or_in=for_or_in, 2434 lock_type=lock_type, 2435 override=override, 2436 ) 2437 2438 def _parse_partition_by(self) -> t.List[exp.Expression]: 2439 if self._match(TokenType.PARTITION_BY): 2440 return self._parse_csv(self._parse_assignment) 2441 return [] 2442 2443 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2444 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2445 if self._match_text_seq("MINVALUE"): 2446 return exp.var("MINVALUE") 2447 if self._match_text_seq("MAXVALUE"): 2448 return exp.var("MAXVALUE") 2449 return self._parse_bitwise() 2450 2451 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2452 expression = None 2453 from_expressions = None 2454 to_expressions = None 2455 2456 if self._match(TokenType.IN): 2457 this = self._parse_wrapped_csv(self._parse_bitwise) 2458 elif self._match(TokenType.FROM): 2459 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2460 self._match_text_seq("TO") 2461 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2462 elif self._match_text_seq("WITH", "(", "MODULUS"): 2463 this = self._parse_number() 2464 self._match_text_seq(",", "REMAINDER") 2465 expression = self._parse_number() 2466 self._match_r_paren() 2467 else: 2468 self.raise_error("Failed to parse partition bound spec.") 2469 2470 return self.expression( 2471 exp.PartitionBoundSpec, 2472 this=this, 2473 expression=expression, 2474 from_expressions=from_expressions, 2475 to_expressions=to_expressions, 2476 ) 2477 2478 # https://www.postgresql.org/docs/current/sql-createtable.html 2479 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2480 if not self._match_text_seq("OF"): 2481 self._retreat(self._index - 1) 2482 return None 2483 2484 this = self._parse_table(schema=True) 2485 2486 if self._match(TokenType.DEFAULT): 2487 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2488 elif self._match_text_seq("FOR", "VALUES"): 2489 expression = self._parse_partition_bound_spec() 2490 else: 2491 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2492 2493 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2494 2495 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2496 self._match(TokenType.EQ) 2497 return self.expression( 2498 exp.PartitionedByProperty, 2499 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2500 ) 2501 2502 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2503 if self._match_text_seq("AND", "STATISTICS"): 2504 statistics = True 2505 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2506 statistics = False 2507 else: 2508 statistics = None 2509 2510 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2511 2512 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2513 if self._match_text_seq("SQL"): 2514 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2515 return None 2516 2517 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2518 if self._match_text_seq("SQL", "DATA"): 2519 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2520 return None 2521 2522 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2523 if self._match_text_seq("PRIMARY", "INDEX"): 2524 return exp.NoPrimaryIndexProperty() 2525 if self._match_text_seq("SQL"): 2526 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2527 return None 2528 2529 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2530 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2531 return exp.OnCommitProperty() 2532 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2533 return exp.OnCommitProperty(delete=True) 2534 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2535 2536 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2537 if self._match_text_seq("SQL", "DATA"): 2538 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2539 return None 2540 2541 def _parse_distkey(self) -> exp.DistKeyProperty: 2542 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2543 2544 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2545 table = self._parse_table(schema=True) 2546 2547 options = [] 2548 while self._match_texts(("INCLUDING", "EXCLUDING")): 2549 this = self._prev.text.upper() 2550 2551 id_var = self._parse_id_var() 2552 if not id_var: 2553 return None 2554 2555 options.append( 2556 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2557 ) 2558 2559 return self.expression(exp.LikeProperty, this=table, expressions=options) 2560 2561 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2562 return self.expression( 2563 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2564 ) 2565 2566 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2567 self._match(TokenType.EQ) 2568 return self.expression( 2569 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2570 ) 2571 2572 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2573 self._match_text_seq("WITH", "CONNECTION") 2574 return self.expression( 2575 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2576 ) 2577 2578 def _parse_returns(self) -> exp.ReturnsProperty: 2579 value: t.Optional[exp.Expression] 2580 null = None 2581 is_table = self._match(TokenType.TABLE) 2582 2583 if is_table: 2584 if self._match(TokenType.LT): 2585 value = self.expression( 2586 exp.Schema, 2587 this="TABLE", 2588 expressions=self._parse_csv(self._parse_struct_types), 2589 ) 2590 if not self._match(TokenType.GT): 2591 self.raise_error("Expecting >") 2592 else: 2593 value = self._parse_schema(exp.var("TABLE")) 2594 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2595 null = True 2596 value = None 2597 else: 2598 value = self._parse_types() 2599 2600 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2601 2602 def _parse_describe(self) -> exp.Describe: 2603 kind = self._match_set(self.CREATABLES) and self._prev.text 2604 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2605 if self._match(TokenType.DOT): 2606 style = None 2607 self._retreat(self._index - 2) 2608 this = self._parse_table(schema=True) 2609 properties = self._parse_properties() 2610 expressions = properties.expressions if properties else None 2611 partition = self._parse_partition() 2612 return self.expression( 2613 exp.Describe, 2614 this=this, 2615 style=style, 2616 kind=kind, 2617 expressions=expressions, 2618 partition=partition, 2619 ) 2620 2621 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2622 kind = self._prev.text.upper() 2623 expressions = [] 2624 2625 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2626 if self._match(TokenType.WHEN): 2627 expression = self._parse_disjunction() 2628 self._match(TokenType.THEN) 2629 else: 2630 expression = None 2631 2632 else_ = self._match(TokenType.ELSE) 2633 2634 if not self._match(TokenType.INTO): 2635 return None 2636 2637 return self.expression( 2638 exp.ConditionalInsert, 2639 this=self.expression( 2640 exp.Insert, 2641 this=self._parse_table(schema=True), 2642 expression=self._parse_derived_table_values(), 2643 ), 2644 expression=expression, 2645 else_=else_, 2646 ) 2647 2648 expression = parse_conditional_insert() 2649 while expression is not None: 2650 expressions.append(expression) 2651 expression = parse_conditional_insert() 2652 2653 return self.expression( 2654 exp.MultitableInserts, 2655 kind=kind, 2656 comments=comments, 2657 expressions=expressions, 2658 source=self._parse_table(), 2659 ) 2660 2661 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2662 comments = ensure_list(self._prev_comments) 2663 hint = self._parse_hint() 2664 overwrite = self._match(TokenType.OVERWRITE) 2665 ignore = self._match(TokenType.IGNORE) 2666 local = self._match_text_seq("LOCAL") 2667 alternative = None 2668 is_function = None 2669 2670 if self._match_text_seq("DIRECTORY"): 2671 this: t.Optional[exp.Expression] = self.expression( 2672 exp.Directory, 2673 this=self._parse_var_or_string(), 2674 local=local, 2675 row_format=self._parse_row_format(match_row=True), 2676 ) 2677 else: 2678 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2679 comments += ensure_list(self._prev_comments) 2680 return self._parse_multitable_inserts(comments) 2681 2682 if self._match(TokenType.OR): 2683 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2684 2685 self._match(TokenType.INTO) 2686 comments += ensure_list(self._prev_comments) 2687 self._match(TokenType.TABLE) 2688 is_function = self._match(TokenType.FUNCTION) 2689 2690 this = ( 2691 self._parse_table(schema=True, parse_partition=True) 2692 if not is_function 2693 else self._parse_function() 2694 ) 2695 2696 returning = self._parse_returning() 2697 2698 return self.expression( 2699 exp.Insert, 2700 comments=comments, 2701 hint=hint, 2702 is_function=is_function, 2703 this=this, 2704 stored=self._match_text_seq("STORED") and self._parse_stored(), 2705 by_name=self._match_text_seq("BY", "NAME"), 2706 exists=self._parse_exists(), 2707 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2708 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2709 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2710 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2711 conflict=self._parse_on_conflict(), 2712 returning=returning or self._parse_returning(), 2713 overwrite=overwrite, 2714 alternative=alternative, 2715 ignore=ignore, 2716 source=self._match(TokenType.TABLE) and self._parse_table(), 2717 ) 2718 2719 def _parse_kill(self) -> exp.Kill: 2720 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2721 2722 return self.expression( 2723 exp.Kill, 2724 this=self._parse_primary(), 2725 kind=kind, 2726 ) 2727 2728 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2729 conflict = self._match_text_seq("ON", "CONFLICT") 2730 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2731 2732 if not conflict and not duplicate: 2733 return None 2734 2735 conflict_keys = None 2736 constraint = None 2737 2738 if conflict: 2739 if self._match_text_seq("ON", "CONSTRAINT"): 2740 constraint = self._parse_id_var() 2741 elif self._match(TokenType.L_PAREN): 2742 conflict_keys = self._parse_csv(self._parse_id_var) 2743 self._match_r_paren() 2744 2745 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2746 if self._prev.token_type == TokenType.UPDATE: 2747 self._match(TokenType.SET) 2748 expressions = self._parse_csv(self._parse_equality) 2749 else: 2750 expressions = None 2751 2752 return self.expression( 2753 exp.OnConflict, 2754 duplicate=duplicate, 2755 expressions=expressions, 2756 action=action, 2757 conflict_keys=conflict_keys, 2758 constraint=constraint, 2759 ) 2760 2761 def _parse_returning(self) -> t.Optional[exp.Returning]: 2762 if not self._match(TokenType.RETURNING): 2763 return None 2764 return self.expression( 2765 exp.Returning, 2766 expressions=self._parse_csv(self._parse_expression), 2767 into=self._match(TokenType.INTO) and self._parse_table_part(), 2768 ) 2769 2770 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2771 if not self._match(TokenType.FORMAT): 2772 return None 2773 return self._parse_row_format() 2774 2775 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2776 index = self._index 2777 with_ = with_ or self._match_text_seq("WITH") 2778 2779 if not self._match(TokenType.SERDE_PROPERTIES): 2780 self._retreat(index) 2781 return None 2782 return self.expression( 2783 exp.SerdeProperties, 2784 **{ # type: ignore 2785 "expressions": self._parse_wrapped_properties(), 2786 "with": with_, 2787 }, 2788 ) 2789 2790 def _parse_row_format( 2791 self, match_row: bool = False 2792 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2793 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2794 return None 2795 2796 if self._match_text_seq("SERDE"): 2797 this = self._parse_string() 2798 2799 serde_properties = self._parse_serde_properties() 2800 2801 return self.expression( 2802 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2803 ) 2804 2805 self._match_text_seq("DELIMITED") 2806 2807 kwargs = {} 2808 2809 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2810 kwargs["fields"] = self._parse_string() 2811 if self._match_text_seq("ESCAPED", "BY"): 2812 kwargs["escaped"] = self._parse_string() 2813 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2814 kwargs["collection_items"] = self._parse_string() 2815 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2816 kwargs["map_keys"] = self._parse_string() 2817 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2818 kwargs["lines"] = self._parse_string() 2819 if self._match_text_seq("NULL", "DEFINED", "AS"): 2820 kwargs["null"] = self._parse_string() 2821 2822 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2823 2824 def _parse_load(self) -> exp.LoadData | exp.Command: 2825 if self._match_text_seq("DATA"): 2826 local = self._match_text_seq("LOCAL") 2827 self._match_text_seq("INPATH") 2828 inpath = self._parse_string() 2829 overwrite = self._match(TokenType.OVERWRITE) 2830 self._match_pair(TokenType.INTO, TokenType.TABLE) 2831 2832 return self.expression( 2833 exp.LoadData, 2834 this=self._parse_table(schema=True), 2835 local=local, 2836 overwrite=overwrite, 2837 inpath=inpath, 2838 partition=self._parse_partition(), 2839 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2840 serde=self._match_text_seq("SERDE") and self._parse_string(), 2841 ) 2842 return self._parse_as_command(self._prev) 2843 2844 def _parse_delete(self) -> exp.Delete: 2845 # This handles MySQL's "Multiple-Table Syntax" 2846 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2847 tables = None 2848 comments = self._prev_comments 2849 if not self._match(TokenType.FROM, advance=False): 2850 tables = self._parse_csv(self._parse_table) or None 2851 2852 returning = self._parse_returning() 2853 2854 return self.expression( 2855 exp.Delete, 2856 comments=comments, 2857 tables=tables, 2858 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2859 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2860 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2861 where=self._parse_where(), 2862 returning=returning or self._parse_returning(), 2863 limit=self._parse_limit(), 2864 ) 2865 2866 def _parse_update(self) -> exp.Update: 2867 comments = self._prev_comments 2868 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2869 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2870 returning = self._parse_returning() 2871 return self.expression( 2872 exp.Update, 2873 comments=comments, 2874 **{ # type: ignore 2875 "this": this, 2876 "expressions": expressions, 2877 "from": self._parse_from(joins=True), 2878 "where": self._parse_where(), 2879 "returning": returning or self._parse_returning(), 2880 "order": self._parse_order(), 2881 "limit": self._parse_limit(), 2882 }, 2883 ) 2884 2885 def _parse_uncache(self) -> exp.Uncache: 2886 if not self._match(TokenType.TABLE): 2887 self.raise_error("Expecting TABLE after UNCACHE") 2888 2889 return self.expression( 2890 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2891 ) 2892 2893 def _parse_cache(self) -> exp.Cache: 2894 lazy = self._match_text_seq("LAZY") 2895 self._match(TokenType.TABLE) 2896 table = self._parse_table(schema=True) 2897 2898 options = [] 2899 if self._match_text_seq("OPTIONS"): 2900 self._match_l_paren() 2901 k = self._parse_string() 2902 self._match(TokenType.EQ) 2903 v = self._parse_string() 2904 options = [k, v] 2905 self._match_r_paren() 2906 2907 self._match(TokenType.ALIAS) 2908 return self.expression( 2909 exp.Cache, 2910 this=table, 2911 lazy=lazy, 2912 options=options, 2913 expression=self._parse_select(nested=True), 2914 ) 2915 2916 def _parse_partition(self) -> t.Optional[exp.Partition]: 2917 if not self._match(TokenType.PARTITION): 2918 return None 2919 2920 return self.expression( 2921 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2922 ) 2923 2924 def _parse_value(self) -> t.Optional[exp.Tuple]: 2925 if self._match(TokenType.L_PAREN): 2926 expressions = self._parse_csv(self._parse_expression) 2927 self._match_r_paren() 2928 return self.expression(exp.Tuple, expressions=expressions) 2929 2930 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2931 expression = self._parse_expression() 2932 if expression: 2933 return self.expression(exp.Tuple, expressions=[expression]) 2934 return None 2935 2936 def _parse_projections(self) -> t.List[exp.Expression]: 2937 return self._parse_expressions() 2938 2939 def _parse_select( 2940 self, 2941 nested: bool = False, 2942 table: bool = False, 2943 parse_subquery_alias: bool = True, 2944 parse_set_operation: bool = True, 2945 ) -> t.Optional[exp.Expression]: 2946 cte = self._parse_with() 2947 2948 if cte: 2949 this = self._parse_statement() 2950 2951 if not this: 2952 self.raise_error("Failed to parse any statement following CTE") 2953 return cte 2954 2955 if "with" in this.arg_types: 2956 this.set("with", cte) 2957 else: 2958 self.raise_error(f"{this.key} does not support CTE") 2959 this = cte 2960 2961 return this 2962 2963 # duckdb supports leading with FROM x 2964 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2965 2966 if self._match(TokenType.SELECT): 2967 comments = self._prev_comments 2968 2969 hint = self._parse_hint() 2970 2971 if self._next and not self._next.token_type == TokenType.DOT: 2972 all_ = self._match(TokenType.ALL) 2973 distinct = self._match_set(self.DISTINCT_TOKENS) 2974 else: 2975 all_, distinct = None, None 2976 2977 kind = ( 2978 self._match(TokenType.ALIAS) 2979 and self._match_texts(("STRUCT", "VALUE")) 2980 and self._prev.text.upper() 2981 ) 2982 2983 if distinct: 2984 distinct = self.expression( 2985 exp.Distinct, 2986 on=self._parse_value() if self._match(TokenType.ON) else None, 2987 ) 2988 2989 if all_ and distinct: 2990 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2991 2992 operation_modifiers = [] 2993 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2994 operation_modifiers.append(exp.var(self._prev.text.upper())) 2995 2996 limit = self._parse_limit(top=True) 2997 projections = self._parse_projections() 2998 2999 this = self.expression( 3000 exp.Select, 3001 kind=kind, 3002 hint=hint, 3003 distinct=distinct, 3004 expressions=projections, 3005 limit=limit, 3006 operation_modifiers=operation_modifiers or None, 3007 ) 3008 this.comments = comments 3009 3010 into = self._parse_into() 3011 if into: 3012 this.set("into", into) 3013 3014 if not from_: 3015 from_ = self._parse_from() 3016 3017 if from_: 3018 this.set("from", from_) 3019 3020 this = self._parse_query_modifiers(this) 3021 elif (table or nested) and self._match(TokenType.L_PAREN): 3022 if self._match(TokenType.PIVOT): 3023 this = self._parse_simplified_pivot() 3024 elif self._match(TokenType.FROM): 3025 this = exp.select("*").from_( 3026 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3027 ) 3028 else: 3029 this = ( 3030 self._parse_table() 3031 if table 3032 else self._parse_select(nested=True, parse_set_operation=False) 3033 ) 3034 3035 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3036 # in case a modifier (e.g. join) is following 3037 if table and isinstance(this, exp.Values) and this.alias: 3038 alias = this.args["alias"].pop() 3039 this = exp.Table(this=this, alias=alias) 3040 3041 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3042 3043 self._match_r_paren() 3044 3045 # We return early here so that the UNION isn't attached to the subquery by the 3046 # following call to _parse_set_operations, but instead becomes the parent node 3047 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3048 elif self._match(TokenType.VALUES, advance=False): 3049 this = self._parse_derived_table_values() 3050 elif from_: 3051 this = exp.select("*").from_(from_.this, copy=False) 3052 elif self._match(TokenType.SUMMARIZE): 3053 table = self._match(TokenType.TABLE) 3054 this = self._parse_select() or self._parse_string() or self._parse_table() 3055 return self.expression(exp.Summarize, this=this, table=table) 3056 elif self._match(TokenType.DESCRIBE): 3057 this = self._parse_describe() 3058 elif self._match_text_seq("STREAM"): 3059 this = self.expression(exp.Stream, this=self._parse_function()) 3060 else: 3061 this = None 3062 3063 return self._parse_set_operations(this) if parse_set_operation else this 3064 3065 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3066 if not skip_with_token and not self._match(TokenType.WITH): 3067 return None 3068 3069 comments = self._prev_comments 3070 recursive = self._match(TokenType.RECURSIVE) 3071 3072 last_comments = None 3073 expressions = [] 3074 while True: 3075 expressions.append(self._parse_cte()) 3076 if last_comments: 3077 expressions[-1].add_comments(last_comments) 3078 3079 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3080 break 3081 else: 3082 self._match(TokenType.WITH) 3083 3084 last_comments = self._prev_comments 3085 3086 return self.expression( 3087 exp.With, comments=comments, expressions=expressions, recursive=recursive 3088 ) 3089 3090 def _parse_cte(self) -> exp.CTE: 3091 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3092 if not alias or not alias.this: 3093 self.raise_error("Expected CTE to have alias") 3094 3095 self._match(TokenType.ALIAS) 3096 comments = self._prev_comments 3097 3098 if self._match_text_seq("NOT", "MATERIALIZED"): 3099 materialized = False 3100 elif self._match_text_seq("MATERIALIZED"): 3101 materialized = True 3102 else: 3103 materialized = None 3104 3105 return self.expression( 3106 exp.CTE, 3107 this=self._parse_wrapped(self._parse_statement), 3108 alias=alias, 3109 materialized=materialized, 3110 comments=comments, 3111 ) 3112 3113 def _parse_table_alias( 3114 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3115 ) -> t.Optional[exp.TableAlias]: 3116 any_token = self._match(TokenType.ALIAS) 3117 alias = ( 3118 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3119 or self._parse_string_as_identifier() 3120 ) 3121 3122 index = self._index 3123 if self._match(TokenType.L_PAREN): 3124 columns = self._parse_csv(self._parse_function_parameter) 3125 self._match_r_paren() if columns else self._retreat(index) 3126 else: 3127 columns = None 3128 3129 if not alias and not columns: 3130 return None 3131 3132 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3133 3134 # We bubble up comments from the Identifier to the TableAlias 3135 if isinstance(alias, exp.Identifier): 3136 table_alias.add_comments(alias.pop_comments()) 3137 3138 return table_alias 3139 3140 def _parse_subquery( 3141 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3142 ) -> t.Optional[exp.Subquery]: 3143 if not this: 3144 return None 3145 3146 return self.expression( 3147 exp.Subquery, 3148 this=this, 3149 pivots=self._parse_pivots(), 3150 alias=self._parse_table_alias() if parse_alias else None, 3151 sample=self._parse_table_sample(), 3152 ) 3153 3154 def _implicit_unnests_to_explicit(self, this: E) -> E: 3155 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3156 3157 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3158 for i, join in enumerate(this.args.get("joins") or []): 3159 table = join.this 3160 normalized_table = table.copy() 3161 normalized_table.meta["maybe_column"] = True 3162 normalized_table = _norm(normalized_table, dialect=self.dialect) 3163 3164 if isinstance(table, exp.Table) and not join.args.get("on"): 3165 if normalized_table.parts[0].name in refs: 3166 table_as_column = table.to_column() 3167 unnest = exp.Unnest(expressions=[table_as_column]) 3168 3169 # Table.to_column creates a parent Alias node that we want to convert to 3170 # a TableAlias and attach to the Unnest, so it matches the parser's output 3171 if isinstance(table.args.get("alias"), exp.TableAlias): 3172 table_as_column.replace(table_as_column.this) 3173 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3174 3175 table.replace(unnest) 3176 3177 refs.add(normalized_table.alias_or_name) 3178 3179 return this 3180 3181 def _parse_query_modifiers( 3182 self, this: t.Optional[exp.Expression] 3183 ) -> t.Optional[exp.Expression]: 3184 if isinstance(this, (exp.Query, exp.Table)): 3185 for join in self._parse_joins(): 3186 this.append("joins", join) 3187 for lateral in iter(self._parse_lateral, None): 3188 this.append("laterals", lateral) 3189 3190 while True: 3191 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3192 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3193 key, expression = parser(self) 3194 3195 if expression: 3196 this.set(key, expression) 3197 if key == "limit": 3198 offset = expression.args.pop("offset", None) 3199 3200 if offset: 3201 offset = exp.Offset(expression=offset) 3202 this.set("offset", offset) 3203 3204 limit_by_expressions = expression.expressions 3205 expression.set("expressions", None) 3206 offset.set("expressions", limit_by_expressions) 3207 continue 3208 break 3209 3210 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3211 this = self._implicit_unnests_to_explicit(this) 3212 3213 return this 3214 3215 def _parse_hint(self) -> t.Optional[exp.Hint]: 3216 if self._match(TokenType.HINT): 3217 hints = [] 3218 for hint in iter( 3219 lambda: self._parse_csv( 3220 lambda: self._parse_function() or self._parse_var(upper=True) 3221 ), 3222 [], 3223 ): 3224 hints.extend(hint) 3225 3226 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3227 self.raise_error("Expected */ after HINT") 3228 3229 return self.expression(exp.Hint, expressions=hints) 3230 3231 return None 3232 3233 def _parse_into(self) -> t.Optional[exp.Into]: 3234 if not self._match(TokenType.INTO): 3235 return None 3236 3237 temp = self._match(TokenType.TEMPORARY) 3238 unlogged = self._match_text_seq("UNLOGGED") 3239 self._match(TokenType.TABLE) 3240 3241 return self.expression( 3242 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3243 ) 3244 3245 def _parse_from( 3246 self, joins: bool = False, skip_from_token: bool = False 3247 ) -> t.Optional[exp.From]: 3248 if not skip_from_token and not self._match(TokenType.FROM): 3249 return None 3250 3251 return self.expression( 3252 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3253 ) 3254 3255 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3256 return self.expression( 3257 exp.MatchRecognizeMeasure, 3258 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3259 this=self._parse_expression(), 3260 ) 3261 3262 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3263 if not self._match(TokenType.MATCH_RECOGNIZE): 3264 return None 3265 3266 self._match_l_paren() 3267 3268 partition = self._parse_partition_by() 3269 order = self._parse_order() 3270 3271 measures = ( 3272 self._parse_csv(self._parse_match_recognize_measure) 3273 if self._match_text_seq("MEASURES") 3274 else None 3275 ) 3276 3277 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3278 rows = exp.var("ONE ROW PER MATCH") 3279 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3280 text = "ALL ROWS PER MATCH" 3281 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3282 text += " SHOW EMPTY MATCHES" 3283 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3284 text += " OMIT EMPTY MATCHES" 3285 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3286 text += " WITH UNMATCHED ROWS" 3287 rows = exp.var(text) 3288 else: 3289 rows = None 3290 3291 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3292 text = "AFTER MATCH SKIP" 3293 if self._match_text_seq("PAST", "LAST", "ROW"): 3294 text += " PAST LAST ROW" 3295 elif self._match_text_seq("TO", "NEXT", "ROW"): 3296 text += " TO NEXT ROW" 3297 elif self._match_text_seq("TO", "FIRST"): 3298 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3299 elif self._match_text_seq("TO", "LAST"): 3300 text += f" TO LAST {self._advance_any().text}" # type: ignore 3301 after = exp.var(text) 3302 else: 3303 after = None 3304 3305 if self._match_text_seq("PATTERN"): 3306 self._match_l_paren() 3307 3308 if not self._curr: 3309 self.raise_error("Expecting )", self._curr) 3310 3311 paren = 1 3312 start = self._curr 3313 3314 while self._curr and paren > 0: 3315 if self._curr.token_type == TokenType.L_PAREN: 3316 paren += 1 3317 if self._curr.token_type == TokenType.R_PAREN: 3318 paren -= 1 3319 3320 end = self._prev 3321 self._advance() 3322 3323 if paren > 0: 3324 self.raise_error("Expecting )", self._curr) 3325 3326 pattern = exp.var(self._find_sql(start, end)) 3327 else: 3328 pattern = None 3329 3330 define = ( 3331 self._parse_csv(self._parse_name_as_expression) 3332 if self._match_text_seq("DEFINE") 3333 else None 3334 ) 3335 3336 self._match_r_paren() 3337 3338 return self.expression( 3339 exp.MatchRecognize, 3340 partition_by=partition, 3341 order=order, 3342 measures=measures, 3343 rows=rows, 3344 after=after, 3345 pattern=pattern, 3346 define=define, 3347 alias=self._parse_table_alias(), 3348 ) 3349 3350 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3351 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3352 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3353 cross_apply = False 3354 3355 if cross_apply is not None: 3356 this = self._parse_select(table=True) 3357 view = None 3358 outer = None 3359 elif self._match(TokenType.LATERAL): 3360 this = self._parse_select(table=True) 3361 view = self._match(TokenType.VIEW) 3362 outer = self._match(TokenType.OUTER) 3363 else: 3364 return None 3365 3366 if not this: 3367 this = ( 3368 self._parse_unnest() 3369 or self._parse_function() 3370 or self._parse_id_var(any_token=False) 3371 ) 3372 3373 while self._match(TokenType.DOT): 3374 this = exp.Dot( 3375 this=this, 3376 expression=self._parse_function() or self._parse_id_var(any_token=False), 3377 ) 3378 3379 if view: 3380 table = self._parse_id_var(any_token=False) 3381 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3382 table_alias: t.Optional[exp.TableAlias] = self.expression( 3383 exp.TableAlias, this=table, columns=columns 3384 ) 3385 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3386 # We move the alias from the lateral's child node to the lateral itself 3387 table_alias = this.args["alias"].pop() 3388 else: 3389 table_alias = self._parse_table_alias() 3390 3391 return self.expression( 3392 exp.Lateral, 3393 this=this, 3394 view=view, 3395 outer=outer, 3396 alias=table_alias, 3397 cross_apply=cross_apply, 3398 ) 3399 3400 def _parse_join_parts( 3401 self, 3402 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3403 return ( 3404 self._match_set(self.JOIN_METHODS) and self._prev, 3405 self._match_set(self.JOIN_SIDES) and self._prev, 3406 self._match_set(self.JOIN_KINDS) and self._prev, 3407 ) 3408 3409 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3410 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3411 this = self._parse_column() 3412 if isinstance(this, exp.Column): 3413 return this.this 3414 return this 3415 3416 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3417 3418 def _parse_join( 3419 self, skip_join_token: bool = False, parse_bracket: bool = False 3420 ) -> t.Optional[exp.Join]: 3421 if self._match(TokenType.COMMA): 3422 return self.expression(exp.Join, this=self._parse_table()) 3423 3424 index = self._index 3425 method, side, kind = self._parse_join_parts() 3426 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3427 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3428 3429 if not skip_join_token and not join: 3430 self._retreat(index) 3431 kind = None 3432 method = None 3433 side = None 3434 3435 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3436 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3437 3438 if not skip_join_token and not join and not outer_apply and not cross_apply: 3439 return None 3440 3441 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3442 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3443 kwargs["expressions"] = self._parse_csv( 3444 lambda: self._parse_table(parse_bracket=parse_bracket) 3445 ) 3446 3447 if method: 3448 kwargs["method"] = method.text 3449 if side: 3450 kwargs["side"] = side.text 3451 if kind: 3452 kwargs["kind"] = kind.text 3453 if hint: 3454 kwargs["hint"] = hint 3455 3456 if self._match(TokenType.MATCH_CONDITION): 3457 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3458 3459 if self._match(TokenType.ON): 3460 kwargs["on"] = self._parse_assignment() 3461 elif self._match(TokenType.USING): 3462 kwargs["using"] = self._parse_using_identifiers() 3463 elif ( 3464 not (outer_apply or cross_apply) 3465 and not isinstance(kwargs["this"], exp.Unnest) 3466 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3467 ): 3468 index = self._index 3469 joins: t.Optional[list] = list(self._parse_joins()) 3470 3471 if joins and self._match(TokenType.ON): 3472 kwargs["on"] = self._parse_assignment() 3473 elif joins and self._match(TokenType.USING): 3474 kwargs["using"] = self._parse_using_identifiers() 3475 else: 3476 joins = None 3477 self._retreat(index) 3478 3479 kwargs["this"].set("joins", joins if joins else None) 3480 3481 comments = [c for token in (method, side, kind) if token for c in token.comments] 3482 return self.expression(exp.Join, comments=comments, **kwargs) 3483 3484 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3485 this = self._parse_assignment() 3486 3487 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3488 return this 3489 3490 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3491 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3492 3493 return this 3494 3495 def _parse_index_params(self) -> exp.IndexParameters: 3496 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3497 3498 if self._match(TokenType.L_PAREN, advance=False): 3499 columns = self._parse_wrapped_csv(self._parse_with_operator) 3500 else: 3501 columns = None 3502 3503 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3504 partition_by = self._parse_partition_by() 3505 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3506 tablespace = ( 3507 self._parse_var(any_token=True) 3508 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3509 else None 3510 ) 3511 where = self._parse_where() 3512 3513 on = self._parse_field() if self._match(TokenType.ON) else None 3514 3515 return self.expression( 3516 exp.IndexParameters, 3517 using=using, 3518 columns=columns, 3519 include=include, 3520 partition_by=partition_by, 3521 where=where, 3522 with_storage=with_storage, 3523 tablespace=tablespace, 3524 on=on, 3525 ) 3526 3527 def _parse_index( 3528 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3529 ) -> t.Optional[exp.Index]: 3530 if index or anonymous: 3531 unique = None 3532 primary = None 3533 amp = None 3534 3535 self._match(TokenType.ON) 3536 self._match(TokenType.TABLE) # hive 3537 table = self._parse_table_parts(schema=True) 3538 else: 3539 unique = self._match(TokenType.UNIQUE) 3540 primary = self._match_text_seq("PRIMARY") 3541 amp = self._match_text_seq("AMP") 3542 3543 if not self._match(TokenType.INDEX): 3544 return None 3545 3546 index = self._parse_id_var() 3547 table = None 3548 3549 params = self._parse_index_params() 3550 3551 return self.expression( 3552 exp.Index, 3553 this=index, 3554 table=table, 3555 unique=unique, 3556 primary=primary, 3557 amp=amp, 3558 params=params, 3559 ) 3560 3561 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3562 hints: t.List[exp.Expression] = [] 3563 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3564 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3565 hints.append( 3566 self.expression( 3567 exp.WithTableHint, 3568 expressions=self._parse_csv( 3569 lambda: self._parse_function() or self._parse_var(any_token=True) 3570 ), 3571 ) 3572 ) 3573 self._match_r_paren() 3574 else: 3575 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3576 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3577 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3578 3579 self._match_set((TokenType.INDEX, TokenType.KEY)) 3580 if self._match(TokenType.FOR): 3581 hint.set("target", self._advance_any() and self._prev.text.upper()) 3582 3583 hint.set("expressions", self._parse_wrapped_id_vars()) 3584 hints.append(hint) 3585 3586 return hints or None 3587 3588 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3589 return ( 3590 (not schema and self._parse_function(optional_parens=False)) 3591 or self._parse_id_var(any_token=False) 3592 or self._parse_string_as_identifier() 3593 or self._parse_placeholder() 3594 ) 3595 3596 def _parse_table_parts( 3597 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3598 ) -> exp.Table: 3599 catalog = None 3600 db = None 3601 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3602 3603 while self._match(TokenType.DOT): 3604 if catalog: 3605 # This allows nesting the table in arbitrarily many dot expressions if needed 3606 table = self.expression( 3607 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3608 ) 3609 else: 3610 catalog = db 3611 db = table 3612 # "" used for tsql FROM a..b case 3613 table = self._parse_table_part(schema=schema) or "" 3614 3615 if ( 3616 wildcard 3617 and self._is_connected() 3618 and (isinstance(table, exp.Identifier) or not table) 3619 and self._match(TokenType.STAR) 3620 ): 3621 if isinstance(table, exp.Identifier): 3622 table.args["this"] += "*" 3623 else: 3624 table = exp.Identifier(this="*") 3625 3626 # We bubble up comments from the Identifier to the Table 3627 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3628 3629 if is_db_reference: 3630 catalog = db 3631 db = table 3632 table = None 3633 3634 if not table and not is_db_reference: 3635 self.raise_error(f"Expected table name but got {self._curr}") 3636 if not db and is_db_reference: 3637 self.raise_error(f"Expected database name but got {self._curr}") 3638 3639 table = self.expression( 3640 exp.Table, 3641 comments=comments, 3642 this=table, 3643 db=db, 3644 catalog=catalog, 3645 ) 3646 3647 changes = self._parse_changes() 3648 if changes: 3649 table.set("changes", changes) 3650 3651 at_before = self._parse_historical_data() 3652 if at_before: 3653 table.set("when", at_before) 3654 3655 pivots = self._parse_pivots() 3656 if pivots: 3657 table.set("pivots", pivots) 3658 3659 return table 3660 3661 def _parse_table( 3662 self, 3663 schema: bool = False, 3664 joins: bool = False, 3665 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3666 parse_bracket: bool = False, 3667 is_db_reference: bool = False, 3668 parse_partition: bool = False, 3669 ) -> t.Optional[exp.Expression]: 3670 lateral = self._parse_lateral() 3671 if lateral: 3672 return lateral 3673 3674 unnest = self._parse_unnest() 3675 if unnest: 3676 return unnest 3677 3678 values = self._parse_derived_table_values() 3679 if values: 3680 return values 3681 3682 subquery = self._parse_select(table=True) 3683 if subquery: 3684 if not subquery.args.get("pivots"): 3685 subquery.set("pivots", self._parse_pivots()) 3686 return subquery 3687 3688 bracket = parse_bracket and self._parse_bracket(None) 3689 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3690 3691 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3692 self._parse_table 3693 ) 3694 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3695 3696 only = self._match(TokenType.ONLY) 3697 3698 this = t.cast( 3699 exp.Expression, 3700 bracket 3701 or rows_from 3702 or self._parse_bracket( 3703 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3704 ), 3705 ) 3706 3707 if only: 3708 this.set("only", only) 3709 3710 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3711 self._match_text_seq("*") 3712 3713 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3714 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3715 this.set("partition", self._parse_partition()) 3716 3717 if schema: 3718 return self._parse_schema(this=this) 3719 3720 version = self._parse_version() 3721 3722 if version: 3723 this.set("version", version) 3724 3725 if self.dialect.ALIAS_POST_TABLESAMPLE: 3726 this.set("sample", self._parse_table_sample()) 3727 3728 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3729 if alias: 3730 this.set("alias", alias) 3731 3732 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3733 return self.expression( 3734 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3735 ) 3736 3737 this.set("hints", self._parse_table_hints()) 3738 3739 if not this.args.get("pivots"): 3740 this.set("pivots", self._parse_pivots()) 3741 3742 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3743 this.set("sample", self._parse_table_sample()) 3744 3745 if joins: 3746 for join in self._parse_joins(): 3747 this.append("joins", join) 3748 3749 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3750 this.set("ordinality", True) 3751 this.set("alias", self._parse_table_alias()) 3752 3753 return this 3754 3755 def _parse_version(self) -> t.Optional[exp.Version]: 3756 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3757 this = "TIMESTAMP" 3758 elif self._match(TokenType.VERSION_SNAPSHOT): 3759 this = "VERSION" 3760 else: 3761 return None 3762 3763 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3764 kind = self._prev.text.upper() 3765 start = self._parse_bitwise() 3766 self._match_texts(("TO", "AND")) 3767 end = self._parse_bitwise() 3768 expression: t.Optional[exp.Expression] = self.expression( 3769 exp.Tuple, expressions=[start, end] 3770 ) 3771 elif self._match_text_seq("CONTAINED", "IN"): 3772 kind = "CONTAINED IN" 3773 expression = self.expression( 3774 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3775 ) 3776 elif self._match(TokenType.ALL): 3777 kind = "ALL" 3778 expression = None 3779 else: 3780 self._match_text_seq("AS", "OF") 3781 kind = "AS OF" 3782 expression = self._parse_type() 3783 3784 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3785 3786 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3787 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3788 index = self._index 3789 historical_data = None 3790 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3791 this = self._prev.text.upper() 3792 kind = ( 3793 self._match(TokenType.L_PAREN) 3794 and self._match_texts(self.HISTORICAL_DATA_KIND) 3795 and self._prev.text.upper() 3796 ) 3797 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3798 3799 if expression: 3800 self._match_r_paren() 3801 historical_data = self.expression( 3802 exp.HistoricalData, this=this, kind=kind, expression=expression 3803 ) 3804 else: 3805 self._retreat(index) 3806 3807 return historical_data 3808 3809 def _parse_changes(self) -> t.Optional[exp.Changes]: 3810 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3811 return None 3812 3813 information = self._parse_var(any_token=True) 3814 self._match_r_paren() 3815 3816 return self.expression( 3817 exp.Changes, 3818 information=information, 3819 at_before=self._parse_historical_data(), 3820 end=self._parse_historical_data(), 3821 ) 3822 3823 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3824 if not self._match(TokenType.UNNEST): 3825 return None 3826 3827 expressions = self._parse_wrapped_csv(self._parse_equality) 3828 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3829 3830 alias = self._parse_table_alias() if with_alias else None 3831 3832 if alias: 3833 if self.dialect.UNNEST_COLUMN_ONLY: 3834 if alias.args.get("columns"): 3835 self.raise_error("Unexpected extra column alias in unnest.") 3836 3837 alias.set("columns", [alias.this]) 3838 alias.set("this", None) 3839 3840 columns = alias.args.get("columns") or [] 3841 if offset and len(expressions) < len(columns): 3842 offset = columns.pop() 3843 3844 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3845 self._match(TokenType.ALIAS) 3846 offset = self._parse_id_var( 3847 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3848 ) or exp.to_identifier("offset") 3849 3850 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3851 3852 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3853 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3854 if not is_derived and not ( 3855 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3856 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3857 ): 3858 return None 3859 3860 expressions = self._parse_csv(self._parse_value) 3861 alias = self._parse_table_alias() 3862 3863 if is_derived: 3864 self._match_r_paren() 3865 3866 return self.expression( 3867 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3868 ) 3869 3870 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3871 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3872 as_modifier and self._match_text_seq("USING", "SAMPLE") 3873 ): 3874 return None 3875 3876 bucket_numerator = None 3877 bucket_denominator = None 3878 bucket_field = None 3879 percent = None 3880 size = None 3881 seed = None 3882 3883 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3884 matched_l_paren = self._match(TokenType.L_PAREN) 3885 3886 if self.TABLESAMPLE_CSV: 3887 num = None 3888 expressions = self._parse_csv(self._parse_primary) 3889 else: 3890 expressions = None 3891 num = ( 3892 self._parse_factor() 3893 if self._match(TokenType.NUMBER, advance=False) 3894 else self._parse_primary() or self._parse_placeholder() 3895 ) 3896 3897 if self._match_text_seq("BUCKET"): 3898 bucket_numerator = self._parse_number() 3899 self._match_text_seq("OUT", "OF") 3900 bucket_denominator = bucket_denominator = self._parse_number() 3901 self._match(TokenType.ON) 3902 bucket_field = self._parse_field() 3903 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3904 percent = num 3905 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3906 size = num 3907 else: 3908 percent = num 3909 3910 if matched_l_paren: 3911 self._match_r_paren() 3912 3913 if self._match(TokenType.L_PAREN): 3914 method = self._parse_var(upper=True) 3915 seed = self._match(TokenType.COMMA) and self._parse_number() 3916 self._match_r_paren() 3917 elif self._match_texts(("SEED", "REPEATABLE")): 3918 seed = self._parse_wrapped(self._parse_number) 3919 3920 if not method and self.DEFAULT_SAMPLING_METHOD: 3921 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3922 3923 return self.expression( 3924 exp.TableSample, 3925 expressions=expressions, 3926 method=method, 3927 bucket_numerator=bucket_numerator, 3928 bucket_denominator=bucket_denominator, 3929 bucket_field=bucket_field, 3930 percent=percent, 3931 size=size, 3932 seed=seed, 3933 ) 3934 3935 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3936 return list(iter(self._parse_pivot, None)) or None 3937 3938 def _parse_joins(self) -> t.Iterator[exp.Join]: 3939 return iter(self._parse_join, None) 3940 3941 # https://duckdb.org/docs/sql/statements/pivot 3942 def _parse_simplified_pivot(self) -> exp.Pivot: 3943 def _parse_on() -> t.Optional[exp.Expression]: 3944 this = self._parse_bitwise() 3945 return self._parse_in(this) if self._match(TokenType.IN) else this 3946 3947 this = self._parse_table() 3948 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3949 using = self._match(TokenType.USING) and self._parse_csv( 3950 lambda: self._parse_alias(self._parse_function()) 3951 ) 3952 group = self._parse_group() 3953 return self.expression( 3954 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3955 ) 3956 3957 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3958 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3959 this = self._parse_select_or_expression() 3960 3961 self._match(TokenType.ALIAS) 3962 alias = self._parse_bitwise() 3963 if alias: 3964 if isinstance(alias, exp.Column) and not alias.db: 3965 alias = alias.this 3966 return self.expression(exp.PivotAlias, this=this, alias=alias) 3967 3968 return this 3969 3970 value = self._parse_column() 3971 3972 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3973 self.raise_error("Expecting IN (") 3974 3975 if self._match(TokenType.ANY): 3976 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3977 else: 3978 exprs = self._parse_csv(_parse_aliased_expression) 3979 3980 self._match_r_paren() 3981 return self.expression(exp.In, this=value, expressions=exprs) 3982 3983 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3984 index = self._index 3985 include_nulls = None 3986 3987 if self._match(TokenType.PIVOT): 3988 unpivot = False 3989 elif self._match(TokenType.UNPIVOT): 3990 unpivot = True 3991 3992 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3993 if self._match_text_seq("INCLUDE", "NULLS"): 3994 include_nulls = True 3995 elif self._match_text_seq("EXCLUDE", "NULLS"): 3996 include_nulls = False 3997 else: 3998 return None 3999 4000 expressions = [] 4001 4002 if not self._match(TokenType.L_PAREN): 4003 self._retreat(index) 4004 return None 4005 4006 if unpivot: 4007 expressions = self._parse_csv(self._parse_column) 4008 else: 4009 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4010 4011 if not expressions: 4012 self.raise_error("Failed to parse PIVOT's aggregation list") 4013 4014 if not self._match(TokenType.FOR): 4015 self.raise_error("Expecting FOR") 4016 4017 field = self._parse_pivot_in() 4018 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4019 self._parse_bitwise 4020 ) 4021 4022 self._match_r_paren() 4023 4024 pivot = self.expression( 4025 exp.Pivot, 4026 expressions=expressions, 4027 field=field, 4028 unpivot=unpivot, 4029 include_nulls=include_nulls, 4030 default_on_null=default_on_null, 4031 ) 4032 4033 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4034 pivot.set("alias", self._parse_table_alias()) 4035 4036 if not unpivot: 4037 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4038 4039 columns: t.List[exp.Expression] = [] 4040 for fld in pivot.args["field"].expressions: 4041 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4042 for name in names: 4043 if self.PREFIXED_PIVOT_COLUMNS: 4044 name = f"{name}_{field_name}" if name else field_name 4045 else: 4046 name = f"{field_name}_{name}" if name else field_name 4047 4048 columns.append(exp.to_identifier(name)) 4049 4050 pivot.set("columns", columns) 4051 4052 return pivot 4053 4054 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4055 return [agg.alias for agg in aggregations] 4056 4057 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4058 if not skip_where_token and not self._match(TokenType.PREWHERE): 4059 return None 4060 4061 return self.expression( 4062 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4063 ) 4064 4065 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4066 if not skip_where_token and not self._match(TokenType.WHERE): 4067 return None 4068 4069 return self.expression( 4070 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4071 ) 4072 4073 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4074 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4075 return None 4076 4077 elements: t.Dict[str, t.Any] = defaultdict(list) 4078 4079 if self._match(TokenType.ALL): 4080 elements["all"] = True 4081 elif self._match(TokenType.DISTINCT): 4082 elements["all"] = False 4083 4084 while True: 4085 index = self._index 4086 4087 elements["expressions"].extend( 4088 self._parse_csv( 4089 lambda: None 4090 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4091 else self._parse_assignment() 4092 ) 4093 ) 4094 4095 before_with_index = self._index 4096 with_prefix = self._match(TokenType.WITH) 4097 4098 if self._match(TokenType.ROLLUP): 4099 elements["rollup"].append( 4100 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4101 ) 4102 elif self._match(TokenType.CUBE): 4103 elements["cube"].append( 4104 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4105 ) 4106 elif self._match(TokenType.GROUPING_SETS): 4107 elements["grouping_sets"].append( 4108 self.expression( 4109 exp.GroupingSets, 4110 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4111 ) 4112 ) 4113 elif self._match_text_seq("TOTALS"): 4114 elements["totals"] = True # type: ignore 4115 4116 if before_with_index <= self._index <= before_with_index + 1: 4117 self._retreat(before_with_index) 4118 break 4119 4120 if index == self._index: 4121 break 4122 4123 return self.expression(exp.Group, **elements) # type: ignore 4124 4125 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4126 return self.expression( 4127 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4128 ) 4129 4130 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4131 if self._match(TokenType.L_PAREN): 4132 grouping_set = self._parse_csv(self._parse_column) 4133 self._match_r_paren() 4134 return self.expression(exp.Tuple, expressions=grouping_set) 4135 4136 return self._parse_column() 4137 4138 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4139 if not skip_having_token and not self._match(TokenType.HAVING): 4140 return None 4141 return self.expression(exp.Having, this=self._parse_assignment()) 4142 4143 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4144 if not self._match(TokenType.QUALIFY): 4145 return None 4146 return self.expression(exp.Qualify, this=self._parse_assignment()) 4147 4148 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4149 if skip_start_token: 4150 start = None 4151 elif self._match(TokenType.START_WITH): 4152 start = self._parse_assignment() 4153 else: 4154 return None 4155 4156 self._match(TokenType.CONNECT_BY) 4157 nocycle = self._match_text_seq("NOCYCLE") 4158 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4159 exp.Prior, this=self._parse_bitwise() 4160 ) 4161 connect = self._parse_assignment() 4162 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4163 4164 if not start and self._match(TokenType.START_WITH): 4165 start = self._parse_assignment() 4166 4167 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4168 4169 def _parse_name_as_expression(self) -> exp.Alias: 4170 return self.expression( 4171 exp.Alias, 4172 alias=self._parse_id_var(any_token=True), 4173 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4174 ) 4175 4176 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4177 if self._match_text_seq("INTERPOLATE"): 4178 return self._parse_wrapped_csv(self._parse_name_as_expression) 4179 return None 4180 4181 def _parse_order( 4182 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4183 ) -> t.Optional[exp.Expression]: 4184 siblings = None 4185 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4186 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4187 return this 4188 4189 siblings = True 4190 4191 return self.expression( 4192 exp.Order, 4193 this=this, 4194 expressions=self._parse_csv(self._parse_ordered), 4195 siblings=siblings, 4196 ) 4197 4198 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4199 if not self._match(token): 4200 return None 4201 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4202 4203 def _parse_ordered( 4204 self, parse_method: t.Optional[t.Callable] = None 4205 ) -> t.Optional[exp.Ordered]: 4206 this = parse_method() if parse_method else self._parse_assignment() 4207 if not this: 4208 return None 4209 4210 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4211 this = exp.var("ALL") 4212 4213 asc = self._match(TokenType.ASC) 4214 desc = self._match(TokenType.DESC) or (asc and False) 4215 4216 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4217 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4218 4219 nulls_first = is_nulls_first or False 4220 explicitly_null_ordered = is_nulls_first or is_nulls_last 4221 4222 if ( 4223 not explicitly_null_ordered 4224 and ( 4225 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4226 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4227 ) 4228 and self.dialect.NULL_ORDERING != "nulls_are_last" 4229 ): 4230 nulls_first = True 4231 4232 if self._match_text_seq("WITH", "FILL"): 4233 with_fill = self.expression( 4234 exp.WithFill, 4235 **{ # type: ignore 4236 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4237 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4238 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4239 "interpolate": self._parse_interpolate(), 4240 }, 4241 ) 4242 else: 4243 with_fill = None 4244 4245 return self.expression( 4246 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4247 ) 4248 4249 def _parse_limit( 4250 self, 4251 this: t.Optional[exp.Expression] = None, 4252 top: bool = False, 4253 skip_limit_token: bool = False, 4254 ) -> t.Optional[exp.Expression]: 4255 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4256 comments = self._prev_comments 4257 if top: 4258 limit_paren = self._match(TokenType.L_PAREN) 4259 expression = self._parse_term() if limit_paren else self._parse_number() 4260 4261 if limit_paren: 4262 self._match_r_paren() 4263 else: 4264 expression = self._parse_term() 4265 4266 if self._match(TokenType.COMMA): 4267 offset = expression 4268 expression = self._parse_term() 4269 else: 4270 offset = None 4271 4272 limit_exp = self.expression( 4273 exp.Limit, 4274 this=this, 4275 expression=expression, 4276 offset=offset, 4277 comments=comments, 4278 expressions=self._parse_limit_by(), 4279 ) 4280 4281 return limit_exp 4282 4283 if self._match(TokenType.FETCH): 4284 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4285 direction = self._prev.text.upper() if direction else "FIRST" 4286 4287 count = self._parse_field(tokens=self.FETCH_TOKENS) 4288 percent = self._match(TokenType.PERCENT) 4289 4290 self._match_set((TokenType.ROW, TokenType.ROWS)) 4291 4292 only = self._match_text_seq("ONLY") 4293 with_ties = self._match_text_seq("WITH", "TIES") 4294 4295 if only and with_ties: 4296 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4297 4298 return self.expression( 4299 exp.Fetch, 4300 direction=direction, 4301 count=count, 4302 percent=percent, 4303 with_ties=with_ties, 4304 ) 4305 4306 return this 4307 4308 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4309 if not self._match(TokenType.OFFSET): 4310 return this 4311 4312 count = self._parse_term() 4313 self._match_set((TokenType.ROW, TokenType.ROWS)) 4314 4315 return self.expression( 4316 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4317 ) 4318 4319 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4320 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4321 4322 def _parse_locks(self) -> t.List[exp.Lock]: 4323 locks = [] 4324 while True: 4325 if self._match_text_seq("FOR", "UPDATE"): 4326 update = True 4327 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4328 "LOCK", "IN", "SHARE", "MODE" 4329 ): 4330 update = False 4331 else: 4332 break 4333 4334 expressions = None 4335 if self._match_text_seq("OF"): 4336 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4337 4338 wait: t.Optional[bool | exp.Expression] = None 4339 if self._match_text_seq("NOWAIT"): 4340 wait = True 4341 elif self._match_text_seq("WAIT"): 4342 wait = self._parse_primary() 4343 elif self._match_text_seq("SKIP", "LOCKED"): 4344 wait = False 4345 4346 locks.append( 4347 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4348 ) 4349 4350 return locks 4351 4352 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4353 while this and self._match_set(self.SET_OPERATIONS): 4354 token_type = self._prev.token_type 4355 4356 if token_type == TokenType.UNION: 4357 operation: t.Type[exp.SetOperation] = exp.Union 4358 elif token_type == TokenType.EXCEPT: 4359 operation = exp.Except 4360 else: 4361 operation = exp.Intersect 4362 4363 comments = self._prev.comments 4364 4365 if self._match(TokenType.DISTINCT): 4366 distinct: t.Optional[bool] = True 4367 elif self._match(TokenType.ALL): 4368 distinct = False 4369 else: 4370 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4371 if distinct is None: 4372 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4373 4374 by_name = self._match_text_seq("BY", "NAME") 4375 expression = self._parse_select(nested=True, parse_set_operation=False) 4376 4377 this = self.expression( 4378 operation, 4379 comments=comments, 4380 this=this, 4381 distinct=distinct, 4382 by_name=by_name, 4383 expression=expression, 4384 ) 4385 4386 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4387 expression = this.expression 4388 4389 if expression: 4390 for arg in self.SET_OP_MODIFIERS: 4391 expr = expression.args.get(arg) 4392 if expr: 4393 this.set(arg, expr.pop()) 4394 4395 return this 4396 4397 def _parse_expression(self) -> t.Optional[exp.Expression]: 4398 return self._parse_alias(self._parse_assignment()) 4399 4400 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4401 this = self._parse_disjunction() 4402 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4403 # This allows us to parse <non-identifier token> := <expr> 4404 this = exp.column( 4405 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4406 ) 4407 4408 while self._match_set(self.ASSIGNMENT): 4409 if isinstance(this, exp.Column) and len(this.parts) == 1: 4410 this = this.this 4411 4412 this = self.expression( 4413 self.ASSIGNMENT[self._prev.token_type], 4414 this=this, 4415 comments=self._prev_comments, 4416 expression=self._parse_assignment(), 4417 ) 4418 4419 return this 4420 4421 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4422 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4423 4424 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4425 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4426 4427 def _parse_equality(self) -> t.Optional[exp.Expression]: 4428 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4429 4430 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4431 return self._parse_tokens(self._parse_range, self.COMPARISON) 4432 4433 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4434 this = this or self._parse_bitwise() 4435 negate = self._match(TokenType.NOT) 4436 4437 if self._match_set(self.RANGE_PARSERS): 4438 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4439 if not expression: 4440 return this 4441 4442 this = expression 4443 elif self._match(TokenType.ISNULL): 4444 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4445 4446 # Postgres supports ISNULL and NOTNULL for conditions. 4447 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4448 if self._match(TokenType.NOTNULL): 4449 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4450 this = self.expression(exp.Not, this=this) 4451 4452 if negate: 4453 this = self._negate_range(this) 4454 4455 if self._match(TokenType.IS): 4456 this = self._parse_is(this) 4457 4458 return this 4459 4460 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4461 if not this: 4462 return this 4463 4464 return self.expression(exp.Not, this=this) 4465 4466 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4467 index = self._index - 1 4468 negate = self._match(TokenType.NOT) 4469 4470 if self._match_text_seq("DISTINCT", "FROM"): 4471 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4472 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4473 4474 if self._match(TokenType.JSON): 4475 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4476 4477 if self._match_text_seq("WITH"): 4478 _with = True 4479 elif self._match_text_seq("WITHOUT"): 4480 _with = False 4481 else: 4482 _with = None 4483 4484 unique = self._match(TokenType.UNIQUE) 4485 self._match_text_seq("KEYS") 4486 expression: t.Optional[exp.Expression] = self.expression( 4487 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4488 ) 4489 else: 4490 expression = self._parse_primary() or self._parse_null() 4491 if not expression: 4492 self._retreat(index) 4493 return None 4494 4495 this = self.expression(exp.Is, this=this, expression=expression) 4496 return self.expression(exp.Not, this=this) if negate else this 4497 4498 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4499 unnest = self._parse_unnest(with_alias=False) 4500 if unnest: 4501 this = self.expression(exp.In, this=this, unnest=unnest) 4502 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4503 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4504 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4505 4506 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4507 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4508 else: 4509 this = self.expression(exp.In, this=this, expressions=expressions) 4510 4511 if matched_l_paren: 4512 self._match_r_paren(this) 4513 elif not self._match(TokenType.R_BRACKET, expression=this): 4514 self.raise_error("Expecting ]") 4515 else: 4516 this = self.expression(exp.In, this=this, field=self._parse_column()) 4517 4518 return this 4519 4520 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4521 low = self._parse_bitwise() 4522 self._match(TokenType.AND) 4523 high = self._parse_bitwise() 4524 return self.expression(exp.Between, this=this, low=low, high=high) 4525 4526 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4527 if not self._match(TokenType.ESCAPE): 4528 return this 4529 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4530 4531 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4532 index = self._index 4533 4534 if not self._match(TokenType.INTERVAL) and match_interval: 4535 return None 4536 4537 if self._match(TokenType.STRING, advance=False): 4538 this = self._parse_primary() 4539 else: 4540 this = self._parse_term() 4541 4542 if not this or ( 4543 isinstance(this, exp.Column) 4544 and not this.table 4545 and not this.this.quoted 4546 and this.name.upper() == "IS" 4547 ): 4548 self._retreat(index) 4549 return None 4550 4551 unit = self._parse_function() or ( 4552 not self._match(TokenType.ALIAS, advance=False) 4553 and self._parse_var(any_token=True, upper=True) 4554 ) 4555 4556 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4557 # each INTERVAL expression into this canonical form so it's easy to transpile 4558 if this and this.is_number: 4559 this = exp.Literal.string(this.to_py()) 4560 elif this and this.is_string: 4561 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4562 if len(parts) == 1: 4563 if unit: 4564 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4565 self._retreat(self._index - 1) 4566 4567 this = exp.Literal.string(parts[0][0]) 4568 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4569 4570 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4571 unit = self.expression( 4572 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4573 ) 4574 4575 interval = self.expression(exp.Interval, this=this, unit=unit) 4576 4577 index = self._index 4578 self._match(TokenType.PLUS) 4579 4580 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4581 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4582 return self.expression( 4583 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4584 ) 4585 4586 self._retreat(index) 4587 return interval 4588 4589 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4590 this = self._parse_term() 4591 4592 while True: 4593 if self._match_set(self.BITWISE): 4594 this = self.expression( 4595 self.BITWISE[self._prev.token_type], 4596 this=this, 4597 expression=self._parse_term(), 4598 ) 4599 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4600 this = self.expression( 4601 exp.DPipe, 4602 this=this, 4603 expression=self._parse_term(), 4604 safe=not self.dialect.STRICT_STRING_CONCAT, 4605 ) 4606 elif self._match(TokenType.DQMARK): 4607 this = self.expression( 4608 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4609 ) 4610 elif self._match_pair(TokenType.LT, TokenType.LT): 4611 this = self.expression( 4612 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4613 ) 4614 elif self._match_pair(TokenType.GT, TokenType.GT): 4615 this = self.expression( 4616 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4617 ) 4618 else: 4619 break 4620 4621 return this 4622 4623 def _parse_term(self) -> t.Optional[exp.Expression]: 4624 this = self._parse_factor() 4625 4626 while self._match_set(self.TERM): 4627 klass = self.TERM[self._prev.token_type] 4628 comments = self._prev_comments 4629 expression = self._parse_factor() 4630 4631 this = self.expression(klass, this=this, comments=comments, expression=expression) 4632 4633 if isinstance(this, exp.Collate): 4634 expr = this.expression 4635 4636 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4637 # fallback to Identifier / Var 4638 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4639 ident = expr.this 4640 if isinstance(ident, exp.Identifier): 4641 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4642 4643 return this 4644 4645 def _parse_factor(self) -> t.Optional[exp.Expression]: 4646 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4647 this = parse_method() 4648 4649 while self._match_set(self.FACTOR): 4650 klass = self.FACTOR[self._prev.token_type] 4651 comments = self._prev_comments 4652 expression = parse_method() 4653 4654 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4655 self._retreat(self._index - 1) 4656 return this 4657 4658 this = self.expression(klass, this=this, comments=comments, expression=expression) 4659 4660 if isinstance(this, exp.Div): 4661 this.args["typed"] = self.dialect.TYPED_DIVISION 4662 this.args["safe"] = self.dialect.SAFE_DIVISION 4663 4664 return this 4665 4666 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4667 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4668 4669 def _parse_unary(self) -> t.Optional[exp.Expression]: 4670 if self._match_set(self.UNARY_PARSERS): 4671 return self.UNARY_PARSERS[self._prev.token_type](self) 4672 return self._parse_at_time_zone(self._parse_type()) 4673 4674 def _parse_type( 4675 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4676 ) -> t.Optional[exp.Expression]: 4677 interval = parse_interval and self._parse_interval() 4678 if interval: 4679 return interval 4680 4681 index = self._index 4682 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4683 4684 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4685 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4686 if isinstance(data_type, exp.Cast): 4687 # This constructor can contain ops directly after it, for instance struct unnesting: 4688 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4689 return self._parse_column_ops(data_type) 4690 4691 if data_type: 4692 index2 = self._index 4693 this = self._parse_primary() 4694 4695 if isinstance(this, exp.Literal): 4696 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4697 if parser: 4698 return parser(self, this, data_type) 4699 4700 return self.expression(exp.Cast, this=this, to=data_type) 4701 4702 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4703 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4704 # 4705 # If the index difference here is greater than 1, that means the parser itself must have 4706 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4707 # 4708 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4709 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4710 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4711 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4712 # 4713 # In these cases, we don't really want to return the converted type, but instead retreat 4714 # and try to parse a Column or Identifier in the section below. 4715 if data_type.expressions and index2 - index > 1: 4716 self._retreat(index2) 4717 return self._parse_column_ops(data_type) 4718 4719 self._retreat(index) 4720 4721 if fallback_to_identifier: 4722 return self._parse_id_var() 4723 4724 this = self._parse_column() 4725 return this and self._parse_column_ops(this) 4726 4727 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4728 this = self._parse_type() 4729 if not this: 4730 return None 4731 4732 if isinstance(this, exp.Column) and not this.table: 4733 this = exp.var(this.name.upper()) 4734 4735 return self.expression( 4736 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4737 ) 4738 4739 def _parse_types( 4740 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4741 ) -> t.Optional[exp.Expression]: 4742 index = self._index 4743 4744 this: t.Optional[exp.Expression] = None 4745 prefix = self._match_text_seq("SYSUDTLIB", ".") 4746 4747 if not self._match_set(self.TYPE_TOKENS): 4748 identifier = allow_identifiers and self._parse_id_var( 4749 any_token=False, tokens=(TokenType.VAR,) 4750 ) 4751 if isinstance(identifier, exp.Identifier): 4752 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4753 4754 if len(tokens) != 1: 4755 self.raise_error("Unexpected identifier", self._prev) 4756 4757 if tokens[0].token_type in self.TYPE_TOKENS: 4758 self._prev = tokens[0] 4759 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4760 type_name = identifier.name 4761 4762 while self._match(TokenType.DOT): 4763 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4764 4765 this = exp.DataType.build(type_name, udt=True) 4766 else: 4767 self._retreat(self._index - 1) 4768 return None 4769 else: 4770 return None 4771 4772 type_token = self._prev.token_type 4773 4774 if type_token == TokenType.PSEUDO_TYPE: 4775 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4776 4777 if type_token == TokenType.OBJECT_IDENTIFIER: 4778 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4779 4780 # https://materialize.com/docs/sql/types/map/ 4781 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4782 key_type = self._parse_types( 4783 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4784 ) 4785 if not self._match(TokenType.FARROW): 4786 self._retreat(index) 4787 return None 4788 4789 value_type = self._parse_types( 4790 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4791 ) 4792 if not self._match(TokenType.R_BRACKET): 4793 self._retreat(index) 4794 return None 4795 4796 return exp.DataType( 4797 this=exp.DataType.Type.MAP, 4798 expressions=[key_type, value_type], 4799 nested=True, 4800 prefix=prefix, 4801 ) 4802 4803 nested = type_token in self.NESTED_TYPE_TOKENS 4804 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4805 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4806 expressions = None 4807 maybe_func = False 4808 4809 if self._match(TokenType.L_PAREN): 4810 if is_struct: 4811 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4812 elif nested: 4813 expressions = self._parse_csv( 4814 lambda: self._parse_types( 4815 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4816 ) 4817 ) 4818 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4819 this = expressions[0] 4820 this.set("nullable", True) 4821 self._match_r_paren() 4822 return this 4823 elif type_token in self.ENUM_TYPE_TOKENS: 4824 expressions = self._parse_csv(self._parse_equality) 4825 elif is_aggregate: 4826 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4827 any_token=False, tokens=(TokenType.VAR,) 4828 ) 4829 if not func_or_ident or not self._match(TokenType.COMMA): 4830 return None 4831 expressions = self._parse_csv( 4832 lambda: self._parse_types( 4833 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4834 ) 4835 ) 4836 expressions.insert(0, func_or_ident) 4837 else: 4838 expressions = self._parse_csv(self._parse_type_size) 4839 4840 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4841 if type_token == TokenType.VECTOR and len(expressions) == 2: 4842 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4843 4844 if not expressions or not self._match(TokenType.R_PAREN): 4845 self._retreat(index) 4846 return None 4847 4848 maybe_func = True 4849 4850 values: t.Optional[t.List[exp.Expression]] = None 4851 4852 if nested and self._match(TokenType.LT): 4853 if is_struct: 4854 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4855 else: 4856 expressions = self._parse_csv( 4857 lambda: self._parse_types( 4858 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4859 ) 4860 ) 4861 4862 if not self._match(TokenType.GT): 4863 self.raise_error("Expecting >") 4864 4865 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4866 values = self._parse_csv(self._parse_assignment) 4867 if not values and is_struct: 4868 values = None 4869 self._retreat(self._index - 1) 4870 else: 4871 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4872 4873 if type_token in self.TIMESTAMPS: 4874 if self._match_text_seq("WITH", "TIME", "ZONE"): 4875 maybe_func = False 4876 tz_type = ( 4877 exp.DataType.Type.TIMETZ 4878 if type_token in self.TIMES 4879 else exp.DataType.Type.TIMESTAMPTZ 4880 ) 4881 this = exp.DataType(this=tz_type, expressions=expressions) 4882 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4883 maybe_func = False 4884 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4885 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4886 maybe_func = False 4887 elif type_token == TokenType.INTERVAL: 4888 unit = self._parse_var(upper=True) 4889 if unit: 4890 if self._match_text_seq("TO"): 4891 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4892 4893 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4894 else: 4895 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4896 4897 if maybe_func and check_func: 4898 index2 = self._index 4899 peek = self._parse_string() 4900 4901 if not peek: 4902 self._retreat(index) 4903 return None 4904 4905 self._retreat(index2) 4906 4907 if not this: 4908 if self._match_text_seq("UNSIGNED"): 4909 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4910 if not unsigned_type_token: 4911 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4912 4913 type_token = unsigned_type_token or type_token 4914 4915 this = exp.DataType( 4916 this=exp.DataType.Type[type_token.value], 4917 expressions=expressions, 4918 nested=nested, 4919 prefix=prefix, 4920 ) 4921 4922 # Empty arrays/structs are allowed 4923 if values is not None: 4924 cls = exp.Struct if is_struct else exp.Array 4925 this = exp.cast(cls(expressions=values), this, copy=False) 4926 4927 elif expressions: 4928 this.set("expressions", expressions) 4929 4930 # https://materialize.com/docs/sql/types/list/#type-name 4931 while self._match(TokenType.LIST): 4932 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4933 4934 index = self._index 4935 4936 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4937 matched_array = self._match(TokenType.ARRAY) 4938 4939 while self._curr: 4940 datatype_token = self._prev.token_type 4941 matched_l_bracket = self._match(TokenType.L_BRACKET) 4942 if not matched_l_bracket and not matched_array: 4943 break 4944 4945 matched_array = False 4946 values = self._parse_csv(self._parse_assignment) or None 4947 if ( 4948 values 4949 and not schema 4950 and ( 4951 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4952 ) 4953 ): 4954 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4955 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4956 self._retreat(index) 4957 break 4958 4959 this = exp.DataType( 4960 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4961 ) 4962 self._match(TokenType.R_BRACKET) 4963 4964 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4965 converter = self.TYPE_CONVERTERS.get(this.this) 4966 if converter: 4967 this = converter(t.cast(exp.DataType, this)) 4968 4969 return this 4970 4971 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4972 index = self._index 4973 4974 if ( 4975 self._curr 4976 and self._next 4977 and self._curr.token_type in self.TYPE_TOKENS 4978 and self._next.token_type in self.TYPE_TOKENS 4979 ): 4980 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4981 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4982 this = self._parse_id_var() 4983 else: 4984 this = ( 4985 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4986 or self._parse_id_var() 4987 ) 4988 4989 self._match(TokenType.COLON) 4990 4991 if ( 4992 type_required 4993 and not isinstance(this, exp.DataType) 4994 and not self._match_set(self.TYPE_TOKENS, advance=False) 4995 ): 4996 self._retreat(index) 4997 return self._parse_types() 4998 4999 return self._parse_column_def(this) 5000 5001 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5002 if not self._match_text_seq("AT", "TIME", "ZONE"): 5003 return this 5004 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5005 5006 def _parse_column(self) -> t.Optional[exp.Expression]: 5007 this = self._parse_column_reference() 5008 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5009 5010 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5011 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5012 5013 return column 5014 5015 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5016 this = self._parse_field() 5017 if ( 5018 not this 5019 and self._match(TokenType.VALUES, advance=False) 5020 and self.VALUES_FOLLOWED_BY_PAREN 5021 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5022 ): 5023 this = self._parse_id_var() 5024 5025 if isinstance(this, exp.Identifier): 5026 # We bubble up comments from the Identifier to the Column 5027 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5028 5029 return this 5030 5031 def _parse_colon_as_variant_extract( 5032 self, this: t.Optional[exp.Expression] 5033 ) -> t.Optional[exp.Expression]: 5034 casts = [] 5035 json_path = [] 5036 escape = None 5037 5038 while self._match(TokenType.COLON): 5039 start_index = self._index 5040 5041 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5042 path = self._parse_column_ops( 5043 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5044 ) 5045 5046 # The cast :: operator has a lower precedence than the extraction operator :, so 5047 # we rearrange the AST appropriately to avoid casting the JSON path 5048 while isinstance(path, exp.Cast): 5049 casts.append(path.to) 5050 path = path.this 5051 5052 if casts: 5053 dcolon_offset = next( 5054 i 5055 for i, t in enumerate(self._tokens[start_index:]) 5056 if t.token_type == TokenType.DCOLON 5057 ) 5058 end_token = self._tokens[start_index + dcolon_offset - 1] 5059 else: 5060 end_token = self._prev 5061 5062 if path: 5063 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5064 # it'll roundtrip to a string literal in GET_PATH 5065 if isinstance(path, exp.Identifier) and path.quoted: 5066 escape = True 5067 5068 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5069 5070 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5071 # Databricks transforms it back to the colon/dot notation 5072 if json_path: 5073 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5074 5075 if json_path_expr: 5076 json_path_expr.set("escape", escape) 5077 5078 this = self.expression( 5079 exp.JSONExtract, 5080 this=this, 5081 expression=json_path_expr, 5082 variant_extract=True, 5083 ) 5084 5085 while casts: 5086 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5087 5088 return this 5089 5090 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5091 return self._parse_types() 5092 5093 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5094 this = self._parse_bracket(this) 5095 5096 while self._match_set(self.COLUMN_OPERATORS): 5097 op_token = self._prev.token_type 5098 op = self.COLUMN_OPERATORS.get(op_token) 5099 5100 if op_token == TokenType.DCOLON: 5101 field = self._parse_dcolon() 5102 if not field: 5103 self.raise_error("Expected type") 5104 elif op and self._curr: 5105 field = self._parse_column_reference() or self._parse_bracket() 5106 else: 5107 field = self._parse_field(any_token=True, anonymous_func=True) 5108 5109 if isinstance(field, exp.Func) and this: 5110 # bigquery allows function calls like x.y.count(...) 5111 # SAFE.SUBSTR(...) 5112 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5113 this = exp.replace_tree( 5114 this, 5115 lambda n: ( 5116 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5117 if n.table 5118 else n.this 5119 ) 5120 if isinstance(n, exp.Column) 5121 else n, 5122 ) 5123 5124 if op: 5125 this = op(self, this, field) 5126 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5127 this = self.expression( 5128 exp.Column, 5129 comments=this.comments, 5130 this=field, 5131 table=this.this, 5132 db=this.args.get("table"), 5133 catalog=this.args.get("db"), 5134 ) 5135 else: 5136 this = self.expression(exp.Dot, this=this, expression=field) 5137 5138 this = self._parse_bracket(this) 5139 5140 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5141 5142 def _parse_primary(self) -> t.Optional[exp.Expression]: 5143 if self._match_set(self.PRIMARY_PARSERS): 5144 token_type = self._prev.token_type 5145 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5146 5147 if token_type == TokenType.STRING: 5148 expressions = [primary] 5149 while self._match(TokenType.STRING): 5150 expressions.append(exp.Literal.string(self._prev.text)) 5151 5152 if len(expressions) > 1: 5153 return self.expression(exp.Concat, expressions=expressions) 5154 5155 return primary 5156 5157 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5158 return exp.Literal.number(f"0.{self._prev.text}") 5159 5160 if self._match(TokenType.L_PAREN): 5161 comments = self._prev_comments 5162 query = self._parse_select() 5163 5164 if query: 5165 expressions = [query] 5166 else: 5167 expressions = self._parse_expressions() 5168 5169 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5170 5171 if not this and self._match(TokenType.R_PAREN, advance=False): 5172 this = self.expression(exp.Tuple) 5173 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5174 this = self._parse_subquery(this=this, parse_alias=False) 5175 elif isinstance(this, exp.Subquery): 5176 this = self._parse_subquery( 5177 this=self._parse_set_operations(this), parse_alias=False 5178 ) 5179 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5180 this = self.expression(exp.Tuple, expressions=expressions) 5181 else: 5182 this = self.expression(exp.Paren, this=this) 5183 5184 if this: 5185 this.add_comments(comments) 5186 5187 self._match_r_paren(expression=this) 5188 return this 5189 5190 return None 5191 5192 def _parse_field( 5193 self, 5194 any_token: bool = False, 5195 tokens: t.Optional[t.Collection[TokenType]] = None, 5196 anonymous_func: bool = False, 5197 ) -> t.Optional[exp.Expression]: 5198 if anonymous_func: 5199 field = ( 5200 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5201 or self._parse_primary() 5202 ) 5203 else: 5204 field = self._parse_primary() or self._parse_function( 5205 anonymous=anonymous_func, any_token=any_token 5206 ) 5207 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5208 5209 def _parse_function( 5210 self, 5211 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5212 anonymous: bool = False, 5213 optional_parens: bool = True, 5214 any_token: bool = False, 5215 ) -> t.Optional[exp.Expression]: 5216 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5217 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5218 fn_syntax = False 5219 if ( 5220 self._match(TokenType.L_BRACE, advance=False) 5221 and self._next 5222 and self._next.text.upper() == "FN" 5223 ): 5224 self._advance(2) 5225 fn_syntax = True 5226 5227 func = self._parse_function_call( 5228 functions=functions, 5229 anonymous=anonymous, 5230 optional_parens=optional_parens, 5231 any_token=any_token, 5232 ) 5233 5234 if fn_syntax: 5235 self._match(TokenType.R_BRACE) 5236 5237 return func 5238 5239 def _parse_function_call( 5240 self, 5241 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5242 anonymous: bool = False, 5243 optional_parens: bool = True, 5244 any_token: bool = False, 5245 ) -> t.Optional[exp.Expression]: 5246 if not self._curr: 5247 return None 5248 5249 comments = self._curr.comments 5250 token_type = self._curr.token_type 5251 this = self._curr.text 5252 upper = this.upper() 5253 5254 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5255 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5256 self._advance() 5257 return self._parse_window(parser(self)) 5258 5259 if not self._next or self._next.token_type != TokenType.L_PAREN: 5260 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5261 self._advance() 5262 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5263 5264 return None 5265 5266 if any_token: 5267 if token_type in self.RESERVED_TOKENS: 5268 return None 5269 elif token_type not in self.FUNC_TOKENS: 5270 return None 5271 5272 self._advance(2) 5273 5274 parser = self.FUNCTION_PARSERS.get(upper) 5275 if parser and not anonymous: 5276 this = parser(self) 5277 else: 5278 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5279 5280 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5281 this = self.expression( 5282 subquery_predicate, comments=comments, this=self._parse_select() 5283 ) 5284 self._match_r_paren() 5285 return this 5286 5287 if functions is None: 5288 functions = self.FUNCTIONS 5289 5290 function = functions.get(upper) 5291 5292 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5293 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5294 5295 if alias: 5296 args = self._kv_to_prop_eq(args) 5297 5298 if function and not anonymous: 5299 if "dialect" in function.__code__.co_varnames: 5300 func = function(args, dialect=self.dialect) 5301 else: 5302 func = function(args) 5303 5304 func = self.validate_expression(func, args) 5305 if not self.dialect.NORMALIZE_FUNCTIONS: 5306 func.meta["name"] = this 5307 5308 this = func 5309 else: 5310 if token_type == TokenType.IDENTIFIER: 5311 this = exp.Identifier(this=this, quoted=True) 5312 this = self.expression(exp.Anonymous, this=this, expressions=args) 5313 5314 if isinstance(this, exp.Expression): 5315 this.add_comments(comments) 5316 5317 self._match_r_paren(this) 5318 return self._parse_window(this) 5319 5320 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5321 return expression 5322 5323 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5324 transformed = [] 5325 5326 for index, e in enumerate(expressions): 5327 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5328 if isinstance(e, exp.Alias): 5329 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5330 5331 if not isinstance(e, exp.PropertyEQ): 5332 e = self.expression( 5333 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5334 ) 5335 5336 if isinstance(e.this, exp.Column): 5337 e.this.replace(e.this.this) 5338 else: 5339 e = self._to_prop_eq(e, index) 5340 5341 transformed.append(e) 5342 5343 return transformed 5344 5345 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5346 return self._parse_statement() 5347 5348 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5349 return self._parse_column_def(self._parse_id_var()) 5350 5351 def _parse_user_defined_function( 5352 self, kind: t.Optional[TokenType] = None 5353 ) -> t.Optional[exp.Expression]: 5354 this = self._parse_id_var() 5355 5356 while self._match(TokenType.DOT): 5357 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5358 5359 if not self._match(TokenType.L_PAREN): 5360 return this 5361 5362 expressions = self._parse_csv(self._parse_function_parameter) 5363 self._match_r_paren() 5364 return self.expression( 5365 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5366 ) 5367 5368 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5369 literal = self._parse_primary() 5370 if literal: 5371 return self.expression(exp.Introducer, this=token.text, expression=literal) 5372 5373 return self.expression(exp.Identifier, this=token.text) 5374 5375 def _parse_session_parameter(self) -> exp.SessionParameter: 5376 kind = None 5377 this = self._parse_id_var() or self._parse_primary() 5378 5379 if this and self._match(TokenType.DOT): 5380 kind = this.name 5381 this = self._parse_var() or self._parse_primary() 5382 5383 return self.expression(exp.SessionParameter, this=this, kind=kind) 5384 5385 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5386 return self._parse_id_var() 5387 5388 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5389 index = self._index 5390 5391 if self._match(TokenType.L_PAREN): 5392 expressions = t.cast( 5393 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5394 ) 5395 5396 if not self._match(TokenType.R_PAREN): 5397 self._retreat(index) 5398 else: 5399 expressions = [self._parse_lambda_arg()] 5400 5401 if self._match_set(self.LAMBDAS): 5402 return self.LAMBDAS[self._prev.token_type](self, expressions) 5403 5404 self._retreat(index) 5405 5406 this: t.Optional[exp.Expression] 5407 5408 if self._match(TokenType.DISTINCT): 5409 this = self.expression( 5410 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5411 ) 5412 else: 5413 this = self._parse_select_or_expression(alias=alias) 5414 5415 return self._parse_limit( 5416 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5417 ) 5418 5419 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5420 index = self._index 5421 if not self._match(TokenType.L_PAREN): 5422 return this 5423 5424 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5425 # expr can be of both types 5426 if self._match_set(self.SELECT_START_TOKENS): 5427 self._retreat(index) 5428 return this 5429 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5430 self._match_r_paren() 5431 return self.expression(exp.Schema, this=this, expressions=args) 5432 5433 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5434 return self._parse_column_def(self._parse_field(any_token=True)) 5435 5436 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5437 # column defs are not really columns, they're identifiers 5438 if isinstance(this, exp.Column): 5439 this = this.this 5440 5441 kind = self._parse_types(schema=True) 5442 5443 if self._match_text_seq("FOR", "ORDINALITY"): 5444 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5445 5446 constraints: t.List[exp.Expression] = [] 5447 5448 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5449 ("ALIAS", "MATERIALIZED") 5450 ): 5451 persisted = self._prev.text.upper() == "MATERIALIZED" 5452 constraint_kind = exp.ComputedColumnConstraint( 5453 this=self._parse_assignment(), 5454 persisted=persisted or self._match_text_seq("PERSISTED"), 5455 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5456 ) 5457 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5458 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5459 self._match(TokenType.ALIAS) 5460 constraints.append( 5461 self.expression( 5462 exp.ColumnConstraint, 5463 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5464 ) 5465 ) 5466 5467 while True: 5468 constraint = self._parse_column_constraint() 5469 if not constraint: 5470 break 5471 constraints.append(constraint) 5472 5473 if not kind and not constraints: 5474 return this 5475 5476 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5477 5478 def _parse_auto_increment( 5479 self, 5480 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5481 start = None 5482 increment = None 5483 5484 if self._match(TokenType.L_PAREN, advance=False): 5485 args = self._parse_wrapped_csv(self._parse_bitwise) 5486 start = seq_get(args, 0) 5487 increment = seq_get(args, 1) 5488 elif self._match_text_seq("START"): 5489 start = self._parse_bitwise() 5490 self._match_text_seq("INCREMENT") 5491 increment = self._parse_bitwise() 5492 5493 if start and increment: 5494 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5495 5496 return exp.AutoIncrementColumnConstraint() 5497 5498 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5499 if not self._match_text_seq("REFRESH"): 5500 self._retreat(self._index - 1) 5501 return None 5502 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5503 5504 def _parse_compress(self) -> exp.CompressColumnConstraint: 5505 if self._match(TokenType.L_PAREN, advance=False): 5506 return self.expression( 5507 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5508 ) 5509 5510 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5511 5512 def _parse_generated_as_identity( 5513 self, 5514 ) -> ( 5515 exp.GeneratedAsIdentityColumnConstraint 5516 | exp.ComputedColumnConstraint 5517 | exp.GeneratedAsRowColumnConstraint 5518 ): 5519 if self._match_text_seq("BY", "DEFAULT"): 5520 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5521 this = self.expression( 5522 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5523 ) 5524 else: 5525 self._match_text_seq("ALWAYS") 5526 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5527 5528 self._match(TokenType.ALIAS) 5529 5530 if self._match_text_seq("ROW"): 5531 start = self._match_text_seq("START") 5532 if not start: 5533 self._match(TokenType.END) 5534 hidden = self._match_text_seq("HIDDEN") 5535 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5536 5537 identity = self._match_text_seq("IDENTITY") 5538 5539 if self._match(TokenType.L_PAREN): 5540 if self._match(TokenType.START_WITH): 5541 this.set("start", self._parse_bitwise()) 5542 if self._match_text_seq("INCREMENT", "BY"): 5543 this.set("increment", self._parse_bitwise()) 5544 if self._match_text_seq("MINVALUE"): 5545 this.set("minvalue", self._parse_bitwise()) 5546 if self._match_text_seq("MAXVALUE"): 5547 this.set("maxvalue", self._parse_bitwise()) 5548 5549 if self._match_text_seq("CYCLE"): 5550 this.set("cycle", True) 5551 elif self._match_text_seq("NO", "CYCLE"): 5552 this.set("cycle", False) 5553 5554 if not identity: 5555 this.set("expression", self._parse_range()) 5556 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5557 args = self._parse_csv(self._parse_bitwise) 5558 this.set("start", seq_get(args, 0)) 5559 this.set("increment", seq_get(args, 1)) 5560 5561 self._match_r_paren() 5562 5563 return this 5564 5565 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5566 self._match_text_seq("LENGTH") 5567 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5568 5569 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5570 if self._match_text_seq("NULL"): 5571 return self.expression(exp.NotNullColumnConstraint) 5572 if self._match_text_seq("CASESPECIFIC"): 5573 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5574 if self._match_text_seq("FOR", "REPLICATION"): 5575 return self.expression(exp.NotForReplicationColumnConstraint) 5576 5577 # Unconsume the `NOT` token 5578 self._retreat(self._index - 1) 5579 return None 5580 5581 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5582 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5583 5584 procedure_option_follows = ( 5585 self._match(TokenType.WITH, advance=False) 5586 and self._next 5587 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5588 ) 5589 5590 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5591 return self.expression( 5592 exp.ColumnConstraint, 5593 this=this, 5594 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5595 ) 5596 5597 return this 5598 5599 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5600 if not self._match(TokenType.CONSTRAINT): 5601 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5602 5603 return self.expression( 5604 exp.Constraint, 5605 this=self._parse_id_var(), 5606 expressions=self._parse_unnamed_constraints(), 5607 ) 5608 5609 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5610 constraints = [] 5611 while True: 5612 constraint = self._parse_unnamed_constraint() or self._parse_function() 5613 if not constraint: 5614 break 5615 constraints.append(constraint) 5616 5617 return constraints 5618 5619 def _parse_unnamed_constraint( 5620 self, constraints: t.Optional[t.Collection[str]] = None 5621 ) -> t.Optional[exp.Expression]: 5622 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5623 constraints or self.CONSTRAINT_PARSERS 5624 ): 5625 return None 5626 5627 constraint = self._prev.text.upper() 5628 if constraint not in self.CONSTRAINT_PARSERS: 5629 self.raise_error(f"No parser found for schema constraint {constraint}.") 5630 5631 return self.CONSTRAINT_PARSERS[constraint](self) 5632 5633 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5634 return self._parse_id_var(any_token=False) 5635 5636 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5637 self._match_text_seq("KEY") 5638 return self.expression( 5639 exp.UniqueColumnConstraint, 5640 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5641 this=self._parse_schema(self._parse_unique_key()), 5642 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5643 on_conflict=self._parse_on_conflict(), 5644 ) 5645 5646 def _parse_key_constraint_options(self) -> t.List[str]: 5647 options = [] 5648 while True: 5649 if not self._curr: 5650 break 5651 5652 if self._match(TokenType.ON): 5653 action = None 5654 on = self._advance_any() and self._prev.text 5655 5656 if self._match_text_seq("NO", "ACTION"): 5657 action = "NO ACTION" 5658 elif self._match_text_seq("CASCADE"): 5659 action = "CASCADE" 5660 elif self._match_text_seq("RESTRICT"): 5661 action = "RESTRICT" 5662 elif self._match_pair(TokenType.SET, TokenType.NULL): 5663 action = "SET NULL" 5664 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5665 action = "SET DEFAULT" 5666 else: 5667 self.raise_error("Invalid key constraint") 5668 5669 options.append(f"ON {on} {action}") 5670 else: 5671 var = self._parse_var_from_options( 5672 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5673 ) 5674 if not var: 5675 break 5676 options.append(var.name) 5677 5678 return options 5679 5680 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5681 if match and not self._match(TokenType.REFERENCES): 5682 return None 5683 5684 expressions = None 5685 this = self._parse_table(schema=True) 5686 options = self._parse_key_constraint_options() 5687 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5688 5689 def _parse_foreign_key(self) -> exp.ForeignKey: 5690 expressions = self._parse_wrapped_id_vars() 5691 reference = self._parse_references() 5692 options = {} 5693 5694 while self._match(TokenType.ON): 5695 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5696 self.raise_error("Expected DELETE or UPDATE") 5697 5698 kind = self._prev.text.lower() 5699 5700 if self._match_text_seq("NO", "ACTION"): 5701 action = "NO ACTION" 5702 elif self._match(TokenType.SET): 5703 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5704 action = "SET " + self._prev.text.upper() 5705 else: 5706 self._advance() 5707 action = self._prev.text.upper() 5708 5709 options[kind] = action 5710 5711 return self.expression( 5712 exp.ForeignKey, 5713 expressions=expressions, 5714 reference=reference, 5715 **options, # type: ignore 5716 ) 5717 5718 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5719 return self._parse_field() 5720 5721 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5722 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5723 self._retreat(self._index - 1) 5724 return None 5725 5726 id_vars = self._parse_wrapped_id_vars() 5727 return self.expression( 5728 exp.PeriodForSystemTimeConstraint, 5729 this=seq_get(id_vars, 0), 5730 expression=seq_get(id_vars, 1), 5731 ) 5732 5733 def _parse_primary_key( 5734 self, wrapped_optional: bool = False, in_props: bool = False 5735 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5736 desc = ( 5737 self._match_set((TokenType.ASC, TokenType.DESC)) 5738 and self._prev.token_type == TokenType.DESC 5739 ) 5740 5741 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5742 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5743 5744 expressions = self._parse_wrapped_csv( 5745 self._parse_primary_key_part, optional=wrapped_optional 5746 ) 5747 options = self._parse_key_constraint_options() 5748 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5749 5750 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5751 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5752 5753 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5754 """ 5755 Parses a datetime column in ODBC format. We parse the column into the corresponding 5756 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5757 same as we did for `DATE('yyyy-mm-dd')`. 5758 5759 Reference: 5760 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5761 """ 5762 self._match(TokenType.VAR) 5763 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5764 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5765 if not self._match(TokenType.R_BRACE): 5766 self.raise_error("Expected }") 5767 return expression 5768 5769 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5770 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5771 return this 5772 5773 bracket_kind = self._prev.token_type 5774 if ( 5775 bracket_kind == TokenType.L_BRACE 5776 and self._curr 5777 and self._curr.token_type == TokenType.VAR 5778 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5779 ): 5780 return self._parse_odbc_datetime_literal() 5781 5782 expressions = self._parse_csv( 5783 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5784 ) 5785 5786 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5787 self.raise_error("Expected ]") 5788 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5789 self.raise_error("Expected }") 5790 5791 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5792 if bracket_kind == TokenType.L_BRACE: 5793 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5794 elif not this: 5795 this = build_array_constructor( 5796 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5797 ) 5798 else: 5799 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5800 if constructor_type: 5801 return build_array_constructor( 5802 constructor_type, 5803 args=expressions, 5804 bracket_kind=bracket_kind, 5805 dialect=self.dialect, 5806 ) 5807 5808 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5809 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5810 5811 self._add_comments(this) 5812 return self._parse_bracket(this) 5813 5814 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5815 if self._match(TokenType.COLON): 5816 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5817 return this 5818 5819 def _parse_case(self) -> t.Optional[exp.Expression]: 5820 ifs = [] 5821 default = None 5822 5823 comments = self._prev_comments 5824 expression = self._parse_assignment() 5825 5826 while self._match(TokenType.WHEN): 5827 this = self._parse_assignment() 5828 self._match(TokenType.THEN) 5829 then = self._parse_assignment() 5830 ifs.append(self.expression(exp.If, this=this, true=then)) 5831 5832 if self._match(TokenType.ELSE): 5833 default = self._parse_assignment() 5834 5835 if not self._match(TokenType.END): 5836 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5837 default = exp.column("interval") 5838 else: 5839 self.raise_error("Expected END after CASE", self._prev) 5840 5841 return self.expression( 5842 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5843 ) 5844 5845 def _parse_if(self) -> t.Optional[exp.Expression]: 5846 if self._match(TokenType.L_PAREN): 5847 args = self._parse_csv(self._parse_assignment) 5848 this = self.validate_expression(exp.If.from_arg_list(args), args) 5849 self._match_r_paren() 5850 else: 5851 index = self._index - 1 5852 5853 if self.NO_PAREN_IF_COMMANDS and index == 0: 5854 return self._parse_as_command(self._prev) 5855 5856 condition = self._parse_assignment() 5857 5858 if not condition: 5859 self._retreat(index) 5860 return None 5861 5862 self._match(TokenType.THEN) 5863 true = self._parse_assignment() 5864 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5865 self._match(TokenType.END) 5866 this = self.expression(exp.If, this=condition, true=true, false=false) 5867 5868 return this 5869 5870 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5871 if not self._match_text_seq("VALUE", "FOR"): 5872 self._retreat(self._index - 1) 5873 return None 5874 5875 return self.expression( 5876 exp.NextValueFor, 5877 this=self._parse_column(), 5878 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5879 ) 5880 5881 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5882 this = self._parse_function() or self._parse_var_or_string(upper=True) 5883 5884 if self._match(TokenType.FROM): 5885 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5886 5887 if not self._match(TokenType.COMMA): 5888 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5889 5890 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5891 5892 def _parse_gap_fill(self) -> exp.GapFill: 5893 self._match(TokenType.TABLE) 5894 this = self._parse_table() 5895 5896 self._match(TokenType.COMMA) 5897 args = [this, *self._parse_csv(self._parse_lambda)] 5898 5899 gap_fill = exp.GapFill.from_arg_list(args) 5900 return self.validate_expression(gap_fill, args) 5901 5902 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5903 this = self._parse_assignment() 5904 5905 if not self._match(TokenType.ALIAS): 5906 if self._match(TokenType.COMMA): 5907 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5908 5909 self.raise_error("Expected AS after CAST") 5910 5911 fmt = None 5912 to = self._parse_types() 5913 5914 if self._match(TokenType.FORMAT): 5915 fmt_string = self._parse_string() 5916 fmt = self._parse_at_time_zone(fmt_string) 5917 5918 if not to: 5919 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5920 if to.this in exp.DataType.TEMPORAL_TYPES: 5921 this = self.expression( 5922 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5923 this=this, 5924 format=exp.Literal.string( 5925 format_time( 5926 fmt_string.this if fmt_string else "", 5927 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5928 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5929 ) 5930 ), 5931 safe=safe, 5932 ) 5933 5934 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5935 this.set("zone", fmt.args["zone"]) 5936 return this 5937 elif not to: 5938 self.raise_error("Expected TYPE after CAST") 5939 elif isinstance(to, exp.Identifier): 5940 to = exp.DataType.build(to.name, udt=True) 5941 elif to.this == exp.DataType.Type.CHAR: 5942 if self._match(TokenType.CHARACTER_SET): 5943 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5944 5945 return self.expression( 5946 exp.Cast if strict else exp.TryCast, 5947 this=this, 5948 to=to, 5949 format=fmt, 5950 safe=safe, 5951 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5952 ) 5953 5954 def _parse_string_agg(self) -> exp.Expression: 5955 if self._match(TokenType.DISTINCT): 5956 args: t.List[t.Optional[exp.Expression]] = [ 5957 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5958 ] 5959 if self._match(TokenType.COMMA): 5960 args.extend(self._parse_csv(self._parse_assignment)) 5961 else: 5962 args = self._parse_csv(self._parse_assignment) # type: ignore 5963 5964 index = self._index 5965 if not self._match(TokenType.R_PAREN) and args: 5966 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5967 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5968 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5969 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5970 5971 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5972 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5973 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5974 if not self._match_text_seq("WITHIN", "GROUP"): 5975 self._retreat(index) 5976 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5977 5978 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5979 order = self._parse_order(this=seq_get(args, 0)) 5980 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5981 5982 def _parse_convert( 5983 self, strict: bool, safe: t.Optional[bool] = None 5984 ) -> t.Optional[exp.Expression]: 5985 this = self._parse_bitwise() 5986 5987 if self._match(TokenType.USING): 5988 to: t.Optional[exp.Expression] = self.expression( 5989 exp.CharacterSet, this=self._parse_var() 5990 ) 5991 elif self._match(TokenType.COMMA): 5992 to = self._parse_types() 5993 else: 5994 to = None 5995 5996 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5997 5998 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5999 """ 6000 There are generally two variants of the DECODE function: 6001 6002 - DECODE(bin, charset) 6003 - DECODE(expression, search, result [, search, result] ... [, default]) 6004 6005 The second variant will always be parsed into a CASE expression. Note that NULL 6006 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6007 instead of relying on pattern matching. 6008 """ 6009 args = self._parse_csv(self._parse_assignment) 6010 6011 if len(args) < 3: 6012 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6013 6014 expression, *expressions = args 6015 if not expression: 6016 return None 6017 6018 ifs = [] 6019 for search, result in zip(expressions[::2], expressions[1::2]): 6020 if not search or not result: 6021 return None 6022 6023 if isinstance(search, exp.Literal): 6024 ifs.append( 6025 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6026 ) 6027 elif isinstance(search, exp.Null): 6028 ifs.append( 6029 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6030 ) 6031 else: 6032 cond = exp.or_( 6033 exp.EQ(this=expression.copy(), expression=search), 6034 exp.and_( 6035 exp.Is(this=expression.copy(), expression=exp.Null()), 6036 exp.Is(this=search.copy(), expression=exp.Null()), 6037 copy=False, 6038 ), 6039 copy=False, 6040 ) 6041 ifs.append(exp.If(this=cond, true=result)) 6042 6043 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6044 6045 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6046 self._match_text_seq("KEY") 6047 key = self._parse_column() 6048 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6049 self._match_text_seq("VALUE") 6050 value = self._parse_bitwise() 6051 6052 if not key and not value: 6053 return None 6054 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6055 6056 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6057 if not this or not self._match_text_seq("FORMAT", "JSON"): 6058 return this 6059 6060 return self.expression(exp.FormatJson, this=this) 6061 6062 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6063 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6064 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6065 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6066 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6067 else: 6068 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6069 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6070 6071 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6072 6073 if not empty and not error and not null: 6074 return None 6075 6076 return self.expression( 6077 exp.OnCondition, 6078 empty=empty, 6079 error=error, 6080 null=null, 6081 ) 6082 6083 def _parse_on_handling( 6084 self, on: str, *values: str 6085 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6086 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6087 for value in values: 6088 if self._match_text_seq(value, "ON", on): 6089 return f"{value} ON {on}" 6090 6091 index = self._index 6092 if self._match(TokenType.DEFAULT): 6093 default_value = self._parse_bitwise() 6094 if self._match_text_seq("ON", on): 6095 return default_value 6096 6097 self._retreat(index) 6098 6099 return None 6100 6101 @t.overload 6102 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6103 6104 @t.overload 6105 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6106 6107 def _parse_json_object(self, agg=False): 6108 star = self._parse_star() 6109 expressions = ( 6110 [star] 6111 if star 6112 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6113 ) 6114 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6115 6116 unique_keys = None 6117 if self._match_text_seq("WITH", "UNIQUE"): 6118 unique_keys = True 6119 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6120 unique_keys = False 6121 6122 self._match_text_seq("KEYS") 6123 6124 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6125 self._parse_type() 6126 ) 6127 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6128 6129 return self.expression( 6130 exp.JSONObjectAgg if agg else exp.JSONObject, 6131 expressions=expressions, 6132 null_handling=null_handling, 6133 unique_keys=unique_keys, 6134 return_type=return_type, 6135 encoding=encoding, 6136 ) 6137 6138 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6139 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6140 if not self._match_text_seq("NESTED"): 6141 this = self._parse_id_var() 6142 kind = self._parse_types(allow_identifiers=False) 6143 nested = None 6144 else: 6145 this = None 6146 kind = None 6147 nested = True 6148 6149 path = self._match_text_seq("PATH") and self._parse_string() 6150 nested_schema = nested and self._parse_json_schema() 6151 6152 return self.expression( 6153 exp.JSONColumnDef, 6154 this=this, 6155 kind=kind, 6156 path=path, 6157 nested_schema=nested_schema, 6158 ) 6159 6160 def _parse_json_schema(self) -> exp.JSONSchema: 6161 self._match_text_seq("COLUMNS") 6162 return self.expression( 6163 exp.JSONSchema, 6164 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6165 ) 6166 6167 def _parse_json_table(self) -> exp.JSONTable: 6168 this = self._parse_format_json(self._parse_bitwise()) 6169 path = self._match(TokenType.COMMA) and self._parse_string() 6170 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6171 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6172 schema = self._parse_json_schema() 6173 6174 return exp.JSONTable( 6175 this=this, 6176 schema=schema, 6177 path=path, 6178 error_handling=error_handling, 6179 empty_handling=empty_handling, 6180 ) 6181 6182 def _parse_match_against(self) -> exp.MatchAgainst: 6183 expressions = self._parse_csv(self._parse_column) 6184 6185 self._match_text_seq(")", "AGAINST", "(") 6186 6187 this = self._parse_string() 6188 6189 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6190 modifier = "IN NATURAL LANGUAGE MODE" 6191 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6192 modifier = f"{modifier} WITH QUERY EXPANSION" 6193 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6194 modifier = "IN BOOLEAN MODE" 6195 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6196 modifier = "WITH QUERY EXPANSION" 6197 else: 6198 modifier = None 6199 6200 return self.expression( 6201 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6202 ) 6203 6204 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6205 def _parse_open_json(self) -> exp.OpenJSON: 6206 this = self._parse_bitwise() 6207 path = self._match(TokenType.COMMA) and self._parse_string() 6208 6209 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6210 this = self._parse_field(any_token=True) 6211 kind = self._parse_types() 6212 path = self._parse_string() 6213 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6214 6215 return self.expression( 6216 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6217 ) 6218 6219 expressions = None 6220 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6221 self._match_l_paren() 6222 expressions = self._parse_csv(_parse_open_json_column_def) 6223 6224 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6225 6226 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6227 args = self._parse_csv(self._parse_bitwise) 6228 6229 if self._match(TokenType.IN): 6230 return self.expression( 6231 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6232 ) 6233 6234 if haystack_first: 6235 haystack = seq_get(args, 0) 6236 needle = seq_get(args, 1) 6237 else: 6238 needle = seq_get(args, 0) 6239 haystack = seq_get(args, 1) 6240 6241 return self.expression( 6242 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6243 ) 6244 6245 def _parse_predict(self) -> exp.Predict: 6246 self._match_text_seq("MODEL") 6247 this = self._parse_table() 6248 6249 self._match(TokenType.COMMA) 6250 self._match_text_seq("TABLE") 6251 6252 return self.expression( 6253 exp.Predict, 6254 this=this, 6255 expression=self._parse_table(), 6256 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6257 ) 6258 6259 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6260 args = self._parse_csv(self._parse_table) 6261 return exp.JoinHint(this=func_name.upper(), expressions=args) 6262 6263 def _parse_substring(self) -> exp.Substring: 6264 # Postgres supports the form: substring(string [from int] [for int]) 6265 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6266 6267 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6268 6269 if self._match(TokenType.FROM): 6270 args.append(self._parse_bitwise()) 6271 if self._match(TokenType.FOR): 6272 if len(args) == 1: 6273 args.append(exp.Literal.number(1)) 6274 args.append(self._parse_bitwise()) 6275 6276 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6277 6278 def _parse_trim(self) -> exp.Trim: 6279 # https://www.w3resource.com/sql/character-functions/trim.php 6280 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6281 6282 position = None 6283 collation = None 6284 expression = None 6285 6286 if self._match_texts(self.TRIM_TYPES): 6287 position = self._prev.text.upper() 6288 6289 this = self._parse_bitwise() 6290 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6291 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6292 expression = self._parse_bitwise() 6293 6294 if invert_order: 6295 this, expression = expression, this 6296 6297 if self._match(TokenType.COLLATE): 6298 collation = self._parse_bitwise() 6299 6300 return self.expression( 6301 exp.Trim, this=this, position=position, expression=expression, collation=collation 6302 ) 6303 6304 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6305 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6306 6307 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6308 return self._parse_window(self._parse_id_var(), alias=True) 6309 6310 def _parse_respect_or_ignore_nulls( 6311 self, this: t.Optional[exp.Expression] 6312 ) -> t.Optional[exp.Expression]: 6313 if self._match_text_seq("IGNORE", "NULLS"): 6314 return self.expression(exp.IgnoreNulls, this=this) 6315 if self._match_text_seq("RESPECT", "NULLS"): 6316 return self.expression(exp.RespectNulls, this=this) 6317 return this 6318 6319 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6320 if self._match(TokenType.HAVING): 6321 self._match_texts(("MAX", "MIN")) 6322 max = self._prev.text.upper() != "MIN" 6323 return self.expression( 6324 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6325 ) 6326 6327 return this 6328 6329 def _parse_window( 6330 self, this: t.Optional[exp.Expression], alias: bool = False 6331 ) -> t.Optional[exp.Expression]: 6332 func = this 6333 comments = func.comments if isinstance(func, exp.Expression) else None 6334 6335 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6336 self._match(TokenType.WHERE) 6337 this = self.expression( 6338 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6339 ) 6340 self._match_r_paren() 6341 6342 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6343 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6344 if self._match_text_seq("WITHIN", "GROUP"): 6345 order = self._parse_wrapped(self._parse_order) 6346 this = self.expression(exp.WithinGroup, this=this, expression=order) 6347 6348 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6349 # Some dialects choose to implement and some do not. 6350 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6351 6352 # There is some code above in _parse_lambda that handles 6353 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6354 6355 # The below changes handle 6356 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6357 6358 # Oracle allows both formats 6359 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6360 # and Snowflake chose to do the same for familiarity 6361 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6362 if isinstance(this, exp.AggFunc): 6363 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6364 6365 if ignore_respect and ignore_respect is not this: 6366 ignore_respect.replace(ignore_respect.this) 6367 this = self.expression(ignore_respect.__class__, this=this) 6368 6369 this = self._parse_respect_or_ignore_nulls(this) 6370 6371 # bigquery select from window x AS (partition by ...) 6372 if alias: 6373 over = None 6374 self._match(TokenType.ALIAS) 6375 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6376 return this 6377 else: 6378 over = self._prev.text.upper() 6379 6380 if comments and isinstance(func, exp.Expression): 6381 func.pop_comments() 6382 6383 if not self._match(TokenType.L_PAREN): 6384 return self.expression( 6385 exp.Window, 6386 comments=comments, 6387 this=this, 6388 alias=self._parse_id_var(False), 6389 over=over, 6390 ) 6391 6392 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6393 6394 first = self._match(TokenType.FIRST) 6395 if self._match_text_seq("LAST"): 6396 first = False 6397 6398 partition, order = self._parse_partition_and_order() 6399 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6400 6401 if kind: 6402 self._match(TokenType.BETWEEN) 6403 start = self._parse_window_spec() 6404 self._match(TokenType.AND) 6405 end = self._parse_window_spec() 6406 6407 spec = self.expression( 6408 exp.WindowSpec, 6409 kind=kind, 6410 start=start["value"], 6411 start_side=start["side"], 6412 end=end["value"], 6413 end_side=end["side"], 6414 ) 6415 else: 6416 spec = None 6417 6418 self._match_r_paren() 6419 6420 window = self.expression( 6421 exp.Window, 6422 comments=comments, 6423 this=this, 6424 partition_by=partition, 6425 order=order, 6426 spec=spec, 6427 alias=window_alias, 6428 over=over, 6429 first=first, 6430 ) 6431 6432 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6433 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6434 return self._parse_window(window, alias=alias) 6435 6436 return window 6437 6438 def _parse_partition_and_order( 6439 self, 6440 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6441 return self._parse_partition_by(), self._parse_order() 6442 6443 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6444 self._match(TokenType.BETWEEN) 6445 6446 return { 6447 "value": ( 6448 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6449 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6450 or self._parse_bitwise() 6451 ), 6452 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6453 } 6454 6455 def _parse_alias( 6456 self, this: t.Optional[exp.Expression], explicit: bool = False 6457 ) -> t.Optional[exp.Expression]: 6458 any_token = self._match(TokenType.ALIAS) 6459 comments = self._prev_comments or [] 6460 6461 if explicit and not any_token: 6462 return this 6463 6464 if self._match(TokenType.L_PAREN): 6465 aliases = self.expression( 6466 exp.Aliases, 6467 comments=comments, 6468 this=this, 6469 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6470 ) 6471 self._match_r_paren(aliases) 6472 return aliases 6473 6474 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6475 self.STRING_ALIASES and self._parse_string_as_identifier() 6476 ) 6477 6478 if alias: 6479 comments.extend(alias.pop_comments()) 6480 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6481 column = this.this 6482 6483 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6484 if not this.comments and column and column.comments: 6485 this.comments = column.pop_comments() 6486 6487 return this 6488 6489 def _parse_id_var( 6490 self, 6491 any_token: bool = True, 6492 tokens: t.Optional[t.Collection[TokenType]] = None, 6493 ) -> t.Optional[exp.Expression]: 6494 expression = self._parse_identifier() 6495 if not expression and ( 6496 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6497 ): 6498 quoted = self._prev.token_type == TokenType.STRING 6499 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6500 6501 return expression 6502 6503 def _parse_string(self) -> t.Optional[exp.Expression]: 6504 if self._match_set(self.STRING_PARSERS): 6505 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6506 return self._parse_placeholder() 6507 6508 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6509 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6510 6511 def _parse_number(self) -> t.Optional[exp.Expression]: 6512 if self._match_set(self.NUMERIC_PARSERS): 6513 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6514 return self._parse_placeholder() 6515 6516 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6517 if self._match(TokenType.IDENTIFIER): 6518 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6519 return self._parse_placeholder() 6520 6521 def _parse_var( 6522 self, 6523 any_token: bool = False, 6524 tokens: t.Optional[t.Collection[TokenType]] = None, 6525 upper: bool = False, 6526 ) -> t.Optional[exp.Expression]: 6527 if ( 6528 (any_token and self._advance_any()) 6529 or self._match(TokenType.VAR) 6530 or (self._match_set(tokens) if tokens else False) 6531 ): 6532 return self.expression( 6533 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6534 ) 6535 return self._parse_placeholder() 6536 6537 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6538 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6539 self._advance() 6540 return self._prev 6541 return None 6542 6543 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6544 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6545 6546 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6547 return self._parse_primary() or self._parse_var(any_token=True) 6548 6549 def _parse_null(self) -> t.Optional[exp.Expression]: 6550 if self._match_set(self.NULL_TOKENS): 6551 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6552 return self._parse_placeholder() 6553 6554 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6555 if self._match(TokenType.TRUE): 6556 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6557 if self._match(TokenType.FALSE): 6558 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6559 return self._parse_placeholder() 6560 6561 def _parse_star(self) -> t.Optional[exp.Expression]: 6562 if self._match(TokenType.STAR): 6563 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6564 return self._parse_placeholder() 6565 6566 def _parse_parameter(self) -> exp.Parameter: 6567 this = self._parse_identifier() or self._parse_primary_or_var() 6568 return self.expression(exp.Parameter, this=this) 6569 6570 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6571 if self._match_set(self.PLACEHOLDER_PARSERS): 6572 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6573 if placeholder: 6574 return placeholder 6575 self._advance(-1) 6576 return None 6577 6578 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6579 if not self._match_texts(keywords): 6580 return None 6581 if self._match(TokenType.L_PAREN, advance=False): 6582 return self._parse_wrapped_csv(self._parse_expression) 6583 6584 expression = self._parse_expression() 6585 return [expression] if expression else None 6586 6587 def _parse_csv( 6588 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6589 ) -> t.List[exp.Expression]: 6590 parse_result = parse_method() 6591 items = [parse_result] if parse_result is not None else [] 6592 6593 while self._match(sep): 6594 self._add_comments(parse_result) 6595 parse_result = parse_method() 6596 if parse_result is not None: 6597 items.append(parse_result) 6598 6599 return items 6600 6601 def _parse_tokens( 6602 self, parse_method: t.Callable, expressions: t.Dict 6603 ) -> t.Optional[exp.Expression]: 6604 this = parse_method() 6605 6606 while self._match_set(expressions): 6607 this = self.expression( 6608 expressions[self._prev.token_type], 6609 this=this, 6610 comments=self._prev_comments, 6611 expression=parse_method(), 6612 ) 6613 6614 return this 6615 6616 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6617 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6618 6619 def _parse_wrapped_csv( 6620 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6621 ) -> t.List[exp.Expression]: 6622 return self._parse_wrapped( 6623 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6624 ) 6625 6626 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6627 wrapped = self._match(TokenType.L_PAREN) 6628 if not wrapped and not optional: 6629 self.raise_error("Expecting (") 6630 parse_result = parse_method() 6631 if wrapped: 6632 self._match_r_paren() 6633 return parse_result 6634 6635 def _parse_expressions(self) -> t.List[exp.Expression]: 6636 return self._parse_csv(self._parse_expression) 6637 6638 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6639 return self._parse_select() or self._parse_set_operations( 6640 self._parse_expression() if alias else self._parse_assignment() 6641 ) 6642 6643 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6644 return self._parse_query_modifiers( 6645 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6646 ) 6647 6648 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6649 this = None 6650 if self._match_texts(self.TRANSACTION_KIND): 6651 this = self._prev.text 6652 6653 self._match_texts(("TRANSACTION", "WORK")) 6654 6655 modes = [] 6656 while True: 6657 mode = [] 6658 while self._match(TokenType.VAR): 6659 mode.append(self._prev.text) 6660 6661 if mode: 6662 modes.append(" ".join(mode)) 6663 if not self._match(TokenType.COMMA): 6664 break 6665 6666 return self.expression(exp.Transaction, this=this, modes=modes) 6667 6668 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6669 chain = None 6670 savepoint = None 6671 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6672 6673 self._match_texts(("TRANSACTION", "WORK")) 6674 6675 if self._match_text_seq("TO"): 6676 self._match_text_seq("SAVEPOINT") 6677 savepoint = self._parse_id_var() 6678 6679 if self._match(TokenType.AND): 6680 chain = not self._match_text_seq("NO") 6681 self._match_text_seq("CHAIN") 6682 6683 if is_rollback: 6684 return self.expression(exp.Rollback, savepoint=savepoint) 6685 6686 return self.expression(exp.Commit, chain=chain) 6687 6688 def _parse_refresh(self) -> exp.Refresh: 6689 self._match(TokenType.TABLE) 6690 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6691 6692 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6693 if not self._match_text_seq("ADD"): 6694 return None 6695 6696 self._match(TokenType.COLUMN) 6697 exists_column = self._parse_exists(not_=True) 6698 expression = self._parse_field_def() 6699 6700 if expression: 6701 expression.set("exists", exists_column) 6702 6703 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6704 if self._match_texts(("FIRST", "AFTER")): 6705 position = self._prev.text 6706 column_position = self.expression( 6707 exp.ColumnPosition, this=self._parse_column(), position=position 6708 ) 6709 expression.set("position", column_position) 6710 6711 return expression 6712 6713 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6714 drop = self._match(TokenType.DROP) and self._parse_drop() 6715 if drop and not isinstance(drop, exp.Command): 6716 drop.set("kind", drop.args.get("kind", "COLUMN")) 6717 return drop 6718 6719 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6720 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6721 return self.expression( 6722 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6723 ) 6724 6725 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6726 index = self._index - 1 6727 6728 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6729 return self._parse_csv( 6730 lambda: self.expression( 6731 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6732 ) 6733 ) 6734 6735 self._retreat(index) 6736 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6737 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6738 6739 if self._match_text_seq("ADD", "COLUMNS"): 6740 schema = self._parse_schema() 6741 if schema: 6742 return [schema] 6743 return [] 6744 6745 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6746 6747 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6748 if self._match_texts(self.ALTER_ALTER_PARSERS): 6749 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6750 6751 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6752 # keyword after ALTER we default to parsing this statement 6753 self._match(TokenType.COLUMN) 6754 column = self._parse_field(any_token=True) 6755 6756 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6757 return self.expression(exp.AlterColumn, this=column, drop=True) 6758 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6759 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6760 if self._match(TokenType.COMMENT): 6761 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6762 if self._match_text_seq("DROP", "NOT", "NULL"): 6763 return self.expression( 6764 exp.AlterColumn, 6765 this=column, 6766 drop=True, 6767 allow_null=True, 6768 ) 6769 if self._match_text_seq("SET", "NOT", "NULL"): 6770 return self.expression( 6771 exp.AlterColumn, 6772 this=column, 6773 allow_null=False, 6774 ) 6775 self._match_text_seq("SET", "DATA") 6776 self._match_text_seq("TYPE") 6777 return self.expression( 6778 exp.AlterColumn, 6779 this=column, 6780 dtype=self._parse_types(), 6781 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6782 using=self._match(TokenType.USING) and self._parse_assignment(), 6783 ) 6784 6785 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6786 if self._match_texts(("ALL", "EVEN", "AUTO")): 6787 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6788 6789 self._match_text_seq("KEY", "DISTKEY") 6790 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6791 6792 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6793 if compound: 6794 self._match_text_seq("SORTKEY") 6795 6796 if self._match(TokenType.L_PAREN, advance=False): 6797 return self.expression( 6798 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6799 ) 6800 6801 self._match_texts(("AUTO", "NONE")) 6802 return self.expression( 6803 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6804 ) 6805 6806 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6807 index = self._index - 1 6808 6809 partition_exists = self._parse_exists() 6810 if self._match(TokenType.PARTITION, advance=False): 6811 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6812 6813 self._retreat(index) 6814 return self._parse_csv(self._parse_drop_column) 6815 6816 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6817 if self._match(TokenType.COLUMN): 6818 exists = self._parse_exists() 6819 old_column = self._parse_column() 6820 to = self._match_text_seq("TO") 6821 new_column = self._parse_column() 6822 6823 if old_column is None or to is None or new_column is None: 6824 return None 6825 6826 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6827 6828 self._match_text_seq("TO") 6829 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6830 6831 def _parse_alter_table_set(self) -> exp.AlterSet: 6832 alter_set = self.expression(exp.AlterSet) 6833 6834 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6835 "TABLE", "PROPERTIES" 6836 ): 6837 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6838 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6839 alter_set.set("expressions", [self._parse_assignment()]) 6840 elif self._match_texts(("LOGGED", "UNLOGGED")): 6841 alter_set.set("option", exp.var(self._prev.text.upper())) 6842 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6843 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6844 elif self._match_text_seq("LOCATION"): 6845 alter_set.set("location", self._parse_field()) 6846 elif self._match_text_seq("ACCESS", "METHOD"): 6847 alter_set.set("access_method", self._parse_field()) 6848 elif self._match_text_seq("TABLESPACE"): 6849 alter_set.set("tablespace", self._parse_field()) 6850 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6851 alter_set.set("file_format", [self._parse_field()]) 6852 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6853 alter_set.set("file_format", self._parse_wrapped_options()) 6854 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6855 alter_set.set("copy_options", self._parse_wrapped_options()) 6856 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6857 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6858 else: 6859 if self._match_text_seq("SERDE"): 6860 alter_set.set("serde", self._parse_field()) 6861 6862 alter_set.set("expressions", [self._parse_properties()]) 6863 6864 return alter_set 6865 6866 def _parse_alter(self) -> exp.Alter | exp.Command: 6867 start = self._prev 6868 6869 alter_token = self._match_set(self.ALTERABLES) and self._prev 6870 if not alter_token: 6871 return self._parse_as_command(start) 6872 6873 exists = self._parse_exists() 6874 only = self._match_text_seq("ONLY") 6875 this = self._parse_table(schema=True) 6876 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6877 6878 if self._next: 6879 self._advance() 6880 6881 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6882 if parser: 6883 actions = ensure_list(parser(self)) 6884 not_valid = self._match_text_seq("NOT", "VALID") 6885 options = self._parse_csv(self._parse_property) 6886 6887 if not self._curr and actions: 6888 return self.expression( 6889 exp.Alter, 6890 this=this, 6891 kind=alter_token.text.upper(), 6892 exists=exists, 6893 actions=actions, 6894 only=only, 6895 options=options, 6896 cluster=cluster, 6897 not_valid=not_valid, 6898 ) 6899 6900 return self._parse_as_command(start) 6901 6902 def _parse_merge(self) -> exp.Merge: 6903 self._match(TokenType.INTO) 6904 target = self._parse_table() 6905 6906 if target and self._match(TokenType.ALIAS, advance=False): 6907 target.set("alias", self._parse_table_alias()) 6908 6909 self._match(TokenType.USING) 6910 using = self._parse_table() 6911 6912 self._match(TokenType.ON) 6913 on = self._parse_assignment() 6914 6915 return self.expression( 6916 exp.Merge, 6917 this=target, 6918 using=using, 6919 on=on, 6920 expressions=self._parse_when_matched(), 6921 returning=self._parse_returning(), 6922 ) 6923 6924 def _parse_when_matched(self) -> t.List[exp.When]: 6925 whens = [] 6926 6927 while self._match(TokenType.WHEN): 6928 matched = not self._match(TokenType.NOT) 6929 self._match_text_seq("MATCHED") 6930 source = ( 6931 False 6932 if self._match_text_seq("BY", "TARGET") 6933 else self._match_text_seq("BY", "SOURCE") 6934 ) 6935 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6936 6937 self._match(TokenType.THEN) 6938 6939 if self._match(TokenType.INSERT): 6940 this = self._parse_star() 6941 if this: 6942 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6943 else: 6944 then = self.expression( 6945 exp.Insert, 6946 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6947 expression=self._match_text_seq("VALUES") and self._parse_value(), 6948 ) 6949 elif self._match(TokenType.UPDATE): 6950 expressions = self._parse_star() 6951 if expressions: 6952 then = self.expression(exp.Update, expressions=expressions) 6953 else: 6954 then = self.expression( 6955 exp.Update, 6956 expressions=self._match(TokenType.SET) 6957 and self._parse_csv(self._parse_equality), 6958 ) 6959 elif self._match(TokenType.DELETE): 6960 then = self.expression(exp.Var, this=self._prev.text) 6961 else: 6962 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6963 6964 whens.append( 6965 self.expression( 6966 exp.When, 6967 matched=matched, 6968 source=source, 6969 condition=condition, 6970 then=then, 6971 ) 6972 ) 6973 return whens 6974 6975 def _parse_show(self) -> t.Optional[exp.Expression]: 6976 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6977 if parser: 6978 return parser(self) 6979 return self._parse_as_command(self._prev) 6980 6981 def _parse_set_item_assignment( 6982 self, kind: t.Optional[str] = None 6983 ) -> t.Optional[exp.Expression]: 6984 index = self._index 6985 6986 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6987 return self._parse_set_transaction(global_=kind == "GLOBAL") 6988 6989 left = self._parse_primary() or self._parse_column() 6990 assignment_delimiter = self._match_texts(("=", "TO")) 6991 6992 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6993 self._retreat(index) 6994 return None 6995 6996 right = self._parse_statement() or self._parse_id_var() 6997 if isinstance(right, (exp.Column, exp.Identifier)): 6998 right = exp.var(right.name) 6999 7000 this = self.expression(exp.EQ, this=left, expression=right) 7001 return self.expression(exp.SetItem, this=this, kind=kind) 7002 7003 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7004 self._match_text_seq("TRANSACTION") 7005 characteristics = self._parse_csv( 7006 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7007 ) 7008 return self.expression( 7009 exp.SetItem, 7010 expressions=characteristics, 7011 kind="TRANSACTION", 7012 **{"global": global_}, # type: ignore 7013 ) 7014 7015 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7016 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7017 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7018 7019 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7020 index = self._index 7021 set_ = self.expression( 7022 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7023 ) 7024 7025 if self._curr: 7026 self._retreat(index) 7027 return self._parse_as_command(self._prev) 7028 7029 return set_ 7030 7031 def _parse_var_from_options( 7032 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7033 ) -> t.Optional[exp.Var]: 7034 start = self._curr 7035 if not start: 7036 return None 7037 7038 option = start.text.upper() 7039 continuations = options.get(option) 7040 7041 index = self._index 7042 self._advance() 7043 for keywords in continuations or []: 7044 if isinstance(keywords, str): 7045 keywords = (keywords,) 7046 7047 if self._match_text_seq(*keywords): 7048 option = f"{option} {' '.join(keywords)}" 7049 break 7050 else: 7051 if continuations or continuations is None: 7052 if raise_unmatched: 7053 self.raise_error(f"Unknown option {option}") 7054 7055 self._retreat(index) 7056 return None 7057 7058 return exp.var(option) 7059 7060 def _parse_as_command(self, start: Token) -> exp.Command: 7061 while self._curr: 7062 self._advance() 7063 text = self._find_sql(start, self._prev) 7064 size = len(start.text) 7065 self._warn_unsupported() 7066 return exp.Command(this=text[:size], expression=text[size:]) 7067 7068 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7069 settings = [] 7070 7071 self._match_l_paren() 7072 kind = self._parse_id_var() 7073 7074 if self._match(TokenType.L_PAREN): 7075 while True: 7076 key = self._parse_id_var() 7077 value = self._parse_primary() 7078 7079 if not key and value is None: 7080 break 7081 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7082 self._match(TokenType.R_PAREN) 7083 7084 self._match_r_paren() 7085 7086 return self.expression( 7087 exp.DictProperty, 7088 this=this, 7089 kind=kind.this if kind else None, 7090 settings=settings, 7091 ) 7092 7093 def _parse_dict_range(self, this: str) -> exp.DictRange: 7094 self._match_l_paren() 7095 has_min = self._match_text_seq("MIN") 7096 if has_min: 7097 min = self._parse_var() or self._parse_primary() 7098 self._match_text_seq("MAX") 7099 max = self._parse_var() or self._parse_primary() 7100 else: 7101 max = self._parse_var() or self._parse_primary() 7102 min = exp.Literal.number(0) 7103 self._match_r_paren() 7104 return self.expression(exp.DictRange, this=this, min=min, max=max) 7105 7106 def _parse_comprehension( 7107 self, this: t.Optional[exp.Expression] 7108 ) -> t.Optional[exp.Comprehension]: 7109 index = self._index 7110 expression = self._parse_column() 7111 if not self._match(TokenType.IN): 7112 self._retreat(index - 1) 7113 return None 7114 iterator = self._parse_column() 7115 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7116 return self.expression( 7117 exp.Comprehension, 7118 this=this, 7119 expression=expression, 7120 iterator=iterator, 7121 condition=condition, 7122 ) 7123 7124 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7125 if self._match(TokenType.HEREDOC_STRING): 7126 return self.expression(exp.Heredoc, this=self._prev.text) 7127 7128 if not self._match_text_seq("$"): 7129 return None 7130 7131 tags = ["$"] 7132 tag_text = None 7133 7134 if self._is_connected(): 7135 self._advance() 7136 tags.append(self._prev.text.upper()) 7137 else: 7138 self.raise_error("No closing $ found") 7139 7140 if tags[-1] != "$": 7141 if self._is_connected() and self._match_text_seq("$"): 7142 tag_text = tags[-1] 7143 tags.append("$") 7144 else: 7145 self.raise_error("No closing $ found") 7146 7147 heredoc_start = self._curr 7148 7149 while self._curr: 7150 if self._match_text_seq(*tags, advance=False): 7151 this = self._find_sql(heredoc_start, self._prev) 7152 self._advance(len(tags)) 7153 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7154 7155 self._advance() 7156 7157 self.raise_error(f"No closing {''.join(tags)} found") 7158 return None 7159 7160 def _find_parser( 7161 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7162 ) -> t.Optional[t.Callable]: 7163 if not self._curr: 7164 return None 7165 7166 index = self._index 7167 this = [] 7168 while True: 7169 # The current token might be multiple words 7170 curr = self._curr.text.upper() 7171 key = curr.split(" ") 7172 this.append(curr) 7173 7174 self._advance() 7175 result, trie = in_trie(trie, key) 7176 if result == TrieResult.FAILED: 7177 break 7178 7179 if result == TrieResult.EXISTS: 7180 subparser = parsers[" ".join(this)] 7181 return subparser 7182 7183 self._retreat(index) 7184 return None 7185 7186 def _match(self, token_type, advance=True, expression=None): 7187 if not self._curr: 7188 return None 7189 7190 if self._curr.token_type == token_type: 7191 if advance: 7192 self._advance() 7193 self._add_comments(expression) 7194 return True 7195 7196 return None 7197 7198 def _match_set(self, types, advance=True): 7199 if not self._curr: 7200 return None 7201 7202 if self._curr.token_type in types: 7203 if advance: 7204 self._advance() 7205 return True 7206 7207 return None 7208 7209 def _match_pair(self, token_type_a, token_type_b, advance=True): 7210 if not self._curr or not self._next: 7211 return None 7212 7213 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7214 if advance: 7215 self._advance(2) 7216 return True 7217 7218 return None 7219 7220 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7221 if not self._match(TokenType.L_PAREN, expression=expression): 7222 self.raise_error("Expecting (") 7223 7224 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7225 if not self._match(TokenType.R_PAREN, expression=expression): 7226 self.raise_error("Expecting )") 7227 7228 def _match_texts(self, texts, advance=True): 7229 if ( 7230 self._curr 7231 and self._curr.token_type != TokenType.STRING 7232 and self._curr.text.upper() in texts 7233 ): 7234 if advance: 7235 self._advance() 7236 return True 7237 return None 7238 7239 def _match_text_seq(self, *texts, advance=True): 7240 index = self._index 7241 for text in texts: 7242 if ( 7243 self._curr 7244 and self._curr.token_type != TokenType.STRING 7245 and self._curr.text.upper() == text 7246 ): 7247 self._advance() 7248 else: 7249 self._retreat(index) 7250 return None 7251 7252 if not advance: 7253 self._retreat(index) 7254 7255 return True 7256 7257 def _replace_lambda( 7258 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7259 ) -> t.Optional[exp.Expression]: 7260 if not node: 7261 return node 7262 7263 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7264 7265 for column in node.find_all(exp.Column): 7266 typ = lambda_types.get(column.parts[0].name) 7267 if typ is not None: 7268 dot_or_id = column.to_dot() if column.table else column.this 7269 7270 if typ: 7271 dot_or_id = self.expression( 7272 exp.Cast, 7273 this=dot_or_id, 7274 to=typ, 7275 ) 7276 7277 parent = column.parent 7278 7279 while isinstance(parent, exp.Dot): 7280 if not isinstance(parent.parent, exp.Dot): 7281 parent.replace(dot_or_id) 7282 break 7283 parent = parent.parent 7284 else: 7285 if column is node: 7286 node = dot_or_id 7287 else: 7288 column.replace(dot_or_id) 7289 return node 7290 7291 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7292 start = self._prev 7293 7294 # Not to be confused with TRUNCATE(number, decimals) function call 7295 if self._match(TokenType.L_PAREN): 7296 self._retreat(self._index - 2) 7297 return self._parse_function() 7298 7299 # Clickhouse supports TRUNCATE DATABASE as well 7300 is_database = self._match(TokenType.DATABASE) 7301 7302 self._match(TokenType.TABLE) 7303 7304 exists = self._parse_exists(not_=False) 7305 7306 expressions = self._parse_csv( 7307 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7308 ) 7309 7310 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7311 7312 if self._match_text_seq("RESTART", "IDENTITY"): 7313 identity = "RESTART" 7314 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7315 identity = "CONTINUE" 7316 else: 7317 identity = None 7318 7319 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7320 option = self._prev.text 7321 else: 7322 option = None 7323 7324 partition = self._parse_partition() 7325 7326 # Fallback case 7327 if self._curr: 7328 return self._parse_as_command(start) 7329 7330 return self.expression( 7331 exp.TruncateTable, 7332 expressions=expressions, 7333 is_database=is_database, 7334 exists=exists, 7335 cluster=cluster, 7336 identity=identity, 7337 option=option, 7338 partition=partition, 7339 ) 7340 7341 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7342 this = self._parse_ordered(self._parse_opclass) 7343 7344 if not self._match(TokenType.WITH): 7345 return this 7346 7347 op = self._parse_var(any_token=True) 7348 7349 return self.expression(exp.WithOperator, this=this, op=op) 7350 7351 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7352 self._match(TokenType.EQ) 7353 self._match(TokenType.L_PAREN) 7354 7355 opts: t.List[t.Optional[exp.Expression]] = [] 7356 while self._curr and not self._match(TokenType.R_PAREN): 7357 if self._match_text_seq("FORMAT_NAME", "="): 7358 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7359 # so we parse it separately to use _parse_field() 7360 prop = self.expression( 7361 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7362 ) 7363 opts.append(prop) 7364 else: 7365 opts.append(self._parse_property()) 7366 7367 self._match(TokenType.COMMA) 7368 7369 return opts 7370 7371 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7372 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7373 7374 options = [] 7375 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7376 option = self._parse_var(any_token=True) 7377 prev = self._prev.text.upper() 7378 7379 # Different dialects might separate options and values by white space, "=" and "AS" 7380 self._match(TokenType.EQ) 7381 self._match(TokenType.ALIAS) 7382 7383 param = self.expression(exp.CopyParameter, this=option) 7384 7385 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7386 TokenType.L_PAREN, advance=False 7387 ): 7388 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7389 param.set("expressions", self._parse_wrapped_options()) 7390 elif prev == "FILE_FORMAT": 7391 # T-SQL's external file format case 7392 param.set("expression", self._parse_field()) 7393 else: 7394 param.set("expression", self._parse_unquoted_field()) 7395 7396 options.append(param) 7397 self._match(sep) 7398 7399 return options 7400 7401 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7402 expr = self.expression(exp.Credentials) 7403 7404 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7405 expr.set("storage", self._parse_field()) 7406 if self._match_text_seq("CREDENTIALS"): 7407 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7408 creds = ( 7409 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7410 ) 7411 expr.set("credentials", creds) 7412 if self._match_text_seq("ENCRYPTION"): 7413 expr.set("encryption", self._parse_wrapped_options()) 7414 if self._match_text_seq("IAM_ROLE"): 7415 expr.set("iam_role", self._parse_field()) 7416 if self._match_text_seq("REGION"): 7417 expr.set("region", self._parse_field()) 7418 7419 return expr 7420 7421 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7422 return self._parse_field() 7423 7424 def _parse_copy(self) -> exp.Copy | exp.Command: 7425 start = self._prev 7426 7427 self._match(TokenType.INTO) 7428 7429 this = ( 7430 self._parse_select(nested=True, parse_subquery_alias=False) 7431 if self._match(TokenType.L_PAREN, advance=False) 7432 else self._parse_table(schema=True) 7433 ) 7434 7435 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7436 7437 files = self._parse_csv(self._parse_file_location) 7438 credentials = self._parse_credentials() 7439 7440 self._match_text_seq("WITH") 7441 7442 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7443 7444 # Fallback case 7445 if self._curr: 7446 return self._parse_as_command(start) 7447 7448 return self.expression( 7449 exp.Copy, 7450 this=this, 7451 kind=kind, 7452 credentials=credentials, 7453 files=files, 7454 params=params, 7455 ) 7456 7457 def _parse_normalize(self) -> exp.Normalize: 7458 return self.expression( 7459 exp.Normalize, 7460 this=self._parse_bitwise(), 7461 form=self._match(TokenType.COMMA) and self._parse_var(), 7462 ) 7463 7464 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7465 if self._match_text_seq("COLUMNS", "(", advance=False): 7466 this = self._parse_function() 7467 if isinstance(this, exp.Columns): 7468 this.set("unpack", True) 7469 return this 7470 7471 return self.expression( 7472 exp.Star, 7473 **{ # type: ignore 7474 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7475 "replace": self._parse_star_op("REPLACE"), 7476 "rename": self._parse_star_op("RENAME"), 7477 }, 7478 ) 7479 7480 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7481 privilege_parts = [] 7482 7483 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7484 # (end of privilege list) or L_PAREN (start of column list) are met 7485 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7486 privilege_parts.append(self._curr.text.upper()) 7487 self._advance() 7488 7489 this = exp.var(" ".join(privilege_parts)) 7490 expressions = ( 7491 self._parse_wrapped_csv(self._parse_column) 7492 if self._match(TokenType.L_PAREN, advance=False) 7493 else None 7494 ) 7495 7496 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7497 7498 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7499 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7500 principal = self._parse_id_var() 7501 7502 if not principal: 7503 return None 7504 7505 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7506 7507 def _parse_grant(self) -> exp.Grant | exp.Command: 7508 start = self._prev 7509 7510 privileges = self._parse_csv(self._parse_grant_privilege) 7511 7512 self._match(TokenType.ON) 7513 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7514 7515 # Attempt to parse the securable e.g. MySQL allows names 7516 # such as "foo.*", "*.*" which are not easily parseable yet 7517 securable = self._try_parse(self._parse_table_parts) 7518 7519 if not securable or not self._match_text_seq("TO"): 7520 return self._parse_as_command(start) 7521 7522 principals = self._parse_csv(self._parse_grant_principal) 7523 7524 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7525 7526 if self._curr: 7527 return self._parse_as_command(start) 7528 7529 return self.expression( 7530 exp.Grant, 7531 privileges=privileges, 7532 kind=kind, 7533 securable=securable, 7534 principals=principals, 7535 grant_option=grant_option, 7536 ) 7537 7538 def _parse_overlay(self) -> exp.Overlay: 7539 return self.expression( 7540 exp.Overlay, 7541 **{ # type: ignore 7542 "this": self._parse_bitwise(), 7543 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7544 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7545 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7546 }, 7547 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.DECIMAL256, 356 TokenType.UDECIMAL, 357 TokenType.BIGDECIMAL, 358 TokenType.UUID, 359 TokenType.GEOGRAPHY, 360 TokenType.GEOMETRY, 361 TokenType.POINT, 362 TokenType.RING, 363 TokenType.LINESTRING, 364 TokenType.MULTILINESTRING, 365 TokenType.POLYGON, 366 TokenType.MULTIPOLYGON, 367 TokenType.HLLSKETCH, 368 TokenType.HSTORE, 369 TokenType.PSEUDO_TYPE, 370 TokenType.SUPER, 371 TokenType.SERIAL, 372 TokenType.SMALLSERIAL, 373 TokenType.BIGSERIAL, 374 TokenType.XML, 375 TokenType.YEAR, 376 TokenType.UNIQUEIDENTIFIER, 377 TokenType.USERDEFINED, 378 TokenType.MONEY, 379 TokenType.SMALLMONEY, 380 TokenType.ROWVERSION, 381 TokenType.IMAGE, 382 TokenType.VARIANT, 383 TokenType.VECTOR, 384 TokenType.OBJECT, 385 TokenType.OBJECT_IDENTIFIER, 386 TokenType.INET, 387 TokenType.IPADDRESS, 388 TokenType.IPPREFIX, 389 TokenType.IPV4, 390 TokenType.IPV6, 391 TokenType.UNKNOWN, 392 TokenType.NULL, 393 TokenType.NAME, 394 TokenType.TDIGEST, 395 *ENUM_TYPE_TOKENS, 396 *NESTED_TYPE_TOKENS, 397 *AGGREGATE_TYPE_TOKENS, 398 } 399 400 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 401 TokenType.BIGINT: TokenType.UBIGINT, 402 TokenType.INT: TokenType.UINT, 403 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 404 TokenType.SMALLINT: TokenType.USMALLINT, 405 TokenType.TINYINT: TokenType.UTINYINT, 406 TokenType.DECIMAL: TokenType.UDECIMAL, 407 } 408 409 SUBQUERY_PREDICATES = { 410 TokenType.ANY: exp.Any, 411 TokenType.ALL: exp.All, 412 TokenType.EXISTS: exp.Exists, 413 TokenType.SOME: exp.Any, 414 } 415 416 RESERVED_TOKENS = { 417 *Tokenizer.SINGLE_TOKENS.values(), 418 TokenType.SELECT, 419 } - {TokenType.IDENTIFIER} 420 421 DB_CREATABLES = { 422 TokenType.DATABASE, 423 TokenType.DICTIONARY, 424 TokenType.MODEL, 425 TokenType.SCHEMA, 426 TokenType.SEQUENCE, 427 TokenType.STORAGE_INTEGRATION, 428 TokenType.TABLE, 429 TokenType.TAG, 430 TokenType.VIEW, 431 TokenType.WAREHOUSE, 432 TokenType.STREAMLIT, 433 } 434 435 CREATABLES = { 436 TokenType.COLUMN, 437 TokenType.CONSTRAINT, 438 TokenType.FOREIGN_KEY, 439 TokenType.FUNCTION, 440 TokenType.INDEX, 441 TokenType.PROCEDURE, 442 *DB_CREATABLES, 443 } 444 445 ALTERABLES = { 446 TokenType.INDEX, 447 TokenType.TABLE, 448 TokenType.VIEW, 449 } 450 451 # Tokens that can represent identifiers 452 ID_VAR_TOKENS = { 453 TokenType.ALL, 454 TokenType.VAR, 455 TokenType.ANTI, 456 TokenType.APPLY, 457 TokenType.ASC, 458 TokenType.ASOF, 459 TokenType.AUTO_INCREMENT, 460 TokenType.BEGIN, 461 TokenType.BPCHAR, 462 TokenType.CACHE, 463 TokenType.CASE, 464 TokenType.COLLATE, 465 TokenType.COMMAND, 466 TokenType.COMMENT, 467 TokenType.COMMIT, 468 TokenType.CONSTRAINT, 469 TokenType.COPY, 470 TokenType.CUBE, 471 TokenType.DEFAULT, 472 TokenType.DELETE, 473 TokenType.DESC, 474 TokenType.DESCRIBE, 475 TokenType.DICTIONARY, 476 TokenType.DIV, 477 TokenType.END, 478 TokenType.EXECUTE, 479 TokenType.ESCAPE, 480 TokenType.FALSE, 481 TokenType.FIRST, 482 TokenType.FILTER, 483 TokenType.FINAL, 484 TokenType.FORMAT, 485 TokenType.FULL, 486 TokenType.IDENTIFIER, 487 TokenType.IS, 488 TokenType.ISNULL, 489 TokenType.INTERVAL, 490 TokenType.KEEP, 491 TokenType.KILL, 492 TokenType.LEFT, 493 TokenType.LOAD, 494 TokenType.MERGE, 495 TokenType.NATURAL, 496 TokenType.NEXT, 497 TokenType.OFFSET, 498 TokenType.OPERATOR, 499 TokenType.ORDINALITY, 500 TokenType.OVERLAPS, 501 TokenType.OVERWRITE, 502 TokenType.PARTITION, 503 TokenType.PERCENT, 504 TokenType.PIVOT, 505 TokenType.PRAGMA, 506 TokenType.RANGE, 507 TokenType.RECURSIVE, 508 TokenType.REFERENCES, 509 TokenType.REFRESH, 510 TokenType.RENAME, 511 TokenType.REPLACE, 512 TokenType.RIGHT, 513 TokenType.ROLLUP, 514 TokenType.ROW, 515 TokenType.ROWS, 516 TokenType.SEMI, 517 TokenType.SET, 518 TokenType.SETTINGS, 519 TokenType.SHOW, 520 TokenType.TEMPORARY, 521 TokenType.TOP, 522 TokenType.TRUE, 523 TokenType.TRUNCATE, 524 TokenType.UNIQUE, 525 TokenType.UNNEST, 526 TokenType.UNPIVOT, 527 TokenType.UPDATE, 528 TokenType.USE, 529 TokenType.VOLATILE, 530 TokenType.WINDOW, 531 *CREATABLES, 532 *SUBQUERY_PREDICATES, 533 *TYPE_TOKENS, 534 *NO_PAREN_FUNCTIONS, 535 } 536 ID_VAR_TOKENS.remove(TokenType.UNION) 537 538 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 539 540 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 541 TokenType.ANTI, 542 TokenType.APPLY, 543 TokenType.ASOF, 544 TokenType.FULL, 545 TokenType.LEFT, 546 TokenType.LOCK, 547 TokenType.NATURAL, 548 TokenType.OFFSET, 549 TokenType.RIGHT, 550 TokenType.SEMI, 551 TokenType.WINDOW, 552 } 553 554 ALIAS_TOKENS = ID_VAR_TOKENS 555 556 ARRAY_CONSTRUCTORS = { 557 "ARRAY": exp.Array, 558 "LIST": exp.List, 559 } 560 561 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 562 563 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 564 565 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 566 567 FUNC_TOKENS = { 568 TokenType.COLLATE, 569 TokenType.COMMAND, 570 TokenType.CURRENT_DATE, 571 TokenType.CURRENT_DATETIME, 572 TokenType.CURRENT_TIMESTAMP, 573 TokenType.CURRENT_TIME, 574 TokenType.CURRENT_USER, 575 TokenType.FILTER, 576 TokenType.FIRST, 577 TokenType.FORMAT, 578 TokenType.GLOB, 579 TokenType.IDENTIFIER, 580 TokenType.INDEX, 581 TokenType.ISNULL, 582 TokenType.ILIKE, 583 TokenType.INSERT, 584 TokenType.LIKE, 585 TokenType.MERGE, 586 TokenType.OFFSET, 587 TokenType.PRIMARY_KEY, 588 TokenType.RANGE, 589 TokenType.REPLACE, 590 TokenType.RLIKE, 591 TokenType.ROW, 592 TokenType.UNNEST, 593 TokenType.VAR, 594 TokenType.LEFT, 595 TokenType.RIGHT, 596 TokenType.SEQUENCE, 597 TokenType.DATE, 598 TokenType.DATETIME, 599 TokenType.TABLE, 600 TokenType.TIMESTAMP, 601 TokenType.TIMESTAMPTZ, 602 TokenType.TRUNCATE, 603 TokenType.WINDOW, 604 TokenType.XOR, 605 *TYPE_TOKENS, 606 *SUBQUERY_PREDICATES, 607 } 608 609 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 610 TokenType.AND: exp.And, 611 } 612 613 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 614 TokenType.COLON_EQ: exp.PropertyEQ, 615 } 616 617 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 618 TokenType.OR: exp.Or, 619 } 620 621 EQUALITY = { 622 TokenType.EQ: exp.EQ, 623 TokenType.NEQ: exp.NEQ, 624 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 625 } 626 627 COMPARISON = { 628 TokenType.GT: exp.GT, 629 TokenType.GTE: exp.GTE, 630 TokenType.LT: exp.LT, 631 TokenType.LTE: exp.LTE, 632 } 633 634 BITWISE = { 635 TokenType.AMP: exp.BitwiseAnd, 636 TokenType.CARET: exp.BitwiseXor, 637 TokenType.PIPE: exp.BitwiseOr, 638 } 639 640 TERM = { 641 TokenType.DASH: exp.Sub, 642 TokenType.PLUS: exp.Add, 643 TokenType.MOD: exp.Mod, 644 TokenType.COLLATE: exp.Collate, 645 } 646 647 FACTOR = { 648 TokenType.DIV: exp.IntDiv, 649 TokenType.LR_ARROW: exp.Distance, 650 TokenType.SLASH: exp.Div, 651 TokenType.STAR: exp.Mul, 652 } 653 654 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 655 656 TIMES = { 657 TokenType.TIME, 658 TokenType.TIMETZ, 659 } 660 661 TIMESTAMPS = { 662 TokenType.TIMESTAMP, 663 TokenType.TIMESTAMPTZ, 664 TokenType.TIMESTAMPLTZ, 665 *TIMES, 666 } 667 668 SET_OPERATIONS = { 669 TokenType.UNION, 670 TokenType.INTERSECT, 671 TokenType.EXCEPT, 672 } 673 674 JOIN_METHODS = { 675 TokenType.ASOF, 676 TokenType.NATURAL, 677 TokenType.POSITIONAL, 678 } 679 680 JOIN_SIDES = { 681 TokenType.LEFT, 682 TokenType.RIGHT, 683 TokenType.FULL, 684 } 685 686 JOIN_KINDS = { 687 TokenType.ANTI, 688 TokenType.CROSS, 689 TokenType.INNER, 690 TokenType.OUTER, 691 TokenType.SEMI, 692 TokenType.STRAIGHT_JOIN, 693 } 694 695 JOIN_HINTS: t.Set[str] = set() 696 697 LAMBDAS = { 698 TokenType.ARROW: lambda self, expressions: self.expression( 699 exp.Lambda, 700 this=self._replace_lambda( 701 self._parse_assignment(), 702 expressions, 703 ), 704 expressions=expressions, 705 ), 706 TokenType.FARROW: lambda self, expressions: self.expression( 707 exp.Kwarg, 708 this=exp.var(expressions[0].name), 709 expression=self._parse_assignment(), 710 ), 711 } 712 713 COLUMN_OPERATORS = { 714 TokenType.DOT: None, 715 TokenType.DCOLON: lambda self, this, to: self.expression( 716 exp.Cast if self.STRICT_CAST else exp.TryCast, 717 this=this, 718 to=to, 719 ), 720 TokenType.ARROW: lambda self, this, path: self.expression( 721 exp.JSONExtract, 722 this=this, 723 expression=self.dialect.to_json_path(path), 724 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 725 ), 726 TokenType.DARROW: lambda self, this, path: self.expression( 727 exp.JSONExtractScalar, 728 this=this, 729 expression=self.dialect.to_json_path(path), 730 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 731 ), 732 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 733 exp.JSONBExtract, 734 this=this, 735 expression=path, 736 ), 737 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 738 exp.JSONBExtractScalar, 739 this=this, 740 expression=path, 741 ), 742 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 743 exp.JSONBContains, 744 this=this, 745 expression=key, 746 ), 747 } 748 749 EXPRESSION_PARSERS = { 750 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 751 exp.Column: lambda self: self._parse_column(), 752 exp.Condition: lambda self: self._parse_assignment(), 753 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 754 exp.Expression: lambda self: self._parse_expression(), 755 exp.From: lambda self: self._parse_from(joins=True), 756 exp.Group: lambda self: self._parse_group(), 757 exp.Having: lambda self: self._parse_having(), 758 exp.Identifier: lambda self: self._parse_id_var(), 759 exp.Join: lambda self: self._parse_join(), 760 exp.Lambda: lambda self: self._parse_lambda(), 761 exp.Lateral: lambda self: self._parse_lateral(), 762 exp.Limit: lambda self: self._parse_limit(), 763 exp.Offset: lambda self: self._parse_offset(), 764 exp.Order: lambda self: self._parse_order(), 765 exp.Ordered: lambda self: self._parse_ordered(), 766 exp.Properties: lambda self: self._parse_properties(), 767 exp.Qualify: lambda self: self._parse_qualify(), 768 exp.Returning: lambda self: self._parse_returning(), 769 exp.Select: lambda self: self._parse_select(), 770 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 771 exp.Table: lambda self: self._parse_table_parts(), 772 exp.TableAlias: lambda self: self._parse_table_alias(), 773 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 774 exp.Where: lambda self: self._parse_where(), 775 exp.Window: lambda self: self._parse_named_window(), 776 exp.With: lambda self: self._parse_with(), 777 "JOIN_TYPE": lambda self: self._parse_join_parts(), 778 } 779 780 STATEMENT_PARSERS = { 781 TokenType.ALTER: lambda self: self._parse_alter(), 782 TokenType.BEGIN: lambda self: self._parse_transaction(), 783 TokenType.CACHE: lambda self: self._parse_cache(), 784 TokenType.COMMENT: lambda self: self._parse_comment(), 785 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 786 TokenType.COPY: lambda self: self._parse_copy(), 787 TokenType.CREATE: lambda self: self._parse_create(), 788 TokenType.DELETE: lambda self: self._parse_delete(), 789 TokenType.DESC: lambda self: self._parse_describe(), 790 TokenType.DESCRIBE: lambda self: self._parse_describe(), 791 TokenType.DROP: lambda self: self._parse_drop(), 792 TokenType.GRANT: lambda self: self._parse_grant(), 793 TokenType.INSERT: lambda self: self._parse_insert(), 794 TokenType.KILL: lambda self: self._parse_kill(), 795 TokenType.LOAD: lambda self: self._parse_load(), 796 TokenType.MERGE: lambda self: self._parse_merge(), 797 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 798 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 799 TokenType.REFRESH: lambda self: self._parse_refresh(), 800 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 801 TokenType.SET: lambda self: self._parse_set(), 802 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 803 TokenType.UNCACHE: lambda self: self._parse_uncache(), 804 TokenType.UPDATE: lambda self: self._parse_update(), 805 TokenType.USE: lambda self: self.expression( 806 exp.Use, 807 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 808 this=self._parse_table(schema=False), 809 ), 810 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 811 } 812 813 UNARY_PARSERS = { 814 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 815 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 816 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 817 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 818 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 819 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 820 } 821 822 STRING_PARSERS = { 823 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 824 exp.RawString, this=token.text 825 ), 826 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 827 exp.National, this=token.text 828 ), 829 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 830 TokenType.STRING: lambda self, token: self.expression( 831 exp.Literal, this=token.text, is_string=True 832 ), 833 TokenType.UNICODE_STRING: lambda self, token: self.expression( 834 exp.UnicodeString, 835 this=token.text, 836 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 837 ), 838 } 839 840 NUMERIC_PARSERS = { 841 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 842 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 843 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 844 TokenType.NUMBER: lambda self, token: self.expression( 845 exp.Literal, this=token.text, is_string=False 846 ), 847 } 848 849 PRIMARY_PARSERS = { 850 **STRING_PARSERS, 851 **NUMERIC_PARSERS, 852 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 853 TokenType.NULL: lambda self, _: self.expression(exp.Null), 854 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 855 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 856 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 857 TokenType.STAR: lambda self, _: self._parse_star_ops(), 858 } 859 860 PLACEHOLDER_PARSERS = { 861 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 862 TokenType.PARAMETER: lambda self: self._parse_parameter(), 863 TokenType.COLON: lambda self: ( 864 self.expression(exp.Placeholder, this=self._prev.text) 865 if self._match_set(self.ID_VAR_TOKENS) 866 else None 867 ), 868 } 869 870 RANGE_PARSERS = { 871 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 872 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 873 TokenType.GLOB: binary_range_parser(exp.Glob), 874 TokenType.ILIKE: binary_range_parser(exp.ILike), 875 TokenType.IN: lambda self, this: self._parse_in(this), 876 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 877 TokenType.IS: lambda self, this: self._parse_is(this), 878 TokenType.LIKE: binary_range_parser(exp.Like), 879 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 880 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 881 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 882 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 883 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 884 } 885 886 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 887 "ALLOWED_VALUES": lambda self: self.expression( 888 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 889 ), 890 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 891 "AUTO": lambda self: self._parse_auto_property(), 892 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 893 "BACKUP": lambda self: self.expression( 894 exp.BackupProperty, this=self._parse_var(any_token=True) 895 ), 896 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 897 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 898 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 899 "CHECKSUM": lambda self: self._parse_checksum(), 900 "CLUSTER BY": lambda self: self._parse_cluster(), 901 "CLUSTERED": lambda self: self._parse_clustered_by(), 902 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 903 exp.CollateProperty, **kwargs 904 ), 905 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 906 "CONTAINS": lambda self: self._parse_contains_property(), 907 "COPY": lambda self: self._parse_copy_property(), 908 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 909 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 910 "DEFINER": lambda self: self._parse_definer(), 911 "DETERMINISTIC": lambda self: self.expression( 912 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 913 ), 914 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 915 "DUPLICATE": lambda self: self._parse_duplicate(), 916 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 917 "DISTKEY": lambda self: self._parse_distkey(), 918 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 919 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 920 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 921 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 922 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 923 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 924 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 925 "FREESPACE": lambda self: self._parse_freespace(), 926 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 927 "HEAP": lambda self: self.expression(exp.HeapProperty), 928 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 929 "IMMUTABLE": lambda self: self.expression( 930 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 931 ), 932 "INHERITS": lambda self: self.expression( 933 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 934 ), 935 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 936 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 937 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 938 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 939 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 940 "LIKE": lambda self: self._parse_create_like(), 941 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 942 "LOCK": lambda self: self._parse_locking(), 943 "LOCKING": lambda self: self._parse_locking(), 944 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 945 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 946 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 947 "MODIFIES": lambda self: self._parse_modifies_property(), 948 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 949 "NO": lambda self: self._parse_no_property(), 950 "ON": lambda self: self._parse_on_property(), 951 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 952 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 953 "PARTITION": lambda self: self._parse_partitioned_of(), 954 "PARTITION BY": lambda self: self._parse_partitioned_by(), 955 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 956 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 957 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 958 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 959 "READS": lambda self: self._parse_reads_property(), 960 "REMOTE": lambda self: self._parse_remote_with_connection(), 961 "RETURNS": lambda self: self._parse_returns(), 962 "STRICT": lambda self: self.expression(exp.StrictProperty), 963 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 964 "ROW": lambda self: self._parse_row(), 965 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 966 "SAMPLE": lambda self: self.expression( 967 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 968 ), 969 "SECURE": lambda self: self.expression(exp.SecureProperty), 970 "SECURITY": lambda self: self._parse_security(), 971 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 972 "SETTINGS": lambda self: self._parse_settings_property(), 973 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 974 "SORTKEY": lambda self: self._parse_sortkey(), 975 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 976 "STABLE": lambda self: self.expression( 977 exp.StabilityProperty, this=exp.Literal.string("STABLE") 978 ), 979 "STORED": lambda self: self._parse_stored(), 980 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 981 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 982 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 983 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 984 "TO": lambda self: self._parse_to_table(), 985 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 986 "TRANSFORM": lambda self: self.expression( 987 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 988 ), 989 "TTL": lambda self: self._parse_ttl(), 990 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 991 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 992 "VOLATILE": lambda self: self._parse_volatile_property(), 993 "WITH": lambda self: self._parse_with_property(), 994 } 995 996 CONSTRAINT_PARSERS = { 997 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 998 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 999 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1000 "CHARACTER SET": lambda self: self.expression( 1001 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1002 ), 1003 "CHECK": lambda self: self.expression( 1004 exp.CheckColumnConstraint, 1005 this=self._parse_wrapped(self._parse_assignment), 1006 enforced=self._match_text_seq("ENFORCED"), 1007 ), 1008 "COLLATE": lambda self: self.expression( 1009 exp.CollateColumnConstraint, 1010 this=self._parse_identifier() or self._parse_column(), 1011 ), 1012 "COMMENT": lambda self: self.expression( 1013 exp.CommentColumnConstraint, this=self._parse_string() 1014 ), 1015 "COMPRESS": lambda self: self._parse_compress(), 1016 "CLUSTERED": lambda self: self.expression( 1017 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1018 ), 1019 "NONCLUSTERED": lambda self: self.expression( 1020 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1021 ), 1022 "DEFAULT": lambda self: self.expression( 1023 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1024 ), 1025 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1026 "EPHEMERAL": lambda self: self.expression( 1027 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1028 ), 1029 "EXCLUDE": lambda self: self.expression( 1030 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1031 ), 1032 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1033 "FORMAT": lambda self: self.expression( 1034 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1035 ), 1036 "GENERATED": lambda self: self._parse_generated_as_identity(), 1037 "IDENTITY": lambda self: self._parse_auto_increment(), 1038 "INLINE": lambda self: self._parse_inline(), 1039 "LIKE": lambda self: self._parse_create_like(), 1040 "NOT": lambda self: self._parse_not_constraint(), 1041 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1042 "ON": lambda self: ( 1043 self._match(TokenType.UPDATE) 1044 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1045 ) 1046 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1047 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1048 "PERIOD": lambda self: self._parse_period_for_system_time(), 1049 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1050 "REFERENCES": lambda self: self._parse_references(match=False), 1051 "TITLE": lambda self: self.expression( 1052 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1053 ), 1054 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1055 "UNIQUE": lambda self: self._parse_unique(), 1056 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1057 "WITH": lambda self: self.expression( 1058 exp.Properties, expressions=self._parse_wrapped_properties() 1059 ), 1060 } 1061 1062 ALTER_PARSERS = { 1063 "ADD": lambda self: self._parse_alter_table_add(), 1064 "AS": lambda self: self._parse_select(), 1065 "ALTER": lambda self: self._parse_alter_table_alter(), 1066 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1067 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1068 "DROP": lambda self: self._parse_alter_table_drop(), 1069 "RENAME": lambda self: self._parse_alter_table_rename(), 1070 "SET": lambda self: self._parse_alter_table_set(), 1071 "SWAP": lambda self: self.expression( 1072 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1073 ), 1074 } 1075 1076 ALTER_ALTER_PARSERS = { 1077 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1078 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1079 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1080 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1081 } 1082 1083 SCHEMA_UNNAMED_CONSTRAINTS = { 1084 "CHECK", 1085 "EXCLUDE", 1086 "FOREIGN KEY", 1087 "LIKE", 1088 "PERIOD", 1089 "PRIMARY KEY", 1090 "UNIQUE", 1091 } 1092 1093 NO_PAREN_FUNCTION_PARSERS = { 1094 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1095 "CASE": lambda self: self._parse_case(), 1096 "CONNECT_BY_ROOT": lambda self: self.expression( 1097 exp.ConnectByRoot, this=self._parse_column() 1098 ), 1099 "IF": lambda self: self._parse_if(), 1100 "NEXT": lambda self: self._parse_next_value_for(), 1101 } 1102 1103 INVALID_FUNC_NAME_TOKENS = { 1104 TokenType.IDENTIFIER, 1105 TokenType.STRING, 1106 } 1107 1108 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1109 1110 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1111 1112 FUNCTION_PARSERS = { 1113 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1114 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1115 "DECODE": lambda self: self._parse_decode(), 1116 "EXTRACT": lambda self: self._parse_extract(), 1117 "GAP_FILL": lambda self: self._parse_gap_fill(), 1118 "JSON_OBJECT": lambda self: self._parse_json_object(), 1119 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1120 "JSON_TABLE": lambda self: self._parse_json_table(), 1121 "MATCH": lambda self: self._parse_match_against(), 1122 "NORMALIZE": lambda self: self._parse_normalize(), 1123 "OPENJSON": lambda self: self._parse_open_json(), 1124 "OVERLAY": lambda self: self._parse_overlay(), 1125 "POSITION": lambda self: self._parse_position(), 1126 "PREDICT": lambda self: self._parse_predict(), 1127 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1128 "STRING_AGG": lambda self: self._parse_string_agg(), 1129 "SUBSTRING": lambda self: self._parse_substring(), 1130 "TRIM": lambda self: self._parse_trim(), 1131 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1132 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1133 } 1134 1135 QUERY_MODIFIER_PARSERS = { 1136 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1137 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1138 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1139 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1140 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1141 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1142 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1143 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1144 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1145 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1146 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1147 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1148 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1149 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1150 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1151 TokenType.CLUSTER_BY: lambda self: ( 1152 "cluster", 1153 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1154 ), 1155 TokenType.DISTRIBUTE_BY: lambda self: ( 1156 "distribute", 1157 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1158 ), 1159 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1160 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1161 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1162 } 1163 1164 SET_PARSERS = { 1165 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1166 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1167 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1168 "TRANSACTION": lambda self: self._parse_set_transaction(), 1169 } 1170 1171 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1172 1173 TYPE_LITERAL_PARSERS = { 1174 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1175 } 1176 1177 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1178 1179 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1180 1181 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1182 1183 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1184 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1185 "ISOLATION": ( 1186 ("LEVEL", "REPEATABLE", "READ"), 1187 ("LEVEL", "READ", "COMMITTED"), 1188 ("LEVEL", "READ", "UNCOMITTED"), 1189 ("LEVEL", "SERIALIZABLE"), 1190 ), 1191 "READ": ("WRITE", "ONLY"), 1192 } 1193 1194 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1195 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1196 ) 1197 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1198 1199 CREATE_SEQUENCE: OPTIONS_TYPE = { 1200 "SCALE": ("EXTEND", "NOEXTEND"), 1201 "SHARD": ("EXTEND", "NOEXTEND"), 1202 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1203 **dict.fromkeys( 1204 ( 1205 "SESSION", 1206 "GLOBAL", 1207 "KEEP", 1208 "NOKEEP", 1209 "ORDER", 1210 "NOORDER", 1211 "NOCACHE", 1212 "CYCLE", 1213 "NOCYCLE", 1214 "NOMINVALUE", 1215 "NOMAXVALUE", 1216 "NOSCALE", 1217 "NOSHARD", 1218 ), 1219 tuple(), 1220 ), 1221 } 1222 1223 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1224 1225 USABLES: OPTIONS_TYPE = dict.fromkeys( 1226 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1227 ) 1228 1229 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1230 1231 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1232 "TYPE": ("EVOLUTION",), 1233 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1234 } 1235 1236 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1237 1238 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1239 1240 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1241 "NOT": ("ENFORCED",), 1242 "MATCH": ( 1243 "FULL", 1244 "PARTIAL", 1245 "SIMPLE", 1246 ), 1247 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1248 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1249 } 1250 1251 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1252 1253 CLONE_KEYWORDS = {"CLONE", "COPY"} 1254 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1255 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1256 1257 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1258 1259 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1260 1261 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1262 1263 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1264 1265 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1266 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1267 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1268 1269 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1270 1271 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1272 1273 ADD_CONSTRAINT_TOKENS = { 1274 TokenType.CONSTRAINT, 1275 TokenType.FOREIGN_KEY, 1276 TokenType.INDEX, 1277 TokenType.KEY, 1278 TokenType.PRIMARY_KEY, 1279 TokenType.UNIQUE, 1280 } 1281 1282 DISTINCT_TOKENS = {TokenType.DISTINCT} 1283 1284 NULL_TOKENS = {TokenType.NULL} 1285 1286 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1287 1288 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1289 1290 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1291 1292 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1293 1294 ODBC_DATETIME_LITERALS = { 1295 "d": exp.Date, 1296 "t": exp.Time, 1297 "ts": exp.Timestamp, 1298 } 1299 1300 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1301 1302 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1303 1304 # The style options for the DESCRIBE statement 1305 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1306 1307 OPERATION_MODIFIERS: t.Set[str] = set() 1308 1309 STRICT_CAST = True 1310 1311 PREFIXED_PIVOT_COLUMNS = False 1312 IDENTIFY_PIVOT_STRINGS = False 1313 1314 LOG_DEFAULTS_TO_LN = False 1315 1316 # Whether ADD is present for each column added by ALTER TABLE 1317 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1318 1319 # Whether the table sample clause expects CSV syntax 1320 TABLESAMPLE_CSV = False 1321 1322 # The default method used for table sampling 1323 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1324 1325 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1326 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1327 1328 # Whether the TRIM function expects the characters to trim as its first argument 1329 TRIM_PATTERN_FIRST = False 1330 1331 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1332 STRING_ALIASES = False 1333 1334 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1335 MODIFIERS_ATTACHED_TO_SET_OP = True 1336 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1337 1338 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1339 NO_PAREN_IF_COMMANDS = True 1340 1341 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1342 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1343 1344 # Whether the `:` operator is used to extract a value from a VARIANT column 1345 COLON_IS_VARIANT_EXTRACT = False 1346 1347 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1348 # If this is True and '(' is not found, the keyword will be treated as an identifier 1349 VALUES_FOLLOWED_BY_PAREN = True 1350 1351 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1352 SUPPORTS_IMPLICIT_UNNEST = False 1353 1354 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1355 INTERVAL_SPANS = True 1356 1357 # Whether a PARTITION clause can follow a table reference 1358 SUPPORTS_PARTITION_SELECTION = False 1359 1360 __slots__ = ( 1361 "error_level", 1362 "error_message_context", 1363 "max_errors", 1364 "dialect", 1365 "sql", 1366 "errors", 1367 "_tokens", 1368 "_index", 1369 "_curr", 1370 "_next", 1371 "_prev", 1372 "_prev_comments", 1373 ) 1374 1375 # Autofilled 1376 SHOW_TRIE: t.Dict = {} 1377 SET_TRIE: t.Dict = {} 1378 1379 def __init__( 1380 self, 1381 error_level: t.Optional[ErrorLevel] = None, 1382 error_message_context: int = 100, 1383 max_errors: int = 3, 1384 dialect: DialectType = None, 1385 ): 1386 from sqlglot.dialects import Dialect 1387 1388 self.error_level = error_level or ErrorLevel.IMMEDIATE 1389 self.error_message_context = error_message_context 1390 self.max_errors = max_errors 1391 self.dialect = Dialect.get_or_raise(dialect) 1392 self.reset() 1393 1394 def reset(self): 1395 self.sql = "" 1396 self.errors = [] 1397 self._tokens = [] 1398 self._index = 0 1399 self._curr = None 1400 self._next = None 1401 self._prev = None 1402 self._prev_comments = None 1403 1404 def parse( 1405 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1406 ) -> t.List[t.Optional[exp.Expression]]: 1407 """ 1408 Parses a list of tokens and returns a list of syntax trees, one tree 1409 per parsed SQL statement. 1410 1411 Args: 1412 raw_tokens: The list of tokens. 1413 sql: The original SQL string, used to produce helpful debug messages. 1414 1415 Returns: 1416 The list of the produced syntax trees. 1417 """ 1418 return self._parse( 1419 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1420 ) 1421 1422 def parse_into( 1423 self, 1424 expression_types: exp.IntoType, 1425 raw_tokens: t.List[Token], 1426 sql: t.Optional[str] = None, 1427 ) -> t.List[t.Optional[exp.Expression]]: 1428 """ 1429 Parses a list of tokens into a given Expression type. If a collection of Expression 1430 types is given instead, this method will try to parse the token list into each one 1431 of them, stopping at the first for which the parsing succeeds. 1432 1433 Args: 1434 expression_types: The expression type(s) to try and parse the token list into. 1435 raw_tokens: The list of tokens. 1436 sql: The original SQL string, used to produce helpful debug messages. 1437 1438 Returns: 1439 The target Expression. 1440 """ 1441 errors = [] 1442 for expression_type in ensure_list(expression_types): 1443 parser = self.EXPRESSION_PARSERS.get(expression_type) 1444 if not parser: 1445 raise TypeError(f"No parser registered for {expression_type}") 1446 1447 try: 1448 return self._parse(parser, raw_tokens, sql) 1449 except ParseError as e: 1450 e.errors[0]["into_expression"] = expression_type 1451 errors.append(e) 1452 1453 raise ParseError( 1454 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1455 errors=merge_errors(errors), 1456 ) from errors[-1] 1457 1458 def _parse( 1459 self, 1460 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1461 raw_tokens: t.List[Token], 1462 sql: t.Optional[str] = None, 1463 ) -> t.List[t.Optional[exp.Expression]]: 1464 self.reset() 1465 self.sql = sql or "" 1466 1467 total = len(raw_tokens) 1468 chunks: t.List[t.List[Token]] = [[]] 1469 1470 for i, token in enumerate(raw_tokens): 1471 if token.token_type == TokenType.SEMICOLON: 1472 if token.comments: 1473 chunks.append([token]) 1474 1475 if i < total - 1: 1476 chunks.append([]) 1477 else: 1478 chunks[-1].append(token) 1479 1480 expressions = [] 1481 1482 for tokens in chunks: 1483 self._index = -1 1484 self._tokens = tokens 1485 self._advance() 1486 1487 expressions.append(parse_method(self)) 1488 1489 if self._index < len(self._tokens): 1490 self.raise_error("Invalid expression / Unexpected token") 1491 1492 self.check_errors() 1493 1494 return expressions 1495 1496 def check_errors(self) -> None: 1497 """Logs or raises any found errors, depending on the chosen error level setting.""" 1498 if self.error_level == ErrorLevel.WARN: 1499 for error in self.errors: 1500 logger.error(str(error)) 1501 elif self.error_level == ErrorLevel.RAISE and self.errors: 1502 raise ParseError( 1503 concat_messages(self.errors, self.max_errors), 1504 errors=merge_errors(self.errors), 1505 ) 1506 1507 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1508 """ 1509 Appends an error in the list of recorded errors or raises it, depending on the chosen 1510 error level setting. 1511 """ 1512 token = token or self._curr or self._prev or Token.string("") 1513 start = token.start 1514 end = token.end + 1 1515 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1516 highlight = self.sql[start:end] 1517 end_context = self.sql[end : end + self.error_message_context] 1518 1519 error = ParseError.new( 1520 f"{message}. Line {token.line}, Col: {token.col}.\n" 1521 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1522 description=message, 1523 line=token.line, 1524 col=token.col, 1525 start_context=start_context, 1526 highlight=highlight, 1527 end_context=end_context, 1528 ) 1529 1530 if self.error_level == ErrorLevel.IMMEDIATE: 1531 raise error 1532 1533 self.errors.append(error) 1534 1535 def expression( 1536 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1537 ) -> E: 1538 """ 1539 Creates a new, validated Expression. 1540 1541 Args: 1542 exp_class: The expression class to instantiate. 1543 comments: An optional list of comments to attach to the expression. 1544 kwargs: The arguments to set for the expression along with their respective values. 1545 1546 Returns: 1547 The target expression. 1548 """ 1549 instance = exp_class(**kwargs) 1550 instance.add_comments(comments) if comments else self._add_comments(instance) 1551 return self.validate_expression(instance) 1552 1553 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1554 if expression and self._prev_comments: 1555 expression.add_comments(self._prev_comments) 1556 self._prev_comments = None 1557 1558 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1559 """ 1560 Validates an Expression, making sure that all its mandatory arguments are set. 1561 1562 Args: 1563 expression: The expression to validate. 1564 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1565 1566 Returns: 1567 The validated expression. 1568 """ 1569 if self.error_level != ErrorLevel.IGNORE: 1570 for error_message in expression.error_messages(args): 1571 self.raise_error(error_message) 1572 1573 return expression 1574 1575 def _find_sql(self, start: Token, end: Token) -> str: 1576 return self.sql[start.start : end.end + 1] 1577 1578 def _is_connected(self) -> bool: 1579 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1580 1581 def _advance(self, times: int = 1) -> None: 1582 self._index += times 1583 self._curr = seq_get(self._tokens, self._index) 1584 self._next = seq_get(self._tokens, self._index + 1) 1585 1586 if self._index > 0: 1587 self._prev = self._tokens[self._index - 1] 1588 self._prev_comments = self._prev.comments 1589 else: 1590 self._prev = None 1591 self._prev_comments = None 1592 1593 def _retreat(self, index: int) -> None: 1594 if index != self._index: 1595 self._advance(index - self._index) 1596 1597 def _warn_unsupported(self) -> None: 1598 if len(self._tokens) <= 1: 1599 return 1600 1601 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1602 # interested in emitting a warning for the one being currently processed. 1603 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1604 1605 logger.warning( 1606 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1607 ) 1608 1609 def _parse_command(self) -> exp.Command: 1610 self._warn_unsupported() 1611 return self.expression( 1612 exp.Command, 1613 comments=self._prev_comments, 1614 this=self._prev.text.upper(), 1615 expression=self._parse_string(), 1616 ) 1617 1618 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1619 """ 1620 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1621 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1622 solve this by setting & resetting the parser state accordingly 1623 """ 1624 index = self._index 1625 error_level = self.error_level 1626 1627 self.error_level = ErrorLevel.IMMEDIATE 1628 try: 1629 this = parse_method() 1630 except ParseError: 1631 this = None 1632 finally: 1633 if not this or retreat: 1634 self._retreat(index) 1635 self.error_level = error_level 1636 1637 return this 1638 1639 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1640 start = self._prev 1641 exists = self._parse_exists() if allow_exists else None 1642 1643 self._match(TokenType.ON) 1644 1645 materialized = self._match_text_seq("MATERIALIZED") 1646 kind = self._match_set(self.CREATABLES) and self._prev 1647 if not kind: 1648 return self._parse_as_command(start) 1649 1650 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1651 this = self._parse_user_defined_function(kind=kind.token_type) 1652 elif kind.token_type == TokenType.TABLE: 1653 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1654 elif kind.token_type == TokenType.COLUMN: 1655 this = self._parse_column() 1656 else: 1657 this = self._parse_id_var() 1658 1659 self._match(TokenType.IS) 1660 1661 return self.expression( 1662 exp.Comment, 1663 this=this, 1664 kind=kind.text, 1665 expression=self._parse_string(), 1666 exists=exists, 1667 materialized=materialized, 1668 ) 1669 1670 def _parse_to_table( 1671 self, 1672 ) -> exp.ToTableProperty: 1673 table = self._parse_table_parts(schema=True) 1674 return self.expression(exp.ToTableProperty, this=table) 1675 1676 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1677 def _parse_ttl(self) -> exp.Expression: 1678 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1679 this = self._parse_bitwise() 1680 1681 if self._match_text_seq("DELETE"): 1682 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1683 if self._match_text_seq("RECOMPRESS"): 1684 return self.expression( 1685 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1686 ) 1687 if self._match_text_seq("TO", "DISK"): 1688 return self.expression( 1689 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1690 ) 1691 if self._match_text_seq("TO", "VOLUME"): 1692 return self.expression( 1693 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1694 ) 1695 1696 return this 1697 1698 expressions = self._parse_csv(_parse_ttl_action) 1699 where = self._parse_where() 1700 group = self._parse_group() 1701 1702 aggregates = None 1703 if group and self._match(TokenType.SET): 1704 aggregates = self._parse_csv(self._parse_set_item) 1705 1706 return self.expression( 1707 exp.MergeTreeTTL, 1708 expressions=expressions, 1709 where=where, 1710 group=group, 1711 aggregates=aggregates, 1712 ) 1713 1714 def _parse_statement(self) -> t.Optional[exp.Expression]: 1715 if self._curr is None: 1716 return None 1717 1718 if self._match_set(self.STATEMENT_PARSERS): 1719 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1720 1721 if self._match_set(self.dialect.tokenizer.COMMANDS): 1722 return self._parse_command() 1723 1724 expression = self._parse_expression() 1725 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1726 return self._parse_query_modifiers(expression) 1727 1728 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1729 start = self._prev 1730 temporary = self._match(TokenType.TEMPORARY) 1731 materialized = self._match_text_seq("MATERIALIZED") 1732 1733 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1734 if not kind: 1735 return self._parse_as_command(start) 1736 1737 concurrently = self._match_text_seq("CONCURRENTLY") 1738 if_exists = exists or self._parse_exists() 1739 table = self._parse_table_parts( 1740 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1741 ) 1742 1743 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1744 1745 if self._match(TokenType.L_PAREN, advance=False): 1746 expressions = self._parse_wrapped_csv(self._parse_types) 1747 else: 1748 expressions = None 1749 1750 return self.expression( 1751 exp.Drop, 1752 comments=start.comments, 1753 exists=if_exists, 1754 this=table, 1755 expressions=expressions, 1756 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1757 temporary=temporary, 1758 materialized=materialized, 1759 cascade=self._match_text_seq("CASCADE"), 1760 constraints=self._match_text_seq("CONSTRAINTS"), 1761 purge=self._match_text_seq("PURGE"), 1762 cluster=cluster, 1763 concurrently=concurrently, 1764 ) 1765 1766 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1767 return ( 1768 self._match_text_seq("IF") 1769 and (not not_ or self._match(TokenType.NOT)) 1770 and self._match(TokenType.EXISTS) 1771 ) 1772 1773 def _parse_create(self) -> exp.Create | exp.Command: 1774 # Note: this can't be None because we've matched a statement parser 1775 start = self._prev 1776 comments = self._prev_comments 1777 1778 replace = ( 1779 start.token_type == TokenType.REPLACE 1780 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1781 or self._match_pair(TokenType.OR, TokenType.ALTER) 1782 ) 1783 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1784 1785 unique = self._match(TokenType.UNIQUE) 1786 1787 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1788 clustered = True 1789 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1790 "COLUMNSTORE" 1791 ): 1792 clustered = False 1793 else: 1794 clustered = None 1795 1796 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1797 self._advance() 1798 1799 properties = None 1800 create_token = self._match_set(self.CREATABLES) and self._prev 1801 1802 if not create_token: 1803 # exp.Properties.Location.POST_CREATE 1804 properties = self._parse_properties() 1805 create_token = self._match_set(self.CREATABLES) and self._prev 1806 1807 if not properties or not create_token: 1808 return self._parse_as_command(start) 1809 1810 concurrently = self._match_text_seq("CONCURRENTLY") 1811 exists = self._parse_exists(not_=True) 1812 this = None 1813 expression: t.Optional[exp.Expression] = None 1814 indexes = None 1815 no_schema_binding = None 1816 begin = None 1817 end = None 1818 clone = None 1819 1820 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1821 nonlocal properties 1822 if properties and temp_props: 1823 properties.expressions.extend(temp_props.expressions) 1824 elif temp_props: 1825 properties = temp_props 1826 1827 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1828 this = self._parse_user_defined_function(kind=create_token.token_type) 1829 1830 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1831 extend_props(self._parse_properties()) 1832 1833 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1834 extend_props(self._parse_properties()) 1835 1836 if not expression: 1837 if self._match(TokenType.COMMAND): 1838 expression = self._parse_as_command(self._prev) 1839 else: 1840 begin = self._match(TokenType.BEGIN) 1841 return_ = self._match_text_seq("RETURN") 1842 1843 if self._match(TokenType.STRING, advance=False): 1844 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1845 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1846 expression = self._parse_string() 1847 extend_props(self._parse_properties()) 1848 else: 1849 expression = self._parse_user_defined_function_expression() 1850 1851 end = self._match_text_seq("END") 1852 1853 if return_: 1854 expression = self.expression(exp.Return, this=expression) 1855 elif create_token.token_type == TokenType.INDEX: 1856 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1857 if not self._match(TokenType.ON): 1858 index = self._parse_id_var() 1859 anonymous = False 1860 else: 1861 index = None 1862 anonymous = True 1863 1864 this = self._parse_index(index=index, anonymous=anonymous) 1865 elif create_token.token_type in self.DB_CREATABLES: 1866 table_parts = self._parse_table_parts( 1867 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1868 ) 1869 1870 # exp.Properties.Location.POST_NAME 1871 self._match(TokenType.COMMA) 1872 extend_props(self._parse_properties(before=True)) 1873 1874 this = self._parse_schema(this=table_parts) 1875 1876 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1877 extend_props(self._parse_properties()) 1878 1879 self._match(TokenType.ALIAS) 1880 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1881 # exp.Properties.Location.POST_ALIAS 1882 extend_props(self._parse_properties()) 1883 1884 if create_token.token_type == TokenType.SEQUENCE: 1885 expression = self._parse_types() 1886 extend_props(self._parse_properties()) 1887 else: 1888 expression = self._parse_ddl_select() 1889 1890 if create_token.token_type == TokenType.TABLE: 1891 # exp.Properties.Location.POST_EXPRESSION 1892 extend_props(self._parse_properties()) 1893 1894 indexes = [] 1895 while True: 1896 index = self._parse_index() 1897 1898 # exp.Properties.Location.POST_INDEX 1899 extend_props(self._parse_properties()) 1900 if not index: 1901 break 1902 else: 1903 self._match(TokenType.COMMA) 1904 indexes.append(index) 1905 elif create_token.token_type == TokenType.VIEW: 1906 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1907 no_schema_binding = True 1908 1909 shallow = self._match_text_seq("SHALLOW") 1910 1911 if self._match_texts(self.CLONE_KEYWORDS): 1912 copy = self._prev.text.lower() == "copy" 1913 clone = self.expression( 1914 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1915 ) 1916 1917 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1918 return self._parse_as_command(start) 1919 1920 create_kind_text = create_token.text.upper() 1921 return self.expression( 1922 exp.Create, 1923 comments=comments, 1924 this=this, 1925 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1926 replace=replace, 1927 refresh=refresh, 1928 unique=unique, 1929 expression=expression, 1930 exists=exists, 1931 properties=properties, 1932 indexes=indexes, 1933 no_schema_binding=no_schema_binding, 1934 begin=begin, 1935 end=end, 1936 clone=clone, 1937 concurrently=concurrently, 1938 clustered=clustered, 1939 ) 1940 1941 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1942 seq = exp.SequenceProperties() 1943 1944 options = [] 1945 index = self._index 1946 1947 while self._curr: 1948 self._match(TokenType.COMMA) 1949 if self._match_text_seq("INCREMENT"): 1950 self._match_text_seq("BY") 1951 self._match_text_seq("=") 1952 seq.set("increment", self._parse_term()) 1953 elif self._match_text_seq("MINVALUE"): 1954 seq.set("minvalue", self._parse_term()) 1955 elif self._match_text_seq("MAXVALUE"): 1956 seq.set("maxvalue", self._parse_term()) 1957 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1958 self._match_text_seq("=") 1959 seq.set("start", self._parse_term()) 1960 elif self._match_text_seq("CACHE"): 1961 # T-SQL allows empty CACHE which is initialized dynamically 1962 seq.set("cache", self._parse_number() or True) 1963 elif self._match_text_seq("OWNED", "BY"): 1964 # "OWNED BY NONE" is the default 1965 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1966 else: 1967 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1968 if opt: 1969 options.append(opt) 1970 else: 1971 break 1972 1973 seq.set("options", options if options else None) 1974 return None if self._index == index else seq 1975 1976 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1977 # only used for teradata currently 1978 self._match(TokenType.COMMA) 1979 1980 kwargs = { 1981 "no": self._match_text_seq("NO"), 1982 "dual": self._match_text_seq("DUAL"), 1983 "before": self._match_text_seq("BEFORE"), 1984 "default": self._match_text_seq("DEFAULT"), 1985 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1986 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1987 "after": self._match_text_seq("AFTER"), 1988 "minimum": self._match_texts(("MIN", "MINIMUM")), 1989 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1990 } 1991 1992 if self._match_texts(self.PROPERTY_PARSERS): 1993 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1994 try: 1995 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1996 except TypeError: 1997 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1998 1999 return None 2000 2001 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2002 return self._parse_wrapped_csv(self._parse_property) 2003 2004 def _parse_property(self) -> t.Optional[exp.Expression]: 2005 if self._match_texts(self.PROPERTY_PARSERS): 2006 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2007 2008 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2009 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2010 2011 if self._match_text_seq("COMPOUND", "SORTKEY"): 2012 return self._parse_sortkey(compound=True) 2013 2014 if self._match_text_seq("SQL", "SECURITY"): 2015 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2016 2017 index = self._index 2018 key = self._parse_column() 2019 2020 if not self._match(TokenType.EQ): 2021 self._retreat(index) 2022 return self._parse_sequence_properties() 2023 2024 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2025 if isinstance(key, exp.Column): 2026 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2027 2028 value = self._parse_bitwise() or self._parse_var(any_token=True) 2029 2030 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2031 if isinstance(value, exp.Column): 2032 value = exp.var(value.name) 2033 2034 return self.expression(exp.Property, this=key, value=value) 2035 2036 def _parse_stored(self) -> exp.FileFormatProperty: 2037 self._match(TokenType.ALIAS) 2038 2039 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2040 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2041 2042 return self.expression( 2043 exp.FileFormatProperty, 2044 this=( 2045 self.expression( 2046 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2047 ) 2048 if input_format or output_format 2049 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2050 ), 2051 ) 2052 2053 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2054 field = self._parse_field() 2055 if isinstance(field, exp.Identifier) and not field.quoted: 2056 field = exp.var(field) 2057 2058 return field 2059 2060 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2061 self._match(TokenType.EQ) 2062 self._match(TokenType.ALIAS) 2063 2064 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2065 2066 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2067 properties = [] 2068 while True: 2069 if before: 2070 prop = self._parse_property_before() 2071 else: 2072 prop = self._parse_property() 2073 if not prop: 2074 break 2075 for p in ensure_list(prop): 2076 properties.append(p) 2077 2078 if properties: 2079 return self.expression(exp.Properties, expressions=properties) 2080 2081 return None 2082 2083 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2084 return self.expression( 2085 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2086 ) 2087 2088 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2089 if self._match_texts(("DEFINER", "INVOKER")): 2090 security_specifier = self._prev.text.upper() 2091 return self.expression(exp.SecurityProperty, this=security_specifier) 2092 return None 2093 2094 def _parse_settings_property(self) -> exp.SettingsProperty: 2095 return self.expression( 2096 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2097 ) 2098 2099 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2100 if self._index >= 2: 2101 pre_volatile_token = self._tokens[self._index - 2] 2102 else: 2103 pre_volatile_token = None 2104 2105 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2106 return exp.VolatileProperty() 2107 2108 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2109 2110 def _parse_retention_period(self) -> exp.Var: 2111 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2112 number = self._parse_number() 2113 number_str = f"{number} " if number else "" 2114 unit = self._parse_var(any_token=True) 2115 return exp.var(f"{number_str}{unit}") 2116 2117 def _parse_system_versioning_property( 2118 self, with_: bool = False 2119 ) -> exp.WithSystemVersioningProperty: 2120 self._match(TokenType.EQ) 2121 prop = self.expression( 2122 exp.WithSystemVersioningProperty, 2123 **{ # type: ignore 2124 "on": True, 2125 "with": with_, 2126 }, 2127 ) 2128 2129 if self._match_text_seq("OFF"): 2130 prop.set("on", False) 2131 return prop 2132 2133 self._match(TokenType.ON) 2134 if self._match(TokenType.L_PAREN): 2135 while self._curr and not self._match(TokenType.R_PAREN): 2136 if self._match_text_seq("HISTORY_TABLE", "="): 2137 prop.set("this", self._parse_table_parts()) 2138 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2139 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2140 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2141 prop.set("retention_period", self._parse_retention_period()) 2142 2143 self._match(TokenType.COMMA) 2144 2145 return prop 2146 2147 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2148 self._match(TokenType.EQ) 2149 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2150 prop = self.expression(exp.DataDeletionProperty, on=on) 2151 2152 if self._match(TokenType.L_PAREN): 2153 while self._curr and not self._match(TokenType.R_PAREN): 2154 if self._match_text_seq("FILTER_COLUMN", "="): 2155 prop.set("filter_column", self._parse_column()) 2156 elif self._match_text_seq("RETENTION_PERIOD", "="): 2157 prop.set("retention_period", self._parse_retention_period()) 2158 2159 self._match(TokenType.COMMA) 2160 2161 return prop 2162 2163 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2164 kind = "HASH" 2165 expressions: t.Optional[t.List[exp.Expression]] = None 2166 if self._match_text_seq("BY", "HASH"): 2167 expressions = self._parse_wrapped_csv(self._parse_id_var) 2168 elif self._match_text_seq("BY", "RANDOM"): 2169 kind = "RANDOM" 2170 2171 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2172 buckets: t.Optional[exp.Expression] = None 2173 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2174 buckets = self._parse_number() 2175 2176 return self.expression( 2177 exp.DistributedByProperty, 2178 expressions=expressions, 2179 kind=kind, 2180 buckets=buckets, 2181 order=self._parse_order(), 2182 ) 2183 2184 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2185 self._match_text_seq("KEY") 2186 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2187 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2188 2189 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2190 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2191 prop = self._parse_system_versioning_property(with_=True) 2192 self._match_r_paren() 2193 return prop 2194 2195 if self._match(TokenType.L_PAREN, advance=False): 2196 return self._parse_wrapped_properties() 2197 2198 if self._match_text_seq("JOURNAL"): 2199 return self._parse_withjournaltable() 2200 2201 if self._match_texts(self.VIEW_ATTRIBUTES): 2202 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2203 2204 if self._match_text_seq("DATA"): 2205 return self._parse_withdata(no=False) 2206 elif self._match_text_seq("NO", "DATA"): 2207 return self._parse_withdata(no=True) 2208 2209 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2210 return self._parse_serde_properties(with_=True) 2211 2212 if self._match(TokenType.SCHEMA): 2213 return self.expression( 2214 exp.WithSchemaBindingProperty, 2215 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2216 ) 2217 2218 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2219 return self.expression( 2220 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2221 ) 2222 2223 if not self._next: 2224 return None 2225 2226 return self._parse_withisolatedloading() 2227 2228 def _parse_procedure_option(self) -> exp.Expression | None: 2229 if self._match_text_seq("EXECUTE", "AS"): 2230 return self.expression( 2231 exp.ExecuteAsProperty, 2232 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2233 or self._parse_string(), 2234 ) 2235 2236 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2237 2238 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2239 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2240 self._match(TokenType.EQ) 2241 2242 user = self._parse_id_var() 2243 self._match(TokenType.PARAMETER) 2244 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2245 2246 if not user or not host: 2247 return None 2248 2249 return exp.DefinerProperty(this=f"{user}@{host}") 2250 2251 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2252 self._match(TokenType.TABLE) 2253 self._match(TokenType.EQ) 2254 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2255 2256 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2257 return self.expression(exp.LogProperty, no=no) 2258 2259 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2260 return self.expression(exp.JournalProperty, **kwargs) 2261 2262 def _parse_checksum(self) -> exp.ChecksumProperty: 2263 self._match(TokenType.EQ) 2264 2265 on = None 2266 if self._match(TokenType.ON): 2267 on = True 2268 elif self._match_text_seq("OFF"): 2269 on = False 2270 2271 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2272 2273 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2274 return self.expression( 2275 exp.Cluster, 2276 expressions=( 2277 self._parse_wrapped_csv(self._parse_ordered) 2278 if wrapped 2279 else self._parse_csv(self._parse_ordered) 2280 ), 2281 ) 2282 2283 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2284 self._match_text_seq("BY") 2285 2286 self._match_l_paren() 2287 expressions = self._parse_csv(self._parse_column) 2288 self._match_r_paren() 2289 2290 if self._match_text_seq("SORTED", "BY"): 2291 self._match_l_paren() 2292 sorted_by = self._parse_csv(self._parse_ordered) 2293 self._match_r_paren() 2294 else: 2295 sorted_by = None 2296 2297 self._match(TokenType.INTO) 2298 buckets = self._parse_number() 2299 self._match_text_seq("BUCKETS") 2300 2301 return self.expression( 2302 exp.ClusteredByProperty, 2303 expressions=expressions, 2304 sorted_by=sorted_by, 2305 buckets=buckets, 2306 ) 2307 2308 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2309 if not self._match_text_seq("GRANTS"): 2310 self._retreat(self._index - 1) 2311 return None 2312 2313 return self.expression(exp.CopyGrantsProperty) 2314 2315 def _parse_freespace(self) -> exp.FreespaceProperty: 2316 self._match(TokenType.EQ) 2317 return self.expression( 2318 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2319 ) 2320 2321 def _parse_mergeblockratio( 2322 self, no: bool = False, default: bool = False 2323 ) -> exp.MergeBlockRatioProperty: 2324 if self._match(TokenType.EQ): 2325 return self.expression( 2326 exp.MergeBlockRatioProperty, 2327 this=self._parse_number(), 2328 percent=self._match(TokenType.PERCENT), 2329 ) 2330 2331 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2332 2333 def _parse_datablocksize( 2334 self, 2335 default: t.Optional[bool] = None, 2336 minimum: t.Optional[bool] = None, 2337 maximum: t.Optional[bool] = None, 2338 ) -> exp.DataBlocksizeProperty: 2339 self._match(TokenType.EQ) 2340 size = self._parse_number() 2341 2342 units = None 2343 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2344 units = self._prev.text 2345 2346 return self.expression( 2347 exp.DataBlocksizeProperty, 2348 size=size, 2349 units=units, 2350 default=default, 2351 minimum=minimum, 2352 maximum=maximum, 2353 ) 2354 2355 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2356 self._match(TokenType.EQ) 2357 always = self._match_text_seq("ALWAYS") 2358 manual = self._match_text_seq("MANUAL") 2359 never = self._match_text_seq("NEVER") 2360 default = self._match_text_seq("DEFAULT") 2361 2362 autotemp = None 2363 if self._match_text_seq("AUTOTEMP"): 2364 autotemp = self._parse_schema() 2365 2366 return self.expression( 2367 exp.BlockCompressionProperty, 2368 always=always, 2369 manual=manual, 2370 never=never, 2371 default=default, 2372 autotemp=autotemp, 2373 ) 2374 2375 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2376 index = self._index 2377 no = self._match_text_seq("NO") 2378 concurrent = self._match_text_seq("CONCURRENT") 2379 2380 if not self._match_text_seq("ISOLATED", "LOADING"): 2381 self._retreat(index) 2382 return None 2383 2384 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2385 return self.expression( 2386 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2387 ) 2388 2389 def _parse_locking(self) -> exp.LockingProperty: 2390 if self._match(TokenType.TABLE): 2391 kind = "TABLE" 2392 elif self._match(TokenType.VIEW): 2393 kind = "VIEW" 2394 elif self._match(TokenType.ROW): 2395 kind = "ROW" 2396 elif self._match_text_seq("DATABASE"): 2397 kind = "DATABASE" 2398 else: 2399 kind = None 2400 2401 if kind in ("DATABASE", "TABLE", "VIEW"): 2402 this = self._parse_table_parts() 2403 else: 2404 this = None 2405 2406 if self._match(TokenType.FOR): 2407 for_or_in = "FOR" 2408 elif self._match(TokenType.IN): 2409 for_or_in = "IN" 2410 else: 2411 for_or_in = None 2412 2413 if self._match_text_seq("ACCESS"): 2414 lock_type = "ACCESS" 2415 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2416 lock_type = "EXCLUSIVE" 2417 elif self._match_text_seq("SHARE"): 2418 lock_type = "SHARE" 2419 elif self._match_text_seq("READ"): 2420 lock_type = "READ" 2421 elif self._match_text_seq("WRITE"): 2422 lock_type = "WRITE" 2423 elif self._match_text_seq("CHECKSUM"): 2424 lock_type = "CHECKSUM" 2425 else: 2426 lock_type = None 2427 2428 override = self._match_text_seq("OVERRIDE") 2429 2430 return self.expression( 2431 exp.LockingProperty, 2432 this=this, 2433 kind=kind, 2434 for_or_in=for_or_in, 2435 lock_type=lock_type, 2436 override=override, 2437 ) 2438 2439 def _parse_partition_by(self) -> t.List[exp.Expression]: 2440 if self._match(TokenType.PARTITION_BY): 2441 return self._parse_csv(self._parse_assignment) 2442 return [] 2443 2444 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2445 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2446 if self._match_text_seq("MINVALUE"): 2447 return exp.var("MINVALUE") 2448 if self._match_text_seq("MAXVALUE"): 2449 return exp.var("MAXVALUE") 2450 return self._parse_bitwise() 2451 2452 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2453 expression = None 2454 from_expressions = None 2455 to_expressions = None 2456 2457 if self._match(TokenType.IN): 2458 this = self._parse_wrapped_csv(self._parse_bitwise) 2459 elif self._match(TokenType.FROM): 2460 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2461 self._match_text_seq("TO") 2462 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2463 elif self._match_text_seq("WITH", "(", "MODULUS"): 2464 this = self._parse_number() 2465 self._match_text_seq(",", "REMAINDER") 2466 expression = self._parse_number() 2467 self._match_r_paren() 2468 else: 2469 self.raise_error("Failed to parse partition bound spec.") 2470 2471 return self.expression( 2472 exp.PartitionBoundSpec, 2473 this=this, 2474 expression=expression, 2475 from_expressions=from_expressions, 2476 to_expressions=to_expressions, 2477 ) 2478 2479 # https://www.postgresql.org/docs/current/sql-createtable.html 2480 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2481 if not self._match_text_seq("OF"): 2482 self._retreat(self._index - 1) 2483 return None 2484 2485 this = self._parse_table(schema=True) 2486 2487 if self._match(TokenType.DEFAULT): 2488 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2489 elif self._match_text_seq("FOR", "VALUES"): 2490 expression = self._parse_partition_bound_spec() 2491 else: 2492 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2493 2494 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2495 2496 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2497 self._match(TokenType.EQ) 2498 return self.expression( 2499 exp.PartitionedByProperty, 2500 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2501 ) 2502 2503 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2504 if self._match_text_seq("AND", "STATISTICS"): 2505 statistics = True 2506 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2507 statistics = False 2508 else: 2509 statistics = None 2510 2511 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2512 2513 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2514 if self._match_text_seq("SQL"): 2515 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2516 return None 2517 2518 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2519 if self._match_text_seq("SQL", "DATA"): 2520 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2521 return None 2522 2523 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2524 if self._match_text_seq("PRIMARY", "INDEX"): 2525 return exp.NoPrimaryIndexProperty() 2526 if self._match_text_seq("SQL"): 2527 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2528 return None 2529 2530 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2531 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2532 return exp.OnCommitProperty() 2533 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2534 return exp.OnCommitProperty(delete=True) 2535 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2536 2537 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2538 if self._match_text_seq("SQL", "DATA"): 2539 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2540 return None 2541 2542 def _parse_distkey(self) -> exp.DistKeyProperty: 2543 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2544 2545 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2546 table = self._parse_table(schema=True) 2547 2548 options = [] 2549 while self._match_texts(("INCLUDING", "EXCLUDING")): 2550 this = self._prev.text.upper() 2551 2552 id_var = self._parse_id_var() 2553 if not id_var: 2554 return None 2555 2556 options.append( 2557 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2558 ) 2559 2560 return self.expression(exp.LikeProperty, this=table, expressions=options) 2561 2562 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2563 return self.expression( 2564 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2565 ) 2566 2567 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2568 self._match(TokenType.EQ) 2569 return self.expression( 2570 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2571 ) 2572 2573 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2574 self._match_text_seq("WITH", "CONNECTION") 2575 return self.expression( 2576 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2577 ) 2578 2579 def _parse_returns(self) -> exp.ReturnsProperty: 2580 value: t.Optional[exp.Expression] 2581 null = None 2582 is_table = self._match(TokenType.TABLE) 2583 2584 if is_table: 2585 if self._match(TokenType.LT): 2586 value = self.expression( 2587 exp.Schema, 2588 this="TABLE", 2589 expressions=self._parse_csv(self._parse_struct_types), 2590 ) 2591 if not self._match(TokenType.GT): 2592 self.raise_error("Expecting >") 2593 else: 2594 value = self._parse_schema(exp.var("TABLE")) 2595 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2596 null = True 2597 value = None 2598 else: 2599 value = self._parse_types() 2600 2601 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2602 2603 def _parse_describe(self) -> exp.Describe: 2604 kind = self._match_set(self.CREATABLES) and self._prev.text 2605 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2606 if self._match(TokenType.DOT): 2607 style = None 2608 self._retreat(self._index - 2) 2609 this = self._parse_table(schema=True) 2610 properties = self._parse_properties() 2611 expressions = properties.expressions if properties else None 2612 partition = self._parse_partition() 2613 return self.expression( 2614 exp.Describe, 2615 this=this, 2616 style=style, 2617 kind=kind, 2618 expressions=expressions, 2619 partition=partition, 2620 ) 2621 2622 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2623 kind = self._prev.text.upper() 2624 expressions = [] 2625 2626 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2627 if self._match(TokenType.WHEN): 2628 expression = self._parse_disjunction() 2629 self._match(TokenType.THEN) 2630 else: 2631 expression = None 2632 2633 else_ = self._match(TokenType.ELSE) 2634 2635 if not self._match(TokenType.INTO): 2636 return None 2637 2638 return self.expression( 2639 exp.ConditionalInsert, 2640 this=self.expression( 2641 exp.Insert, 2642 this=self._parse_table(schema=True), 2643 expression=self._parse_derived_table_values(), 2644 ), 2645 expression=expression, 2646 else_=else_, 2647 ) 2648 2649 expression = parse_conditional_insert() 2650 while expression is not None: 2651 expressions.append(expression) 2652 expression = parse_conditional_insert() 2653 2654 return self.expression( 2655 exp.MultitableInserts, 2656 kind=kind, 2657 comments=comments, 2658 expressions=expressions, 2659 source=self._parse_table(), 2660 ) 2661 2662 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2663 comments = ensure_list(self._prev_comments) 2664 hint = self._parse_hint() 2665 overwrite = self._match(TokenType.OVERWRITE) 2666 ignore = self._match(TokenType.IGNORE) 2667 local = self._match_text_seq("LOCAL") 2668 alternative = None 2669 is_function = None 2670 2671 if self._match_text_seq("DIRECTORY"): 2672 this: t.Optional[exp.Expression] = self.expression( 2673 exp.Directory, 2674 this=self._parse_var_or_string(), 2675 local=local, 2676 row_format=self._parse_row_format(match_row=True), 2677 ) 2678 else: 2679 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2680 comments += ensure_list(self._prev_comments) 2681 return self._parse_multitable_inserts(comments) 2682 2683 if self._match(TokenType.OR): 2684 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2685 2686 self._match(TokenType.INTO) 2687 comments += ensure_list(self._prev_comments) 2688 self._match(TokenType.TABLE) 2689 is_function = self._match(TokenType.FUNCTION) 2690 2691 this = ( 2692 self._parse_table(schema=True, parse_partition=True) 2693 if not is_function 2694 else self._parse_function() 2695 ) 2696 2697 returning = self._parse_returning() 2698 2699 return self.expression( 2700 exp.Insert, 2701 comments=comments, 2702 hint=hint, 2703 is_function=is_function, 2704 this=this, 2705 stored=self._match_text_seq("STORED") and self._parse_stored(), 2706 by_name=self._match_text_seq("BY", "NAME"), 2707 exists=self._parse_exists(), 2708 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2709 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2710 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2711 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2712 conflict=self._parse_on_conflict(), 2713 returning=returning or self._parse_returning(), 2714 overwrite=overwrite, 2715 alternative=alternative, 2716 ignore=ignore, 2717 source=self._match(TokenType.TABLE) and self._parse_table(), 2718 ) 2719 2720 def _parse_kill(self) -> exp.Kill: 2721 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2722 2723 return self.expression( 2724 exp.Kill, 2725 this=self._parse_primary(), 2726 kind=kind, 2727 ) 2728 2729 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2730 conflict = self._match_text_seq("ON", "CONFLICT") 2731 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2732 2733 if not conflict and not duplicate: 2734 return None 2735 2736 conflict_keys = None 2737 constraint = None 2738 2739 if conflict: 2740 if self._match_text_seq("ON", "CONSTRAINT"): 2741 constraint = self._parse_id_var() 2742 elif self._match(TokenType.L_PAREN): 2743 conflict_keys = self._parse_csv(self._parse_id_var) 2744 self._match_r_paren() 2745 2746 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2747 if self._prev.token_type == TokenType.UPDATE: 2748 self._match(TokenType.SET) 2749 expressions = self._parse_csv(self._parse_equality) 2750 else: 2751 expressions = None 2752 2753 return self.expression( 2754 exp.OnConflict, 2755 duplicate=duplicate, 2756 expressions=expressions, 2757 action=action, 2758 conflict_keys=conflict_keys, 2759 constraint=constraint, 2760 ) 2761 2762 def _parse_returning(self) -> t.Optional[exp.Returning]: 2763 if not self._match(TokenType.RETURNING): 2764 return None 2765 return self.expression( 2766 exp.Returning, 2767 expressions=self._parse_csv(self._parse_expression), 2768 into=self._match(TokenType.INTO) and self._parse_table_part(), 2769 ) 2770 2771 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2772 if not self._match(TokenType.FORMAT): 2773 return None 2774 return self._parse_row_format() 2775 2776 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2777 index = self._index 2778 with_ = with_ or self._match_text_seq("WITH") 2779 2780 if not self._match(TokenType.SERDE_PROPERTIES): 2781 self._retreat(index) 2782 return None 2783 return self.expression( 2784 exp.SerdeProperties, 2785 **{ # type: ignore 2786 "expressions": self._parse_wrapped_properties(), 2787 "with": with_, 2788 }, 2789 ) 2790 2791 def _parse_row_format( 2792 self, match_row: bool = False 2793 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2794 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2795 return None 2796 2797 if self._match_text_seq("SERDE"): 2798 this = self._parse_string() 2799 2800 serde_properties = self._parse_serde_properties() 2801 2802 return self.expression( 2803 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2804 ) 2805 2806 self._match_text_seq("DELIMITED") 2807 2808 kwargs = {} 2809 2810 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2811 kwargs["fields"] = self._parse_string() 2812 if self._match_text_seq("ESCAPED", "BY"): 2813 kwargs["escaped"] = self._parse_string() 2814 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2815 kwargs["collection_items"] = self._parse_string() 2816 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2817 kwargs["map_keys"] = self._parse_string() 2818 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2819 kwargs["lines"] = self._parse_string() 2820 if self._match_text_seq("NULL", "DEFINED", "AS"): 2821 kwargs["null"] = self._parse_string() 2822 2823 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2824 2825 def _parse_load(self) -> exp.LoadData | exp.Command: 2826 if self._match_text_seq("DATA"): 2827 local = self._match_text_seq("LOCAL") 2828 self._match_text_seq("INPATH") 2829 inpath = self._parse_string() 2830 overwrite = self._match(TokenType.OVERWRITE) 2831 self._match_pair(TokenType.INTO, TokenType.TABLE) 2832 2833 return self.expression( 2834 exp.LoadData, 2835 this=self._parse_table(schema=True), 2836 local=local, 2837 overwrite=overwrite, 2838 inpath=inpath, 2839 partition=self._parse_partition(), 2840 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2841 serde=self._match_text_seq("SERDE") and self._parse_string(), 2842 ) 2843 return self._parse_as_command(self._prev) 2844 2845 def _parse_delete(self) -> exp.Delete: 2846 # This handles MySQL's "Multiple-Table Syntax" 2847 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2848 tables = None 2849 comments = self._prev_comments 2850 if not self._match(TokenType.FROM, advance=False): 2851 tables = self._parse_csv(self._parse_table) or None 2852 2853 returning = self._parse_returning() 2854 2855 return self.expression( 2856 exp.Delete, 2857 comments=comments, 2858 tables=tables, 2859 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2860 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2861 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2862 where=self._parse_where(), 2863 returning=returning or self._parse_returning(), 2864 limit=self._parse_limit(), 2865 ) 2866 2867 def _parse_update(self) -> exp.Update: 2868 comments = self._prev_comments 2869 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2870 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2871 returning = self._parse_returning() 2872 return self.expression( 2873 exp.Update, 2874 comments=comments, 2875 **{ # type: ignore 2876 "this": this, 2877 "expressions": expressions, 2878 "from": self._parse_from(joins=True), 2879 "where": self._parse_where(), 2880 "returning": returning or self._parse_returning(), 2881 "order": self._parse_order(), 2882 "limit": self._parse_limit(), 2883 }, 2884 ) 2885 2886 def _parse_uncache(self) -> exp.Uncache: 2887 if not self._match(TokenType.TABLE): 2888 self.raise_error("Expecting TABLE after UNCACHE") 2889 2890 return self.expression( 2891 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2892 ) 2893 2894 def _parse_cache(self) -> exp.Cache: 2895 lazy = self._match_text_seq("LAZY") 2896 self._match(TokenType.TABLE) 2897 table = self._parse_table(schema=True) 2898 2899 options = [] 2900 if self._match_text_seq("OPTIONS"): 2901 self._match_l_paren() 2902 k = self._parse_string() 2903 self._match(TokenType.EQ) 2904 v = self._parse_string() 2905 options = [k, v] 2906 self._match_r_paren() 2907 2908 self._match(TokenType.ALIAS) 2909 return self.expression( 2910 exp.Cache, 2911 this=table, 2912 lazy=lazy, 2913 options=options, 2914 expression=self._parse_select(nested=True), 2915 ) 2916 2917 def _parse_partition(self) -> t.Optional[exp.Partition]: 2918 if not self._match(TokenType.PARTITION): 2919 return None 2920 2921 return self.expression( 2922 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2923 ) 2924 2925 def _parse_value(self) -> t.Optional[exp.Tuple]: 2926 if self._match(TokenType.L_PAREN): 2927 expressions = self._parse_csv(self._parse_expression) 2928 self._match_r_paren() 2929 return self.expression(exp.Tuple, expressions=expressions) 2930 2931 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2932 expression = self._parse_expression() 2933 if expression: 2934 return self.expression(exp.Tuple, expressions=[expression]) 2935 return None 2936 2937 def _parse_projections(self) -> t.List[exp.Expression]: 2938 return self._parse_expressions() 2939 2940 def _parse_select( 2941 self, 2942 nested: bool = False, 2943 table: bool = False, 2944 parse_subquery_alias: bool = True, 2945 parse_set_operation: bool = True, 2946 ) -> t.Optional[exp.Expression]: 2947 cte = self._parse_with() 2948 2949 if cte: 2950 this = self._parse_statement() 2951 2952 if not this: 2953 self.raise_error("Failed to parse any statement following CTE") 2954 return cte 2955 2956 if "with" in this.arg_types: 2957 this.set("with", cte) 2958 else: 2959 self.raise_error(f"{this.key} does not support CTE") 2960 this = cte 2961 2962 return this 2963 2964 # duckdb supports leading with FROM x 2965 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2966 2967 if self._match(TokenType.SELECT): 2968 comments = self._prev_comments 2969 2970 hint = self._parse_hint() 2971 2972 if self._next and not self._next.token_type == TokenType.DOT: 2973 all_ = self._match(TokenType.ALL) 2974 distinct = self._match_set(self.DISTINCT_TOKENS) 2975 else: 2976 all_, distinct = None, None 2977 2978 kind = ( 2979 self._match(TokenType.ALIAS) 2980 and self._match_texts(("STRUCT", "VALUE")) 2981 and self._prev.text.upper() 2982 ) 2983 2984 if distinct: 2985 distinct = self.expression( 2986 exp.Distinct, 2987 on=self._parse_value() if self._match(TokenType.ON) else None, 2988 ) 2989 2990 if all_ and distinct: 2991 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2992 2993 operation_modifiers = [] 2994 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2995 operation_modifiers.append(exp.var(self._prev.text.upper())) 2996 2997 limit = self._parse_limit(top=True) 2998 projections = self._parse_projections() 2999 3000 this = self.expression( 3001 exp.Select, 3002 kind=kind, 3003 hint=hint, 3004 distinct=distinct, 3005 expressions=projections, 3006 limit=limit, 3007 operation_modifiers=operation_modifiers or None, 3008 ) 3009 this.comments = comments 3010 3011 into = self._parse_into() 3012 if into: 3013 this.set("into", into) 3014 3015 if not from_: 3016 from_ = self._parse_from() 3017 3018 if from_: 3019 this.set("from", from_) 3020 3021 this = self._parse_query_modifiers(this) 3022 elif (table or nested) and self._match(TokenType.L_PAREN): 3023 if self._match(TokenType.PIVOT): 3024 this = self._parse_simplified_pivot() 3025 elif self._match(TokenType.FROM): 3026 this = exp.select("*").from_( 3027 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3028 ) 3029 else: 3030 this = ( 3031 self._parse_table() 3032 if table 3033 else self._parse_select(nested=True, parse_set_operation=False) 3034 ) 3035 3036 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3037 # in case a modifier (e.g. join) is following 3038 if table and isinstance(this, exp.Values) and this.alias: 3039 alias = this.args["alias"].pop() 3040 this = exp.Table(this=this, alias=alias) 3041 3042 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3043 3044 self._match_r_paren() 3045 3046 # We return early here so that the UNION isn't attached to the subquery by the 3047 # following call to _parse_set_operations, but instead becomes the parent node 3048 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3049 elif self._match(TokenType.VALUES, advance=False): 3050 this = self._parse_derived_table_values() 3051 elif from_: 3052 this = exp.select("*").from_(from_.this, copy=False) 3053 elif self._match(TokenType.SUMMARIZE): 3054 table = self._match(TokenType.TABLE) 3055 this = self._parse_select() or self._parse_string() or self._parse_table() 3056 return self.expression(exp.Summarize, this=this, table=table) 3057 elif self._match(TokenType.DESCRIBE): 3058 this = self._parse_describe() 3059 elif self._match_text_seq("STREAM"): 3060 this = self.expression(exp.Stream, this=self._parse_function()) 3061 else: 3062 this = None 3063 3064 return self._parse_set_operations(this) if parse_set_operation else this 3065 3066 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3067 if not skip_with_token and not self._match(TokenType.WITH): 3068 return None 3069 3070 comments = self._prev_comments 3071 recursive = self._match(TokenType.RECURSIVE) 3072 3073 last_comments = None 3074 expressions = [] 3075 while True: 3076 expressions.append(self._parse_cte()) 3077 if last_comments: 3078 expressions[-1].add_comments(last_comments) 3079 3080 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3081 break 3082 else: 3083 self._match(TokenType.WITH) 3084 3085 last_comments = self._prev_comments 3086 3087 return self.expression( 3088 exp.With, comments=comments, expressions=expressions, recursive=recursive 3089 ) 3090 3091 def _parse_cte(self) -> exp.CTE: 3092 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3093 if not alias or not alias.this: 3094 self.raise_error("Expected CTE to have alias") 3095 3096 self._match(TokenType.ALIAS) 3097 comments = self._prev_comments 3098 3099 if self._match_text_seq("NOT", "MATERIALIZED"): 3100 materialized = False 3101 elif self._match_text_seq("MATERIALIZED"): 3102 materialized = True 3103 else: 3104 materialized = None 3105 3106 return self.expression( 3107 exp.CTE, 3108 this=self._parse_wrapped(self._parse_statement), 3109 alias=alias, 3110 materialized=materialized, 3111 comments=comments, 3112 ) 3113 3114 def _parse_table_alias( 3115 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3116 ) -> t.Optional[exp.TableAlias]: 3117 any_token = self._match(TokenType.ALIAS) 3118 alias = ( 3119 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3120 or self._parse_string_as_identifier() 3121 ) 3122 3123 index = self._index 3124 if self._match(TokenType.L_PAREN): 3125 columns = self._parse_csv(self._parse_function_parameter) 3126 self._match_r_paren() if columns else self._retreat(index) 3127 else: 3128 columns = None 3129 3130 if not alias and not columns: 3131 return None 3132 3133 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3134 3135 # We bubble up comments from the Identifier to the TableAlias 3136 if isinstance(alias, exp.Identifier): 3137 table_alias.add_comments(alias.pop_comments()) 3138 3139 return table_alias 3140 3141 def _parse_subquery( 3142 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3143 ) -> t.Optional[exp.Subquery]: 3144 if not this: 3145 return None 3146 3147 return self.expression( 3148 exp.Subquery, 3149 this=this, 3150 pivots=self._parse_pivots(), 3151 alias=self._parse_table_alias() if parse_alias else None, 3152 sample=self._parse_table_sample(), 3153 ) 3154 3155 def _implicit_unnests_to_explicit(self, this: E) -> E: 3156 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3157 3158 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3159 for i, join in enumerate(this.args.get("joins") or []): 3160 table = join.this 3161 normalized_table = table.copy() 3162 normalized_table.meta["maybe_column"] = True 3163 normalized_table = _norm(normalized_table, dialect=self.dialect) 3164 3165 if isinstance(table, exp.Table) and not join.args.get("on"): 3166 if normalized_table.parts[0].name in refs: 3167 table_as_column = table.to_column() 3168 unnest = exp.Unnest(expressions=[table_as_column]) 3169 3170 # Table.to_column creates a parent Alias node that we want to convert to 3171 # a TableAlias and attach to the Unnest, so it matches the parser's output 3172 if isinstance(table.args.get("alias"), exp.TableAlias): 3173 table_as_column.replace(table_as_column.this) 3174 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3175 3176 table.replace(unnest) 3177 3178 refs.add(normalized_table.alias_or_name) 3179 3180 return this 3181 3182 def _parse_query_modifiers( 3183 self, this: t.Optional[exp.Expression] 3184 ) -> t.Optional[exp.Expression]: 3185 if isinstance(this, (exp.Query, exp.Table)): 3186 for join in self._parse_joins(): 3187 this.append("joins", join) 3188 for lateral in iter(self._parse_lateral, None): 3189 this.append("laterals", lateral) 3190 3191 while True: 3192 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3193 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3194 key, expression = parser(self) 3195 3196 if expression: 3197 this.set(key, expression) 3198 if key == "limit": 3199 offset = expression.args.pop("offset", None) 3200 3201 if offset: 3202 offset = exp.Offset(expression=offset) 3203 this.set("offset", offset) 3204 3205 limit_by_expressions = expression.expressions 3206 expression.set("expressions", None) 3207 offset.set("expressions", limit_by_expressions) 3208 continue 3209 break 3210 3211 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3212 this = self._implicit_unnests_to_explicit(this) 3213 3214 return this 3215 3216 def _parse_hint(self) -> t.Optional[exp.Hint]: 3217 if self._match(TokenType.HINT): 3218 hints = [] 3219 for hint in iter( 3220 lambda: self._parse_csv( 3221 lambda: self._parse_function() or self._parse_var(upper=True) 3222 ), 3223 [], 3224 ): 3225 hints.extend(hint) 3226 3227 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3228 self.raise_error("Expected */ after HINT") 3229 3230 return self.expression(exp.Hint, expressions=hints) 3231 3232 return None 3233 3234 def _parse_into(self) -> t.Optional[exp.Into]: 3235 if not self._match(TokenType.INTO): 3236 return None 3237 3238 temp = self._match(TokenType.TEMPORARY) 3239 unlogged = self._match_text_seq("UNLOGGED") 3240 self._match(TokenType.TABLE) 3241 3242 return self.expression( 3243 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3244 ) 3245 3246 def _parse_from( 3247 self, joins: bool = False, skip_from_token: bool = False 3248 ) -> t.Optional[exp.From]: 3249 if not skip_from_token and not self._match(TokenType.FROM): 3250 return None 3251 3252 return self.expression( 3253 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3254 ) 3255 3256 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3257 return self.expression( 3258 exp.MatchRecognizeMeasure, 3259 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3260 this=self._parse_expression(), 3261 ) 3262 3263 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3264 if not self._match(TokenType.MATCH_RECOGNIZE): 3265 return None 3266 3267 self._match_l_paren() 3268 3269 partition = self._parse_partition_by() 3270 order = self._parse_order() 3271 3272 measures = ( 3273 self._parse_csv(self._parse_match_recognize_measure) 3274 if self._match_text_seq("MEASURES") 3275 else None 3276 ) 3277 3278 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3279 rows = exp.var("ONE ROW PER MATCH") 3280 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3281 text = "ALL ROWS PER MATCH" 3282 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3283 text += " SHOW EMPTY MATCHES" 3284 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3285 text += " OMIT EMPTY MATCHES" 3286 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3287 text += " WITH UNMATCHED ROWS" 3288 rows = exp.var(text) 3289 else: 3290 rows = None 3291 3292 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3293 text = "AFTER MATCH SKIP" 3294 if self._match_text_seq("PAST", "LAST", "ROW"): 3295 text += " PAST LAST ROW" 3296 elif self._match_text_seq("TO", "NEXT", "ROW"): 3297 text += " TO NEXT ROW" 3298 elif self._match_text_seq("TO", "FIRST"): 3299 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3300 elif self._match_text_seq("TO", "LAST"): 3301 text += f" TO LAST {self._advance_any().text}" # type: ignore 3302 after = exp.var(text) 3303 else: 3304 after = None 3305 3306 if self._match_text_seq("PATTERN"): 3307 self._match_l_paren() 3308 3309 if not self._curr: 3310 self.raise_error("Expecting )", self._curr) 3311 3312 paren = 1 3313 start = self._curr 3314 3315 while self._curr and paren > 0: 3316 if self._curr.token_type == TokenType.L_PAREN: 3317 paren += 1 3318 if self._curr.token_type == TokenType.R_PAREN: 3319 paren -= 1 3320 3321 end = self._prev 3322 self._advance() 3323 3324 if paren > 0: 3325 self.raise_error("Expecting )", self._curr) 3326 3327 pattern = exp.var(self._find_sql(start, end)) 3328 else: 3329 pattern = None 3330 3331 define = ( 3332 self._parse_csv(self._parse_name_as_expression) 3333 if self._match_text_seq("DEFINE") 3334 else None 3335 ) 3336 3337 self._match_r_paren() 3338 3339 return self.expression( 3340 exp.MatchRecognize, 3341 partition_by=partition, 3342 order=order, 3343 measures=measures, 3344 rows=rows, 3345 after=after, 3346 pattern=pattern, 3347 define=define, 3348 alias=self._parse_table_alias(), 3349 ) 3350 3351 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3352 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3353 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3354 cross_apply = False 3355 3356 if cross_apply is not None: 3357 this = self._parse_select(table=True) 3358 view = None 3359 outer = None 3360 elif self._match(TokenType.LATERAL): 3361 this = self._parse_select(table=True) 3362 view = self._match(TokenType.VIEW) 3363 outer = self._match(TokenType.OUTER) 3364 else: 3365 return None 3366 3367 if not this: 3368 this = ( 3369 self._parse_unnest() 3370 or self._parse_function() 3371 or self._parse_id_var(any_token=False) 3372 ) 3373 3374 while self._match(TokenType.DOT): 3375 this = exp.Dot( 3376 this=this, 3377 expression=self._parse_function() or self._parse_id_var(any_token=False), 3378 ) 3379 3380 if view: 3381 table = self._parse_id_var(any_token=False) 3382 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3383 table_alias: t.Optional[exp.TableAlias] = self.expression( 3384 exp.TableAlias, this=table, columns=columns 3385 ) 3386 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3387 # We move the alias from the lateral's child node to the lateral itself 3388 table_alias = this.args["alias"].pop() 3389 else: 3390 table_alias = self._parse_table_alias() 3391 3392 return self.expression( 3393 exp.Lateral, 3394 this=this, 3395 view=view, 3396 outer=outer, 3397 alias=table_alias, 3398 cross_apply=cross_apply, 3399 ) 3400 3401 def _parse_join_parts( 3402 self, 3403 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3404 return ( 3405 self._match_set(self.JOIN_METHODS) and self._prev, 3406 self._match_set(self.JOIN_SIDES) and self._prev, 3407 self._match_set(self.JOIN_KINDS) and self._prev, 3408 ) 3409 3410 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3411 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3412 this = self._parse_column() 3413 if isinstance(this, exp.Column): 3414 return this.this 3415 return this 3416 3417 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3418 3419 def _parse_join( 3420 self, skip_join_token: bool = False, parse_bracket: bool = False 3421 ) -> t.Optional[exp.Join]: 3422 if self._match(TokenType.COMMA): 3423 return self.expression(exp.Join, this=self._parse_table()) 3424 3425 index = self._index 3426 method, side, kind = self._parse_join_parts() 3427 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3428 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3429 3430 if not skip_join_token and not join: 3431 self._retreat(index) 3432 kind = None 3433 method = None 3434 side = None 3435 3436 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3437 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3438 3439 if not skip_join_token and not join and not outer_apply and not cross_apply: 3440 return None 3441 3442 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3443 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3444 kwargs["expressions"] = self._parse_csv( 3445 lambda: self._parse_table(parse_bracket=parse_bracket) 3446 ) 3447 3448 if method: 3449 kwargs["method"] = method.text 3450 if side: 3451 kwargs["side"] = side.text 3452 if kind: 3453 kwargs["kind"] = kind.text 3454 if hint: 3455 kwargs["hint"] = hint 3456 3457 if self._match(TokenType.MATCH_CONDITION): 3458 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3459 3460 if self._match(TokenType.ON): 3461 kwargs["on"] = self._parse_assignment() 3462 elif self._match(TokenType.USING): 3463 kwargs["using"] = self._parse_using_identifiers() 3464 elif ( 3465 not (outer_apply or cross_apply) 3466 and not isinstance(kwargs["this"], exp.Unnest) 3467 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3468 ): 3469 index = self._index 3470 joins: t.Optional[list] = list(self._parse_joins()) 3471 3472 if joins and self._match(TokenType.ON): 3473 kwargs["on"] = self._parse_assignment() 3474 elif joins and self._match(TokenType.USING): 3475 kwargs["using"] = self._parse_using_identifiers() 3476 else: 3477 joins = None 3478 self._retreat(index) 3479 3480 kwargs["this"].set("joins", joins if joins else None) 3481 3482 comments = [c for token in (method, side, kind) if token for c in token.comments] 3483 return self.expression(exp.Join, comments=comments, **kwargs) 3484 3485 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3486 this = self._parse_assignment() 3487 3488 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3489 return this 3490 3491 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3492 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3493 3494 return this 3495 3496 def _parse_index_params(self) -> exp.IndexParameters: 3497 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3498 3499 if self._match(TokenType.L_PAREN, advance=False): 3500 columns = self._parse_wrapped_csv(self._parse_with_operator) 3501 else: 3502 columns = None 3503 3504 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3505 partition_by = self._parse_partition_by() 3506 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3507 tablespace = ( 3508 self._parse_var(any_token=True) 3509 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3510 else None 3511 ) 3512 where = self._parse_where() 3513 3514 on = self._parse_field() if self._match(TokenType.ON) else None 3515 3516 return self.expression( 3517 exp.IndexParameters, 3518 using=using, 3519 columns=columns, 3520 include=include, 3521 partition_by=partition_by, 3522 where=where, 3523 with_storage=with_storage, 3524 tablespace=tablespace, 3525 on=on, 3526 ) 3527 3528 def _parse_index( 3529 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3530 ) -> t.Optional[exp.Index]: 3531 if index or anonymous: 3532 unique = None 3533 primary = None 3534 amp = None 3535 3536 self._match(TokenType.ON) 3537 self._match(TokenType.TABLE) # hive 3538 table = self._parse_table_parts(schema=True) 3539 else: 3540 unique = self._match(TokenType.UNIQUE) 3541 primary = self._match_text_seq("PRIMARY") 3542 amp = self._match_text_seq("AMP") 3543 3544 if not self._match(TokenType.INDEX): 3545 return None 3546 3547 index = self._parse_id_var() 3548 table = None 3549 3550 params = self._parse_index_params() 3551 3552 return self.expression( 3553 exp.Index, 3554 this=index, 3555 table=table, 3556 unique=unique, 3557 primary=primary, 3558 amp=amp, 3559 params=params, 3560 ) 3561 3562 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3563 hints: t.List[exp.Expression] = [] 3564 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3565 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3566 hints.append( 3567 self.expression( 3568 exp.WithTableHint, 3569 expressions=self._parse_csv( 3570 lambda: self._parse_function() or self._parse_var(any_token=True) 3571 ), 3572 ) 3573 ) 3574 self._match_r_paren() 3575 else: 3576 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3577 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3578 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3579 3580 self._match_set((TokenType.INDEX, TokenType.KEY)) 3581 if self._match(TokenType.FOR): 3582 hint.set("target", self._advance_any() and self._prev.text.upper()) 3583 3584 hint.set("expressions", self._parse_wrapped_id_vars()) 3585 hints.append(hint) 3586 3587 return hints or None 3588 3589 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3590 return ( 3591 (not schema and self._parse_function(optional_parens=False)) 3592 or self._parse_id_var(any_token=False) 3593 or self._parse_string_as_identifier() 3594 or self._parse_placeholder() 3595 ) 3596 3597 def _parse_table_parts( 3598 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3599 ) -> exp.Table: 3600 catalog = None 3601 db = None 3602 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3603 3604 while self._match(TokenType.DOT): 3605 if catalog: 3606 # This allows nesting the table in arbitrarily many dot expressions if needed 3607 table = self.expression( 3608 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3609 ) 3610 else: 3611 catalog = db 3612 db = table 3613 # "" used for tsql FROM a..b case 3614 table = self._parse_table_part(schema=schema) or "" 3615 3616 if ( 3617 wildcard 3618 and self._is_connected() 3619 and (isinstance(table, exp.Identifier) or not table) 3620 and self._match(TokenType.STAR) 3621 ): 3622 if isinstance(table, exp.Identifier): 3623 table.args["this"] += "*" 3624 else: 3625 table = exp.Identifier(this="*") 3626 3627 # We bubble up comments from the Identifier to the Table 3628 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3629 3630 if is_db_reference: 3631 catalog = db 3632 db = table 3633 table = None 3634 3635 if not table and not is_db_reference: 3636 self.raise_error(f"Expected table name but got {self._curr}") 3637 if not db and is_db_reference: 3638 self.raise_error(f"Expected database name but got {self._curr}") 3639 3640 table = self.expression( 3641 exp.Table, 3642 comments=comments, 3643 this=table, 3644 db=db, 3645 catalog=catalog, 3646 ) 3647 3648 changes = self._parse_changes() 3649 if changes: 3650 table.set("changes", changes) 3651 3652 at_before = self._parse_historical_data() 3653 if at_before: 3654 table.set("when", at_before) 3655 3656 pivots = self._parse_pivots() 3657 if pivots: 3658 table.set("pivots", pivots) 3659 3660 return table 3661 3662 def _parse_table( 3663 self, 3664 schema: bool = False, 3665 joins: bool = False, 3666 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3667 parse_bracket: bool = False, 3668 is_db_reference: bool = False, 3669 parse_partition: bool = False, 3670 ) -> t.Optional[exp.Expression]: 3671 lateral = self._parse_lateral() 3672 if lateral: 3673 return lateral 3674 3675 unnest = self._parse_unnest() 3676 if unnest: 3677 return unnest 3678 3679 values = self._parse_derived_table_values() 3680 if values: 3681 return values 3682 3683 subquery = self._parse_select(table=True) 3684 if subquery: 3685 if not subquery.args.get("pivots"): 3686 subquery.set("pivots", self._parse_pivots()) 3687 return subquery 3688 3689 bracket = parse_bracket and self._parse_bracket(None) 3690 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3691 3692 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3693 self._parse_table 3694 ) 3695 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3696 3697 only = self._match(TokenType.ONLY) 3698 3699 this = t.cast( 3700 exp.Expression, 3701 bracket 3702 or rows_from 3703 or self._parse_bracket( 3704 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3705 ), 3706 ) 3707 3708 if only: 3709 this.set("only", only) 3710 3711 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3712 self._match_text_seq("*") 3713 3714 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3715 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3716 this.set("partition", self._parse_partition()) 3717 3718 if schema: 3719 return self._parse_schema(this=this) 3720 3721 version = self._parse_version() 3722 3723 if version: 3724 this.set("version", version) 3725 3726 if self.dialect.ALIAS_POST_TABLESAMPLE: 3727 this.set("sample", self._parse_table_sample()) 3728 3729 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3730 if alias: 3731 this.set("alias", alias) 3732 3733 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3734 return self.expression( 3735 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3736 ) 3737 3738 this.set("hints", self._parse_table_hints()) 3739 3740 if not this.args.get("pivots"): 3741 this.set("pivots", self._parse_pivots()) 3742 3743 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3744 this.set("sample", self._parse_table_sample()) 3745 3746 if joins: 3747 for join in self._parse_joins(): 3748 this.append("joins", join) 3749 3750 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3751 this.set("ordinality", True) 3752 this.set("alias", self._parse_table_alias()) 3753 3754 return this 3755 3756 def _parse_version(self) -> t.Optional[exp.Version]: 3757 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3758 this = "TIMESTAMP" 3759 elif self._match(TokenType.VERSION_SNAPSHOT): 3760 this = "VERSION" 3761 else: 3762 return None 3763 3764 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3765 kind = self._prev.text.upper() 3766 start = self._parse_bitwise() 3767 self._match_texts(("TO", "AND")) 3768 end = self._parse_bitwise() 3769 expression: t.Optional[exp.Expression] = self.expression( 3770 exp.Tuple, expressions=[start, end] 3771 ) 3772 elif self._match_text_seq("CONTAINED", "IN"): 3773 kind = "CONTAINED IN" 3774 expression = self.expression( 3775 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3776 ) 3777 elif self._match(TokenType.ALL): 3778 kind = "ALL" 3779 expression = None 3780 else: 3781 self._match_text_seq("AS", "OF") 3782 kind = "AS OF" 3783 expression = self._parse_type() 3784 3785 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3786 3787 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3788 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3789 index = self._index 3790 historical_data = None 3791 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3792 this = self._prev.text.upper() 3793 kind = ( 3794 self._match(TokenType.L_PAREN) 3795 and self._match_texts(self.HISTORICAL_DATA_KIND) 3796 and self._prev.text.upper() 3797 ) 3798 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3799 3800 if expression: 3801 self._match_r_paren() 3802 historical_data = self.expression( 3803 exp.HistoricalData, this=this, kind=kind, expression=expression 3804 ) 3805 else: 3806 self._retreat(index) 3807 3808 return historical_data 3809 3810 def _parse_changes(self) -> t.Optional[exp.Changes]: 3811 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3812 return None 3813 3814 information = self._parse_var(any_token=True) 3815 self._match_r_paren() 3816 3817 return self.expression( 3818 exp.Changes, 3819 information=information, 3820 at_before=self._parse_historical_data(), 3821 end=self._parse_historical_data(), 3822 ) 3823 3824 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3825 if not self._match(TokenType.UNNEST): 3826 return None 3827 3828 expressions = self._parse_wrapped_csv(self._parse_equality) 3829 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3830 3831 alias = self._parse_table_alias() if with_alias else None 3832 3833 if alias: 3834 if self.dialect.UNNEST_COLUMN_ONLY: 3835 if alias.args.get("columns"): 3836 self.raise_error("Unexpected extra column alias in unnest.") 3837 3838 alias.set("columns", [alias.this]) 3839 alias.set("this", None) 3840 3841 columns = alias.args.get("columns") or [] 3842 if offset and len(expressions) < len(columns): 3843 offset = columns.pop() 3844 3845 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3846 self._match(TokenType.ALIAS) 3847 offset = self._parse_id_var( 3848 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3849 ) or exp.to_identifier("offset") 3850 3851 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3852 3853 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3854 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3855 if not is_derived and not ( 3856 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3857 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3858 ): 3859 return None 3860 3861 expressions = self._parse_csv(self._parse_value) 3862 alias = self._parse_table_alias() 3863 3864 if is_derived: 3865 self._match_r_paren() 3866 3867 return self.expression( 3868 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3869 ) 3870 3871 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3872 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3873 as_modifier and self._match_text_seq("USING", "SAMPLE") 3874 ): 3875 return None 3876 3877 bucket_numerator = None 3878 bucket_denominator = None 3879 bucket_field = None 3880 percent = None 3881 size = None 3882 seed = None 3883 3884 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3885 matched_l_paren = self._match(TokenType.L_PAREN) 3886 3887 if self.TABLESAMPLE_CSV: 3888 num = None 3889 expressions = self._parse_csv(self._parse_primary) 3890 else: 3891 expressions = None 3892 num = ( 3893 self._parse_factor() 3894 if self._match(TokenType.NUMBER, advance=False) 3895 else self._parse_primary() or self._parse_placeholder() 3896 ) 3897 3898 if self._match_text_seq("BUCKET"): 3899 bucket_numerator = self._parse_number() 3900 self._match_text_seq("OUT", "OF") 3901 bucket_denominator = bucket_denominator = self._parse_number() 3902 self._match(TokenType.ON) 3903 bucket_field = self._parse_field() 3904 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3905 percent = num 3906 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3907 size = num 3908 else: 3909 percent = num 3910 3911 if matched_l_paren: 3912 self._match_r_paren() 3913 3914 if self._match(TokenType.L_PAREN): 3915 method = self._parse_var(upper=True) 3916 seed = self._match(TokenType.COMMA) and self._parse_number() 3917 self._match_r_paren() 3918 elif self._match_texts(("SEED", "REPEATABLE")): 3919 seed = self._parse_wrapped(self._parse_number) 3920 3921 if not method and self.DEFAULT_SAMPLING_METHOD: 3922 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3923 3924 return self.expression( 3925 exp.TableSample, 3926 expressions=expressions, 3927 method=method, 3928 bucket_numerator=bucket_numerator, 3929 bucket_denominator=bucket_denominator, 3930 bucket_field=bucket_field, 3931 percent=percent, 3932 size=size, 3933 seed=seed, 3934 ) 3935 3936 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3937 return list(iter(self._parse_pivot, None)) or None 3938 3939 def _parse_joins(self) -> t.Iterator[exp.Join]: 3940 return iter(self._parse_join, None) 3941 3942 # https://duckdb.org/docs/sql/statements/pivot 3943 def _parse_simplified_pivot(self) -> exp.Pivot: 3944 def _parse_on() -> t.Optional[exp.Expression]: 3945 this = self._parse_bitwise() 3946 return self._parse_in(this) if self._match(TokenType.IN) else this 3947 3948 this = self._parse_table() 3949 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3950 using = self._match(TokenType.USING) and self._parse_csv( 3951 lambda: self._parse_alias(self._parse_function()) 3952 ) 3953 group = self._parse_group() 3954 return self.expression( 3955 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3956 ) 3957 3958 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3959 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3960 this = self._parse_select_or_expression() 3961 3962 self._match(TokenType.ALIAS) 3963 alias = self._parse_bitwise() 3964 if alias: 3965 if isinstance(alias, exp.Column) and not alias.db: 3966 alias = alias.this 3967 return self.expression(exp.PivotAlias, this=this, alias=alias) 3968 3969 return this 3970 3971 value = self._parse_column() 3972 3973 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3974 self.raise_error("Expecting IN (") 3975 3976 if self._match(TokenType.ANY): 3977 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3978 else: 3979 exprs = self._parse_csv(_parse_aliased_expression) 3980 3981 self._match_r_paren() 3982 return self.expression(exp.In, this=value, expressions=exprs) 3983 3984 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3985 index = self._index 3986 include_nulls = None 3987 3988 if self._match(TokenType.PIVOT): 3989 unpivot = False 3990 elif self._match(TokenType.UNPIVOT): 3991 unpivot = True 3992 3993 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3994 if self._match_text_seq("INCLUDE", "NULLS"): 3995 include_nulls = True 3996 elif self._match_text_seq("EXCLUDE", "NULLS"): 3997 include_nulls = False 3998 else: 3999 return None 4000 4001 expressions = [] 4002 4003 if not self._match(TokenType.L_PAREN): 4004 self._retreat(index) 4005 return None 4006 4007 if unpivot: 4008 expressions = self._parse_csv(self._parse_column) 4009 else: 4010 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4011 4012 if not expressions: 4013 self.raise_error("Failed to parse PIVOT's aggregation list") 4014 4015 if not self._match(TokenType.FOR): 4016 self.raise_error("Expecting FOR") 4017 4018 field = self._parse_pivot_in() 4019 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4020 self._parse_bitwise 4021 ) 4022 4023 self._match_r_paren() 4024 4025 pivot = self.expression( 4026 exp.Pivot, 4027 expressions=expressions, 4028 field=field, 4029 unpivot=unpivot, 4030 include_nulls=include_nulls, 4031 default_on_null=default_on_null, 4032 ) 4033 4034 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4035 pivot.set("alias", self._parse_table_alias()) 4036 4037 if not unpivot: 4038 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4039 4040 columns: t.List[exp.Expression] = [] 4041 for fld in pivot.args["field"].expressions: 4042 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4043 for name in names: 4044 if self.PREFIXED_PIVOT_COLUMNS: 4045 name = f"{name}_{field_name}" if name else field_name 4046 else: 4047 name = f"{field_name}_{name}" if name else field_name 4048 4049 columns.append(exp.to_identifier(name)) 4050 4051 pivot.set("columns", columns) 4052 4053 return pivot 4054 4055 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4056 return [agg.alias for agg in aggregations] 4057 4058 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4059 if not skip_where_token and not self._match(TokenType.PREWHERE): 4060 return None 4061 4062 return self.expression( 4063 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4064 ) 4065 4066 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4067 if not skip_where_token and not self._match(TokenType.WHERE): 4068 return None 4069 4070 return self.expression( 4071 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4072 ) 4073 4074 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4075 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4076 return None 4077 4078 elements: t.Dict[str, t.Any] = defaultdict(list) 4079 4080 if self._match(TokenType.ALL): 4081 elements["all"] = True 4082 elif self._match(TokenType.DISTINCT): 4083 elements["all"] = False 4084 4085 while True: 4086 index = self._index 4087 4088 elements["expressions"].extend( 4089 self._parse_csv( 4090 lambda: None 4091 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4092 else self._parse_assignment() 4093 ) 4094 ) 4095 4096 before_with_index = self._index 4097 with_prefix = self._match(TokenType.WITH) 4098 4099 if self._match(TokenType.ROLLUP): 4100 elements["rollup"].append( 4101 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4102 ) 4103 elif self._match(TokenType.CUBE): 4104 elements["cube"].append( 4105 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4106 ) 4107 elif self._match(TokenType.GROUPING_SETS): 4108 elements["grouping_sets"].append( 4109 self.expression( 4110 exp.GroupingSets, 4111 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4112 ) 4113 ) 4114 elif self._match_text_seq("TOTALS"): 4115 elements["totals"] = True # type: ignore 4116 4117 if before_with_index <= self._index <= before_with_index + 1: 4118 self._retreat(before_with_index) 4119 break 4120 4121 if index == self._index: 4122 break 4123 4124 return self.expression(exp.Group, **elements) # type: ignore 4125 4126 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4127 return self.expression( 4128 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4129 ) 4130 4131 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4132 if self._match(TokenType.L_PAREN): 4133 grouping_set = self._parse_csv(self._parse_column) 4134 self._match_r_paren() 4135 return self.expression(exp.Tuple, expressions=grouping_set) 4136 4137 return self._parse_column() 4138 4139 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4140 if not skip_having_token and not self._match(TokenType.HAVING): 4141 return None 4142 return self.expression(exp.Having, this=self._parse_assignment()) 4143 4144 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4145 if not self._match(TokenType.QUALIFY): 4146 return None 4147 return self.expression(exp.Qualify, this=self._parse_assignment()) 4148 4149 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4150 if skip_start_token: 4151 start = None 4152 elif self._match(TokenType.START_WITH): 4153 start = self._parse_assignment() 4154 else: 4155 return None 4156 4157 self._match(TokenType.CONNECT_BY) 4158 nocycle = self._match_text_seq("NOCYCLE") 4159 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4160 exp.Prior, this=self._parse_bitwise() 4161 ) 4162 connect = self._parse_assignment() 4163 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4164 4165 if not start and self._match(TokenType.START_WITH): 4166 start = self._parse_assignment() 4167 4168 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4169 4170 def _parse_name_as_expression(self) -> exp.Alias: 4171 return self.expression( 4172 exp.Alias, 4173 alias=self._parse_id_var(any_token=True), 4174 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4175 ) 4176 4177 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4178 if self._match_text_seq("INTERPOLATE"): 4179 return self._parse_wrapped_csv(self._parse_name_as_expression) 4180 return None 4181 4182 def _parse_order( 4183 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4184 ) -> t.Optional[exp.Expression]: 4185 siblings = None 4186 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4187 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4188 return this 4189 4190 siblings = True 4191 4192 return self.expression( 4193 exp.Order, 4194 this=this, 4195 expressions=self._parse_csv(self._parse_ordered), 4196 siblings=siblings, 4197 ) 4198 4199 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4200 if not self._match(token): 4201 return None 4202 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4203 4204 def _parse_ordered( 4205 self, parse_method: t.Optional[t.Callable] = None 4206 ) -> t.Optional[exp.Ordered]: 4207 this = parse_method() if parse_method else self._parse_assignment() 4208 if not this: 4209 return None 4210 4211 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4212 this = exp.var("ALL") 4213 4214 asc = self._match(TokenType.ASC) 4215 desc = self._match(TokenType.DESC) or (asc and False) 4216 4217 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4218 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4219 4220 nulls_first = is_nulls_first or False 4221 explicitly_null_ordered = is_nulls_first or is_nulls_last 4222 4223 if ( 4224 not explicitly_null_ordered 4225 and ( 4226 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4227 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4228 ) 4229 and self.dialect.NULL_ORDERING != "nulls_are_last" 4230 ): 4231 nulls_first = True 4232 4233 if self._match_text_seq("WITH", "FILL"): 4234 with_fill = self.expression( 4235 exp.WithFill, 4236 **{ # type: ignore 4237 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4238 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4239 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4240 "interpolate": self._parse_interpolate(), 4241 }, 4242 ) 4243 else: 4244 with_fill = None 4245 4246 return self.expression( 4247 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4248 ) 4249 4250 def _parse_limit( 4251 self, 4252 this: t.Optional[exp.Expression] = None, 4253 top: bool = False, 4254 skip_limit_token: bool = False, 4255 ) -> t.Optional[exp.Expression]: 4256 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4257 comments = self._prev_comments 4258 if top: 4259 limit_paren = self._match(TokenType.L_PAREN) 4260 expression = self._parse_term() if limit_paren else self._parse_number() 4261 4262 if limit_paren: 4263 self._match_r_paren() 4264 else: 4265 expression = self._parse_term() 4266 4267 if self._match(TokenType.COMMA): 4268 offset = expression 4269 expression = self._parse_term() 4270 else: 4271 offset = None 4272 4273 limit_exp = self.expression( 4274 exp.Limit, 4275 this=this, 4276 expression=expression, 4277 offset=offset, 4278 comments=comments, 4279 expressions=self._parse_limit_by(), 4280 ) 4281 4282 return limit_exp 4283 4284 if self._match(TokenType.FETCH): 4285 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4286 direction = self._prev.text.upper() if direction else "FIRST" 4287 4288 count = self._parse_field(tokens=self.FETCH_TOKENS) 4289 percent = self._match(TokenType.PERCENT) 4290 4291 self._match_set((TokenType.ROW, TokenType.ROWS)) 4292 4293 only = self._match_text_seq("ONLY") 4294 with_ties = self._match_text_seq("WITH", "TIES") 4295 4296 if only and with_ties: 4297 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4298 4299 return self.expression( 4300 exp.Fetch, 4301 direction=direction, 4302 count=count, 4303 percent=percent, 4304 with_ties=with_ties, 4305 ) 4306 4307 return this 4308 4309 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4310 if not self._match(TokenType.OFFSET): 4311 return this 4312 4313 count = self._parse_term() 4314 self._match_set((TokenType.ROW, TokenType.ROWS)) 4315 4316 return self.expression( 4317 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4318 ) 4319 4320 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4321 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4322 4323 def _parse_locks(self) -> t.List[exp.Lock]: 4324 locks = [] 4325 while True: 4326 if self._match_text_seq("FOR", "UPDATE"): 4327 update = True 4328 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4329 "LOCK", "IN", "SHARE", "MODE" 4330 ): 4331 update = False 4332 else: 4333 break 4334 4335 expressions = None 4336 if self._match_text_seq("OF"): 4337 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4338 4339 wait: t.Optional[bool | exp.Expression] = None 4340 if self._match_text_seq("NOWAIT"): 4341 wait = True 4342 elif self._match_text_seq("WAIT"): 4343 wait = self._parse_primary() 4344 elif self._match_text_seq("SKIP", "LOCKED"): 4345 wait = False 4346 4347 locks.append( 4348 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4349 ) 4350 4351 return locks 4352 4353 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4354 while this and self._match_set(self.SET_OPERATIONS): 4355 token_type = self._prev.token_type 4356 4357 if token_type == TokenType.UNION: 4358 operation: t.Type[exp.SetOperation] = exp.Union 4359 elif token_type == TokenType.EXCEPT: 4360 operation = exp.Except 4361 else: 4362 operation = exp.Intersect 4363 4364 comments = self._prev.comments 4365 4366 if self._match(TokenType.DISTINCT): 4367 distinct: t.Optional[bool] = True 4368 elif self._match(TokenType.ALL): 4369 distinct = False 4370 else: 4371 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4372 if distinct is None: 4373 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4374 4375 by_name = self._match_text_seq("BY", "NAME") 4376 expression = self._parse_select(nested=True, parse_set_operation=False) 4377 4378 this = self.expression( 4379 operation, 4380 comments=comments, 4381 this=this, 4382 distinct=distinct, 4383 by_name=by_name, 4384 expression=expression, 4385 ) 4386 4387 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4388 expression = this.expression 4389 4390 if expression: 4391 for arg in self.SET_OP_MODIFIERS: 4392 expr = expression.args.get(arg) 4393 if expr: 4394 this.set(arg, expr.pop()) 4395 4396 return this 4397 4398 def _parse_expression(self) -> t.Optional[exp.Expression]: 4399 return self._parse_alias(self._parse_assignment()) 4400 4401 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4402 this = self._parse_disjunction() 4403 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4404 # This allows us to parse <non-identifier token> := <expr> 4405 this = exp.column( 4406 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4407 ) 4408 4409 while self._match_set(self.ASSIGNMENT): 4410 if isinstance(this, exp.Column) and len(this.parts) == 1: 4411 this = this.this 4412 4413 this = self.expression( 4414 self.ASSIGNMENT[self._prev.token_type], 4415 this=this, 4416 comments=self._prev_comments, 4417 expression=self._parse_assignment(), 4418 ) 4419 4420 return this 4421 4422 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4423 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4424 4425 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4426 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4427 4428 def _parse_equality(self) -> t.Optional[exp.Expression]: 4429 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4430 4431 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4432 return self._parse_tokens(self._parse_range, self.COMPARISON) 4433 4434 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4435 this = this or self._parse_bitwise() 4436 negate = self._match(TokenType.NOT) 4437 4438 if self._match_set(self.RANGE_PARSERS): 4439 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4440 if not expression: 4441 return this 4442 4443 this = expression 4444 elif self._match(TokenType.ISNULL): 4445 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4446 4447 # Postgres supports ISNULL and NOTNULL for conditions. 4448 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4449 if self._match(TokenType.NOTNULL): 4450 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4451 this = self.expression(exp.Not, this=this) 4452 4453 if negate: 4454 this = self._negate_range(this) 4455 4456 if self._match(TokenType.IS): 4457 this = self._parse_is(this) 4458 4459 return this 4460 4461 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4462 if not this: 4463 return this 4464 4465 return self.expression(exp.Not, this=this) 4466 4467 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4468 index = self._index - 1 4469 negate = self._match(TokenType.NOT) 4470 4471 if self._match_text_seq("DISTINCT", "FROM"): 4472 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4473 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4474 4475 if self._match(TokenType.JSON): 4476 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4477 4478 if self._match_text_seq("WITH"): 4479 _with = True 4480 elif self._match_text_seq("WITHOUT"): 4481 _with = False 4482 else: 4483 _with = None 4484 4485 unique = self._match(TokenType.UNIQUE) 4486 self._match_text_seq("KEYS") 4487 expression: t.Optional[exp.Expression] = self.expression( 4488 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4489 ) 4490 else: 4491 expression = self._parse_primary() or self._parse_null() 4492 if not expression: 4493 self._retreat(index) 4494 return None 4495 4496 this = self.expression(exp.Is, this=this, expression=expression) 4497 return self.expression(exp.Not, this=this) if negate else this 4498 4499 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4500 unnest = self._parse_unnest(with_alias=False) 4501 if unnest: 4502 this = self.expression(exp.In, this=this, unnest=unnest) 4503 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4504 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4505 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4506 4507 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4508 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4509 else: 4510 this = self.expression(exp.In, this=this, expressions=expressions) 4511 4512 if matched_l_paren: 4513 self._match_r_paren(this) 4514 elif not self._match(TokenType.R_BRACKET, expression=this): 4515 self.raise_error("Expecting ]") 4516 else: 4517 this = self.expression(exp.In, this=this, field=self._parse_column()) 4518 4519 return this 4520 4521 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4522 low = self._parse_bitwise() 4523 self._match(TokenType.AND) 4524 high = self._parse_bitwise() 4525 return self.expression(exp.Between, this=this, low=low, high=high) 4526 4527 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4528 if not self._match(TokenType.ESCAPE): 4529 return this 4530 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4531 4532 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4533 index = self._index 4534 4535 if not self._match(TokenType.INTERVAL) and match_interval: 4536 return None 4537 4538 if self._match(TokenType.STRING, advance=False): 4539 this = self._parse_primary() 4540 else: 4541 this = self._parse_term() 4542 4543 if not this or ( 4544 isinstance(this, exp.Column) 4545 and not this.table 4546 and not this.this.quoted 4547 and this.name.upper() == "IS" 4548 ): 4549 self._retreat(index) 4550 return None 4551 4552 unit = self._parse_function() or ( 4553 not self._match(TokenType.ALIAS, advance=False) 4554 and self._parse_var(any_token=True, upper=True) 4555 ) 4556 4557 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4558 # each INTERVAL expression into this canonical form so it's easy to transpile 4559 if this and this.is_number: 4560 this = exp.Literal.string(this.to_py()) 4561 elif this and this.is_string: 4562 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4563 if len(parts) == 1: 4564 if unit: 4565 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4566 self._retreat(self._index - 1) 4567 4568 this = exp.Literal.string(parts[0][0]) 4569 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4570 4571 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4572 unit = self.expression( 4573 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4574 ) 4575 4576 interval = self.expression(exp.Interval, this=this, unit=unit) 4577 4578 index = self._index 4579 self._match(TokenType.PLUS) 4580 4581 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4582 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4583 return self.expression( 4584 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4585 ) 4586 4587 self._retreat(index) 4588 return interval 4589 4590 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4591 this = self._parse_term() 4592 4593 while True: 4594 if self._match_set(self.BITWISE): 4595 this = self.expression( 4596 self.BITWISE[self._prev.token_type], 4597 this=this, 4598 expression=self._parse_term(), 4599 ) 4600 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4601 this = self.expression( 4602 exp.DPipe, 4603 this=this, 4604 expression=self._parse_term(), 4605 safe=not self.dialect.STRICT_STRING_CONCAT, 4606 ) 4607 elif self._match(TokenType.DQMARK): 4608 this = self.expression( 4609 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4610 ) 4611 elif self._match_pair(TokenType.LT, TokenType.LT): 4612 this = self.expression( 4613 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4614 ) 4615 elif self._match_pair(TokenType.GT, TokenType.GT): 4616 this = self.expression( 4617 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4618 ) 4619 else: 4620 break 4621 4622 return this 4623 4624 def _parse_term(self) -> t.Optional[exp.Expression]: 4625 this = self._parse_factor() 4626 4627 while self._match_set(self.TERM): 4628 klass = self.TERM[self._prev.token_type] 4629 comments = self._prev_comments 4630 expression = self._parse_factor() 4631 4632 this = self.expression(klass, this=this, comments=comments, expression=expression) 4633 4634 if isinstance(this, exp.Collate): 4635 expr = this.expression 4636 4637 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4638 # fallback to Identifier / Var 4639 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4640 ident = expr.this 4641 if isinstance(ident, exp.Identifier): 4642 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4643 4644 return this 4645 4646 def _parse_factor(self) -> t.Optional[exp.Expression]: 4647 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4648 this = parse_method() 4649 4650 while self._match_set(self.FACTOR): 4651 klass = self.FACTOR[self._prev.token_type] 4652 comments = self._prev_comments 4653 expression = parse_method() 4654 4655 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4656 self._retreat(self._index - 1) 4657 return this 4658 4659 this = self.expression(klass, this=this, comments=comments, expression=expression) 4660 4661 if isinstance(this, exp.Div): 4662 this.args["typed"] = self.dialect.TYPED_DIVISION 4663 this.args["safe"] = self.dialect.SAFE_DIVISION 4664 4665 return this 4666 4667 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4668 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4669 4670 def _parse_unary(self) -> t.Optional[exp.Expression]: 4671 if self._match_set(self.UNARY_PARSERS): 4672 return self.UNARY_PARSERS[self._prev.token_type](self) 4673 return self._parse_at_time_zone(self._parse_type()) 4674 4675 def _parse_type( 4676 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4677 ) -> t.Optional[exp.Expression]: 4678 interval = parse_interval and self._parse_interval() 4679 if interval: 4680 return interval 4681 4682 index = self._index 4683 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4684 4685 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4686 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4687 if isinstance(data_type, exp.Cast): 4688 # This constructor can contain ops directly after it, for instance struct unnesting: 4689 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4690 return self._parse_column_ops(data_type) 4691 4692 if data_type: 4693 index2 = self._index 4694 this = self._parse_primary() 4695 4696 if isinstance(this, exp.Literal): 4697 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4698 if parser: 4699 return parser(self, this, data_type) 4700 4701 return self.expression(exp.Cast, this=this, to=data_type) 4702 4703 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4704 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4705 # 4706 # If the index difference here is greater than 1, that means the parser itself must have 4707 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4708 # 4709 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4710 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4711 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4712 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4713 # 4714 # In these cases, we don't really want to return the converted type, but instead retreat 4715 # and try to parse a Column or Identifier in the section below. 4716 if data_type.expressions and index2 - index > 1: 4717 self._retreat(index2) 4718 return self._parse_column_ops(data_type) 4719 4720 self._retreat(index) 4721 4722 if fallback_to_identifier: 4723 return self._parse_id_var() 4724 4725 this = self._parse_column() 4726 return this and self._parse_column_ops(this) 4727 4728 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4729 this = self._parse_type() 4730 if not this: 4731 return None 4732 4733 if isinstance(this, exp.Column) and not this.table: 4734 this = exp.var(this.name.upper()) 4735 4736 return self.expression( 4737 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4738 ) 4739 4740 def _parse_types( 4741 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4742 ) -> t.Optional[exp.Expression]: 4743 index = self._index 4744 4745 this: t.Optional[exp.Expression] = None 4746 prefix = self._match_text_seq("SYSUDTLIB", ".") 4747 4748 if not self._match_set(self.TYPE_TOKENS): 4749 identifier = allow_identifiers and self._parse_id_var( 4750 any_token=False, tokens=(TokenType.VAR,) 4751 ) 4752 if isinstance(identifier, exp.Identifier): 4753 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4754 4755 if len(tokens) != 1: 4756 self.raise_error("Unexpected identifier", self._prev) 4757 4758 if tokens[0].token_type in self.TYPE_TOKENS: 4759 self._prev = tokens[0] 4760 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4761 type_name = identifier.name 4762 4763 while self._match(TokenType.DOT): 4764 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4765 4766 this = exp.DataType.build(type_name, udt=True) 4767 else: 4768 self._retreat(self._index - 1) 4769 return None 4770 else: 4771 return None 4772 4773 type_token = self._prev.token_type 4774 4775 if type_token == TokenType.PSEUDO_TYPE: 4776 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4777 4778 if type_token == TokenType.OBJECT_IDENTIFIER: 4779 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4780 4781 # https://materialize.com/docs/sql/types/map/ 4782 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4783 key_type = self._parse_types( 4784 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4785 ) 4786 if not self._match(TokenType.FARROW): 4787 self._retreat(index) 4788 return None 4789 4790 value_type = self._parse_types( 4791 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4792 ) 4793 if not self._match(TokenType.R_BRACKET): 4794 self._retreat(index) 4795 return None 4796 4797 return exp.DataType( 4798 this=exp.DataType.Type.MAP, 4799 expressions=[key_type, value_type], 4800 nested=True, 4801 prefix=prefix, 4802 ) 4803 4804 nested = type_token in self.NESTED_TYPE_TOKENS 4805 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4806 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4807 expressions = None 4808 maybe_func = False 4809 4810 if self._match(TokenType.L_PAREN): 4811 if is_struct: 4812 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4813 elif nested: 4814 expressions = self._parse_csv( 4815 lambda: self._parse_types( 4816 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4817 ) 4818 ) 4819 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4820 this = expressions[0] 4821 this.set("nullable", True) 4822 self._match_r_paren() 4823 return this 4824 elif type_token in self.ENUM_TYPE_TOKENS: 4825 expressions = self._parse_csv(self._parse_equality) 4826 elif is_aggregate: 4827 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4828 any_token=False, tokens=(TokenType.VAR,) 4829 ) 4830 if not func_or_ident or not self._match(TokenType.COMMA): 4831 return None 4832 expressions = self._parse_csv( 4833 lambda: self._parse_types( 4834 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4835 ) 4836 ) 4837 expressions.insert(0, func_or_ident) 4838 else: 4839 expressions = self._parse_csv(self._parse_type_size) 4840 4841 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4842 if type_token == TokenType.VECTOR and len(expressions) == 2: 4843 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4844 4845 if not expressions or not self._match(TokenType.R_PAREN): 4846 self._retreat(index) 4847 return None 4848 4849 maybe_func = True 4850 4851 values: t.Optional[t.List[exp.Expression]] = None 4852 4853 if nested and self._match(TokenType.LT): 4854 if is_struct: 4855 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4856 else: 4857 expressions = self._parse_csv( 4858 lambda: self._parse_types( 4859 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4860 ) 4861 ) 4862 4863 if not self._match(TokenType.GT): 4864 self.raise_error("Expecting >") 4865 4866 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4867 values = self._parse_csv(self._parse_assignment) 4868 if not values and is_struct: 4869 values = None 4870 self._retreat(self._index - 1) 4871 else: 4872 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4873 4874 if type_token in self.TIMESTAMPS: 4875 if self._match_text_seq("WITH", "TIME", "ZONE"): 4876 maybe_func = False 4877 tz_type = ( 4878 exp.DataType.Type.TIMETZ 4879 if type_token in self.TIMES 4880 else exp.DataType.Type.TIMESTAMPTZ 4881 ) 4882 this = exp.DataType(this=tz_type, expressions=expressions) 4883 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4884 maybe_func = False 4885 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4886 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4887 maybe_func = False 4888 elif type_token == TokenType.INTERVAL: 4889 unit = self._parse_var(upper=True) 4890 if unit: 4891 if self._match_text_seq("TO"): 4892 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4893 4894 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4895 else: 4896 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4897 4898 if maybe_func and check_func: 4899 index2 = self._index 4900 peek = self._parse_string() 4901 4902 if not peek: 4903 self._retreat(index) 4904 return None 4905 4906 self._retreat(index2) 4907 4908 if not this: 4909 if self._match_text_seq("UNSIGNED"): 4910 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4911 if not unsigned_type_token: 4912 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4913 4914 type_token = unsigned_type_token or type_token 4915 4916 this = exp.DataType( 4917 this=exp.DataType.Type[type_token.value], 4918 expressions=expressions, 4919 nested=nested, 4920 prefix=prefix, 4921 ) 4922 4923 # Empty arrays/structs are allowed 4924 if values is not None: 4925 cls = exp.Struct if is_struct else exp.Array 4926 this = exp.cast(cls(expressions=values), this, copy=False) 4927 4928 elif expressions: 4929 this.set("expressions", expressions) 4930 4931 # https://materialize.com/docs/sql/types/list/#type-name 4932 while self._match(TokenType.LIST): 4933 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4934 4935 index = self._index 4936 4937 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4938 matched_array = self._match(TokenType.ARRAY) 4939 4940 while self._curr: 4941 datatype_token = self._prev.token_type 4942 matched_l_bracket = self._match(TokenType.L_BRACKET) 4943 if not matched_l_bracket and not matched_array: 4944 break 4945 4946 matched_array = False 4947 values = self._parse_csv(self._parse_assignment) or None 4948 if ( 4949 values 4950 and not schema 4951 and ( 4952 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4953 ) 4954 ): 4955 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4956 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4957 self._retreat(index) 4958 break 4959 4960 this = exp.DataType( 4961 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4962 ) 4963 self._match(TokenType.R_BRACKET) 4964 4965 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4966 converter = self.TYPE_CONVERTERS.get(this.this) 4967 if converter: 4968 this = converter(t.cast(exp.DataType, this)) 4969 4970 return this 4971 4972 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4973 index = self._index 4974 4975 if ( 4976 self._curr 4977 and self._next 4978 and self._curr.token_type in self.TYPE_TOKENS 4979 and self._next.token_type in self.TYPE_TOKENS 4980 ): 4981 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4982 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4983 this = self._parse_id_var() 4984 else: 4985 this = ( 4986 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4987 or self._parse_id_var() 4988 ) 4989 4990 self._match(TokenType.COLON) 4991 4992 if ( 4993 type_required 4994 and not isinstance(this, exp.DataType) 4995 and not self._match_set(self.TYPE_TOKENS, advance=False) 4996 ): 4997 self._retreat(index) 4998 return self._parse_types() 4999 5000 return self._parse_column_def(this) 5001 5002 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5003 if not self._match_text_seq("AT", "TIME", "ZONE"): 5004 return this 5005 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5006 5007 def _parse_column(self) -> t.Optional[exp.Expression]: 5008 this = self._parse_column_reference() 5009 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5010 5011 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5012 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5013 5014 return column 5015 5016 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5017 this = self._parse_field() 5018 if ( 5019 not this 5020 and self._match(TokenType.VALUES, advance=False) 5021 and self.VALUES_FOLLOWED_BY_PAREN 5022 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5023 ): 5024 this = self._parse_id_var() 5025 5026 if isinstance(this, exp.Identifier): 5027 # We bubble up comments from the Identifier to the Column 5028 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5029 5030 return this 5031 5032 def _parse_colon_as_variant_extract( 5033 self, this: t.Optional[exp.Expression] 5034 ) -> t.Optional[exp.Expression]: 5035 casts = [] 5036 json_path = [] 5037 escape = None 5038 5039 while self._match(TokenType.COLON): 5040 start_index = self._index 5041 5042 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5043 path = self._parse_column_ops( 5044 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5045 ) 5046 5047 # The cast :: operator has a lower precedence than the extraction operator :, so 5048 # we rearrange the AST appropriately to avoid casting the JSON path 5049 while isinstance(path, exp.Cast): 5050 casts.append(path.to) 5051 path = path.this 5052 5053 if casts: 5054 dcolon_offset = next( 5055 i 5056 for i, t in enumerate(self._tokens[start_index:]) 5057 if t.token_type == TokenType.DCOLON 5058 ) 5059 end_token = self._tokens[start_index + dcolon_offset - 1] 5060 else: 5061 end_token = self._prev 5062 5063 if path: 5064 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5065 # it'll roundtrip to a string literal in GET_PATH 5066 if isinstance(path, exp.Identifier) and path.quoted: 5067 escape = True 5068 5069 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5070 5071 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5072 # Databricks transforms it back to the colon/dot notation 5073 if json_path: 5074 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5075 5076 if json_path_expr: 5077 json_path_expr.set("escape", escape) 5078 5079 this = self.expression( 5080 exp.JSONExtract, 5081 this=this, 5082 expression=json_path_expr, 5083 variant_extract=True, 5084 ) 5085 5086 while casts: 5087 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5088 5089 return this 5090 5091 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5092 return self._parse_types() 5093 5094 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5095 this = self._parse_bracket(this) 5096 5097 while self._match_set(self.COLUMN_OPERATORS): 5098 op_token = self._prev.token_type 5099 op = self.COLUMN_OPERATORS.get(op_token) 5100 5101 if op_token == TokenType.DCOLON: 5102 field = self._parse_dcolon() 5103 if not field: 5104 self.raise_error("Expected type") 5105 elif op and self._curr: 5106 field = self._parse_column_reference() or self._parse_bracket() 5107 else: 5108 field = self._parse_field(any_token=True, anonymous_func=True) 5109 5110 if isinstance(field, exp.Func) and this: 5111 # bigquery allows function calls like x.y.count(...) 5112 # SAFE.SUBSTR(...) 5113 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5114 this = exp.replace_tree( 5115 this, 5116 lambda n: ( 5117 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5118 if n.table 5119 else n.this 5120 ) 5121 if isinstance(n, exp.Column) 5122 else n, 5123 ) 5124 5125 if op: 5126 this = op(self, this, field) 5127 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5128 this = self.expression( 5129 exp.Column, 5130 comments=this.comments, 5131 this=field, 5132 table=this.this, 5133 db=this.args.get("table"), 5134 catalog=this.args.get("db"), 5135 ) 5136 else: 5137 this = self.expression(exp.Dot, this=this, expression=field) 5138 5139 this = self._parse_bracket(this) 5140 5141 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5142 5143 def _parse_primary(self) -> t.Optional[exp.Expression]: 5144 if self._match_set(self.PRIMARY_PARSERS): 5145 token_type = self._prev.token_type 5146 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5147 5148 if token_type == TokenType.STRING: 5149 expressions = [primary] 5150 while self._match(TokenType.STRING): 5151 expressions.append(exp.Literal.string(self._prev.text)) 5152 5153 if len(expressions) > 1: 5154 return self.expression(exp.Concat, expressions=expressions) 5155 5156 return primary 5157 5158 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5159 return exp.Literal.number(f"0.{self._prev.text}") 5160 5161 if self._match(TokenType.L_PAREN): 5162 comments = self._prev_comments 5163 query = self._parse_select() 5164 5165 if query: 5166 expressions = [query] 5167 else: 5168 expressions = self._parse_expressions() 5169 5170 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5171 5172 if not this and self._match(TokenType.R_PAREN, advance=False): 5173 this = self.expression(exp.Tuple) 5174 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5175 this = self._parse_subquery(this=this, parse_alias=False) 5176 elif isinstance(this, exp.Subquery): 5177 this = self._parse_subquery( 5178 this=self._parse_set_operations(this), parse_alias=False 5179 ) 5180 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5181 this = self.expression(exp.Tuple, expressions=expressions) 5182 else: 5183 this = self.expression(exp.Paren, this=this) 5184 5185 if this: 5186 this.add_comments(comments) 5187 5188 self._match_r_paren(expression=this) 5189 return this 5190 5191 return None 5192 5193 def _parse_field( 5194 self, 5195 any_token: bool = False, 5196 tokens: t.Optional[t.Collection[TokenType]] = None, 5197 anonymous_func: bool = False, 5198 ) -> t.Optional[exp.Expression]: 5199 if anonymous_func: 5200 field = ( 5201 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5202 or self._parse_primary() 5203 ) 5204 else: 5205 field = self._parse_primary() or self._parse_function( 5206 anonymous=anonymous_func, any_token=any_token 5207 ) 5208 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5209 5210 def _parse_function( 5211 self, 5212 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5213 anonymous: bool = False, 5214 optional_parens: bool = True, 5215 any_token: bool = False, 5216 ) -> t.Optional[exp.Expression]: 5217 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5218 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5219 fn_syntax = False 5220 if ( 5221 self._match(TokenType.L_BRACE, advance=False) 5222 and self._next 5223 and self._next.text.upper() == "FN" 5224 ): 5225 self._advance(2) 5226 fn_syntax = True 5227 5228 func = self._parse_function_call( 5229 functions=functions, 5230 anonymous=anonymous, 5231 optional_parens=optional_parens, 5232 any_token=any_token, 5233 ) 5234 5235 if fn_syntax: 5236 self._match(TokenType.R_BRACE) 5237 5238 return func 5239 5240 def _parse_function_call( 5241 self, 5242 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5243 anonymous: bool = False, 5244 optional_parens: bool = True, 5245 any_token: bool = False, 5246 ) -> t.Optional[exp.Expression]: 5247 if not self._curr: 5248 return None 5249 5250 comments = self._curr.comments 5251 token_type = self._curr.token_type 5252 this = self._curr.text 5253 upper = this.upper() 5254 5255 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5256 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5257 self._advance() 5258 return self._parse_window(parser(self)) 5259 5260 if not self._next or self._next.token_type != TokenType.L_PAREN: 5261 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5262 self._advance() 5263 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5264 5265 return None 5266 5267 if any_token: 5268 if token_type in self.RESERVED_TOKENS: 5269 return None 5270 elif token_type not in self.FUNC_TOKENS: 5271 return None 5272 5273 self._advance(2) 5274 5275 parser = self.FUNCTION_PARSERS.get(upper) 5276 if parser and not anonymous: 5277 this = parser(self) 5278 else: 5279 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5280 5281 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5282 this = self.expression( 5283 subquery_predicate, comments=comments, this=self._parse_select() 5284 ) 5285 self._match_r_paren() 5286 return this 5287 5288 if functions is None: 5289 functions = self.FUNCTIONS 5290 5291 function = functions.get(upper) 5292 5293 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5294 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5295 5296 if alias: 5297 args = self._kv_to_prop_eq(args) 5298 5299 if function and not anonymous: 5300 if "dialect" in function.__code__.co_varnames: 5301 func = function(args, dialect=self.dialect) 5302 else: 5303 func = function(args) 5304 5305 func = self.validate_expression(func, args) 5306 if not self.dialect.NORMALIZE_FUNCTIONS: 5307 func.meta["name"] = this 5308 5309 this = func 5310 else: 5311 if token_type == TokenType.IDENTIFIER: 5312 this = exp.Identifier(this=this, quoted=True) 5313 this = self.expression(exp.Anonymous, this=this, expressions=args) 5314 5315 if isinstance(this, exp.Expression): 5316 this.add_comments(comments) 5317 5318 self._match_r_paren(this) 5319 return self._parse_window(this) 5320 5321 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5322 return expression 5323 5324 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5325 transformed = [] 5326 5327 for index, e in enumerate(expressions): 5328 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5329 if isinstance(e, exp.Alias): 5330 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5331 5332 if not isinstance(e, exp.PropertyEQ): 5333 e = self.expression( 5334 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5335 ) 5336 5337 if isinstance(e.this, exp.Column): 5338 e.this.replace(e.this.this) 5339 else: 5340 e = self._to_prop_eq(e, index) 5341 5342 transformed.append(e) 5343 5344 return transformed 5345 5346 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5347 return self._parse_statement() 5348 5349 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5350 return self._parse_column_def(self._parse_id_var()) 5351 5352 def _parse_user_defined_function( 5353 self, kind: t.Optional[TokenType] = None 5354 ) -> t.Optional[exp.Expression]: 5355 this = self._parse_id_var() 5356 5357 while self._match(TokenType.DOT): 5358 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5359 5360 if not self._match(TokenType.L_PAREN): 5361 return this 5362 5363 expressions = self._parse_csv(self._parse_function_parameter) 5364 self._match_r_paren() 5365 return self.expression( 5366 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5367 ) 5368 5369 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5370 literal = self._parse_primary() 5371 if literal: 5372 return self.expression(exp.Introducer, this=token.text, expression=literal) 5373 5374 return self.expression(exp.Identifier, this=token.text) 5375 5376 def _parse_session_parameter(self) -> exp.SessionParameter: 5377 kind = None 5378 this = self._parse_id_var() or self._parse_primary() 5379 5380 if this and self._match(TokenType.DOT): 5381 kind = this.name 5382 this = self._parse_var() or self._parse_primary() 5383 5384 return self.expression(exp.SessionParameter, this=this, kind=kind) 5385 5386 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5387 return self._parse_id_var() 5388 5389 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5390 index = self._index 5391 5392 if self._match(TokenType.L_PAREN): 5393 expressions = t.cast( 5394 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5395 ) 5396 5397 if not self._match(TokenType.R_PAREN): 5398 self._retreat(index) 5399 else: 5400 expressions = [self._parse_lambda_arg()] 5401 5402 if self._match_set(self.LAMBDAS): 5403 return self.LAMBDAS[self._prev.token_type](self, expressions) 5404 5405 self._retreat(index) 5406 5407 this: t.Optional[exp.Expression] 5408 5409 if self._match(TokenType.DISTINCT): 5410 this = self.expression( 5411 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5412 ) 5413 else: 5414 this = self._parse_select_or_expression(alias=alias) 5415 5416 return self._parse_limit( 5417 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5418 ) 5419 5420 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5421 index = self._index 5422 if not self._match(TokenType.L_PAREN): 5423 return this 5424 5425 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5426 # expr can be of both types 5427 if self._match_set(self.SELECT_START_TOKENS): 5428 self._retreat(index) 5429 return this 5430 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5431 self._match_r_paren() 5432 return self.expression(exp.Schema, this=this, expressions=args) 5433 5434 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5435 return self._parse_column_def(self._parse_field(any_token=True)) 5436 5437 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5438 # column defs are not really columns, they're identifiers 5439 if isinstance(this, exp.Column): 5440 this = this.this 5441 5442 kind = self._parse_types(schema=True) 5443 5444 if self._match_text_seq("FOR", "ORDINALITY"): 5445 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5446 5447 constraints: t.List[exp.Expression] = [] 5448 5449 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5450 ("ALIAS", "MATERIALIZED") 5451 ): 5452 persisted = self._prev.text.upper() == "MATERIALIZED" 5453 constraint_kind = exp.ComputedColumnConstraint( 5454 this=self._parse_assignment(), 5455 persisted=persisted or self._match_text_seq("PERSISTED"), 5456 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5457 ) 5458 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5459 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5460 self._match(TokenType.ALIAS) 5461 constraints.append( 5462 self.expression( 5463 exp.ColumnConstraint, 5464 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5465 ) 5466 ) 5467 5468 while True: 5469 constraint = self._parse_column_constraint() 5470 if not constraint: 5471 break 5472 constraints.append(constraint) 5473 5474 if not kind and not constraints: 5475 return this 5476 5477 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5478 5479 def _parse_auto_increment( 5480 self, 5481 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5482 start = None 5483 increment = None 5484 5485 if self._match(TokenType.L_PAREN, advance=False): 5486 args = self._parse_wrapped_csv(self._parse_bitwise) 5487 start = seq_get(args, 0) 5488 increment = seq_get(args, 1) 5489 elif self._match_text_seq("START"): 5490 start = self._parse_bitwise() 5491 self._match_text_seq("INCREMENT") 5492 increment = self._parse_bitwise() 5493 5494 if start and increment: 5495 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5496 5497 return exp.AutoIncrementColumnConstraint() 5498 5499 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5500 if not self._match_text_seq("REFRESH"): 5501 self._retreat(self._index - 1) 5502 return None 5503 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5504 5505 def _parse_compress(self) -> exp.CompressColumnConstraint: 5506 if self._match(TokenType.L_PAREN, advance=False): 5507 return self.expression( 5508 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5509 ) 5510 5511 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5512 5513 def _parse_generated_as_identity( 5514 self, 5515 ) -> ( 5516 exp.GeneratedAsIdentityColumnConstraint 5517 | exp.ComputedColumnConstraint 5518 | exp.GeneratedAsRowColumnConstraint 5519 ): 5520 if self._match_text_seq("BY", "DEFAULT"): 5521 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5522 this = self.expression( 5523 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5524 ) 5525 else: 5526 self._match_text_seq("ALWAYS") 5527 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5528 5529 self._match(TokenType.ALIAS) 5530 5531 if self._match_text_seq("ROW"): 5532 start = self._match_text_seq("START") 5533 if not start: 5534 self._match(TokenType.END) 5535 hidden = self._match_text_seq("HIDDEN") 5536 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5537 5538 identity = self._match_text_seq("IDENTITY") 5539 5540 if self._match(TokenType.L_PAREN): 5541 if self._match(TokenType.START_WITH): 5542 this.set("start", self._parse_bitwise()) 5543 if self._match_text_seq("INCREMENT", "BY"): 5544 this.set("increment", self._parse_bitwise()) 5545 if self._match_text_seq("MINVALUE"): 5546 this.set("minvalue", self._parse_bitwise()) 5547 if self._match_text_seq("MAXVALUE"): 5548 this.set("maxvalue", self._parse_bitwise()) 5549 5550 if self._match_text_seq("CYCLE"): 5551 this.set("cycle", True) 5552 elif self._match_text_seq("NO", "CYCLE"): 5553 this.set("cycle", False) 5554 5555 if not identity: 5556 this.set("expression", self._parse_range()) 5557 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5558 args = self._parse_csv(self._parse_bitwise) 5559 this.set("start", seq_get(args, 0)) 5560 this.set("increment", seq_get(args, 1)) 5561 5562 self._match_r_paren() 5563 5564 return this 5565 5566 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5567 self._match_text_seq("LENGTH") 5568 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5569 5570 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5571 if self._match_text_seq("NULL"): 5572 return self.expression(exp.NotNullColumnConstraint) 5573 if self._match_text_seq("CASESPECIFIC"): 5574 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5575 if self._match_text_seq("FOR", "REPLICATION"): 5576 return self.expression(exp.NotForReplicationColumnConstraint) 5577 5578 # Unconsume the `NOT` token 5579 self._retreat(self._index - 1) 5580 return None 5581 5582 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5583 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5584 5585 procedure_option_follows = ( 5586 self._match(TokenType.WITH, advance=False) 5587 and self._next 5588 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5589 ) 5590 5591 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5592 return self.expression( 5593 exp.ColumnConstraint, 5594 this=this, 5595 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5596 ) 5597 5598 return this 5599 5600 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5601 if not self._match(TokenType.CONSTRAINT): 5602 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5603 5604 return self.expression( 5605 exp.Constraint, 5606 this=self._parse_id_var(), 5607 expressions=self._parse_unnamed_constraints(), 5608 ) 5609 5610 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5611 constraints = [] 5612 while True: 5613 constraint = self._parse_unnamed_constraint() or self._parse_function() 5614 if not constraint: 5615 break 5616 constraints.append(constraint) 5617 5618 return constraints 5619 5620 def _parse_unnamed_constraint( 5621 self, constraints: t.Optional[t.Collection[str]] = None 5622 ) -> t.Optional[exp.Expression]: 5623 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5624 constraints or self.CONSTRAINT_PARSERS 5625 ): 5626 return None 5627 5628 constraint = self._prev.text.upper() 5629 if constraint not in self.CONSTRAINT_PARSERS: 5630 self.raise_error(f"No parser found for schema constraint {constraint}.") 5631 5632 return self.CONSTRAINT_PARSERS[constraint](self) 5633 5634 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5635 return self._parse_id_var(any_token=False) 5636 5637 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5638 self._match_text_seq("KEY") 5639 return self.expression( 5640 exp.UniqueColumnConstraint, 5641 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5642 this=self._parse_schema(self._parse_unique_key()), 5643 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5644 on_conflict=self._parse_on_conflict(), 5645 ) 5646 5647 def _parse_key_constraint_options(self) -> t.List[str]: 5648 options = [] 5649 while True: 5650 if not self._curr: 5651 break 5652 5653 if self._match(TokenType.ON): 5654 action = None 5655 on = self._advance_any() and self._prev.text 5656 5657 if self._match_text_seq("NO", "ACTION"): 5658 action = "NO ACTION" 5659 elif self._match_text_seq("CASCADE"): 5660 action = "CASCADE" 5661 elif self._match_text_seq("RESTRICT"): 5662 action = "RESTRICT" 5663 elif self._match_pair(TokenType.SET, TokenType.NULL): 5664 action = "SET NULL" 5665 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5666 action = "SET DEFAULT" 5667 else: 5668 self.raise_error("Invalid key constraint") 5669 5670 options.append(f"ON {on} {action}") 5671 else: 5672 var = self._parse_var_from_options( 5673 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5674 ) 5675 if not var: 5676 break 5677 options.append(var.name) 5678 5679 return options 5680 5681 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5682 if match and not self._match(TokenType.REFERENCES): 5683 return None 5684 5685 expressions = None 5686 this = self._parse_table(schema=True) 5687 options = self._parse_key_constraint_options() 5688 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5689 5690 def _parse_foreign_key(self) -> exp.ForeignKey: 5691 expressions = self._parse_wrapped_id_vars() 5692 reference = self._parse_references() 5693 options = {} 5694 5695 while self._match(TokenType.ON): 5696 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5697 self.raise_error("Expected DELETE or UPDATE") 5698 5699 kind = self._prev.text.lower() 5700 5701 if self._match_text_seq("NO", "ACTION"): 5702 action = "NO ACTION" 5703 elif self._match(TokenType.SET): 5704 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5705 action = "SET " + self._prev.text.upper() 5706 else: 5707 self._advance() 5708 action = self._prev.text.upper() 5709 5710 options[kind] = action 5711 5712 return self.expression( 5713 exp.ForeignKey, 5714 expressions=expressions, 5715 reference=reference, 5716 **options, # type: ignore 5717 ) 5718 5719 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5720 return self._parse_field() 5721 5722 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5723 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5724 self._retreat(self._index - 1) 5725 return None 5726 5727 id_vars = self._parse_wrapped_id_vars() 5728 return self.expression( 5729 exp.PeriodForSystemTimeConstraint, 5730 this=seq_get(id_vars, 0), 5731 expression=seq_get(id_vars, 1), 5732 ) 5733 5734 def _parse_primary_key( 5735 self, wrapped_optional: bool = False, in_props: bool = False 5736 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5737 desc = ( 5738 self._match_set((TokenType.ASC, TokenType.DESC)) 5739 and self._prev.token_type == TokenType.DESC 5740 ) 5741 5742 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5743 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5744 5745 expressions = self._parse_wrapped_csv( 5746 self._parse_primary_key_part, optional=wrapped_optional 5747 ) 5748 options = self._parse_key_constraint_options() 5749 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5750 5751 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5752 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5753 5754 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5755 """ 5756 Parses a datetime column in ODBC format. We parse the column into the corresponding 5757 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5758 same as we did for `DATE('yyyy-mm-dd')`. 5759 5760 Reference: 5761 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5762 """ 5763 self._match(TokenType.VAR) 5764 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5765 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5766 if not self._match(TokenType.R_BRACE): 5767 self.raise_error("Expected }") 5768 return expression 5769 5770 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5771 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5772 return this 5773 5774 bracket_kind = self._prev.token_type 5775 if ( 5776 bracket_kind == TokenType.L_BRACE 5777 and self._curr 5778 and self._curr.token_type == TokenType.VAR 5779 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5780 ): 5781 return self._parse_odbc_datetime_literal() 5782 5783 expressions = self._parse_csv( 5784 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5785 ) 5786 5787 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5788 self.raise_error("Expected ]") 5789 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5790 self.raise_error("Expected }") 5791 5792 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5793 if bracket_kind == TokenType.L_BRACE: 5794 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5795 elif not this: 5796 this = build_array_constructor( 5797 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5798 ) 5799 else: 5800 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5801 if constructor_type: 5802 return build_array_constructor( 5803 constructor_type, 5804 args=expressions, 5805 bracket_kind=bracket_kind, 5806 dialect=self.dialect, 5807 ) 5808 5809 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5810 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5811 5812 self._add_comments(this) 5813 return self._parse_bracket(this) 5814 5815 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5816 if self._match(TokenType.COLON): 5817 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5818 return this 5819 5820 def _parse_case(self) -> t.Optional[exp.Expression]: 5821 ifs = [] 5822 default = None 5823 5824 comments = self._prev_comments 5825 expression = self._parse_assignment() 5826 5827 while self._match(TokenType.WHEN): 5828 this = self._parse_assignment() 5829 self._match(TokenType.THEN) 5830 then = self._parse_assignment() 5831 ifs.append(self.expression(exp.If, this=this, true=then)) 5832 5833 if self._match(TokenType.ELSE): 5834 default = self._parse_assignment() 5835 5836 if not self._match(TokenType.END): 5837 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5838 default = exp.column("interval") 5839 else: 5840 self.raise_error("Expected END after CASE", self._prev) 5841 5842 return self.expression( 5843 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5844 ) 5845 5846 def _parse_if(self) -> t.Optional[exp.Expression]: 5847 if self._match(TokenType.L_PAREN): 5848 args = self._parse_csv(self._parse_assignment) 5849 this = self.validate_expression(exp.If.from_arg_list(args), args) 5850 self._match_r_paren() 5851 else: 5852 index = self._index - 1 5853 5854 if self.NO_PAREN_IF_COMMANDS and index == 0: 5855 return self._parse_as_command(self._prev) 5856 5857 condition = self._parse_assignment() 5858 5859 if not condition: 5860 self._retreat(index) 5861 return None 5862 5863 self._match(TokenType.THEN) 5864 true = self._parse_assignment() 5865 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5866 self._match(TokenType.END) 5867 this = self.expression(exp.If, this=condition, true=true, false=false) 5868 5869 return this 5870 5871 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5872 if not self._match_text_seq("VALUE", "FOR"): 5873 self._retreat(self._index - 1) 5874 return None 5875 5876 return self.expression( 5877 exp.NextValueFor, 5878 this=self._parse_column(), 5879 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5880 ) 5881 5882 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5883 this = self._parse_function() or self._parse_var_or_string(upper=True) 5884 5885 if self._match(TokenType.FROM): 5886 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5887 5888 if not self._match(TokenType.COMMA): 5889 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5890 5891 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5892 5893 def _parse_gap_fill(self) -> exp.GapFill: 5894 self._match(TokenType.TABLE) 5895 this = self._parse_table() 5896 5897 self._match(TokenType.COMMA) 5898 args = [this, *self._parse_csv(self._parse_lambda)] 5899 5900 gap_fill = exp.GapFill.from_arg_list(args) 5901 return self.validate_expression(gap_fill, args) 5902 5903 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5904 this = self._parse_assignment() 5905 5906 if not self._match(TokenType.ALIAS): 5907 if self._match(TokenType.COMMA): 5908 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5909 5910 self.raise_error("Expected AS after CAST") 5911 5912 fmt = None 5913 to = self._parse_types() 5914 5915 if self._match(TokenType.FORMAT): 5916 fmt_string = self._parse_string() 5917 fmt = self._parse_at_time_zone(fmt_string) 5918 5919 if not to: 5920 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5921 if to.this in exp.DataType.TEMPORAL_TYPES: 5922 this = self.expression( 5923 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5924 this=this, 5925 format=exp.Literal.string( 5926 format_time( 5927 fmt_string.this if fmt_string else "", 5928 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5929 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5930 ) 5931 ), 5932 safe=safe, 5933 ) 5934 5935 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5936 this.set("zone", fmt.args["zone"]) 5937 return this 5938 elif not to: 5939 self.raise_error("Expected TYPE after CAST") 5940 elif isinstance(to, exp.Identifier): 5941 to = exp.DataType.build(to.name, udt=True) 5942 elif to.this == exp.DataType.Type.CHAR: 5943 if self._match(TokenType.CHARACTER_SET): 5944 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5945 5946 return self.expression( 5947 exp.Cast if strict else exp.TryCast, 5948 this=this, 5949 to=to, 5950 format=fmt, 5951 safe=safe, 5952 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5953 ) 5954 5955 def _parse_string_agg(self) -> exp.Expression: 5956 if self._match(TokenType.DISTINCT): 5957 args: t.List[t.Optional[exp.Expression]] = [ 5958 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5959 ] 5960 if self._match(TokenType.COMMA): 5961 args.extend(self._parse_csv(self._parse_assignment)) 5962 else: 5963 args = self._parse_csv(self._parse_assignment) # type: ignore 5964 5965 index = self._index 5966 if not self._match(TokenType.R_PAREN) and args: 5967 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5968 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5969 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5970 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5971 5972 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5973 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5974 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5975 if not self._match_text_seq("WITHIN", "GROUP"): 5976 self._retreat(index) 5977 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5978 5979 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5980 order = self._parse_order(this=seq_get(args, 0)) 5981 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5982 5983 def _parse_convert( 5984 self, strict: bool, safe: t.Optional[bool] = None 5985 ) -> t.Optional[exp.Expression]: 5986 this = self._parse_bitwise() 5987 5988 if self._match(TokenType.USING): 5989 to: t.Optional[exp.Expression] = self.expression( 5990 exp.CharacterSet, this=self._parse_var() 5991 ) 5992 elif self._match(TokenType.COMMA): 5993 to = self._parse_types() 5994 else: 5995 to = None 5996 5997 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5998 5999 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6000 """ 6001 There are generally two variants of the DECODE function: 6002 6003 - DECODE(bin, charset) 6004 - DECODE(expression, search, result [, search, result] ... [, default]) 6005 6006 The second variant will always be parsed into a CASE expression. Note that NULL 6007 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6008 instead of relying on pattern matching. 6009 """ 6010 args = self._parse_csv(self._parse_assignment) 6011 6012 if len(args) < 3: 6013 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6014 6015 expression, *expressions = args 6016 if not expression: 6017 return None 6018 6019 ifs = [] 6020 for search, result in zip(expressions[::2], expressions[1::2]): 6021 if not search or not result: 6022 return None 6023 6024 if isinstance(search, exp.Literal): 6025 ifs.append( 6026 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6027 ) 6028 elif isinstance(search, exp.Null): 6029 ifs.append( 6030 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6031 ) 6032 else: 6033 cond = exp.or_( 6034 exp.EQ(this=expression.copy(), expression=search), 6035 exp.and_( 6036 exp.Is(this=expression.copy(), expression=exp.Null()), 6037 exp.Is(this=search.copy(), expression=exp.Null()), 6038 copy=False, 6039 ), 6040 copy=False, 6041 ) 6042 ifs.append(exp.If(this=cond, true=result)) 6043 6044 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6045 6046 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6047 self._match_text_seq("KEY") 6048 key = self._parse_column() 6049 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6050 self._match_text_seq("VALUE") 6051 value = self._parse_bitwise() 6052 6053 if not key and not value: 6054 return None 6055 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6056 6057 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6058 if not this or not self._match_text_seq("FORMAT", "JSON"): 6059 return this 6060 6061 return self.expression(exp.FormatJson, this=this) 6062 6063 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6064 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6065 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6066 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6067 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6068 else: 6069 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6070 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6071 6072 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6073 6074 if not empty and not error and not null: 6075 return None 6076 6077 return self.expression( 6078 exp.OnCondition, 6079 empty=empty, 6080 error=error, 6081 null=null, 6082 ) 6083 6084 def _parse_on_handling( 6085 self, on: str, *values: str 6086 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6087 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6088 for value in values: 6089 if self._match_text_seq(value, "ON", on): 6090 return f"{value} ON {on}" 6091 6092 index = self._index 6093 if self._match(TokenType.DEFAULT): 6094 default_value = self._parse_bitwise() 6095 if self._match_text_seq("ON", on): 6096 return default_value 6097 6098 self._retreat(index) 6099 6100 return None 6101 6102 @t.overload 6103 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6104 6105 @t.overload 6106 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6107 6108 def _parse_json_object(self, agg=False): 6109 star = self._parse_star() 6110 expressions = ( 6111 [star] 6112 if star 6113 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6114 ) 6115 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6116 6117 unique_keys = None 6118 if self._match_text_seq("WITH", "UNIQUE"): 6119 unique_keys = True 6120 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6121 unique_keys = False 6122 6123 self._match_text_seq("KEYS") 6124 6125 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6126 self._parse_type() 6127 ) 6128 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6129 6130 return self.expression( 6131 exp.JSONObjectAgg if agg else exp.JSONObject, 6132 expressions=expressions, 6133 null_handling=null_handling, 6134 unique_keys=unique_keys, 6135 return_type=return_type, 6136 encoding=encoding, 6137 ) 6138 6139 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6140 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6141 if not self._match_text_seq("NESTED"): 6142 this = self._parse_id_var() 6143 kind = self._parse_types(allow_identifiers=False) 6144 nested = None 6145 else: 6146 this = None 6147 kind = None 6148 nested = True 6149 6150 path = self._match_text_seq("PATH") and self._parse_string() 6151 nested_schema = nested and self._parse_json_schema() 6152 6153 return self.expression( 6154 exp.JSONColumnDef, 6155 this=this, 6156 kind=kind, 6157 path=path, 6158 nested_schema=nested_schema, 6159 ) 6160 6161 def _parse_json_schema(self) -> exp.JSONSchema: 6162 self._match_text_seq("COLUMNS") 6163 return self.expression( 6164 exp.JSONSchema, 6165 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6166 ) 6167 6168 def _parse_json_table(self) -> exp.JSONTable: 6169 this = self._parse_format_json(self._parse_bitwise()) 6170 path = self._match(TokenType.COMMA) and self._parse_string() 6171 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6172 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6173 schema = self._parse_json_schema() 6174 6175 return exp.JSONTable( 6176 this=this, 6177 schema=schema, 6178 path=path, 6179 error_handling=error_handling, 6180 empty_handling=empty_handling, 6181 ) 6182 6183 def _parse_match_against(self) -> exp.MatchAgainst: 6184 expressions = self._parse_csv(self._parse_column) 6185 6186 self._match_text_seq(")", "AGAINST", "(") 6187 6188 this = self._parse_string() 6189 6190 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6191 modifier = "IN NATURAL LANGUAGE MODE" 6192 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6193 modifier = f"{modifier} WITH QUERY EXPANSION" 6194 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6195 modifier = "IN BOOLEAN MODE" 6196 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6197 modifier = "WITH QUERY EXPANSION" 6198 else: 6199 modifier = None 6200 6201 return self.expression( 6202 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6203 ) 6204 6205 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6206 def _parse_open_json(self) -> exp.OpenJSON: 6207 this = self._parse_bitwise() 6208 path = self._match(TokenType.COMMA) and self._parse_string() 6209 6210 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6211 this = self._parse_field(any_token=True) 6212 kind = self._parse_types() 6213 path = self._parse_string() 6214 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6215 6216 return self.expression( 6217 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6218 ) 6219 6220 expressions = None 6221 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6222 self._match_l_paren() 6223 expressions = self._parse_csv(_parse_open_json_column_def) 6224 6225 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6226 6227 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6228 args = self._parse_csv(self._parse_bitwise) 6229 6230 if self._match(TokenType.IN): 6231 return self.expression( 6232 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6233 ) 6234 6235 if haystack_first: 6236 haystack = seq_get(args, 0) 6237 needle = seq_get(args, 1) 6238 else: 6239 needle = seq_get(args, 0) 6240 haystack = seq_get(args, 1) 6241 6242 return self.expression( 6243 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6244 ) 6245 6246 def _parse_predict(self) -> exp.Predict: 6247 self._match_text_seq("MODEL") 6248 this = self._parse_table() 6249 6250 self._match(TokenType.COMMA) 6251 self._match_text_seq("TABLE") 6252 6253 return self.expression( 6254 exp.Predict, 6255 this=this, 6256 expression=self._parse_table(), 6257 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6258 ) 6259 6260 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6261 args = self._parse_csv(self._parse_table) 6262 return exp.JoinHint(this=func_name.upper(), expressions=args) 6263 6264 def _parse_substring(self) -> exp.Substring: 6265 # Postgres supports the form: substring(string [from int] [for int]) 6266 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6267 6268 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6269 6270 if self._match(TokenType.FROM): 6271 args.append(self._parse_bitwise()) 6272 if self._match(TokenType.FOR): 6273 if len(args) == 1: 6274 args.append(exp.Literal.number(1)) 6275 args.append(self._parse_bitwise()) 6276 6277 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6278 6279 def _parse_trim(self) -> exp.Trim: 6280 # https://www.w3resource.com/sql/character-functions/trim.php 6281 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6282 6283 position = None 6284 collation = None 6285 expression = None 6286 6287 if self._match_texts(self.TRIM_TYPES): 6288 position = self._prev.text.upper() 6289 6290 this = self._parse_bitwise() 6291 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6292 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6293 expression = self._parse_bitwise() 6294 6295 if invert_order: 6296 this, expression = expression, this 6297 6298 if self._match(TokenType.COLLATE): 6299 collation = self._parse_bitwise() 6300 6301 return self.expression( 6302 exp.Trim, this=this, position=position, expression=expression, collation=collation 6303 ) 6304 6305 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6306 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6307 6308 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6309 return self._parse_window(self._parse_id_var(), alias=True) 6310 6311 def _parse_respect_or_ignore_nulls( 6312 self, this: t.Optional[exp.Expression] 6313 ) -> t.Optional[exp.Expression]: 6314 if self._match_text_seq("IGNORE", "NULLS"): 6315 return self.expression(exp.IgnoreNulls, this=this) 6316 if self._match_text_seq("RESPECT", "NULLS"): 6317 return self.expression(exp.RespectNulls, this=this) 6318 return this 6319 6320 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6321 if self._match(TokenType.HAVING): 6322 self._match_texts(("MAX", "MIN")) 6323 max = self._prev.text.upper() != "MIN" 6324 return self.expression( 6325 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6326 ) 6327 6328 return this 6329 6330 def _parse_window( 6331 self, this: t.Optional[exp.Expression], alias: bool = False 6332 ) -> t.Optional[exp.Expression]: 6333 func = this 6334 comments = func.comments if isinstance(func, exp.Expression) else None 6335 6336 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6337 self._match(TokenType.WHERE) 6338 this = self.expression( 6339 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6340 ) 6341 self._match_r_paren() 6342 6343 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6344 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6345 if self._match_text_seq("WITHIN", "GROUP"): 6346 order = self._parse_wrapped(self._parse_order) 6347 this = self.expression(exp.WithinGroup, this=this, expression=order) 6348 6349 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6350 # Some dialects choose to implement and some do not. 6351 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6352 6353 # There is some code above in _parse_lambda that handles 6354 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6355 6356 # The below changes handle 6357 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6358 6359 # Oracle allows both formats 6360 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6361 # and Snowflake chose to do the same for familiarity 6362 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6363 if isinstance(this, exp.AggFunc): 6364 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6365 6366 if ignore_respect and ignore_respect is not this: 6367 ignore_respect.replace(ignore_respect.this) 6368 this = self.expression(ignore_respect.__class__, this=this) 6369 6370 this = self._parse_respect_or_ignore_nulls(this) 6371 6372 # bigquery select from window x AS (partition by ...) 6373 if alias: 6374 over = None 6375 self._match(TokenType.ALIAS) 6376 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6377 return this 6378 else: 6379 over = self._prev.text.upper() 6380 6381 if comments and isinstance(func, exp.Expression): 6382 func.pop_comments() 6383 6384 if not self._match(TokenType.L_PAREN): 6385 return self.expression( 6386 exp.Window, 6387 comments=comments, 6388 this=this, 6389 alias=self._parse_id_var(False), 6390 over=over, 6391 ) 6392 6393 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6394 6395 first = self._match(TokenType.FIRST) 6396 if self._match_text_seq("LAST"): 6397 first = False 6398 6399 partition, order = self._parse_partition_and_order() 6400 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6401 6402 if kind: 6403 self._match(TokenType.BETWEEN) 6404 start = self._parse_window_spec() 6405 self._match(TokenType.AND) 6406 end = self._parse_window_spec() 6407 6408 spec = self.expression( 6409 exp.WindowSpec, 6410 kind=kind, 6411 start=start["value"], 6412 start_side=start["side"], 6413 end=end["value"], 6414 end_side=end["side"], 6415 ) 6416 else: 6417 spec = None 6418 6419 self._match_r_paren() 6420 6421 window = self.expression( 6422 exp.Window, 6423 comments=comments, 6424 this=this, 6425 partition_by=partition, 6426 order=order, 6427 spec=spec, 6428 alias=window_alias, 6429 over=over, 6430 first=first, 6431 ) 6432 6433 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6434 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6435 return self._parse_window(window, alias=alias) 6436 6437 return window 6438 6439 def _parse_partition_and_order( 6440 self, 6441 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6442 return self._parse_partition_by(), self._parse_order() 6443 6444 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6445 self._match(TokenType.BETWEEN) 6446 6447 return { 6448 "value": ( 6449 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6450 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6451 or self._parse_bitwise() 6452 ), 6453 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6454 } 6455 6456 def _parse_alias( 6457 self, this: t.Optional[exp.Expression], explicit: bool = False 6458 ) -> t.Optional[exp.Expression]: 6459 any_token = self._match(TokenType.ALIAS) 6460 comments = self._prev_comments or [] 6461 6462 if explicit and not any_token: 6463 return this 6464 6465 if self._match(TokenType.L_PAREN): 6466 aliases = self.expression( 6467 exp.Aliases, 6468 comments=comments, 6469 this=this, 6470 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6471 ) 6472 self._match_r_paren(aliases) 6473 return aliases 6474 6475 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6476 self.STRING_ALIASES and self._parse_string_as_identifier() 6477 ) 6478 6479 if alias: 6480 comments.extend(alias.pop_comments()) 6481 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6482 column = this.this 6483 6484 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6485 if not this.comments and column and column.comments: 6486 this.comments = column.pop_comments() 6487 6488 return this 6489 6490 def _parse_id_var( 6491 self, 6492 any_token: bool = True, 6493 tokens: t.Optional[t.Collection[TokenType]] = None, 6494 ) -> t.Optional[exp.Expression]: 6495 expression = self._parse_identifier() 6496 if not expression and ( 6497 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6498 ): 6499 quoted = self._prev.token_type == TokenType.STRING 6500 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6501 6502 return expression 6503 6504 def _parse_string(self) -> t.Optional[exp.Expression]: 6505 if self._match_set(self.STRING_PARSERS): 6506 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6507 return self._parse_placeholder() 6508 6509 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6510 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6511 6512 def _parse_number(self) -> t.Optional[exp.Expression]: 6513 if self._match_set(self.NUMERIC_PARSERS): 6514 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6515 return self._parse_placeholder() 6516 6517 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6518 if self._match(TokenType.IDENTIFIER): 6519 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6520 return self._parse_placeholder() 6521 6522 def _parse_var( 6523 self, 6524 any_token: bool = False, 6525 tokens: t.Optional[t.Collection[TokenType]] = None, 6526 upper: bool = False, 6527 ) -> t.Optional[exp.Expression]: 6528 if ( 6529 (any_token and self._advance_any()) 6530 or self._match(TokenType.VAR) 6531 or (self._match_set(tokens) if tokens else False) 6532 ): 6533 return self.expression( 6534 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6535 ) 6536 return self._parse_placeholder() 6537 6538 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6539 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6540 self._advance() 6541 return self._prev 6542 return None 6543 6544 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6545 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6546 6547 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6548 return self._parse_primary() or self._parse_var(any_token=True) 6549 6550 def _parse_null(self) -> t.Optional[exp.Expression]: 6551 if self._match_set(self.NULL_TOKENS): 6552 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6553 return self._parse_placeholder() 6554 6555 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6556 if self._match(TokenType.TRUE): 6557 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6558 if self._match(TokenType.FALSE): 6559 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6560 return self._parse_placeholder() 6561 6562 def _parse_star(self) -> t.Optional[exp.Expression]: 6563 if self._match(TokenType.STAR): 6564 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6565 return self._parse_placeholder() 6566 6567 def _parse_parameter(self) -> exp.Parameter: 6568 this = self._parse_identifier() or self._parse_primary_or_var() 6569 return self.expression(exp.Parameter, this=this) 6570 6571 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6572 if self._match_set(self.PLACEHOLDER_PARSERS): 6573 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6574 if placeholder: 6575 return placeholder 6576 self._advance(-1) 6577 return None 6578 6579 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6580 if not self._match_texts(keywords): 6581 return None 6582 if self._match(TokenType.L_PAREN, advance=False): 6583 return self._parse_wrapped_csv(self._parse_expression) 6584 6585 expression = self._parse_expression() 6586 return [expression] if expression else None 6587 6588 def _parse_csv( 6589 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6590 ) -> t.List[exp.Expression]: 6591 parse_result = parse_method() 6592 items = [parse_result] if parse_result is not None else [] 6593 6594 while self._match(sep): 6595 self._add_comments(parse_result) 6596 parse_result = parse_method() 6597 if parse_result is not None: 6598 items.append(parse_result) 6599 6600 return items 6601 6602 def _parse_tokens( 6603 self, parse_method: t.Callable, expressions: t.Dict 6604 ) -> t.Optional[exp.Expression]: 6605 this = parse_method() 6606 6607 while self._match_set(expressions): 6608 this = self.expression( 6609 expressions[self._prev.token_type], 6610 this=this, 6611 comments=self._prev_comments, 6612 expression=parse_method(), 6613 ) 6614 6615 return this 6616 6617 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6618 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6619 6620 def _parse_wrapped_csv( 6621 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6622 ) -> t.List[exp.Expression]: 6623 return self._parse_wrapped( 6624 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6625 ) 6626 6627 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6628 wrapped = self._match(TokenType.L_PAREN) 6629 if not wrapped and not optional: 6630 self.raise_error("Expecting (") 6631 parse_result = parse_method() 6632 if wrapped: 6633 self._match_r_paren() 6634 return parse_result 6635 6636 def _parse_expressions(self) -> t.List[exp.Expression]: 6637 return self._parse_csv(self._parse_expression) 6638 6639 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6640 return self._parse_select() or self._parse_set_operations( 6641 self._parse_expression() if alias else self._parse_assignment() 6642 ) 6643 6644 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6645 return self._parse_query_modifiers( 6646 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6647 ) 6648 6649 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6650 this = None 6651 if self._match_texts(self.TRANSACTION_KIND): 6652 this = self._prev.text 6653 6654 self._match_texts(("TRANSACTION", "WORK")) 6655 6656 modes = [] 6657 while True: 6658 mode = [] 6659 while self._match(TokenType.VAR): 6660 mode.append(self._prev.text) 6661 6662 if mode: 6663 modes.append(" ".join(mode)) 6664 if not self._match(TokenType.COMMA): 6665 break 6666 6667 return self.expression(exp.Transaction, this=this, modes=modes) 6668 6669 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6670 chain = None 6671 savepoint = None 6672 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6673 6674 self._match_texts(("TRANSACTION", "WORK")) 6675 6676 if self._match_text_seq("TO"): 6677 self._match_text_seq("SAVEPOINT") 6678 savepoint = self._parse_id_var() 6679 6680 if self._match(TokenType.AND): 6681 chain = not self._match_text_seq("NO") 6682 self._match_text_seq("CHAIN") 6683 6684 if is_rollback: 6685 return self.expression(exp.Rollback, savepoint=savepoint) 6686 6687 return self.expression(exp.Commit, chain=chain) 6688 6689 def _parse_refresh(self) -> exp.Refresh: 6690 self._match(TokenType.TABLE) 6691 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6692 6693 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6694 if not self._match_text_seq("ADD"): 6695 return None 6696 6697 self._match(TokenType.COLUMN) 6698 exists_column = self._parse_exists(not_=True) 6699 expression = self._parse_field_def() 6700 6701 if expression: 6702 expression.set("exists", exists_column) 6703 6704 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6705 if self._match_texts(("FIRST", "AFTER")): 6706 position = self._prev.text 6707 column_position = self.expression( 6708 exp.ColumnPosition, this=self._parse_column(), position=position 6709 ) 6710 expression.set("position", column_position) 6711 6712 return expression 6713 6714 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6715 drop = self._match(TokenType.DROP) and self._parse_drop() 6716 if drop and not isinstance(drop, exp.Command): 6717 drop.set("kind", drop.args.get("kind", "COLUMN")) 6718 return drop 6719 6720 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6721 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6722 return self.expression( 6723 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6724 ) 6725 6726 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6727 index = self._index - 1 6728 6729 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6730 return self._parse_csv( 6731 lambda: self.expression( 6732 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6733 ) 6734 ) 6735 6736 self._retreat(index) 6737 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6738 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6739 6740 if self._match_text_seq("ADD", "COLUMNS"): 6741 schema = self._parse_schema() 6742 if schema: 6743 return [schema] 6744 return [] 6745 6746 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6747 6748 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6749 if self._match_texts(self.ALTER_ALTER_PARSERS): 6750 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6751 6752 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6753 # keyword after ALTER we default to parsing this statement 6754 self._match(TokenType.COLUMN) 6755 column = self._parse_field(any_token=True) 6756 6757 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6758 return self.expression(exp.AlterColumn, this=column, drop=True) 6759 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6760 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6761 if self._match(TokenType.COMMENT): 6762 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6763 if self._match_text_seq("DROP", "NOT", "NULL"): 6764 return self.expression( 6765 exp.AlterColumn, 6766 this=column, 6767 drop=True, 6768 allow_null=True, 6769 ) 6770 if self._match_text_seq("SET", "NOT", "NULL"): 6771 return self.expression( 6772 exp.AlterColumn, 6773 this=column, 6774 allow_null=False, 6775 ) 6776 self._match_text_seq("SET", "DATA") 6777 self._match_text_seq("TYPE") 6778 return self.expression( 6779 exp.AlterColumn, 6780 this=column, 6781 dtype=self._parse_types(), 6782 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6783 using=self._match(TokenType.USING) and self._parse_assignment(), 6784 ) 6785 6786 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6787 if self._match_texts(("ALL", "EVEN", "AUTO")): 6788 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6789 6790 self._match_text_seq("KEY", "DISTKEY") 6791 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6792 6793 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6794 if compound: 6795 self._match_text_seq("SORTKEY") 6796 6797 if self._match(TokenType.L_PAREN, advance=False): 6798 return self.expression( 6799 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6800 ) 6801 6802 self._match_texts(("AUTO", "NONE")) 6803 return self.expression( 6804 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6805 ) 6806 6807 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6808 index = self._index - 1 6809 6810 partition_exists = self._parse_exists() 6811 if self._match(TokenType.PARTITION, advance=False): 6812 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6813 6814 self._retreat(index) 6815 return self._parse_csv(self._parse_drop_column) 6816 6817 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6818 if self._match(TokenType.COLUMN): 6819 exists = self._parse_exists() 6820 old_column = self._parse_column() 6821 to = self._match_text_seq("TO") 6822 new_column = self._parse_column() 6823 6824 if old_column is None or to is None or new_column is None: 6825 return None 6826 6827 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6828 6829 self._match_text_seq("TO") 6830 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6831 6832 def _parse_alter_table_set(self) -> exp.AlterSet: 6833 alter_set = self.expression(exp.AlterSet) 6834 6835 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6836 "TABLE", "PROPERTIES" 6837 ): 6838 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6839 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6840 alter_set.set("expressions", [self._parse_assignment()]) 6841 elif self._match_texts(("LOGGED", "UNLOGGED")): 6842 alter_set.set("option", exp.var(self._prev.text.upper())) 6843 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6844 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6845 elif self._match_text_seq("LOCATION"): 6846 alter_set.set("location", self._parse_field()) 6847 elif self._match_text_seq("ACCESS", "METHOD"): 6848 alter_set.set("access_method", self._parse_field()) 6849 elif self._match_text_seq("TABLESPACE"): 6850 alter_set.set("tablespace", self._parse_field()) 6851 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6852 alter_set.set("file_format", [self._parse_field()]) 6853 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6854 alter_set.set("file_format", self._parse_wrapped_options()) 6855 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6856 alter_set.set("copy_options", self._parse_wrapped_options()) 6857 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6858 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6859 else: 6860 if self._match_text_seq("SERDE"): 6861 alter_set.set("serde", self._parse_field()) 6862 6863 alter_set.set("expressions", [self._parse_properties()]) 6864 6865 return alter_set 6866 6867 def _parse_alter(self) -> exp.Alter | exp.Command: 6868 start = self._prev 6869 6870 alter_token = self._match_set(self.ALTERABLES) and self._prev 6871 if not alter_token: 6872 return self._parse_as_command(start) 6873 6874 exists = self._parse_exists() 6875 only = self._match_text_seq("ONLY") 6876 this = self._parse_table(schema=True) 6877 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6878 6879 if self._next: 6880 self._advance() 6881 6882 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6883 if parser: 6884 actions = ensure_list(parser(self)) 6885 not_valid = self._match_text_seq("NOT", "VALID") 6886 options = self._parse_csv(self._parse_property) 6887 6888 if not self._curr and actions: 6889 return self.expression( 6890 exp.Alter, 6891 this=this, 6892 kind=alter_token.text.upper(), 6893 exists=exists, 6894 actions=actions, 6895 only=only, 6896 options=options, 6897 cluster=cluster, 6898 not_valid=not_valid, 6899 ) 6900 6901 return self._parse_as_command(start) 6902 6903 def _parse_merge(self) -> exp.Merge: 6904 self._match(TokenType.INTO) 6905 target = self._parse_table() 6906 6907 if target and self._match(TokenType.ALIAS, advance=False): 6908 target.set("alias", self._parse_table_alias()) 6909 6910 self._match(TokenType.USING) 6911 using = self._parse_table() 6912 6913 self._match(TokenType.ON) 6914 on = self._parse_assignment() 6915 6916 return self.expression( 6917 exp.Merge, 6918 this=target, 6919 using=using, 6920 on=on, 6921 expressions=self._parse_when_matched(), 6922 returning=self._parse_returning(), 6923 ) 6924 6925 def _parse_when_matched(self) -> t.List[exp.When]: 6926 whens = [] 6927 6928 while self._match(TokenType.WHEN): 6929 matched = not self._match(TokenType.NOT) 6930 self._match_text_seq("MATCHED") 6931 source = ( 6932 False 6933 if self._match_text_seq("BY", "TARGET") 6934 else self._match_text_seq("BY", "SOURCE") 6935 ) 6936 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6937 6938 self._match(TokenType.THEN) 6939 6940 if self._match(TokenType.INSERT): 6941 this = self._parse_star() 6942 if this: 6943 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6944 else: 6945 then = self.expression( 6946 exp.Insert, 6947 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6948 expression=self._match_text_seq("VALUES") and self._parse_value(), 6949 ) 6950 elif self._match(TokenType.UPDATE): 6951 expressions = self._parse_star() 6952 if expressions: 6953 then = self.expression(exp.Update, expressions=expressions) 6954 else: 6955 then = self.expression( 6956 exp.Update, 6957 expressions=self._match(TokenType.SET) 6958 and self._parse_csv(self._parse_equality), 6959 ) 6960 elif self._match(TokenType.DELETE): 6961 then = self.expression(exp.Var, this=self._prev.text) 6962 else: 6963 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6964 6965 whens.append( 6966 self.expression( 6967 exp.When, 6968 matched=matched, 6969 source=source, 6970 condition=condition, 6971 then=then, 6972 ) 6973 ) 6974 return whens 6975 6976 def _parse_show(self) -> t.Optional[exp.Expression]: 6977 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6978 if parser: 6979 return parser(self) 6980 return self._parse_as_command(self._prev) 6981 6982 def _parse_set_item_assignment( 6983 self, kind: t.Optional[str] = None 6984 ) -> t.Optional[exp.Expression]: 6985 index = self._index 6986 6987 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6988 return self._parse_set_transaction(global_=kind == "GLOBAL") 6989 6990 left = self._parse_primary() or self._parse_column() 6991 assignment_delimiter = self._match_texts(("=", "TO")) 6992 6993 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6994 self._retreat(index) 6995 return None 6996 6997 right = self._parse_statement() or self._parse_id_var() 6998 if isinstance(right, (exp.Column, exp.Identifier)): 6999 right = exp.var(right.name) 7000 7001 this = self.expression(exp.EQ, this=left, expression=right) 7002 return self.expression(exp.SetItem, this=this, kind=kind) 7003 7004 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7005 self._match_text_seq("TRANSACTION") 7006 characteristics = self._parse_csv( 7007 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7008 ) 7009 return self.expression( 7010 exp.SetItem, 7011 expressions=characteristics, 7012 kind="TRANSACTION", 7013 **{"global": global_}, # type: ignore 7014 ) 7015 7016 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7017 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7018 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7019 7020 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7021 index = self._index 7022 set_ = self.expression( 7023 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7024 ) 7025 7026 if self._curr: 7027 self._retreat(index) 7028 return self._parse_as_command(self._prev) 7029 7030 return set_ 7031 7032 def _parse_var_from_options( 7033 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7034 ) -> t.Optional[exp.Var]: 7035 start = self._curr 7036 if not start: 7037 return None 7038 7039 option = start.text.upper() 7040 continuations = options.get(option) 7041 7042 index = self._index 7043 self._advance() 7044 for keywords in continuations or []: 7045 if isinstance(keywords, str): 7046 keywords = (keywords,) 7047 7048 if self._match_text_seq(*keywords): 7049 option = f"{option} {' '.join(keywords)}" 7050 break 7051 else: 7052 if continuations or continuations is None: 7053 if raise_unmatched: 7054 self.raise_error(f"Unknown option {option}") 7055 7056 self._retreat(index) 7057 return None 7058 7059 return exp.var(option) 7060 7061 def _parse_as_command(self, start: Token) -> exp.Command: 7062 while self._curr: 7063 self._advance() 7064 text = self._find_sql(start, self._prev) 7065 size = len(start.text) 7066 self._warn_unsupported() 7067 return exp.Command(this=text[:size], expression=text[size:]) 7068 7069 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7070 settings = [] 7071 7072 self._match_l_paren() 7073 kind = self._parse_id_var() 7074 7075 if self._match(TokenType.L_PAREN): 7076 while True: 7077 key = self._parse_id_var() 7078 value = self._parse_primary() 7079 7080 if not key and value is None: 7081 break 7082 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7083 self._match(TokenType.R_PAREN) 7084 7085 self._match_r_paren() 7086 7087 return self.expression( 7088 exp.DictProperty, 7089 this=this, 7090 kind=kind.this if kind else None, 7091 settings=settings, 7092 ) 7093 7094 def _parse_dict_range(self, this: str) -> exp.DictRange: 7095 self._match_l_paren() 7096 has_min = self._match_text_seq("MIN") 7097 if has_min: 7098 min = self._parse_var() or self._parse_primary() 7099 self._match_text_seq("MAX") 7100 max = self._parse_var() or self._parse_primary() 7101 else: 7102 max = self._parse_var() or self._parse_primary() 7103 min = exp.Literal.number(0) 7104 self._match_r_paren() 7105 return self.expression(exp.DictRange, this=this, min=min, max=max) 7106 7107 def _parse_comprehension( 7108 self, this: t.Optional[exp.Expression] 7109 ) -> t.Optional[exp.Comprehension]: 7110 index = self._index 7111 expression = self._parse_column() 7112 if not self._match(TokenType.IN): 7113 self._retreat(index - 1) 7114 return None 7115 iterator = self._parse_column() 7116 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7117 return self.expression( 7118 exp.Comprehension, 7119 this=this, 7120 expression=expression, 7121 iterator=iterator, 7122 condition=condition, 7123 ) 7124 7125 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7126 if self._match(TokenType.HEREDOC_STRING): 7127 return self.expression(exp.Heredoc, this=self._prev.text) 7128 7129 if not self._match_text_seq("$"): 7130 return None 7131 7132 tags = ["$"] 7133 tag_text = None 7134 7135 if self._is_connected(): 7136 self._advance() 7137 tags.append(self._prev.text.upper()) 7138 else: 7139 self.raise_error("No closing $ found") 7140 7141 if tags[-1] != "$": 7142 if self._is_connected() and self._match_text_seq("$"): 7143 tag_text = tags[-1] 7144 tags.append("$") 7145 else: 7146 self.raise_error("No closing $ found") 7147 7148 heredoc_start = self._curr 7149 7150 while self._curr: 7151 if self._match_text_seq(*tags, advance=False): 7152 this = self._find_sql(heredoc_start, self._prev) 7153 self._advance(len(tags)) 7154 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7155 7156 self._advance() 7157 7158 self.raise_error(f"No closing {''.join(tags)} found") 7159 return None 7160 7161 def _find_parser( 7162 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7163 ) -> t.Optional[t.Callable]: 7164 if not self._curr: 7165 return None 7166 7167 index = self._index 7168 this = [] 7169 while True: 7170 # The current token might be multiple words 7171 curr = self._curr.text.upper() 7172 key = curr.split(" ") 7173 this.append(curr) 7174 7175 self._advance() 7176 result, trie = in_trie(trie, key) 7177 if result == TrieResult.FAILED: 7178 break 7179 7180 if result == TrieResult.EXISTS: 7181 subparser = parsers[" ".join(this)] 7182 return subparser 7183 7184 self._retreat(index) 7185 return None 7186 7187 def _match(self, token_type, advance=True, expression=None): 7188 if not self._curr: 7189 return None 7190 7191 if self._curr.token_type == token_type: 7192 if advance: 7193 self._advance() 7194 self._add_comments(expression) 7195 return True 7196 7197 return None 7198 7199 def _match_set(self, types, advance=True): 7200 if not self._curr: 7201 return None 7202 7203 if self._curr.token_type in types: 7204 if advance: 7205 self._advance() 7206 return True 7207 7208 return None 7209 7210 def _match_pair(self, token_type_a, token_type_b, advance=True): 7211 if not self._curr or not self._next: 7212 return None 7213 7214 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7215 if advance: 7216 self._advance(2) 7217 return True 7218 7219 return None 7220 7221 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7222 if not self._match(TokenType.L_PAREN, expression=expression): 7223 self.raise_error("Expecting (") 7224 7225 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7226 if not self._match(TokenType.R_PAREN, expression=expression): 7227 self.raise_error("Expecting )") 7228 7229 def _match_texts(self, texts, advance=True): 7230 if ( 7231 self._curr 7232 and self._curr.token_type != TokenType.STRING 7233 and self._curr.text.upper() in texts 7234 ): 7235 if advance: 7236 self._advance() 7237 return True 7238 return None 7239 7240 def _match_text_seq(self, *texts, advance=True): 7241 index = self._index 7242 for text in texts: 7243 if ( 7244 self._curr 7245 and self._curr.token_type != TokenType.STRING 7246 and self._curr.text.upper() == text 7247 ): 7248 self._advance() 7249 else: 7250 self._retreat(index) 7251 return None 7252 7253 if not advance: 7254 self._retreat(index) 7255 7256 return True 7257 7258 def _replace_lambda( 7259 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7260 ) -> t.Optional[exp.Expression]: 7261 if not node: 7262 return node 7263 7264 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7265 7266 for column in node.find_all(exp.Column): 7267 typ = lambda_types.get(column.parts[0].name) 7268 if typ is not None: 7269 dot_or_id = column.to_dot() if column.table else column.this 7270 7271 if typ: 7272 dot_or_id = self.expression( 7273 exp.Cast, 7274 this=dot_or_id, 7275 to=typ, 7276 ) 7277 7278 parent = column.parent 7279 7280 while isinstance(parent, exp.Dot): 7281 if not isinstance(parent.parent, exp.Dot): 7282 parent.replace(dot_or_id) 7283 break 7284 parent = parent.parent 7285 else: 7286 if column is node: 7287 node = dot_or_id 7288 else: 7289 column.replace(dot_or_id) 7290 return node 7291 7292 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7293 start = self._prev 7294 7295 # Not to be confused with TRUNCATE(number, decimals) function call 7296 if self._match(TokenType.L_PAREN): 7297 self._retreat(self._index - 2) 7298 return self._parse_function() 7299 7300 # Clickhouse supports TRUNCATE DATABASE as well 7301 is_database = self._match(TokenType.DATABASE) 7302 7303 self._match(TokenType.TABLE) 7304 7305 exists = self._parse_exists(not_=False) 7306 7307 expressions = self._parse_csv( 7308 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7309 ) 7310 7311 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7312 7313 if self._match_text_seq("RESTART", "IDENTITY"): 7314 identity = "RESTART" 7315 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7316 identity = "CONTINUE" 7317 else: 7318 identity = None 7319 7320 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7321 option = self._prev.text 7322 else: 7323 option = None 7324 7325 partition = self._parse_partition() 7326 7327 # Fallback case 7328 if self._curr: 7329 return self._parse_as_command(start) 7330 7331 return self.expression( 7332 exp.TruncateTable, 7333 expressions=expressions, 7334 is_database=is_database, 7335 exists=exists, 7336 cluster=cluster, 7337 identity=identity, 7338 option=option, 7339 partition=partition, 7340 ) 7341 7342 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7343 this = self._parse_ordered(self._parse_opclass) 7344 7345 if not self._match(TokenType.WITH): 7346 return this 7347 7348 op = self._parse_var(any_token=True) 7349 7350 return self.expression(exp.WithOperator, this=this, op=op) 7351 7352 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7353 self._match(TokenType.EQ) 7354 self._match(TokenType.L_PAREN) 7355 7356 opts: t.List[t.Optional[exp.Expression]] = [] 7357 while self._curr and not self._match(TokenType.R_PAREN): 7358 if self._match_text_seq("FORMAT_NAME", "="): 7359 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7360 # so we parse it separately to use _parse_field() 7361 prop = self.expression( 7362 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7363 ) 7364 opts.append(prop) 7365 else: 7366 opts.append(self._parse_property()) 7367 7368 self._match(TokenType.COMMA) 7369 7370 return opts 7371 7372 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7373 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7374 7375 options = [] 7376 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7377 option = self._parse_var(any_token=True) 7378 prev = self._prev.text.upper() 7379 7380 # Different dialects might separate options and values by white space, "=" and "AS" 7381 self._match(TokenType.EQ) 7382 self._match(TokenType.ALIAS) 7383 7384 param = self.expression(exp.CopyParameter, this=option) 7385 7386 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7387 TokenType.L_PAREN, advance=False 7388 ): 7389 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7390 param.set("expressions", self._parse_wrapped_options()) 7391 elif prev == "FILE_FORMAT": 7392 # T-SQL's external file format case 7393 param.set("expression", self._parse_field()) 7394 else: 7395 param.set("expression", self._parse_unquoted_field()) 7396 7397 options.append(param) 7398 self._match(sep) 7399 7400 return options 7401 7402 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7403 expr = self.expression(exp.Credentials) 7404 7405 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7406 expr.set("storage", self._parse_field()) 7407 if self._match_text_seq("CREDENTIALS"): 7408 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7409 creds = ( 7410 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7411 ) 7412 expr.set("credentials", creds) 7413 if self._match_text_seq("ENCRYPTION"): 7414 expr.set("encryption", self._parse_wrapped_options()) 7415 if self._match_text_seq("IAM_ROLE"): 7416 expr.set("iam_role", self._parse_field()) 7417 if self._match_text_seq("REGION"): 7418 expr.set("region", self._parse_field()) 7419 7420 return expr 7421 7422 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7423 return self._parse_field() 7424 7425 def _parse_copy(self) -> exp.Copy | exp.Command: 7426 start = self._prev 7427 7428 self._match(TokenType.INTO) 7429 7430 this = ( 7431 self._parse_select(nested=True, parse_subquery_alias=False) 7432 if self._match(TokenType.L_PAREN, advance=False) 7433 else self._parse_table(schema=True) 7434 ) 7435 7436 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7437 7438 files = self._parse_csv(self._parse_file_location) 7439 credentials = self._parse_credentials() 7440 7441 self._match_text_seq("WITH") 7442 7443 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7444 7445 # Fallback case 7446 if self._curr: 7447 return self._parse_as_command(start) 7448 7449 return self.expression( 7450 exp.Copy, 7451 this=this, 7452 kind=kind, 7453 credentials=credentials, 7454 files=files, 7455 params=params, 7456 ) 7457 7458 def _parse_normalize(self) -> exp.Normalize: 7459 return self.expression( 7460 exp.Normalize, 7461 this=self._parse_bitwise(), 7462 form=self._match(TokenType.COMMA) and self._parse_var(), 7463 ) 7464 7465 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7466 if self._match_text_seq("COLUMNS", "(", advance=False): 7467 this = self._parse_function() 7468 if isinstance(this, exp.Columns): 7469 this.set("unpack", True) 7470 return this 7471 7472 return self.expression( 7473 exp.Star, 7474 **{ # type: ignore 7475 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7476 "replace": self._parse_star_op("REPLACE"), 7477 "rename": self._parse_star_op("RENAME"), 7478 }, 7479 ) 7480 7481 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7482 privilege_parts = [] 7483 7484 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7485 # (end of privilege list) or L_PAREN (start of column list) are met 7486 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7487 privilege_parts.append(self._curr.text.upper()) 7488 self._advance() 7489 7490 this = exp.var(" ".join(privilege_parts)) 7491 expressions = ( 7492 self._parse_wrapped_csv(self._parse_column) 7493 if self._match(TokenType.L_PAREN, advance=False) 7494 else None 7495 ) 7496 7497 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7498 7499 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7500 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7501 principal = self._parse_id_var() 7502 7503 if not principal: 7504 return None 7505 7506 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7507 7508 def _parse_grant(self) -> exp.Grant | exp.Command: 7509 start = self._prev 7510 7511 privileges = self._parse_csv(self._parse_grant_privilege) 7512 7513 self._match(TokenType.ON) 7514 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7515 7516 # Attempt to parse the securable e.g. MySQL allows names 7517 # such as "foo.*", "*.*" which are not easily parseable yet 7518 securable = self._try_parse(self._parse_table_parts) 7519 7520 if not securable or not self._match_text_seq("TO"): 7521 return self._parse_as_command(start) 7522 7523 principals = self._parse_csv(self._parse_grant_principal) 7524 7525 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7526 7527 if self._curr: 7528 return self._parse_as_command(start) 7529 7530 return self.expression( 7531 exp.Grant, 7532 privileges=privileges, 7533 kind=kind, 7534 securable=securable, 7535 principals=principals, 7536 grant_option=grant_option, 7537 ) 7538 7539 def _parse_overlay(self) -> exp.Overlay: 7540 return self.expression( 7541 exp.Overlay, 7542 **{ # type: ignore 7543 "this": self._parse_bitwise(), 7544 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7545 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7546 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7547 }, 7548 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1379 def __init__( 1380 self, 1381 error_level: t.Optional[ErrorLevel] = None, 1382 error_message_context: int = 100, 1383 max_errors: int = 3, 1384 dialect: DialectType = None, 1385 ): 1386 from sqlglot.dialects import Dialect 1387 1388 self.error_level = error_level or ErrorLevel.IMMEDIATE 1389 self.error_message_context = error_message_context 1390 self.max_errors = max_errors 1391 self.dialect = Dialect.get_or_raise(dialect) 1392 self.reset()
1404 def parse( 1405 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1406 ) -> t.List[t.Optional[exp.Expression]]: 1407 """ 1408 Parses a list of tokens and returns a list of syntax trees, one tree 1409 per parsed SQL statement. 1410 1411 Args: 1412 raw_tokens: The list of tokens. 1413 sql: The original SQL string, used to produce helpful debug messages. 1414 1415 Returns: 1416 The list of the produced syntax trees. 1417 """ 1418 return self._parse( 1419 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1420 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1422 def parse_into( 1423 self, 1424 expression_types: exp.IntoType, 1425 raw_tokens: t.List[Token], 1426 sql: t.Optional[str] = None, 1427 ) -> t.List[t.Optional[exp.Expression]]: 1428 """ 1429 Parses a list of tokens into a given Expression type. If a collection of Expression 1430 types is given instead, this method will try to parse the token list into each one 1431 of them, stopping at the first for which the parsing succeeds. 1432 1433 Args: 1434 expression_types: The expression type(s) to try and parse the token list into. 1435 raw_tokens: The list of tokens. 1436 sql: The original SQL string, used to produce helpful debug messages. 1437 1438 Returns: 1439 The target Expression. 1440 """ 1441 errors = [] 1442 for expression_type in ensure_list(expression_types): 1443 parser = self.EXPRESSION_PARSERS.get(expression_type) 1444 if not parser: 1445 raise TypeError(f"No parser registered for {expression_type}") 1446 1447 try: 1448 return self._parse(parser, raw_tokens, sql) 1449 except ParseError as e: 1450 e.errors[0]["into_expression"] = expression_type 1451 errors.append(e) 1452 1453 raise ParseError( 1454 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1455 errors=merge_errors(errors), 1456 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1496 def check_errors(self) -> None: 1497 """Logs or raises any found errors, depending on the chosen error level setting.""" 1498 if self.error_level == ErrorLevel.WARN: 1499 for error in self.errors: 1500 logger.error(str(error)) 1501 elif self.error_level == ErrorLevel.RAISE and self.errors: 1502 raise ParseError( 1503 concat_messages(self.errors, self.max_errors), 1504 errors=merge_errors(self.errors), 1505 )
Logs or raises any found errors, depending on the chosen error level setting.
1507 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1508 """ 1509 Appends an error in the list of recorded errors or raises it, depending on the chosen 1510 error level setting. 1511 """ 1512 token = token or self._curr or self._prev or Token.string("") 1513 start = token.start 1514 end = token.end + 1 1515 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1516 highlight = self.sql[start:end] 1517 end_context = self.sql[end : end + self.error_message_context] 1518 1519 error = ParseError.new( 1520 f"{message}. Line {token.line}, Col: {token.col}.\n" 1521 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1522 description=message, 1523 line=token.line, 1524 col=token.col, 1525 start_context=start_context, 1526 highlight=highlight, 1527 end_context=end_context, 1528 ) 1529 1530 if self.error_level == ErrorLevel.IMMEDIATE: 1531 raise error 1532 1533 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1535 def expression( 1536 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1537 ) -> E: 1538 """ 1539 Creates a new, validated Expression. 1540 1541 Args: 1542 exp_class: The expression class to instantiate. 1543 comments: An optional list of comments to attach to the expression. 1544 kwargs: The arguments to set for the expression along with their respective values. 1545 1546 Returns: 1547 The target expression. 1548 """ 1549 instance = exp_class(**kwargs) 1550 instance.add_comments(comments) if comments else self._add_comments(instance) 1551 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1558 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1559 """ 1560 Validates an Expression, making sure that all its mandatory arguments are set. 1561 1562 Args: 1563 expression: The expression to validate. 1564 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1565 1566 Returns: 1567 The validated expression. 1568 """ 1569 if self.error_level != ErrorLevel.IGNORE: 1570 for error_message in expression.error_messages(args): 1571 self.raise_error(error_message) 1572 1573 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.