sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME2, 336 TokenType.DATETIME64, 337 TokenType.SMALLDATETIME, 338 TokenType.DATE, 339 TokenType.DATE32, 340 TokenType.INT4RANGE, 341 TokenType.INT4MULTIRANGE, 342 TokenType.INT8RANGE, 343 TokenType.INT8MULTIRANGE, 344 TokenType.NUMRANGE, 345 TokenType.NUMMULTIRANGE, 346 TokenType.TSRANGE, 347 TokenType.TSMULTIRANGE, 348 TokenType.TSTZRANGE, 349 TokenType.TSTZMULTIRANGE, 350 TokenType.DATERANGE, 351 TokenType.DATEMULTIRANGE, 352 TokenType.DECIMAL, 353 TokenType.DECIMAL32, 354 TokenType.DECIMAL64, 355 TokenType.DECIMAL128, 356 TokenType.DECIMAL256, 357 TokenType.UDECIMAL, 358 TokenType.BIGDECIMAL, 359 TokenType.UUID, 360 TokenType.GEOGRAPHY, 361 TokenType.GEOMETRY, 362 TokenType.POINT, 363 TokenType.RING, 364 TokenType.LINESTRING, 365 TokenType.MULTILINESTRING, 366 TokenType.POLYGON, 367 TokenType.MULTIPOLYGON, 368 TokenType.HLLSKETCH, 369 TokenType.HSTORE, 370 TokenType.PSEUDO_TYPE, 371 TokenType.SUPER, 372 TokenType.SERIAL, 373 TokenType.SMALLSERIAL, 374 TokenType.BIGSERIAL, 375 TokenType.XML, 376 TokenType.YEAR, 377 TokenType.UNIQUEIDENTIFIER, 378 TokenType.USERDEFINED, 379 TokenType.MONEY, 380 TokenType.SMALLMONEY, 381 TokenType.ROWVERSION, 382 TokenType.IMAGE, 383 TokenType.VARIANT, 384 TokenType.VECTOR, 385 TokenType.OBJECT, 386 TokenType.OBJECT_IDENTIFIER, 387 TokenType.INET, 388 TokenType.IPADDRESS, 389 TokenType.IPPREFIX, 390 TokenType.IPV4, 391 TokenType.IPV6, 392 TokenType.UNKNOWN, 393 TokenType.NULL, 394 TokenType.NAME, 395 TokenType.TDIGEST, 396 *ENUM_TYPE_TOKENS, 397 *NESTED_TYPE_TOKENS, 398 *AGGREGATE_TYPE_TOKENS, 399 } 400 401 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 402 TokenType.BIGINT: TokenType.UBIGINT, 403 TokenType.INT: TokenType.UINT, 404 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 405 TokenType.SMALLINT: TokenType.USMALLINT, 406 TokenType.TINYINT: TokenType.UTINYINT, 407 TokenType.DECIMAL: TokenType.UDECIMAL, 408 } 409 410 SUBQUERY_PREDICATES = { 411 TokenType.ANY: exp.Any, 412 TokenType.ALL: exp.All, 413 TokenType.EXISTS: exp.Exists, 414 TokenType.SOME: exp.Any, 415 } 416 417 RESERVED_TOKENS = { 418 *Tokenizer.SINGLE_TOKENS.values(), 419 TokenType.SELECT, 420 } - {TokenType.IDENTIFIER} 421 422 DB_CREATABLES = { 423 TokenType.DATABASE, 424 TokenType.DICTIONARY, 425 TokenType.MODEL, 426 TokenType.SCHEMA, 427 TokenType.SEQUENCE, 428 TokenType.STORAGE_INTEGRATION, 429 TokenType.TABLE, 430 TokenType.TAG, 431 TokenType.VIEW, 432 TokenType.WAREHOUSE, 433 TokenType.STREAMLIT, 434 TokenType.SINK, 435 TokenType.SOURCE, 436 } 437 438 CREATABLES = { 439 TokenType.COLUMN, 440 TokenType.CONSTRAINT, 441 TokenType.FOREIGN_KEY, 442 TokenType.FUNCTION, 443 TokenType.INDEX, 444 TokenType.PROCEDURE, 445 *DB_CREATABLES, 446 } 447 448 ALTERABLES = { 449 TokenType.INDEX, 450 TokenType.TABLE, 451 TokenType.VIEW, 452 } 453 454 # Tokens that can represent identifiers 455 ID_VAR_TOKENS = { 456 TokenType.ALL, 457 TokenType.ATTACH, 458 TokenType.VAR, 459 TokenType.ANTI, 460 TokenType.APPLY, 461 TokenType.ASC, 462 TokenType.ASOF, 463 TokenType.AUTO_INCREMENT, 464 TokenType.BEGIN, 465 TokenType.BPCHAR, 466 TokenType.CACHE, 467 TokenType.CASE, 468 TokenType.COLLATE, 469 TokenType.COMMAND, 470 TokenType.COMMENT, 471 TokenType.COMMIT, 472 TokenType.CONSTRAINT, 473 TokenType.COPY, 474 TokenType.CUBE, 475 TokenType.DEFAULT, 476 TokenType.DELETE, 477 TokenType.DESC, 478 TokenType.DESCRIBE, 479 TokenType.DETACH, 480 TokenType.DICTIONARY, 481 TokenType.DIV, 482 TokenType.END, 483 TokenType.EXECUTE, 484 TokenType.ESCAPE, 485 TokenType.FALSE, 486 TokenType.FIRST, 487 TokenType.FILTER, 488 TokenType.FINAL, 489 TokenType.FORMAT, 490 TokenType.FULL, 491 TokenType.IDENTIFIER, 492 TokenType.IS, 493 TokenType.ISNULL, 494 TokenType.INTERVAL, 495 TokenType.KEEP, 496 TokenType.KILL, 497 TokenType.LEFT, 498 TokenType.LOAD, 499 TokenType.MERGE, 500 TokenType.NATURAL, 501 TokenType.NEXT, 502 TokenType.OFFSET, 503 TokenType.OPERATOR, 504 TokenType.ORDINALITY, 505 TokenType.OVERLAPS, 506 TokenType.OVERWRITE, 507 TokenType.PARTITION, 508 TokenType.PERCENT, 509 TokenType.PIVOT, 510 TokenType.PRAGMA, 511 TokenType.RANGE, 512 TokenType.RECURSIVE, 513 TokenType.REFERENCES, 514 TokenType.REFRESH, 515 TokenType.RENAME, 516 TokenType.REPLACE, 517 TokenType.RIGHT, 518 TokenType.ROLLUP, 519 TokenType.ROW, 520 TokenType.ROWS, 521 TokenType.SEMI, 522 TokenType.SET, 523 TokenType.SETTINGS, 524 TokenType.SHOW, 525 TokenType.TEMPORARY, 526 TokenType.TOP, 527 TokenType.TRUE, 528 TokenType.TRUNCATE, 529 TokenType.UNIQUE, 530 TokenType.UNNEST, 531 TokenType.UNPIVOT, 532 TokenType.UPDATE, 533 TokenType.USE, 534 TokenType.VOLATILE, 535 TokenType.WINDOW, 536 *CREATABLES, 537 *SUBQUERY_PREDICATES, 538 *TYPE_TOKENS, 539 *NO_PAREN_FUNCTIONS, 540 } 541 ID_VAR_TOKENS.remove(TokenType.UNION) 542 543 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 544 545 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 546 TokenType.ANTI, 547 TokenType.APPLY, 548 TokenType.ASOF, 549 TokenType.FULL, 550 TokenType.LEFT, 551 TokenType.LOCK, 552 TokenType.NATURAL, 553 TokenType.OFFSET, 554 TokenType.RIGHT, 555 TokenType.SEMI, 556 TokenType.WINDOW, 557 } 558 559 ALIAS_TOKENS = ID_VAR_TOKENS 560 561 ARRAY_CONSTRUCTORS = { 562 "ARRAY": exp.Array, 563 "LIST": exp.List, 564 } 565 566 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 567 568 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 569 570 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 571 572 FUNC_TOKENS = { 573 TokenType.COLLATE, 574 TokenType.COMMAND, 575 TokenType.CURRENT_DATE, 576 TokenType.CURRENT_DATETIME, 577 TokenType.CURRENT_TIMESTAMP, 578 TokenType.CURRENT_TIME, 579 TokenType.CURRENT_USER, 580 TokenType.FILTER, 581 TokenType.FIRST, 582 TokenType.FORMAT, 583 TokenType.GLOB, 584 TokenType.IDENTIFIER, 585 TokenType.INDEX, 586 TokenType.ISNULL, 587 TokenType.ILIKE, 588 TokenType.INSERT, 589 TokenType.LIKE, 590 TokenType.MERGE, 591 TokenType.NEXT, 592 TokenType.OFFSET, 593 TokenType.PRIMARY_KEY, 594 TokenType.RANGE, 595 TokenType.REPLACE, 596 TokenType.RLIKE, 597 TokenType.ROW, 598 TokenType.UNNEST, 599 TokenType.VAR, 600 TokenType.LEFT, 601 TokenType.RIGHT, 602 TokenType.SEQUENCE, 603 TokenType.DATE, 604 TokenType.DATETIME, 605 TokenType.TABLE, 606 TokenType.TIMESTAMP, 607 TokenType.TIMESTAMPTZ, 608 TokenType.TRUNCATE, 609 TokenType.WINDOW, 610 TokenType.XOR, 611 *TYPE_TOKENS, 612 *SUBQUERY_PREDICATES, 613 } 614 615 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 616 TokenType.AND: exp.And, 617 } 618 619 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 620 TokenType.COLON_EQ: exp.PropertyEQ, 621 } 622 623 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 624 TokenType.OR: exp.Or, 625 } 626 627 EQUALITY = { 628 TokenType.EQ: exp.EQ, 629 TokenType.NEQ: exp.NEQ, 630 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 631 } 632 633 COMPARISON = { 634 TokenType.GT: exp.GT, 635 TokenType.GTE: exp.GTE, 636 TokenType.LT: exp.LT, 637 TokenType.LTE: exp.LTE, 638 } 639 640 BITWISE = { 641 TokenType.AMP: exp.BitwiseAnd, 642 TokenType.CARET: exp.BitwiseXor, 643 TokenType.PIPE: exp.BitwiseOr, 644 } 645 646 TERM = { 647 TokenType.DASH: exp.Sub, 648 TokenType.PLUS: exp.Add, 649 TokenType.MOD: exp.Mod, 650 TokenType.COLLATE: exp.Collate, 651 } 652 653 FACTOR = { 654 TokenType.DIV: exp.IntDiv, 655 TokenType.LR_ARROW: exp.Distance, 656 TokenType.SLASH: exp.Div, 657 TokenType.STAR: exp.Mul, 658 } 659 660 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 661 662 TIMES = { 663 TokenType.TIME, 664 TokenType.TIMETZ, 665 } 666 667 TIMESTAMPS = { 668 TokenType.TIMESTAMP, 669 TokenType.TIMESTAMPTZ, 670 TokenType.TIMESTAMPLTZ, 671 *TIMES, 672 } 673 674 SET_OPERATIONS = { 675 TokenType.UNION, 676 TokenType.INTERSECT, 677 TokenType.EXCEPT, 678 } 679 680 JOIN_METHODS = { 681 TokenType.ASOF, 682 TokenType.NATURAL, 683 TokenType.POSITIONAL, 684 } 685 686 JOIN_SIDES = { 687 TokenType.LEFT, 688 TokenType.RIGHT, 689 TokenType.FULL, 690 } 691 692 JOIN_KINDS = { 693 TokenType.ANTI, 694 TokenType.CROSS, 695 TokenType.INNER, 696 TokenType.OUTER, 697 TokenType.SEMI, 698 TokenType.STRAIGHT_JOIN, 699 } 700 701 JOIN_HINTS: t.Set[str] = set() 702 703 LAMBDAS = { 704 TokenType.ARROW: lambda self, expressions: self.expression( 705 exp.Lambda, 706 this=self._replace_lambda( 707 self._parse_assignment(), 708 expressions, 709 ), 710 expressions=expressions, 711 ), 712 TokenType.FARROW: lambda self, expressions: self.expression( 713 exp.Kwarg, 714 this=exp.var(expressions[0].name), 715 expression=self._parse_assignment(), 716 ), 717 } 718 719 COLUMN_OPERATORS = { 720 TokenType.DOT: None, 721 TokenType.DCOLON: lambda self, this, to: self.expression( 722 exp.Cast if self.STRICT_CAST else exp.TryCast, 723 this=this, 724 to=to, 725 ), 726 TokenType.ARROW: lambda self, this, path: self.expression( 727 exp.JSONExtract, 728 this=this, 729 expression=self.dialect.to_json_path(path), 730 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 731 ), 732 TokenType.DARROW: lambda self, this, path: self.expression( 733 exp.JSONExtractScalar, 734 this=this, 735 expression=self.dialect.to_json_path(path), 736 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 737 ), 738 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 739 exp.JSONBExtract, 740 this=this, 741 expression=path, 742 ), 743 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 744 exp.JSONBExtractScalar, 745 this=this, 746 expression=path, 747 ), 748 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 749 exp.JSONBContains, 750 this=this, 751 expression=key, 752 ), 753 } 754 755 EXPRESSION_PARSERS = { 756 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 757 exp.Column: lambda self: self._parse_column(), 758 exp.Condition: lambda self: self._parse_assignment(), 759 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 760 exp.Expression: lambda self: self._parse_expression(), 761 exp.From: lambda self: self._parse_from(joins=True), 762 exp.Group: lambda self: self._parse_group(), 763 exp.Having: lambda self: self._parse_having(), 764 exp.Hint: lambda self: self._parse_hint_body(), 765 exp.Identifier: lambda self: self._parse_id_var(), 766 exp.Join: lambda self: self._parse_join(), 767 exp.Lambda: lambda self: self._parse_lambda(), 768 exp.Lateral: lambda self: self._parse_lateral(), 769 exp.Limit: lambda self: self._parse_limit(), 770 exp.Offset: lambda self: self._parse_offset(), 771 exp.Order: lambda self: self._parse_order(), 772 exp.Ordered: lambda self: self._parse_ordered(), 773 exp.Properties: lambda self: self._parse_properties(), 774 exp.Qualify: lambda self: self._parse_qualify(), 775 exp.Returning: lambda self: self._parse_returning(), 776 exp.Select: lambda self: self._parse_select(), 777 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 778 exp.Table: lambda self: self._parse_table_parts(), 779 exp.TableAlias: lambda self: self._parse_table_alias(), 780 exp.Tuple: lambda self: self._parse_value(), 781 exp.Whens: lambda self: self._parse_when_matched(), 782 exp.Where: lambda self: self._parse_where(), 783 exp.Window: lambda self: self._parse_named_window(), 784 exp.With: lambda self: self._parse_with(), 785 "JOIN_TYPE": lambda self: self._parse_join_parts(), 786 } 787 788 STATEMENT_PARSERS = { 789 TokenType.ALTER: lambda self: self._parse_alter(), 790 TokenType.BEGIN: lambda self: self._parse_transaction(), 791 TokenType.CACHE: lambda self: self._parse_cache(), 792 TokenType.COMMENT: lambda self: self._parse_comment(), 793 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 794 TokenType.COPY: lambda self: self._parse_copy(), 795 TokenType.CREATE: lambda self: self._parse_create(), 796 TokenType.DELETE: lambda self: self._parse_delete(), 797 TokenType.DESC: lambda self: self._parse_describe(), 798 TokenType.DESCRIBE: lambda self: self._parse_describe(), 799 TokenType.DROP: lambda self: self._parse_drop(), 800 TokenType.GRANT: lambda self: self._parse_grant(), 801 TokenType.INSERT: lambda self: self._parse_insert(), 802 TokenType.KILL: lambda self: self._parse_kill(), 803 TokenType.LOAD: lambda self: self._parse_load(), 804 TokenType.MERGE: lambda self: self._parse_merge(), 805 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 806 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 807 TokenType.REFRESH: lambda self: self._parse_refresh(), 808 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 809 TokenType.SET: lambda self: self._parse_set(), 810 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 811 TokenType.UNCACHE: lambda self: self._parse_uncache(), 812 TokenType.UPDATE: lambda self: self._parse_update(), 813 TokenType.USE: lambda self: self.expression( 814 exp.Use, 815 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 816 this=self._parse_table(schema=False), 817 ), 818 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 819 } 820 821 UNARY_PARSERS = { 822 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 823 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 824 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 825 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 826 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 827 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 828 } 829 830 STRING_PARSERS = { 831 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 832 exp.RawString, this=token.text 833 ), 834 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 835 exp.National, this=token.text 836 ), 837 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 838 TokenType.STRING: lambda self, token: self.expression( 839 exp.Literal, this=token.text, is_string=True 840 ), 841 TokenType.UNICODE_STRING: lambda self, token: self.expression( 842 exp.UnicodeString, 843 this=token.text, 844 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 845 ), 846 } 847 848 NUMERIC_PARSERS = { 849 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 850 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 851 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 852 TokenType.NUMBER: lambda self, token: self.expression( 853 exp.Literal, this=token.text, is_string=False 854 ), 855 } 856 857 PRIMARY_PARSERS = { 858 **STRING_PARSERS, 859 **NUMERIC_PARSERS, 860 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 861 TokenType.NULL: lambda self, _: self.expression(exp.Null), 862 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 863 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 864 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 865 TokenType.STAR: lambda self, _: self._parse_star_ops(), 866 } 867 868 PLACEHOLDER_PARSERS = { 869 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 870 TokenType.PARAMETER: lambda self: self._parse_parameter(), 871 TokenType.COLON: lambda self: ( 872 self.expression(exp.Placeholder, this=self._prev.text) 873 if self._match_set(self.ID_VAR_TOKENS) 874 else None 875 ), 876 } 877 878 RANGE_PARSERS = { 879 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 880 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 881 TokenType.GLOB: binary_range_parser(exp.Glob), 882 TokenType.ILIKE: binary_range_parser(exp.ILike), 883 TokenType.IN: lambda self, this: self._parse_in(this), 884 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 885 TokenType.IS: lambda self, this: self._parse_is(this), 886 TokenType.LIKE: binary_range_parser(exp.Like), 887 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 888 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 889 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 890 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 891 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 892 } 893 894 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 895 "ALLOWED_VALUES": lambda self: self.expression( 896 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 897 ), 898 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 899 "AUTO": lambda self: self._parse_auto_property(), 900 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 901 "BACKUP": lambda self: self.expression( 902 exp.BackupProperty, this=self._parse_var(any_token=True) 903 ), 904 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 905 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 906 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 907 "CHECKSUM": lambda self: self._parse_checksum(), 908 "CLUSTER BY": lambda self: self._parse_cluster(), 909 "CLUSTERED": lambda self: self._parse_clustered_by(), 910 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 911 exp.CollateProperty, **kwargs 912 ), 913 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 914 "CONTAINS": lambda self: self._parse_contains_property(), 915 "COPY": lambda self: self._parse_copy_property(), 916 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 917 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 918 "DEFINER": lambda self: self._parse_definer(), 919 "DETERMINISTIC": lambda self: self.expression( 920 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 921 ), 922 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 923 "DUPLICATE": lambda self: self._parse_duplicate(), 924 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 925 "DISTKEY": lambda self: self._parse_distkey(), 926 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 927 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 928 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 929 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 930 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 931 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 932 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 933 "FREESPACE": lambda self: self._parse_freespace(), 934 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 935 "HEAP": lambda self: self.expression(exp.HeapProperty), 936 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 937 "IMMUTABLE": lambda self: self.expression( 938 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 939 ), 940 "INHERITS": lambda self: self.expression( 941 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 942 ), 943 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 944 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 945 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 946 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 947 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 948 "LIKE": lambda self: self._parse_create_like(), 949 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 950 "LOCK": lambda self: self._parse_locking(), 951 "LOCKING": lambda self: self._parse_locking(), 952 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 953 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 954 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 955 "MODIFIES": lambda self: self._parse_modifies_property(), 956 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 957 "NO": lambda self: self._parse_no_property(), 958 "ON": lambda self: self._parse_on_property(), 959 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 960 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 961 "PARTITION": lambda self: self._parse_partitioned_of(), 962 "PARTITION BY": lambda self: self._parse_partitioned_by(), 963 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 964 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 965 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 966 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 967 "READS": lambda self: self._parse_reads_property(), 968 "REMOTE": lambda self: self._parse_remote_with_connection(), 969 "RETURNS": lambda self: self._parse_returns(), 970 "STRICT": lambda self: self.expression(exp.StrictProperty), 971 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 972 "ROW": lambda self: self._parse_row(), 973 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 974 "SAMPLE": lambda self: self.expression( 975 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 976 ), 977 "SECURE": lambda self: self.expression(exp.SecureProperty), 978 "SECURITY": lambda self: self._parse_security(), 979 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 980 "SETTINGS": lambda self: self._parse_settings_property(), 981 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 982 "SORTKEY": lambda self: self._parse_sortkey(), 983 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 984 "STABLE": lambda self: self.expression( 985 exp.StabilityProperty, this=exp.Literal.string("STABLE") 986 ), 987 "STORED": lambda self: self._parse_stored(), 988 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 989 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 990 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 991 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 992 "TO": lambda self: self._parse_to_table(), 993 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 994 "TRANSFORM": lambda self: self.expression( 995 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 996 ), 997 "TTL": lambda self: self._parse_ttl(), 998 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 999 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1000 "VOLATILE": lambda self: self._parse_volatile_property(), 1001 "WITH": lambda self: self._parse_with_property(), 1002 } 1003 1004 CONSTRAINT_PARSERS = { 1005 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1006 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1007 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1008 "CHARACTER SET": lambda self: self.expression( 1009 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1010 ), 1011 "CHECK": lambda self: self.expression( 1012 exp.CheckColumnConstraint, 1013 this=self._parse_wrapped(self._parse_assignment), 1014 enforced=self._match_text_seq("ENFORCED"), 1015 ), 1016 "COLLATE": lambda self: self.expression( 1017 exp.CollateColumnConstraint, 1018 this=self._parse_identifier() or self._parse_column(), 1019 ), 1020 "COMMENT": lambda self: self.expression( 1021 exp.CommentColumnConstraint, this=self._parse_string() 1022 ), 1023 "COMPRESS": lambda self: self._parse_compress(), 1024 "CLUSTERED": lambda self: self.expression( 1025 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1026 ), 1027 "NONCLUSTERED": lambda self: self.expression( 1028 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1029 ), 1030 "DEFAULT": lambda self: self.expression( 1031 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1032 ), 1033 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1034 "EPHEMERAL": lambda self: self.expression( 1035 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1036 ), 1037 "EXCLUDE": lambda self: self.expression( 1038 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1039 ), 1040 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1041 "FORMAT": lambda self: self.expression( 1042 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1043 ), 1044 "GENERATED": lambda self: self._parse_generated_as_identity(), 1045 "IDENTITY": lambda self: self._parse_auto_increment(), 1046 "INLINE": lambda self: self._parse_inline(), 1047 "LIKE": lambda self: self._parse_create_like(), 1048 "NOT": lambda self: self._parse_not_constraint(), 1049 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1050 "ON": lambda self: ( 1051 self._match(TokenType.UPDATE) 1052 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1053 ) 1054 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1055 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1056 "PERIOD": lambda self: self._parse_period_for_system_time(), 1057 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1058 "REFERENCES": lambda self: self._parse_references(match=False), 1059 "TITLE": lambda self: self.expression( 1060 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1061 ), 1062 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1063 "UNIQUE": lambda self: self._parse_unique(), 1064 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1065 "WATERMARK": lambda self: self.expression( 1066 exp.WatermarkColumnConstraint, 1067 this=self._match(TokenType.FOR) and self._parse_column(), 1068 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1069 ), 1070 "WITH": lambda self: self.expression( 1071 exp.Properties, expressions=self._parse_wrapped_properties() 1072 ), 1073 } 1074 1075 ALTER_PARSERS = { 1076 "ADD": lambda self: self._parse_alter_table_add(), 1077 "AS": lambda self: self._parse_select(), 1078 "ALTER": lambda self: self._parse_alter_table_alter(), 1079 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1080 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1081 "DROP": lambda self: self._parse_alter_table_drop(), 1082 "RENAME": lambda self: self._parse_alter_table_rename(), 1083 "SET": lambda self: self._parse_alter_table_set(), 1084 "SWAP": lambda self: self.expression( 1085 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1086 ), 1087 } 1088 1089 ALTER_ALTER_PARSERS = { 1090 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1091 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1092 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1093 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1094 } 1095 1096 SCHEMA_UNNAMED_CONSTRAINTS = { 1097 "CHECK", 1098 "EXCLUDE", 1099 "FOREIGN KEY", 1100 "LIKE", 1101 "PERIOD", 1102 "PRIMARY KEY", 1103 "UNIQUE", 1104 "WATERMARK", 1105 } 1106 1107 NO_PAREN_FUNCTION_PARSERS = { 1108 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1109 "CASE": lambda self: self._parse_case(), 1110 "CONNECT_BY_ROOT": lambda self: self.expression( 1111 exp.ConnectByRoot, this=self._parse_column() 1112 ), 1113 "IF": lambda self: self._parse_if(), 1114 } 1115 1116 INVALID_FUNC_NAME_TOKENS = { 1117 TokenType.IDENTIFIER, 1118 TokenType.STRING, 1119 } 1120 1121 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1122 1123 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1124 1125 FUNCTION_PARSERS = { 1126 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1127 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1128 "DECODE": lambda self: self._parse_decode(), 1129 "EXTRACT": lambda self: self._parse_extract(), 1130 "GAP_FILL": lambda self: self._parse_gap_fill(), 1131 "JSON_OBJECT": lambda self: self._parse_json_object(), 1132 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1133 "JSON_TABLE": lambda self: self._parse_json_table(), 1134 "MATCH": lambda self: self._parse_match_against(), 1135 "NORMALIZE": lambda self: self._parse_normalize(), 1136 "OPENJSON": lambda self: self._parse_open_json(), 1137 "OVERLAY": lambda self: self._parse_overlay(), 1138 "POSITION": lambda self: self._parse_position(), 1139 "PREDICT": lambda self: self._parse_predict(), 1140 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1141 "STRING_AGG": lambda self: self._parse_string_agg(), 1142 "SUBSTRING": lambda self: self._parse_substring(), 1143 "TRIM": lambda self: self._parse_trim(), 1144 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1145 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1146 } 1147 1148 QUERY_MODIFIER_PARSERS = { 1149 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1150 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1151 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1152 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1153 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1154 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1155 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1156 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1157 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1158 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1159 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1160 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1161 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1162 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1163 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1164 TokenType.CLUSTER_BY: lambda self: ( 1165 "cluster", 1166 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1167 ), 1168 TokenType.DISTRIBUTE_BY: lambda self: ( 1169 "distribute", 1170 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1171 ), 1172 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1173 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1174 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1175 } 1176 1177 SET_PARSERS = { 1178 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1179 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1180 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1181 "TRANSACTION": lambda self: self._parse_set_transaction(), 1182 } 1183 1184 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1185 1186 TYPE_LITERAL_PARSERS = { 1187 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1188 } 1189 1190 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1191 1192 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1193 1194 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1195 1196 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1197 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1198 "ISOLATION": ( 1199 ("LEVEL", "REPEATABLE", "READ"), 1200 ("LEVEL", "READ", "COMMITTED"), 1201 ("LEVEL", "READ", "UNCOMITTED"), 1202 ("LEVEL", "SERIALIZABLE"), 1203 ), 1204 "READ": ("WRITE", "ONLY"), 1205 } 1206 1207 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1208 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1209 ) 1210 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1211 1212 CREATE_SEQUENCE: OPTIONS_TYPE = { 1213 "SCALE": ("EXTEND", "NOEXTEND"), 1214 "SHARD": ("EXTEND", "NOEXTEND"), 1215 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1216 **dict.fromkeys( 1217 ( 1218 "SESSION", 1219 "GLOBAL", 1220 "KEEP", 1221 "NOKEEP", 1222 "ORDER", 1223 "NOORDER", 1224 "NOCACHE", 1225 "CYCLE", 1226 "NOCYCLE", 1227 "NOMINVALUE", 1228 "NOMAXVALUE", 1229 "NOSCALE", 1230 "NOSHARD", 1231 ), 1232 tuple(), 1233 ), 1234 } 1235 1236 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1237 1238 USABLES: OPTIONS_TYPE = dict.fromkeys( 1239 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1240 ) 1241 1242 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1243 1244 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1245 "TYPE": ("EVOLUTION",), 1246 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1247 } 1248 1249 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1250 1251 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1252 1253 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1254 "NOT": ("ENFORCED",), 1255 "MATCH": ( 1256 "FULL", 1257 "PARTIAL", 1258 "SIMPLE", 1259 ), 1260 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1261 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1262 } 1263 1264 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1265 1266 CLONE_KEYWORDS = {"CLONE", "COPY"} 1267 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1268 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1269 1270 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1271 1272 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1273 1274 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1275 1276 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1277 1278 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1279 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1280 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1281 1282 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1283 1284 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1285 1286 ADD_CONSTRAINT_TOKENS = { 1287 TokenType.CONSTRAINT, 1288 TokenType.FOREIGN_KEY, 1289 TokenType.INDEX, 1290 TokenType.KEY, 1291 TokenType.PRIMARY_KEY, 1292 TokenType.UNIQUE, 1293 } 1294 1295 DISTINCT_TOKENS = {TokenType.DISTINCT} 1296 1297 NULL_TOKENS = {TokenType.NULL} 1298 1299 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1300 1301 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1302 1303 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1304 1305 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1306 1307 ODBC_DATETIME_LITERALS = { 1308 "d": exp.Date, 1309 "t": exp.Time, 1310 "ts": exp.Timestamp, 1311 } 1312 1313 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1314 1315 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1316 1317 # The style options for the DESCRIBE statement 1318 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1319 1320 OPERATION_MODIFIERS: t.Set[str] = set() 1321 1322 STRICT_CAST = True 1323 1324 PREFIXED_PIVOT_COLUMNS = False 1325 IDENTIFY_PIVOT_STRINGS = False 1326 1327 LOG_DEFAULTS_TO_LN = False 1328 1329 # Whether ADD is present for each column added by ALTER TABLE 1330 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1331 1332 # Whether the table sample clause expects CSV syntax 1333 TABLESAMPLE_CSV = False 1334 1335 # The default method used for table sampling 1336 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1337 1338 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1339 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1340 1341 # Whether the TRIM function expects the characters to trim as its first argument 1342 TRIM_PATTERN_FIRST = False 1343 1344 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1345 STRING_ALIASES = False 1346 1347 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1348 MODIFIERS_ATTACHED_TO_SET_OP = True 1349 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1350 1351 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1352 NO_PAREN_IF_COMMANDS = True 1353 1354 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1355 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1356 1357 # Whether the `:` operator is used to extract a value from a VARIANT column 1358 COLON_IS_VARIANT_EXTRACT = False 1359 1360 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1361 # If this is True and '(' is not found, the keyword will be treated as an identifier 1362 VALUES_FOLLOWED_BY_PAREN = True 1363 1364 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1365 SUPPORTS_IMPLICIT_UNNEST = False 1366 1367 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1368 INTERVAL_SPANS = True 1369 1370 # Whether a PARTITION clause can follow a table reference 1371 SUPPORTS_PARTITION_SELECTION = False 1372 1373 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1374 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1375 1376 __slots__ = ( 1377 "error_level", 1378 "error_message_context", 1379 "max_errors", 1380 "dialect", 1381 "sql", 1382 "errors", 1383 "_tokens", 1384 "_index", 1385 "_curr", 1386 "_next", 1387 "_prev", 1388 "_prev_comments", 1389 ) 1390 1391 # Autofilled 1392 SHOW_TRIE: t.Dict = {} 1393 SET_TRIE: t.Dict = {} 1394 1395 def __init__( 1396 self, 1397 error_level: t.Optional[ErrorLevel] = None, 1398 error_message_context: int = 100, 1399 max_errors: int = 3, 1400 dialect: DialectType = None, 1401 ): 1402 from sqlglot.dialects import Dialect 1403 1404 self.error_level = error_level or ErrorLevel.IMMEDIATE 1405 self.error_message_context = error_message_context 1406 self.max_errors = max_errors 1407 self.dialect = Dialect.get_or_raise(dialect) 1408 self.reset() 1409 1410 def reset(self): 1411 self.sql = "" 1412 self.errors = [] 1413 self._tokens = [] 1414 self._index = 0 1415 self._curr = None 1416 self._next = None 1417 self._prev = None 1418 self._prev_comments = None 1419 1420 def parse( 1421 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1422 ) -> t.List[t.Optional[exp.Expression]]: 1423 """ 1424 Parses a list of tokens and returns a list of syntax trees, one tree 1425 per parsed SQL statement. 1426 1427 Args: 1428 raw_tokens: The list of tokens. 1429 sql: The original SQL string, used to produce helpful debug messages. 1430 1431 Returns: 1432 The list of the produced syntax trees. 1433 """ 1434 return self._parse( 1435 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1436 ) 1437 1438 def parse_into( 1439 self, 1440 expression_types: exp.IntoType, 1441 raw_tokens: t.List[Token], 1442 sql: t.Optional[str] = None, 1443 ) -> t.List[t.Optional[exp.Expression]]: 1444 """ 1445 Parses a list of tokens into a given Expression type. If a collection of Expression 1446 types is given instead, this method will try to parse the token list into each one 1447 of them, stopping at the first for which the parsing succeeds. 1448 1449 Args: 1450 expression_types: The expression type(s) to try and parse the token list into. 1451 raw_tokens: The list of tokens. 1452 sql: The original SQL string, used to produce helpful debug messages. 1453 1454 Returns: 1455 The target Expression. 1456 """ 1457 errors = [] 1458 for expression_type in ensure_list(expression_types): 1459 parser = self.EXPRESSION_PARSERS.get(expression_type) 1460 if not parser: 1461 raise TypeError(f"No parser registered for {expression_type}") 1462 1463 try: 1464 return self._parse(parser, raw_tokens, sql) 1465 except ParseError as e: 1466 e.errors[0]["into_expression"] = expression_type 1467 errors.append(e) 1468 1469 raise ParseError( 1470 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1471 errors=merge_errors(errors), 1472 ) from errors[-1] 1473 1474 def _parse( 1475 self, 1476 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1477 raw_tokens: t.List[Token], 1478 sql: t.Optional[str] = None, 1479 ) -> t.List[t.Optional[exp.Expression]]: 1480 self.reset() 1481 self.sql = sql or "" 1482 1483 total = len(raw_tokens) 1484 chunks: t.List[t.List[Token]] = [[]] 1485 1486 for i, token in enumerate(raw_tokens): 1487 if token.token_type == TokenType.SEMICOLON: 1488 if token.comments: 1489 chunks.append([token]) 1490 1491 if i < total - 1: 1492 chunks.append([]) 1493 else: 1494 chunks[-1].append(token) 1495 1496 expressions = [] 1497 1498 for tokens in chunks: 1499 self._index = -1 1500 self._tokens = tokens 1501 self._advance() 1502 1503 expressions.append(parse_method(self)) 1504 1505 if self._index < len(self._tokens): 1506 self.raise_error("Invalid expression / Unexpected token") 1507 1508 self.check_errors() 1509 1510 return expressions 1511 1512 def check_errors(self) -> None: 1513 """Logs or raises any found errors, depending on the chosen error level setting.""" 1514 if self.error_level == ErrorLevel.WARN: 1515 for error in self.errors: 1516 logger.error(str(error)) 1517 elif self.error_level == ErrorLevel.RAISE and self.errors: 1518 raise ParseError( 1519 concat_messages(self.errors, self.max_errors), 1520 errors=merge_errors(self.errors), 1521 ) 1522 1523 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1524 """ 1525 Appends an error in the list of recorded errors or raises it, depending on the chosen 1526 error level setting. 1527 """ 1528 token = token or self._curr or self._prev or Token.string("") 1529 start = token.start 1530 end = token.end + 1 1531 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1532 highlight = self.sql[start:end] 1533 end_context = self.sql[end : end + self.error_message_context] 1534 1535 error = ParseError.new( 1536 f"{message}. Line {token.line}, Col: {token.col}.\n" 1537 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1538 description=message, 1539 line=token.line, 1540 col=token.col, 1541 start_context=start_context, 1542 highlight=highlight, 1543 end_context=end_context, 1544 ) 1545 1546 if self.error_level == ErrorLevel.IMMEDIATE: 1547 raise error 1548 1549 self.errors.append(error) 1550 1551 def expression( 1552 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1553 ) -> E: 1554 """ 1555 Creates a new, validated Expression. 1556 1557 Args: 1558 exp_class: The expression class to instantiate. 1559 comments: An optional list of comments to attach to the expression. 1560 kwargs: The arguments to set for the expression along with their respective values. 1561 1562 Returns: 1563 The target expression. 1564 """ 1565 instance = exp_class(**kwargs) 1566 instance.add_comments(comments) if comments else self._add_comments(instance) 1567 return self.validate_expression(instance) 1568 1569 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1570 if expression and self._prev_comments: 1571 expression.add_comments(self._prev_comments) 1572 self._prev_comments = None 1573 1574 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1575 """ 1576 Validates an Expression, making sure that all its mandatory arguments are set. 1577 1578 Args: 1579 expression: The expression to validate. 1580 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1581 1582 Returns: 1583 The validated expression. 1584 """ 1585 if self.error_level != ErrorLevel.IGNORE: 1586 for error_message in expression.error_messages(args): 1587 self.raise_error(error_message) 1588 1589 return expression 1590 1591 def _find_sql(self, start: Token, end: Token) -> str: 1592 return self.sql[start.start : end.end + 1] 1593 1594 def _is_connected(self) -> bool: 1595 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1596 1597 def _advance(self, times: int = 1) -> None: 1598 self._index += times 1599 self._curr = seq_get(self._tokens, self._index) 1600 self._next = seq_get(self._tokens, self._index + 1) 1601 1602 if self._index > 0: 1603 self._prev = self._tokens[self._index - 1] 1604 self._prev_comments = self._prev.comments 1605 else: 1606 self._prev = None 1607 self._prev_comments = None 1608 1609 def _retreat(self, index: int) -> None: 1610 if index != self._index: 1611 self._advance(index - self._index) 1612 1613 def _warn_unsupported(self) -> None: 1614 if len(self._tokens) <= 1: 1615 return 1616 1617 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1618 # interested in emitting a warning for the one being currently processed. 1619 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1620 1621 logger.warning( 1622 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1623 ) 1624 1625 def _parse_command(self) -> exp.Command: 1626 self._warn_unsupported() 1627 return self.expression( 1628 exp.Command, 1629 comments=self._prev_comments, 1630 this=self._prev.text.upper(), 1631 expression=self._parse_string(), 1632 ) 1633 1634 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1635 """ 1636 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1637 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1638 solve this by setting & resetting the parser state accordingly 1639 """ 1640 index = self._index 1641 error_level = self.error_level 1642 1643 self.error_level = ErrorLevel.IMMEDIATE 1644 try: 1645 this = parse_method() 1646 except ParseError: 1647 this = None 1648 finally: 1649 if not this or retreat: 1650 self._retreat(index) 1651 self.error_level = error_level 1652 1653 return this 1654 1655 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1656 start = self._prev 1657 exists = self._parse_exists() if allow_exists else None 1658 1659 self._match(TokenType.ON) 1660 1661 materialized = self._match_text_seq("MATERIALIZED") 1662 kind = self._match_set(self.CREATABLES) and self._prev 1663 if not kind: 1664 return self._parse_as_command(start) 1665 1666 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1667 this = self._parse_user_defined_function(kind=kind.token_type) 1668 elif kind.token_type == TokenType.TABLE: 1669 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1670 elif kind.token_type == TokenType.COLUMN: 1671 this = self._parse_column() 1672 else: 1673 this = self._parse_id_var() 1674 1675 self._match(TokenType.IS) 1676 1677 return self.expression( 1678 exp.Comment, 1679 this=this, 1680 kind=kind.text, 1681 expression=self._parse_string(), 1682 exists=exists, 1683 materialized=materialized, 1684 ) 1685 1686 def _parse_to_table( 1687 self, 1688 ) -> exp.ToTableProperty: 1689 table = self._parse_table_parts(schema=True) 1690 return self.expression(exp.ToTableProperty, this=table) 1691 1692 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1693 def _parse_ttl(self) -> exp.Expression: 1694 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1695 this = self._parse_bitwise() 1696 1697 if self._match_text_seq("DELETE"): 1698 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1699 if self._match_text_seq("RECOMPRESS"): 1700 return self.expression( 1701 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1702 ) 1703 if self._match_text_seq("TO", "DISK"): 1704 return self.expression( 1705 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1706 ) 1707 if self._match_text_seq("TO", "VOLUME"): 1708 return self.expression( 1709 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1710 ) 1711 1712 return this 1713 1714 expressions = self._parse_csv(_parse_ttl_action) 1715 where = self._parse_where() 1716 group = self._parse_group() 1717 1718 aggregates = None 1719 if group and self._match(TokenType.SET): 1720 aggregates = self._parse_csv(self._parse_set_item) 1721 1722 return self.expression( 1723 exp.MergeTreeTTL, 1724 expressions=expressions, 1725 where=where, 1726 group=group, 1727 aggregates=aggregates, 1728 ) 1729 1730 def _parse_statement(self) -> t.Optional[exp.Expression]: 1731 if self._curr is None: 1732 return None 1733 1734 if self._match_set(self.STATEMENT_PARSERS): 1735 comments = self._prev_comments 1736 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1737 stmt.add_comments(comments, prepend=True) 1738 return stmt 1739 1740 if self._match_set(self.dialect.tokenizer.COMMANDS): 1741 return self._parse_command() 1742 1743 expression = self._parse_expression() 1744 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1745 return self._parse_query_modifiers(expression) 1746 1747 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1748 start = self._prev 1749 temporary = self._match(TokenType.TEMPORARY) 1750 materialized = self._match_text_seq("MATERIALIZED") 1751 1752 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1753 if not kind: 1754 return self._parse_as_command(start) 1755 1756 concurrently = self._match_text_seq("CONCURRENTLY") 1757 if_exists = exists or self._parse_exists() 1758 1759 if kind == "COLUMN": 1760 this = self._parse_column() 1761 else: 1762 this = self._parse_table_parts( 1763 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1764 ) 1765 1766 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1767 1768 if self._match(TokenType.L_PAREN, advance=False): 1769 expressions = self._parse_wrapped_csv(self._parse_types) 1770 else: 1771 expressions = None 1772 1773 return self.expression( 1774 exp.Drop, 1775 exists=if_exists, 1776 this=this, 1777 expressions=expressions, 1778 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1779 temporary=temporary, 1780 materialized=materialized, 1781 cascade=self._match_text_seq("CASCADE"), 1782 constraints=self._match_text_seq("CONSTRAINTS"), 1783 purge=self._match_text_seq("PURGE"), 1784 cluster=cluster, 1785 concurrently=concurrently, 1786 ) 1787 1788 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1789 return ( 1790 self._match_text_seq("IF") 1791 and (not not_ or self._match(TokenType.NOT)) 1792 and self._match(TokenType.EXISTS) 1793 ) 1794 1795 def _parse_create(self) -> exp.Create | exp.Command: 1796 # Note: this can't be None because we've matched a statement parser 1797 start = self._prev 1798 1799 replace = ( 1800 start.token_type == TokenType.REPLACE 1801 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1802 or self._match_pair(TokenType.OR, TokenType.ALTER) 1803 ) 1804 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1805 1806 unique = self._match(TokenType.UNIQUE) 1807 1808 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1809 clustered = True 1810 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1811 "COLUMNSTORE" 1812 ): 1813 clustered = False 1814 else: 1815 clustered = None 1816 1817 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1818 self._advance() 1819 1820 properties = None 1821 create_token = self._match_set(self.CREATABLES) and self._prev 1822 1823 if not create_token: 1824 # exp.Properties.Location.POST_CREATE 1825 properties = self._parse_properties() 1826 create_token = self._match_set(self.CREATABLES) and self._prev 1827 1828 if not properties or not create_token: 1829 return self._parse_as_command(start) 1830 1831 concurrently = self._match_text_seq("CONCURRENTLY") 1832 exists = self._parse_exists(not_=True) 1833 this = None 1834 expression: t.Optional[exp.Expression] = None 1835 indexes = None 1836 no_schema_binding = None 1837 begin = None 1838 end = None 1839 clone = None 1840 1841 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1842 nonlocal properties 1843 if properties and temp_props: 1844 properties.expressions.extend(temp_props.expressions) 1845 elif temp_props: 1846 properties = temp_props 1847 1848 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1849 this = self._parse_user_defined_function(kind=create_token.token_type) 1850 1851 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1852 extend_props(self._parse_properties()) 1853 1854 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1855 extend_props(self._parse_properties()) 1856 1857 if not expression: 1858 if self._match(TokenType.COMMAND): 1859 expression = self._parse_as_command(self._prev) 1860 else: 1861 begin = self._match(TokenType.BEGIN) 1862 return_ = self._match_text_seq("RETURN") 1863 1864 if self._match(TokenType.STRING, advance=False): 1865 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1866 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1867 expression = self._parse_string() 1868 extend_props(self._parse_properties()) 1869 else: 1870 expression = self._parse_user_defined_function_expression() 1871 1872 end = self._match_text_seq("END") 1873 1874 if return_: 1875 expression = self.expression(exp.Return, this=expression) 1876 elif create_token.token_type == TokenType.INDEX: 1877 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1878 if not self._match(TokenType.ON): 1879 index = self._parse_id_var() 1880 anonymous = False 1881 else: 1882 index = None 1883 anonymous = True 1884 1885 this = self._parse_index(index=index, anonymous=anonymous) 1886 elif create_token.token_type in self.DB_CREATABLES: 1887 table_parts = self._parse_table_parts( 1888 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1889 ) 1890 1891 # exp.Properties.Location.POST_NAME 1892 self._match(TokenType.COMMA) 1893 extend_props(self._parse_properties(before=True)) 1894 1895 this = self._parse_schema(this=table_parts) 1896 1897 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1898 extend_props(self._parse_properties()) 1899 1900 self._match(TokenType.ALIAS) 1901 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1902 # exp.Properties.Location.POST_ALIAS 1903 extend_props(self._parse_properties()) 1904 1905 if create_token.token_type == TokenType.SEQUENCE: 1906 expression = self._parse_types() 1907 extend_props(self._parse_properties()) 1908 else: 1909 expression = self._parse_ddl_select() 1910 1911 if create_token.token_type == TokenType.TABLE: 1912 # exp.Properties.Location.POST_EXPRESSION 1913 extend_props(self._parse_properties()) 1914 1915 indexes = [] 1916 while True: 1917 index = self._parse_index() 1918 1919 # exp.Properties.Location.POST_INDEX 1920 extend_props(self._parse_properties()) 1921 if not index: 1922 break 1923 else: 1924 self._match(TokenType.COMMA) 1925 indexes.append(index) 1926 elif create_token.token_type == TokenType.VIEW: 1927 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1928 no_schema_binding = True 1929 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1930 extend_props(self._parse_properties()) 1931 1932 shallow = self._match_text_seq("SHALLOW") 1933 1934 if self._match_texts(self.CLONE_KEYWORDS): 1935 copy = self._prev.text.lower() == "copy" 1936 clone = self.expression( 1937 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1938 ) 1939 1940 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1941 return self._parse_as_command(start) 1942 1943 create_kind_text = create_token.text.upper() 1944 return self.expression( 1945 exp.Create, 1946 this=this, 1947 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1948 replace=replace, 1949 refresh=refresh, 1950 unique=unique, 1951 expression=expression, 1952 exists=exists, 1953 properties=properties, 1954 indexes=indexes, 1955 no_schema_binding=no_schema_binding, 1956 begin=begin, 1957 end=end, 1958 clone=clone, 1959 concurrently=concurrently, 1960 clustered=clustered, 1961 ) 1962 1963 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1964 seq = exp.SequenceProperties() 1965 1966 options = [] 1967 index = self._index 1968 1969 while self._curr: 1970 self._match(TokenType.COMMA) 1971 if self._match_text_seq("INCREMENT"): 1972 self._match_text_seq("BY") 1973 self._match_text_seq("=") 1974 seq.set("increment", self._parse_term()) 1975 elif self._match_text_seq("MINVALUE"): 1976 seq.set("minvalue", self._parse_term()) 1977 elif self._match_text_seq("MAXVALUE"): 1978 seq.set("maxvalue", self._parse_term()) 1979 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1980 self._match_text_seq("=") 1981 seq.set("start", self._parse_term()) 1982 elif self._match_text_seq("CACHE"): 1983 # T-SQL allows empty CACHE which is initialized dynamically 1984 seq.set("cache", self._parse_number() or True) 1985 elif self._match_text_seq("OWNED", "BY"): 1986 # "OWNED BY NONE" is the default 1987 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1988 else: 1989 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1990 if opt: 1991 options.append(opt) 1992 else: 1993 break 1994 1995 seq.set("options", options if options else None) 1996 return None if self._index == index else seq 1997 1998 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1999 # only used for teradata currently 2000 self._match(TokenType.COMMA) 2001 2002 kwargs = { 2003 "no": self._match_text_seq("NO"), 2004 "dual": self._match_text_seq("DUAL"), 2005 "before": self._match_text_seq("BEFORE"), 2006 "default": self._match_text_seq("DEFAULT"), 2007 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2008 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2009 "after": self._match_text_seq("AFTER"), 2010 "minimum": self._match_texts(("MIN", "MINIMUM")), 2011 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2012 } 2013 2014 if self._match_texts(self.PROPERTY_PARSERS): 2015 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2016 try: 2017 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2018 except TypeError: 2019 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2020 2021 return None 2022 2023 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2024 return self._parse_wrapped_csv(self._parse_property) 2025 2026 def _parse_property(self) -> t.Optional[exp.Expression]: 2027 if self._match_texts(self.PROPERTY_PARSERS): 2028 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2029 2030 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2031 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2032 2033 if self._match_text_seq("COMPOUND", "SORTKEY"): 2034 return self._parse_sortkey(compound=True) 2035 2036 if self._match_text_seq("SQL", "SECURITY"): 2037 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2038 2039 index = self._index 2040 key = self._parse_column() 2041 2042 if not self._match(TokenType.EQ): 2043 self._retreat(index) 2044 return self._parse_sequence_properties() 2045 2046 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2047 if isinstance(key, exp.Column): 2048 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2049 2050 value = self._parse_bitwise() or self._parse_var(any_token=True) 2051 2052 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2053 if isinstance(value, exp.Column): 2054 value = exp.var(value.name) 2055 2056 return self.expression(exp.Property, this=key, value=value) 2057 2058 def _parse_stored(self) -> exp.FileFormatProperty: 2059 self._match(TokenType.ALIAS) 2060 2061 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2062 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2063 2064 return self.expression( 2065 exp.FileFormatProperty, 2066 this=( 2067 self.expression( 2068 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2069 ) 2070 if input_format or output_format 2071 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2072 ), 2073 ) 2074 2075 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2076 field = self._parse_field() 2077 if isinstance(field, exp.Identifier) and not field.quoted: 2078 field = exp.var(field) 2079 2080 return field 2081 2082 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2083 self._match(TokenType.EQ) 2084 self._match(TokenType.ALIAS) 2085 2086 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2087 2088 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2089 properties = [] 2090 while True: 2091 if before: 2092 prop = self._parse_property_before() 2093 else: 2094 prop = self._parse_property() 2095 if not prop: 2096 break 2097 for p in ensure_list(prop): 2098 properties.append(p) 2099 2100 if properties: 2101 return self.expression(exp.Properties, expressions=properties) 2102 2103 return None 2104 2105 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2106 return self.expression( 2107 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2108 ) 2109 2110 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2111 if self._match_texts(("DEFINER", "INVOKER")): 2112 security_specifier = self._prev.text.upper() 2113 return self.expression(exp.SecurityProperty, this=security_specifier) 2114 return None 2115 2116 def _parse_settings_property(self) -> exp.SettingsProperty: 2117 return self.expression( 2118 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2119 ) 2120 2121 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2122 if self._index >= 2: 2123 pre_volatile_token = self._tokens[self._index - 2] 2124 else: 2125 pre_volatile_token = None 2126 2127 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2128 return exp.VolatileProperty() 2129 2130 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2131 2132 def _parse_retention_period(self) -> exp.Var: 2133 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2134 number = self._parse_number() 2135 number_str = f"{number} " if number else "" 2136 unit = self._parse_var(any_token=True) 2137 return exp.var(f"{number_str}{unit}") 2138 2139 def _parse_system_versioning_property( 2140 self, with_: bool = False 2141 ) -> exp.WithSystemVersioningProperty: 2142 self._match(TokenType.EQ) 2143 prop = self.expression( 2144 exp.WithSystemVersioningProperty, 2145 **{ # type: ignore 2146 "on": True, 2147 "with": with_, 2148 }, 2149 ) 2150 2151 if self._match_text_seq("OFF"): 2152 prop.set("on", False) 2153 return prop 2154 2155 self._match(TokenType.ON) 2156 if self._match(TokenType.L_PAREN): 2157 while self._curr and not self._match(TokenType.R_PAREN): 2158 if self._match_text_seq("HISTORY_TABLE", "="): 2159 prop.set("this", self._parse_table_parts()) 2160 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2161 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2162 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2163 prop.set("retention_period", self._parse_retention_period()) 2164 2165 self._match(TokenType.COMMA) 2166 2167 return prop 2168 2169 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2170 self._match(TokenType.EQ) 2171 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2172 prop = self.expression(exp.DataDeletionProperty, on=on) 2173 2174 if self._match(TokenType.L_PAREN): 2175 while self._curr and not self._match(TokenType.R_PAREN): 2176 if self._match_text_seq("FILTER_COLUMN", "="): 2177 prop.set("filter_column", self._parse_column()) 2178 elif self._match_text_seq("RETENTION_PERIOD", "="): 2179 prop.set("retention_period", self._parse_retention_period()) 2180 2181 self._match(TokenType.COMMA) 2182 2183 return prop 2184 2185 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2186 kind = "HASH" 2187 expressions: t.Optional[t.List[exp.Expression]] = None 2188 if self._match_text_seq("BY", "HASH"): 2189 expressions = self._parse_wrapped_csv(self._parse_id_var) 2190 elif self._match_text_seq("BY", "RANDOM"): 2191 kind = "RANDOM" 2192 2193 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2194 buckets: t.Optional[exp.Expression] = None 2195 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2196 buckets = self._parse_number() 2197 2198 return self.expression( 2199 exp.DistributedByProperty, 2200 expressions=expressions, 2201 kind=kind, 2202 buckets=buckets, 2203 order=self._parse_order(), 2204 ) 2205 2206 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2207 self._match_text_seq("KEY") 2208 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2209 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2210 2211 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2212 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2213 prop = self._parse_system_versioning_property(with_=True) 2214 self._match_r_paren() 2215 return prop 2216 2217 if self._match(TokenType.L_PAREN, advance=False): 2218 return self._parse_wrapped_properties() 2219 2220 if self._match_text_seq("JOURNAL"): 2221 return self._parse_withjournaltable() 2222 2223 if self._match_texts(self.VIEW_ATTRIBUTES): 2224 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2225 2226 if self._match_text_seq("DATA"): 2227 return self._parse_withdata(no=False) 2228 elif self._match_text_seq("NO", "DATA"): 2229 return self._parse_withdata(no=True) 2230 2231 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2232 return self._parse_serde_properties(with_=True) 2233 2234 if self._match(TokenType.SCHEMA): 2235 return self.expression( 2236 exp.WithSchemaBindingProperty, 2237 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2238 ) 2239 2240 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2241 return self.expression( 2242 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2243 ) 2244 2245 if not self._next: 2246 return None 2247 2248 return self._parse_withisolatedloading() 2249 2250 def _parse_procedure_option(self) -> exp.Expression | None: 2251 if self._match_text_seq("EXECUTE", "AS"): 2252 return self.expression( 2253 exp.ExecuteAsProperty, 2254 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2255 or self._parse_string(), 2256 ) 2257 2258 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2259 2260 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2261 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2262 self._match(TokenType.EQ) 2263 2264 user = self._parse_id_var() 2265 self._match(TokenType.PARAMETER) 2266 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2267 2268 if not user or not host: 2269 return None 2270 2271 return exp.DefinerProperty(this=f"{user}@{host}") 2272 2273 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2274 self._match(TokenType.TABLE) 2275 self._match(TokenType.EQ) 2276 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2277 2278 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2279 return self.expression(exp.LogProperty, no=no) 2280 2281 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2282 return self.expression(exp.JournalProperty, **kwargs) 2283 2284 def _parse_checksum(self) -> exp.ChecksumProperty: 2285 self._match(TokenType.EQ) 2286 2287 on = None 2288 if self._match(TokenType.ON): 2289 on = True 2290 elif self._match_text_seq("OFF"): 2291 on = False 2292 2293 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2294 2295 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2296 return self.expression( 2297 exp.Cluster, 2298 expressions=( 2299 self._parse_wrapped_csv(self._parse_ordered) 2300 if wrapped 2301 else self._parse_csv(self._parse_ordered) 2302 ), 2303 ) 2304 2305 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2306 self._match_text_seq("BY") 2307 2308 self._match_l_paren() 2309 expressions = self._parse_csv(self._parse_column) 2310 self._match_r_paren() 2311 2312 if self._match_text_seq("SORTED", "BY"): 2313 self._match_l_paren() 2314 sorted_by = self._parse_csv(self._parse_ordered) 2315 self._match_r_paren() 2316 else: 2317 sorted_by = None 2318 2319 self._match(TokenType.INTO) 2320 buckets = self._parse_number() 2321 self._match_text_seq("BUCKETS") 2322 2323 return self.expression( 2324 exp.ClusteredByProperty, 2325 expressions=expressions, 2326 sorted_by=sorted_by, 2327 buckets=buckets, 2328 ) 2329 2330 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2331 if not self._match_text_seq("GRANTS"): 2332 self._retreat(self._index - 1) 2333 return None 2334 2335 return self.expression(exp.CopyGrantsProperty) 2336 2337 def _parse_freespace(self) -> exp.FreespaceProperty: 2338 self._match(TokenType.EQ) 2339 return self.expression( 2340 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2341 ) 2342 2343 def _parse_mergeblockratio( 2344 self, no: bool = False, default: bool = False 2345 ) -> exp.MergeBlockRatioProperty: 2346 if self._match(TokenType.EQ): 2347 return self.expression( 2348 exp.MergeBlockRatioProperty, 2349 this=self._parse_number(), 2350 percent=self._match(TokenType.PERCENT), 2351 ) 2352 2353 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2354 2355 def _parse_datablocksize( 2356 self, 2357 default: t.Optional[bool] = None, 2358 minimum: t.Optional[bool] = None, 2359 maximum: t.Optional[bool] = None, 2360 ) -> exp.DataBlocksizeProperty: 2361 self._match(TokenType.EQ) 2362 size = self._parse_number() 2363 2364 units = None 2365 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2366 units = self._prev.text 2367 2368 return self.expression( 2369 exp.DataBlocksizeProperty, 2370 size=size, 2371 units=units, 2372 default=default, 2373 minimum=minimum, 2374 maximum=maximum, 2375 ) 2376 2377 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2378 self._match(TokenType.EQ) 2379 always = self._match_text_seq("ALWAYS") 2380 manual = self._match_text_seq("MANUAL") 2381 never = self._match_text_seq("NEVER") 2382 default = self._match_text_seq("DEFAULT") 2383 2384 autotemp = None 2385 if self._match_text_seq("AUTOTEMP"): 2386 autotemp = self._parse_schema() 2387 2388 return self.expression( 2389 exp.BlockCompressionProperty, 2390 always=always, 2391 manual=manual, 2392 never=never, 2393 default=default, 2394 autotemp=autotemp, 2395 ) 2396 2397 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2398 index = self._index 2399 no = self._match_text_seq("NO") 2400 concurrent = self._match_text_seq("CONCURRENT") 2401 2402 if not self._match_text_seq("ISOLATED", "LOADING"): 2403 self._retreat(index) 2404 return None 2405 2406 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2407 return self.expression( 2408 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2409 ) 2410 2411 def _parse_locking(self) -> exp.LockingProperty: 2412 if self._match(TokenType.TABLE): 2413 kind = "TABLE" 2414 elif self._match(TokenType.VIEW): 2415 kind = "VIEW" 2416 elif self._match(TokenType.ROW): 2417 kind = "ROW" 2418 elif self._match_text_seq("DATABASE"): 2419 kind = "DATABASE" 2420 else: 2421 kind = None 2422 2423 if kind in ("DATABASE", "TABLE", "VIEW"): 2424 this = self._parse_table_parts() 2425 else: 2426 this = None 2427 2428 if self._match(TokenType.FOR): 2429 for_or_in = "FOR" 2430 elif self._match(TokenType.IN): 2431 for_or_in = "IN" 2432 else: 2433 for_or_in = None 2434 2435 if self._match_text_seq("ACCESS"): 2436 lock_type = "ACCESS" 2437 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2438 lock_type = "EXCLUSIVE" 2439 elif self._match_text_seq("SHARE"): 2440 lock_type = "SHARE" 2441 elif self._match_text_seq("READ"): 2442 lock_type = "READ" 2443 elif self._match_text_seq("WRITE"): 2444 lock_type = "WRITE" 2445 elif self._match_text_seq("CHECKSUM"): 2446 lock_type = "CHECKSUM" 2447 else: 2448 lock_type = None 2449 2450 override = self._match_text_seq("OVERRIDE") 2451 2452 return self.expression( 2453 exp.LockingProperty, 2454 this=this, 2455 kind=kind, 2456 for_or_in=for_or_in, 2457 lock_type=lock_type, 2458 override=override, 2459 ) 2460 2461 def _parse_partition_by(self) -> t.List[exp.Expression]: 2462 if self._match(TokenType.PARTITION_BY): 2463 return self._parse_csv(self._parse_assignment) 2464 return [] 2465 2466 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2467 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2468 if self._match_text_seq("MINVALUE"): 2469 return exp.var("MINVALUE") 2470 if self._match_text_seq("MAXVALUE"): 2471 return exp.var("MAXVALUE") 2472 return self._parse_bitwise() 2473 2474 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2475 expression = None 2476 from_expressions = None 2477 to_expressions = None 2478 2479 if self._match(TokenType.IN): 2480 this = self._parse_wrapped_csv(self._parse_bitwise) 2481 elif self._match(TokenType.FROM): 2482 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2483 self._match_text_seq("TO") 2484 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2485 elif self._match_text_seq("WITH", "(", "MODULUS"): 2486 this = self._parse_number() 2487 self._match_text_seq(",", "REMAINDER") 2488 expression = self._parse_number() 2489 self._match_r_paren() 2490 else: 2491 self.raise_error("Failed to parse partition bound spec.") 2492 2493 return self.expression( 2494 exp.PartitionBoundSpec, 2495 this=this, 2496 expression=expression, 2497 from_expressions=from_expressions, 2498 to_expressions=to_expressions, 2499 ) 2500 2501 # https://www.postgresql.org/docs/current/sql-createtable.html 2502 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2503 if not self._match_text_seq("OF"): 2504 self._retreat(self._index - 1) 2505 return None 2506 2507 this = self._parse_table(schema=True) 2508 2509 if self._match(TokenType.DEFAULT): 2510 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2511 elif self._match_text_seq("FOR", "VALUES"): 2512 expression = self._parse_partition_bound_spec() 2513 else: 2514 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2515 2516 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2517 2518 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2519 self._match(TokenType.EQ) 2520 return self.expression( 2521 exp.PartitionedByProperty, 2522 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2523 ) 2524 2525 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2526 if self._match_text_seq("AND", "STATISTICS"): 2527 statistics = True 2528 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2529 statistics = False 2530 else: 2531 statistics = None 2532 2533 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2534 2535 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2536 if self._match_text_seq("SQL"): 2537 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2538 return None 2539 2540 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2541 if self._match_text_seq("SQL", "DATA"): 2542 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2543 return None 2544 2545 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2546 if self._match_text_seq("PRIMARY", "INDEX"): 2547 return exp.NoPrimaryIndexProperty() 2548 if self._match_text_seq("SQL"): 2549 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2550 return None 2551 2552 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2553 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2554 return exp.OnCommitProperty() 2555 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2556 return exp.OnCommitProperty(delete=True) 2557 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2558 2559 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2560 if self._match_text_seq("SQL", "DATA"): 2561 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2562 return None 2563 2564 def _parse_distkey(self) -> exp.DistKeyProperty: 2565 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2566 2567 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2568 table = self._parse_table(schema=True) 2569 2570 options = [] 2571 while self._match_texts(("INCLUDING", "EXCLUDING")): 2572 this = self._prev.text.upper() 2573 2574 id_var = self._parse_id_var() 2575 if not id_var: 2576 return None 2577 2578 options.append( 2579 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2580 ) 2581 2582 return self.expression(exp.LikeProperty, this=table, expressions=options) 2583 2584 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2585 return self.expression( 2586 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2587 ) 2588 2589 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2590 self._match(TokenType.EQ) 2591 return self.expression( 2592 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2593 ) 2594 2595 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2596 self._match_text_seq("WITH", "CONNECTION") 2597 return self.expression( 2598 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2599 ) 2600 2601 def _parse_returns(self) -> exp.ReturnsProperty: 2602 value: t.Optional[exp.Expression] 2603 null = None 2604 is_table = self._match(TokenType.TABLE) 2605 2606 if is_table: 2607 if self._match(TokenType.LT): 2608 value = self.expression( 2609 exp.Schema, 2610 this="TABLE", 2611 expressions=self._parse_csv(self._parse_struct_types), 2612 ) 2613 if not self._match(TokenType.GT): 2614 self.raise_error("Expecting >") 2615 else: 2616 value = self._parse_schema(exp.var("TABLE")) 2617 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2618 null = True 2619 value = None 2620 else: 2621 value = self._parse_types() 2622 2623 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2624 2625 def _parse_describe(self) -> exp.Describe: 2626 kind = self._match_set(self.CREATABLES) and self._prev.text 2627 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2628 if self._match(TokenType.DOT): 2629 style = None 2630 self._retreat(self._index - 2) 2631 2632 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2633 2634 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2635 this = self._parse_statement() 2636 else: 2637 this = self._parse_table(schema=True) 2638 2639 properties = self._parse_properties() 2640 expressions = properties.expressions if properties else None 2641 partition = self._parse_partition() 2642 return self.expression( 2643 exp.Describe, 2644 this=this, 2645 style=style, 2646 kind=kind, 2647 expressions=expressions, 2648 partition=partition, 2649 format=format, 2650 ) 2651 2652 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2653 kind = self._prev.text.upper() 2654 expressions = [] 2655 2656 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2657 if self._match(TokenType.WHEN): 2658 expression = self._parse_disjunction() 2659 self._match(TokenType.THEN) 2660 else: 2661 expression = None 2662 2663 else_ = self._match(TokenType.ELSE) 2664 2665 if not self._match(TokenType.INTO): 2666 return None 2667 2668 return self.expression( 2669 exp.ConditionalInsert, 2670 this=self.expression( 2671 exp.Insert, 2672 this=self._parse_table(schema=True), 2673 expression=self._parse_derived_table_values(), 2674 ), 2675 expression=expression, 2676 else_=else_, 2677 ) 2678 2679 expression = parse_conditional_insert() 2680 while expression is not None: 2681 expressions.append(expression) 2682 expression = parse_conditional_insert() 2683 2684 return self.expression( 2685 exp.MultitableInserts, 2686 kind=kind, 2687 comments=comments, 2688 expressions=expressions, 2689 source=self._parse_table(), 2690 ) 2691 2692 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2693 comments = [] 2694 hint = self._parse_hint() 2695 overwrite = self._match(TokenType.OVERWRITE) 2696 ignore = self._match(TokenType.IGNORE) 2697 local = self._match_text_seq("LOCAL") 2698 alternative = None 2699 is_function = None 2700 2701 if self._match_text_seq("DIRECTORY"): 2702 this: t.Optional[exp.Expression] = self.expression( 2703 exp.Directory, 2704 this=self._parse_var_or_string(), 2705 local=local, 2706 row_format=self._parse_row_format(match_row=True), 2707 ) 2708 else: 2709 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2710 comments += ensure_list(self._prev_comments) 2711 return self._parse_multitable_inserts(comments) 2712 2713 if self._match(TokenType.OR): 2714 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2715 2716 self._match(TokenType.INTO) 2717 comments += ensure_list(self._prev_comments) 2718 self._match(TokenType.TABLE) 2719 is_function = self._match(TokenType.FUNCTION) 2720 2721 this = ( 2722 self._parse_table(schema=True, parse_partition=True) 2723 if not is_function 2724 else self._parse_function() 2725 ) 2726 2727 returning = self._parse_returning() 2728 2729 return self.expression( 2730 exp.Insert, 2731 comments=comments, 2732 hint=hint, 2733 is_function=is_function, 2734 this=this, 2735 stored=self._match_text_seq("STORED") and self._parse_stored(), 2736 by_name=self._match_text_seq("BY", "NAME"), 2737 exists=self._parse_exists(), 2738 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2739 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2740 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2741 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2742 conflict=self._parse_on_conflict(), 2743 returning=returning or self._parse_returning(), 2744 overwrite=overwrite, 2745 alternative=alternative, 2746 ignore=ignore, 2747 source=self._match(TokenType.TABLE) and self._parse_table(), 2748 ) 2749 2750 def _parse_kill(self) -> exp.Kill: 2751 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2752 2753 return self.expression( 2754 exp.Kill, 2755 this=self._parse_primary(), 2756 kind=kind, 2757 ) 2758 2759 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2760 conflict = self._match_text_seq("ON", "CONFLICT") 2761 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2762 2763 if not conflict and not duplicate: 2764 return None 2765 2766 conflict_keys = None 2767 constraint = None 2768 2769 if conflict: 2770 if self._match_text_seq("ON", "CONSTRAINT"): 2771 constraint = self._parse_id_var() 2772 elif self._match(TokenType.L_PAREN): 2773 conflict_keys = self._parse_csv(self._parse_id_var) 2774 self._match_r_paren() 2775 2776 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2777 if self._prev.token_type == TokenType.UPDATE: 2778 self._match(TokenType.SET) 2779 expressions = self._parse_csv(self._parse_equality) 2780 else: 2781 expressions = None 2782 2783 return self.expression( 2784 exp.OnConflict, 2785 duplicate=duplicate, 2786 expressions=expressions, 2787 action=action, 2788 conflict_keys=conflict_keys, 2789 constraint=constraint, 2790 ) 2791 2792 def _parse_returning(self) -> t.Optional[exp.Returning]: 2793 if not self._match(TokenType.RETURNING): 2794 return None 2795 return self.expression( 2796 exp.Returning, 2797 expressions=self._parse_csv(self._parse_expression), 2798 into=self._match(TokenType.INTO) and self._parse_table_part(), 2799 ) 2800 2801 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2802 if not self._match(TokenType.FORMAT): 2803 return None 2804 return self._parse_row_format() 2805 2806 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2807 index = self._index 2808 with_ = with_ or self._match_text_seq("WITH") 2809 2810 if not self._match(TokenType.SERDE_PROPERTIES): 2811 self._retreat(index) 2812 return None 2813 return self.expression( 2814 exp.SerdeProperties, 2815 **{ # type: ignore 2816 "expressions": self._parse_wrapped_properties(), 2817 "with": with_, 2818 }, 2819 ) 2820 2821 def _parse_row_format( 2822 self, match_row: bool = False 2823 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2824 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2825 return None 2826 2827 if self._match_text_seq("SERDE"): 2828 this = self._parse_string() 2829 2830 serde_properties = self._parse_serde_properties() 2831 2832 return self.expression( 2833 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2834 ) 2835 2836 self._match_text_seq("DELIMITED") 2837 2838 kwargs = {} 2839 2840 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2841 kwargs["fields"] = self._parse_string() 2842 if self._match_text_seq("ESCAPED", "BY"): 2843 kwargs["escaped"] = self._parse_string() 2844 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2845 kwargs["collection_items"] = self._parse_string() 2846 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2847 kwargs["map_keys"] = self._parse_string() 2848 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2849 kwargs["lines"] = self._parse_string() 2850 if self._match_text_seq("NULL", "DEFINED", "AS"): 2851 kwargs["null"] = self._parse_string() 2852 2853 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2854 2855 def _parse_load(self) -> exp.LoadData | exp.Command: 2856 if self._match_text_seq("DATA"): 2857 local = self._match_text_seq("LOCAL") 2858 self._match_text_seq("INPATH") 2859 inpath = self._parse_string() 2860 overwrite = self._match(TokenType.OVERWRITE) 2861 self._match_pair(TokenType.INTO, TokenType.TABLE) 2862 2863 return self.expression( 2864 exp.LoadData, 2865 this=self._parse_table(schema=True), 2866 local=local, 2867 overwrite=overwrite, 2868 inpath=inpath, 2869 partition=self._parse_partition(), 2870 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2871 serde=self._match_text_seq("SERDE") and self._parse_string(), 2872 ) 2873 return self._parse_as_command(self._prev) 2874 2875 def _parse_delete(self) -> exp.Delete: 2876 # This handles MySQL's "Multiple-Table Syntax" 2877 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2878 tables = None 2879 if not self._match(TokenType.FROM, advance=False): 2880 tables = self._parse_csv(self._parse_table) or None 2881 2882 returning = self._parse_returning() 2883 2884 return self.expression( 2885 exp.Delete, 2886 tables=tables, 2887 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2888 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2889 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2890 where=self._parse_where(), 2891 returning=returning or self._parse_returning(), 2892 limit=self._parse_limit(), 2893 ) 2894 2895 def _parse_update(self) -> exp.Update: 2896 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2897 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2898 returning = self._parse_returning() 2899 return self.expression( 2900 exp.Update, 2901 **{ # type: ignore 2902 "this": this, 2903 "expressions": expressions, 2904 "from": self._parse_from(joins=True), 2905 "where": self._parse_where(), 2906 "returning": returning or self._parse_returning(), 2907 "order": self._parse_order(), 2908 "limit": self._parse_limit(), 2909 }, 2910 ) 2911 2912 def _parse_uncache(self) -> exp.Uncache: 2913 if not self._match(TokenType.TABLE): 2914 self.raise_error("Expecting TABLE after UNCACHE") 2915 2916 return self.expression( 2917 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2918 ) 2919 2920 def _parse_cache(self) -> exp.Cache: 2921 lazy = self._match_text_seq("LAZY") 2922 self._match(TokenType.TABLE) 2923 table = self._parse_table(schema=True) 2924 2925 options = [] 2926 if self._match_text_seq("OPTIONS"): 2927 self._match_l_paren() 2928 k = self._parse_string() 2929 self._match(TokenType.EQ) 2930 v = self._parse_string() 2931 options = [k, v] 2932 self._match_r_paren() 2933 2934 self._match(TokenType.ALIAS) 2935 return self.expression( 2936 exp.Cache, 2937 this=table, 2938 lazy=lazy, 2939 options=options, 2940 expression=self._parse_select(nested=True), 2941 ) 2942 2943 def _parse_partition(self) -> t.Optional[exp.Partition]: 2944 if not self._match(TokenType.PARTITION): 2945 return None 2946 2947 return self.expression( 2948 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2949 ) 2950 2951 def _parse_value(self) -> t.Optional[exp.Tuple]: 2952 def _parse_value_expression() -> t.Optional[exp.Expression]: 2953 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 2954 return exp.var(self._prev.text.upper()) 2955 return self._parse_expression() 2956 2957 if self._match(TokenType.L_PAREN): 2958 expressions = self._parse_csv(_parse_value_expression) 2959 self._match_r_paren() 2960 return self.expression(exp.Tuple, expressions=expressions) 2961 2962 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2963 expression = self._parse_expression() 2964 if expression: 2965 return self.expression(exp.Tuple, expressions=[expression]) 2966 return None 2967 2968 def _parse_projections(self) -> t.List[exp.Expression]: 2969 return self._parse_expressions() 2970 2971 def _parse_select( 2972 self, 2973 nested: bool = False, 2974 table: bool = False, 2975 parse_subquery_alias: bool = True, 2976 parse_set_operation: bool = True, 2977 ) -> t.Optional[exp.Expression]: 2978 cte = self._parse_with() 2979 2980 if cte: 2981 this = self._parse_statement() 2982 2983 if not this: 2984 self.raise_error("Failed to parse any statement following CTE") 2985 return cte 2986 2987 if "with" in this.arg_types: 2988 this.set("with", cte) 2989 else: 2990 self.raise_error(f"{this.key} does not support CTE") 2991 this = cte 2992 2993 return this 2994 2995 # duckdb supports leading with FROM x 2996 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2997 2998 if self._match(TokenType.SELECT): 2999 comments = self._prev_comments 3000 3001 hint = self._parse_hint() 3002 3003 if self._next and not self._next.token_type == TokenType.DOT: 3004 all_ = self._match(TokenType.ALL) 3005 distinct = self._match_set(self.DISTINCT_TOKENS) 3006 else: 3007 all_, distinct = None, None 3008 3009 kind = ( 3010 self._match(TokenType.ALIAS) 3011 and self._match_texts(("STRUCT", "VALUE")) 3012 and self._prev.text.upper() 3013 ) 3014 3015 if distinct: 3016 distinct = self.expression( 3017 exp.Distinct, 3018 on=self._parse_value() if self._match(TokenType.ON) else None, 3019 ) 3020 3021 if all_ and distinct: 3022 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3023 3024 operation_modifiers = [] 3025 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3026 operation_modifiers.append(exp.var(self._prev.text.upper())) 3027 3028 limit = self._parse_limit(top=True) 3029 projections = self._parse_projections() 3030 3031 this = self.expression( 3032 exp.Select, 3033 kind=kind, 3034 hint=hint, 3035 distinct=distinct, 3036 expressions=projections, 3037 limit=limit, 3038 operation_modifiers=operation_modifiers or None, 3039 ) 3040 this.comments = comments 3041 3042 into = self._parse_into() 3043 if into: 3044 this.set("into", into) 3045 3046 if not from_: 3047 from_ = self._parse_from() 3048 3049 if from_: 3050 this.set("from", from_) 3051 3052 this = self._parse_query_modifiers(this) 3053 elif (table or nested) and self._match(TokenType.L_PAREN): 3054 if self._match(TokenType.PIVOT): 3055 this = self._parse_simplified_pivot() 3056 elif self._match(TokenType.FROM): 3057 this = exp.select("*").from_( 3058 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3059 ) 3060 else: 3061 this = ( 3062 self._parse_table() 3063 if table 3064 else self._parse_select(nested=True, parse_set_operation=False) 3065 ) 3066 3067 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3068 # in case a modifier (e.g. join) is following 3069 if table and isinstance(this, exp.Values) and this.alias: 3070 alias = this.args["alias"].pop() 3071 this = exp.Table(this=this, alias=alias) 3072 3073 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3074 3075 self._match_r_paren() 3076 3077 # We return early here so that the UNION isn't attached to the subquery by the 3078 # following call to _parse_set_operations, but instead becomes the parent node 3079 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3080 elif self._match(TokenType.VALUES, advance=False): 3081 this = self._parse_derived_table_values() 3082 elif from_: 3083 this = exp.select("*").from_(from_.this, copy=False) 3084 elif self._match(TokenType.SUMMARIZE): 3085 table = self._match(TokenType.TABLE) 3086 this = self._parse_select() or self._parse_string() or self._parse_table() 3087 return self.expression(exp.Summarize, this=this, table=table) 3088 elif self._match(TokenType.DESCRIBE): 3089 this = self._parse_describe() 3090 elif self._match_text_seq("STREAM"): 3091 this = self._parse_function() 3092 if this: 3093 this = self.expression(exp.Stream, this=this) 3094 else: 3095 self._retreat(self._index - 1) 3096 else: 3097 this = None 3098 3099 return self._parse_set_operations(this) if parse_set_operation else this 3100 3101 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3102 if not skip_with_token and not self._match(TokenType.WITH): 3103 return None 3104 3105 comments = self._prev_comments 3106 recursive = self._match(TokenType.RECURSIVE) 3107 3108 last_comments = None 3109 expressions = [] 3110 while True: 3111 expressions.append(self._parse_cte()) 3112 if last_comments: 3113 expressions[-1].add_comments(last_comments) 3114 3115 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3116 break 3117 else: 3118 self._match(TokenType.WITH) 3119 3120 last_comments = self._prev_comments 3121 3122 return self.expression( 3123 exp.With, comments=comments, expressions=expressions, recursive=recursive 3124 ) 3125 3126 def _parse_cte(self) -> exp.CTE: 3127 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3128 if not alias or not alias.this: 3129 self.raise_error("Expected CTE to have alias") 3130 3131 self._match(TokenType.ALIAS) 3132 comments = self._prev_comments 3133 3134 if self._match_text_seq("NOT", "MATERIALIZED"): 3135 materialized = False 3136 elif self._match_text_seq("MATERIALIZED"): 3137 materialized = True 3138 else: 3139 materialized = None 3140 3141 return self.expression( 3142 exp.CTE, 3143 this=self._parse_wrapped(self._parse_statement), 3144 alias=alias, 3145 materialized=materialized, 3146 comments=comments, 3147 ) 3148 3149 def _parse_table_alias( 3150 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3151 ) -> t.Optional[exp.TableAlias]: 3152 any_token = self._match(TokenType.ALIAS) 3153 alias = ( 3154 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3155 or self._parse_string_as_identifier() 3156 ) 3157 3158 index = self._index 3159 if self._match(TokenType.L_PAREN): 3160 columns = self._parse_csv(self._parse_function_parameter) 3161 self._match_r_paren() if columns else self._retreat(index) 3162 else: 3163 columns = None 3164 3165 if not alias and not columns: 3166 return None 3167 3168 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3169 3170 # We bubble up comments from the Identifier to the TableAlias 3171 if isinstance(alias, exp.Identifier): 3172 table_alias.add_comments(alias.pop_comments()) 3173 3174 return table_alias 3175 3176 def _parse_subquery( 3177 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3178 ) -> t.Optional[exp.Subquery]: 3179 if not this: 3180 return None 3181 3182 return self.expression( 3183 exp.Subquery, 3184 this=this, 3185 pivots=self._parse_pivots(), 3186 alias=self._parse_table_alias() if parse_alias else None, 3187 sample=self._parse_table_sample(), 3188 ) 3189 3190 def _implicit_unnests_to_explicit(self, this: E) -> E: 3191 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3192 3193 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3194 for i, join in enumerate(this.args.get("joins") or []): 3195 table = join.this 3196 normalized_table = table.copy() 3197 normalized_table.meta["maybe_column"] = True 3198 normalized_table = _norm(normalized_table, dialect=self.dialect) 3199 3200 if isinstance(table, exp.Table) and not join.args.get("on"): 3201 if normalized_table.parts[0].name in refs: 3202 table_as_column = table.to_column() 3203 unnest = exp.Unnest(expressions=[table_as_column]) 3204 3205 # Table.to_column creates a parent Alias node that we want to convert to 3206 # a TableAlias and attach to the Unnest, so it matches the parser's output 3207 if isinstance(table.args.get("alias"), exp.TableAlias): 3208 table_as_column.replace(table_as_column.this) 3209 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3210 3211 table.replace(unnest) 3212 3213 refs.add(normalized_table.alias_or_name) 3214 3215 return this 3216 3217 def _parse_query_modifiers( 3218 self, this: t.Optional[exp.Expression] 3219 ) -> t.Optional[exp.Expression]: 3220 if isinstance(this, (exp.Query, exp.Table)): 3221 for join in self._parse_joins(): 3222 this.append("joins", join) 3223 for lateral in iter(self._parse_lateral, None): 3224 this.append("laterals", lateral) 3225 3226 while True: 3227 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3228 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3229 key, expression = parser(self) 3230 3231 if expression: 3232 this.set(key, expression) 3233 if key == "limit": 3234 offset = expression.args.pop("offset", None) 3235 3236 if offset: 3237 offset = exp.Offset(expression=offset) 3238 this.set("offset", offset) 3239 3240 limit_by_expressions = expression.expressions 3241 expression.set("expressions", None) 3242 offset.set("expressions", limit_by_expressions) 3243 continue 3244 break 3245 3246 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3247 this = self._implicit_unnests_to_explicit(this) 3248 3249 return this 3250 3251 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3252 start = self._curr 3253 while self._curr: 3254 self._advance() 3255 3256 end = self._tokens[self._index - 1] 3257 return exp.Hint(expressions=[self._find_sql(start, end)]) 3258 3259 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3260 return self._parse_function_call() 3261 3262 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3263 start_index = self._index 3264 should_fallback_to_string = False 3265 3266 hints = [] 3267 try: 3268 for hint in iter( 3269 lambda: self._parse_csv( 3270 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3271 ), 3272 [], 3273 ): 3274 hints.extend(hint) 3275 except ParseError: 3276 should_fallback_to_string = True 3277 3278 if should_fallback_to_string or self._curr: 3279 self._retreat(start_index) 3280 return self._parse_hint_fallback_to_string() 3281 3282 return self.expression(exp.Hint, expressions=hints) 3283 3284 def _parse_hint(self) -> t.Optional[exp.Hint]: 3285 if self._match(TokenType.HINT) and self._prev_comments: 3286 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3287 3288 return None 3289 3290 def _parse_into(self) -> t.Optional[exp.Into]: 3291 if not self._match(TokenType.INTO): 3292 return None 3293 3294 temp = self._match(TokenType.TEMPORARY) 3295 unlogged = self._match_text_seq("UNLOGGED") 3296 self._match(TokenType.TABLE) 3297 3298 return self.expression( 3299 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3300 ) 3301 3302 def _parse_from( 3303 self, joins: bool = False, skip_from_token: bool = False 3304 ) -> t.Optional[exp.From]: 3305 if not skip_from_token and not self._match(TokenType.FROM): 3306 return None 3307 3308 return self.expression( 3309 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3310 ) 3311 3312 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3313 return self.expression( 3314 exp.MatchRecognizeMeasure, 3315 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3316 this=self._parse_expression(), 3317 ) 3318 3319 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3320 if not self._match(TokenType.MATCH_RECOGNIZE): 3321 return None 3322 3323 self._match_l_paren() 3324 3325 partition = self._parse_partition_by() 3326 order = self._parse_order() 3327 3328 measures = ( 3329 self._parse_csv(self._parse_match_recognize_measure) 3330 if self._match_text_seq("MEASURES") 3331 else None 3332 ) 3333 3334 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3335 rows = exp.var("ONE ROW PER MATCH") 3336 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3337 text = "ALL ROWS PER MATCH" 3338 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3339 text += " SHOW EMPTY MATCHES" 3340 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3341 text += " OMIT EMPTY MATCHES" 3342 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3343 text += " WITH UNMATCHED ROWS" 3344 rows = exp.var(text) 3345 else: 3346 rows = None 3347 3348 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3349 text = "AFTER MATCH SKIP" 3350 if self._match_text_seq("PAST", "LAST", "ROW"): 3351 text += " PAST LAST ROW" 3352 elif self._match_text_seq("TO", "NEXT", "ROW"): 3353 text += " TO NEXT ROW" 3354 elif self._match_text_seq("TO", "FIRST"): 3355 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3356 elif self._match_text_seq("TO", "LAST"): 3357 text += f" TO LAST {self._advance_any().text}" # type: ignore 3358 after = exp.var(text) 3359 else: 3360 after = None 3361 3362 if self._match_text_seq("PATTERN"): 3363 self._match_l_paren() 3364 3365 if not self._curr: 3366 self.raise_error("Expecting )", self._curr) 3367 3368 paren = 1 3369 start = self._curr 3370 3371 while self._curr and paren > 0: 3372 if self._curr.token_type == TokenType.L_PAREN: 3373 paren += 1 3374 if self._curr.token_type == TokenType.R_PAREN: 3375 paren -= 1 3376 3377 end = self._prev 3378 self._advance() 3379 3380 if paren > 0: 3381 self.raise_error("Expecting )", self._curr) 3382 3383 pattern = exp.var(self._find_sql(start, end)) 3384 else: 3385 pattern = None 3386 3387 define = ( 3388 self._parse_csv(self._parse_name_as_expression) 3389 if self._match_text_seq("DEFINE") 3390 else None 3391 ) 3392 3393 self._match_r_paren() 3394 3395 return self.expression( 3396 exp.MatchRecognize, 3397 partition_by=partition, 3398 order=order, 3399 measures=measures, 3400 rows=rows, 3401 after=after, 3402 pattern=pattern, 3403 define=define, 3404 alias=self._parse_table_alias(), 3405 ) 3406 3407 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3408 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3409 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3410 cross_apply = False 3411 3412 if cross_apply is not None: 3413 this = self._parse_select(table=True) 3414 view = None 3415 outer = None 3416 elif self._match(TokenType.LATERAL): 3417 this = self._parse_select(table=True) 3418 view = self._match(TokenType.VIEW) 3419 outer = self._match(TokenType.OUTER) 3420 else: 3421 return None 3422 3423 if not this: 3424 this = ( 3425 self._parse_unnest() 3426 or self._parse_function() 3427 or self._parse_id_var(any_token=False) 3428 ) 3429 3430 while self._match(TokenType.DOT): 3431 this = exp.Dot( 3432 this=this, 3433 expression=self._parse_function() or self._parse_id_var(any_token=False), 3434 ) 3435 3436 if view: 3437 table = self._parse_id_var(any_token=False) 3438 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3439 table_alias: t.Optional[exp.TableAlias] = self.expression( 3440 exp.TableAlias, this=table, columns=columns 3441 ) 3442 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3443 # We move the alias from the lateral's child node to the lateral itself 3444 table_alias = this.args["alias"].pop() 3445 else: 3446 table_alias = self._parse_table_alias() 3447 3448 return self.expression( 3449 exp.Lateral, 3450 this=this, 3451 view=view, 3452 outer=outer, 3453 alias=table_alias, 3454 cross_apply=cross_apply, 3455 ) 3456 3457 def _parse_join_parts( 3458 self, 3459 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3460 return ( 3461 self._match_set(self.JOIN_METHODS) and self._prev, 3462 self._match_set(self.JOIN_SIDES) and self._prev, 3463 self._match_set(self.JOIN_KINDS) and self._prev, 3464 ) 3465 3466 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3467 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3468 this = self._parse_column() 3469 if isinstance(this, exp.Column): 3470 return this.this 3471 return this 3472 3473 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3474 3475 def _parse_join( 3476 self, skip_join_token: bool = False, parse_bracket: bool = False 3477 ) -> t.Optional[exp.Join]: 3478 if self._match(TokenType.COMMA): 3479 return self.expression(exp.Join, this=self._parse_table()) 3480 3481 index = self._index 3482 method, side, kind = self._parse_join_parts() 3483 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3484 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3485 3486 if not skip_join_token and not join: 3487 self._retreat(index) 3488 kind = None 3489 method = None 3490 side = None 3491 3492 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3493 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3494 3495 if not skip_join_token and not join and not outer_apply and not cross_apply: 3496 return None 3497 3498 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3499 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3500 kwargs["expressions"] = self._parse_csv( 3501 lambda: self._parse_table(parse_bracket=parse_bracket) 3502 ) 3503 3504 if method: 3505 kwargs["method"] = method.text 3506 if side: 3507 kwargs["side"] = side.text 3508 if kind: 3509 kwargs["kind"] = kind.text 3510 if hint: 3511 kwargs["hint"] = hint 3512 3513 if self._match(TokenType.MATCH_CONDITION): 3514 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3515 3516 if self._match(TokenType.ON): 3517 kwargs["on"] = self._parse_assignment() 3518 elif self._match(TokenType.USING): 3519 kwargs["using"] = self._parse_using_identifiers() 3520 elif ( 3521 not (outer_apply or cross_apply) 3522 and not isinstance(kwargs["this"], exp.Unnest) 3523 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3524 ): 3525 index = self._index 3526 joins: t.Optional[list] = list(self._parse_joins()) 3527 3528 if joins and self._match(TokenType.ON): 3529 kwargs["on"] = self._parse_assignment() 3530 elif joins and self._match(TokenType.USING): 3531 kwargs["using"] = self._parse_using_identifiers() 3532 else: 3533 joins = None 3534 self._retreat(index) 3535 3536 kwargs["this"].set("joins", joins if joins else None) 3537 3538 comments = [c for token in (method, side, kind) if token for c in token.comments] 3539 return self.expression(exp.Join, comments=comments, **kwargs) 3540 3541 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3542 this = self._parse_assignment() 3543 3544 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3545 return this 3546 3547 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3548 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3549 3550 return this 3551 3552 def _parse_index_params(self) -> exp.IndexParameters: 3553 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3554 3555 if self._match(TokenType.L_PAREN, advance=False): 3556 columns = self._parse_wrapped_csv(self._parse_with_operator) 3557 else: 3558 columns = None 3559 3560 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3561 partition_by = self._parse_partition_by() 3562 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3563 tablespace = ( 3564 self._parse_var(any_token=True) 3565 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3566 else None 3567 ) 3568 where = self._parse_where() 3569 3570 on = self._parse_field() if self._match(TokenType.ON) else None 3571 3572 return self.expression( 3573 exp.IndexParameters, 3574 using=using, 3575 columns=columns, 3576 include=include, 3577 partition_by=partition_by, 3578 where=where, 3579 with_storage=with_storage, 3580 tablespace=tablespace, 3581 on=on, 3582 ) 3583 3584 def _parse_index( 3585 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3586 ) -> t.Optional[exp.Index]: 3587 if index or anonymous: 3588 unique = None 3589 primary = None 3590 amp = None 3591 3592 self._match(TokenType.ON) 3593 self._match(TokenType.TABLE) # hive 3594 table = self._parse_table_parts(schema=True) 3595 else: 3596 unique = self._match(TokenType.UNIQUE) 3597 primary = self._match_text_seq("PRIMARY") 3598 amp = self._match_text_seq("AMP") 3599 3600 if not self._match(TokenType.INDEX): 3601 return None 3602 3603 index = self._parse_id_var() 3604 table = None 3605 3606 params = self._parse_index_params() 3607 3608 return self.expression( 3609 exp.Index, 3610 this=index, 3611 table=table, 3612 unique=unique, 3613 primary=primary, 3614 amp=amp, 3615 params=params, 3616 ) 3617 3618 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3619 hints: t.List[exp.Expression] = [] 3620 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3621 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3622 hints.append( 3623 self.expression( 3624 exp.WithTableHint, 3625 expressions=self._parse_csv( 3626 lambda: self._parse_function() or self._parse_var(any_token=True) 3627 ), 3628 ) 3629 ) 3630 self._match_r_paren() 3631 else: 3632 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3633 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3634 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3635 3636 self._match_set((TokenType.INDEX, TokenType.KEY)) 3637 if self._match(TokenType.FOR): 3638 hint.set("target", self._advance_any() and self._prev.text.upper()) 3639 3640 hint.set("expressions", self._parse_wrapped_id_vars()) 3641 hints.append(hint) 3642 3643 return hints or None 3644 3645 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3646 return ( 3647 (not schema and self._parse_function(optional_parens=False)) 3648 or self._parse_id_var(any_token=False) 3649 or self._parse_string_as_identifier() 3650 or self._parse_placeholder() 3651 ) 3652 3653 def _parse_table_parts( 3654 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3655 ) -> exp.Table: 3656 catalog = None 3657 db = None 3658 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3659 3660 while self._match(TokenType.DOT): 3661 if catalog: 3662 # This allows nesting the table in arbitrarily many dot expressions if needed 3663 table = self.expression( 3664 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3665 ) 3666 else: 3667 catalog = db 3668 db = table 3669 # "" used for tsql FROM a..b case 3670 table = self._parse_table_part(schema=schema) or "" 3671 3672 if ( 3673 wildcard 3674 and self._is_connected() 3675 and (isinstance(table, exp.Identifier) or not table) 3676 and self._match(TokenType.STAR) 3677 ): 3678 if isinstance(table, exp.Identifier): 3679 table.args["this"] += "*" 3680 else: 3681 table = exp.Identifier(this="*") 3682 3683 # We bubble up comments from the Identifier to the Table 3684 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3685 3686 if is_db_reference: 3687 catalog = db 3688 db = table 3689 table = None 3690 3691 if not table and not is_db_reference: 3692 self.raise_error(f"Expected table name but got {self._curr}") 3693 if not db and is_db_reference: 3694 self.raise_error(f"Expected database name but got {self._curr}") 3695 3696 table = self.expression( 3697 exp.Table, 3698 comments=comments, 3699 this=table, 3700 db=db, 3701 catalog=catalog, 3702 ) 3703 3704 changes = self._parse_changes() 3705 if changes: 3706 table.set("changes", changes) 3707 3708 at_before = self._parse_historical_data() 3709 if at_before: 3710 table.set("when", at_before) 3711 3712 pivots = self._parse_pivots() 3713 if pivots: 3714 table.set("pivots", pivots) 3715 3716 return table 3717 3718 def _parse_table( 3719 self, 3720 schema: bool = False, 3721 joins: bool = False, 3722 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3723 parse_bracket: bool = False, 3724 is_db_reference: bool = False, 3725 parse_partition: bool = False, 3726 ) -> t.Optional[exp.Expression]: 3727 lateral = self._parse_lateral() 3728 if lateral: 3729 return lateral 3730 3731 unnest = self._parse_unnest() 3732 if unnest: 3733 return unnest 3734 3735 values = self._parse_derived_table_values() 3736 if values: 3737 return values 3738 3739 subquery = self._parse_select(table=True) 3740 if subquery: 3741 if not subquery.args.get("pivots"): 3742 subquery.set("pivots", self._parse_pivots()) 3743 return subquery 3744 3745 bracket = parse_bracket and self._parse_bracket(None) 3746 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3747 3748 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3749 self._parse_table 3750 ) 3751 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3752 3753 only = self._match(TokenType.ONLY) 3754 3755 this = t.cast( 3756 exp.Expression, 3757 bracket 3758 or rows_from 3759 or self._parse_bracket( 3760 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3761 ), 3762 ) 3763 3764 if only: 3765 this.set("only", only) 3766 3767 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3768 self._match_text_seq("*") 3769 3770 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3771 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3772 this.set("partition", self._parse_partition()) 3773 3774 if schema: 3775 return self._parse_schema(this=this) 3776 3777 version = self._parse_version() 3778 3779 if version: 3780 this.set("version", version) 3781 3782 if self.dialect.ALIAS_POST_TABLESAMPLE: 3783 this.set("sample", self._parse_table_sample()) 3784 3785 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3786 if alias: 3787 this.set("alias", alias) 3788 3789 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3790 return self.expression( 3791 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3792 ) 3793 3794 this.set("hints", self._parse_table_hints()) 3795 3796 if not this.args.get("pivots"): 3797 this.set("pivots", self._parse_pivots()) 3798 3799 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3800 this.set("sample", self._parse_table_sample()) 3801 3802 if joins: 3803 for join in self._parse_joins(): 3804 this.append("joins", join) 3805 3806 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3807 this.set("ordinality", True) 3808 this.set("alias", self._parse_table_alias()) 3809 3810 return this 3811 3812 def _parse_version(self) -> t.Optional[exp.Version]: 3813 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3814 this = "TIMESTAMP" 3815 elif self._match(TokenType.VERSION_SNAPSHOT): 3816 this = "VERSION" 3817 else: 3818 return None 3819 3820 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3821 kind = self._prev.text.upper() 3822 start = self._parse_bitwise() 3823 self._match_texts(("TO", "AND")) 3824 end = self._parse_bitwise() 3825 expression: t.Optional[exp.Expression] = self.expression( 3826 exp.Tuple, expressions=[start, end] 3827 ) 3828 elif self._match_text_seq("CONTAINED", "IN"): 3829 kind = "CONTAINED IN" 3830 expression = self.expression( 3831 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3832 ) 3833 elif self._match(TokenType.ALL): 3834 kind = "ALL" 3835 expression = None 3836 else: 3837 self._match_text_seq("AS", "OF") 3838 kind = "AS OF" 3839 expression = self._parse_type() 3840 3841 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3842 3843 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3844 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3845 index = self._index 3846 historical_data = None 3847 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3848 this = self._prev.text.upper() 3849 kind = ( 3850 self._match(TokenType.L_PAREN) 3851 and self._match_texts(self.HISTORICAL_DATA_KIND) 3852 and self._prev.text.upper() 3853 ) 3854 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3855 3856 if expression: 3857 self._match_r_paren() 3858 historical_data = self.expression( 3859 exp.HistoricalData, this=this, kind=kind, expression=expression 3860 ) 3861 else: 3862 self._retreat(index) 3863 3864 return historical_data 3865 3866 def _parse_changes(self) -> t.Optional[exp.Changes]: 3867 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3868 return None 3869 3870 information = self._parse_var(any_token=True) 3871 self._match_r_paren() 3872 3873 return self.expression( 3874 exp.Changes, 3875 information=information, 3876 at_before=self._parse_historical_data(), 3877 end=self._parse_historical_data(), 3878 ) 3879 3880 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3881 if not self._match(TokenType.UNNEST): 3882 return None 3883 3884 expressions = self._parse_wrapped_csv(self._parse_equality) 3885 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3886 3887 alias = self._parse_table_alias() if with_alias else None 3888 3889 if alias: 3890 if self.dialect.UNNEST_COLUMN_ONLY: 3891 if alias.args.get("columns"): 3892 self.raise_error("Unexpected extra column alias in unnest.") 3893 3894 alias.set("columns", [alias.this]) 3895 alias.set("this", None) 3896 3897 columns = alias.args.get("columns") or [] 3898 if offset and len(expressions) < len(columns): 3899 offset = columns.pop() 3900 3901 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3902 self._match(TokenType.ALIAS) 3903 offset = self._parse_id_var( 3904 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3905 ) or exp.to_identifier("offset") 3906 3907 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3908 3909 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3910 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3911 if not is_derived and not ( 3912 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3913 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3914 ): 3915 return None 3916 3917 expressions = self._parse_csv(self._parse_value) 3918 alias = self._parse_table_alias() 3919 3920 if is_derived: 3921 self._match_r_paren() 3922 3923 return self.expression( 3924 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3925 ) 3926 3927 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3928 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3929 as_modifier and self._match_text_seq("USING", "SAMPLE") 3930 ): 3931 return None 3932 3933 bucket_numerator = None 3934 bucket_denominator = None 3935 bucket_field = None 3936 percent = None 3937 size = None 3938 seed = None 3939 3940 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3941 matched_l_paren = self._match(TokenType.L_PAREN) 3942 3943 if self.TABLESAMPLE_CSV: 3944 num = None 3945 expressions = self._parse_csv(self._parse_primary) 3946 else: 3947 expressions = None 3948 num = ( 3949 self._parse_factor() 3950 if self._match(TokenType.NUMBER, advance=False) 3951 else self._parse_primary() or self._parse_placeholder() 3952 ) 3953 3954 if self._match_text_seq("BUCKET"): 3955 bucket_numerator = self._parse_number() 3956 self._match_text_seq("OUT", "OF") 3957 bucket_denominator = bucket_denominator = self._parse_number() 3958 self._match(TokenType.ON) 3959 bucket_field = self._parse_field() 3960 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3961 percent = num 3962 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3963 size = num 3964 else: 3965 percent = num 3966 3967 if matched_l_paren: 3968 self._match_r_paren() 3969 3970 if self._match(TokenType.L_PAREN): 3971 method = self._parse_var(upper=True) 3972 seed = self._match(TokenType.COMMA) and self._parse_number() 3973 self._match_r_paren() 3974 elif self._match_texts(("SEED", "REPEATABLE")): 3975 seed = self._parse_wrapped(self._parse_number) 3976 3977 if not method and self.DEFAULT_SAMPLING_METHOD: 3978 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3979 3980 return self.expression( 3981 exp.TableSample, 3982 expressions=expressions, 3983 method=method, 3984 bucket_numerator=bucket_numerator, 3985 bucket_denominator=bucket_denominator, 3986 bucket_field=bucket_field, 3987 percent=percent, 3988 size=size, 3989 seed=seed, 3990 ) 3991 3992 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3993 return list(iter(self._parse_pivot, None)) or None 3994 3995 def _parse_joins(self) -> t.Iterator[exp.Join]: 3996 return iter(self._parse_join, None) 3997 3998 # https://duckdb.org/docs/sql/statements/pivot 3999 def _parse_simplified_pivot(self) -> exp.Pivot: 4000 def _parse_on() -> t.Optional[exp.Expression]: 4001 this = self._parse_bitwise() 4002 return self._parse_in(this) if self._match(TokenType.IN) else this 4003 4004 this = self._parse_table() 4005 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4006 using = self._match(TokenType.USING) and self._parse_csv( 4007 lambda: self._parse_alias(self._parse_function()) 4008 ) 4009 group = self._parse_group() 4010 return self.expression( 4011 exp.Pivot, this=this, expressions=expressions, using=using, group=group 4012 ) 4013 4014 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4015 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4016 this = self._parse_select_or_expression() 4017 4018 self._match(TokenType.ALIAS) 4019 alias = self._parse_bitwise() 4020 if alias: 4021 if isinstance(alias, exp.Column) and not alias.db: 4022 alias = alias.this 4023 return self.expression(exp.PivotAlias, this=this, alias=alias) 4024 4025 return this 4026 4027 value = self._parse_column() 4028 4029 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4030 self.raise_error("Expecting IN (") 4031 4032 if self._match(TokenType.ANY): 4033 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4034 else: 4035 exprs = self._parse_csv(_parse_aliased_expression) 4036 4037 self._match_r_paren() 4038 return self.expression(exp.In, this=value, expressions=exprs) 4039 4040 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4041 index = self._index 4042 include_nulls = None 4043 4044 if self._match(TokenType.PIVOT): 4045 unpivot = False 4046 elif self._match(TokenType.UNPIVOT): 4047 unpivot = True 4048 4049 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4050 if self._match_text_seq("INCLUDE", "NULLS"): 4051 include_nulls = True 4052 elif self._match_text_seq("EXCLUDE", "NULLS"): 4053 include_nulls = False 4054 else: 4055 return None 4056 4057 expressions = [] 4058 4059 if not self._match(TokenType.L_PAREN): 4060 self._retreat(index) 4061 return None 4062 4063 if unpivot: 4064 expressions = self._parse_csv(self._parse_column) 4065 else: 4066 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4067 4068 if not expressions: 4069 self.raise_error("Failed to parse PIVOT's aggregation list") 4070 4071 if not self._match(TokenType.FOR): 4072 self.raise_error("Expecting FOR") 4073 4074 field = self._parse_pivot_in() 4075 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4076 self._parse_bitwise 4077 ) 4078 4079 self._match_r_paren() 4080 4081 pivot = self.expression( 4082 exp.Pivot, 4083 expressions=expressions, 4084 field=field, 4085 unpivot=unpivot, 4086 include_nulls=include_nulls, 4087 default_on_null=default_on_null, 4088 ) 4089 4090 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4091 pivot.set("alias", self._parse_table_alias()) 4092 4093 if not unpivot: 4094 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4095 4096 columns: t.List[exp.Expression] = [] 4097 for fld in pivot.args["field"].expressions: 4098 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4099 for name in names: 4100 if self.PREFIXED_PIVOT_COLUMNS: 4101 name = f"{name}_{field_name}" if name else field_name 4102 else: 4103 name = f"{field_name}_{name}" if name else field_name 4104 4105 columns.append(exp.to_identifier(name)) 4106 4107 pivot.set("columns", columns) 4108 4109 return pivot 4110 4111 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4112 return [agg.alias for agg in aggregations] 4113 4114 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4115 if not skip_where_token and not self._match(TokenType.PREWHERE): 4116 return None 4117 4118 return self.expression( 4119 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4120 ) 4121 4122 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4123 if not skip_where_token and not self._match(TokenType.WHERE): 4124 return None 4125 4126 return self.expression( 4127 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4128 ) 4129 4130 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4131 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4132 return None 4133 4134 elements: t.Dict[str, t.Any] = defaultdict(list) 4135 4136 if self._match(TokenType.ALL): 4137 elements["all"] = True 4138 elif self._match(TokenType.DISTINCT): 4139 elements["all"] = False 4140 4141 while True: 4142 index = self._index 4143 4144 elements["expressions"].extend( 4145 self._parse_csv( 4146 lambda: None 4147 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4148 else self._parse_assignment() 4149 ) 4150 ) 4151 4152 before_with_index = self._index 4153 with_prefix = self._match(TokenType.WITH) 4154 4155 if self._match(TokenType.ROLLUP): 4156 elements["rollup"].append( 4157 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4158 ) 4159 elif self._match(TokenType.CUBE): 4160 elements["cube"].append( 4161 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4162 ) 4163 elif self._match(TokenType.GROUPING_SETS): 4164 elements["grouping_sets"].append( 4165 self.expression( 4166 exp.GroupingSets, 4167 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4168 ) 4169 ) 4170 elif self._match_text_seq("TOTALS"): 4171 elements["totals"] = True # type: ignore 4172 4173 if before_with_index <= self._index <= before_with_index + 1: 4174 self._retreat(before_with_index) 4175 break 4176 4177 if index == self._index: 4178 break 4179 4180 return self.expression(exp.Group, **elements) # type: ignore 4181 4182 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4183 return self.expression( 4184 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4185 ) 4186 4187 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4188 if self._match(TokenType.L_PAREN): 4189 grouping_set = self._parse_csv(self._parse_column) 4190 self._match_r_paren() 4191 return self.expression(exp.Tuple, expressions=grouping_set) 4192 4193 return self._parse_column() 4194 4195 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4196 if not skip_having_token and not self._match(TokenType.HAVING): 4197 return None 4198 return self.expression(exp.Having, this=self._parse_assignment()) 4199 4200 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4201 if not self._match(TokenType.QUALIFY): 4202 return None 4203 return self.expression(exp.Qualify, this=self._parse_assignment()) 4204 4205 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4206 if skip_start_token: 4207 start = None 4208 elif self._match(TokenType.START_WITH): 4209 start = self._parse_assignment() 4210 else: 4211 return None 4212 4213 self._match(TokenType.CONNECT_BY) 4214 nocycle = self._match_text_seq("NOCYCLE") 4215 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4216 exp.Prior, this=self._parse_bitwise() 4217 ) 4218 connect = self._parse_assignment() 4219 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4220 4221 if not start and self._match(TokenType.START_WITH): 4222 start = self._parse_assignment() 4223 4224 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4225 4226 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4227 this = self._parse_id_var(any_token=True) 4228 if self._match(TokenType.ALIAS): 4229 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4230 return this 4231 4232 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4233 if self._match_text_seq("INTERPOLATE"): 4234 return self._parse_wrapped_csv(self._parse_name_as_expression) 4235 return None 4236 4237 def _parse_order( 4238 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4239 ) -> t.Optional[exp.Expression]: 4240 siblings = None 4241 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4242 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4243 return this 4244 4245 siblings = True 4246 4247 return self.expression( 4248 exp.Order, 4249 this=this, 4250 expressions=self._parse_csv(self._parse_ordered), 4251 siblings=siblings, 4252 ) 4253 4254 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4255 if not self._match(token): 4256 return None 4257 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4258 4259 def _parse_ordered( 4260 self, parse_method: t.Optional[t.Callable] = None 4261 ) -> t.Optional[exp.Ordered]: 4262 this = parse_method() if parse_method else self._parse_assignment() 4263 if not this: 4264 return None 4265 4266 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4267 this = exp.var("ALL") 4268 4269 asc = self._match(TokenType.ASC) 4270 desc = self._match(TokenType.DESC) or (asc and False) 4271 4272 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4273 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4274 4275 nulls_first = is_nulls_first or False 4276 explicitly_null_ordered = is_nulls_first or is_nulls_last 4277 4278 if ( 4279 not explicitly_null_ordered 4280 and ( 4281 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4282 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4283 ) 4284 and self.dialect.NULL_ORDERING != "nulls_are_last" 4285 ): 4286 nulls_first = True 4287 4288 if self._match_text_seq("WITH", "FILL"): 4289 with_fill = self.expression( 4290 exp.WithFill, 4291 **{ # type: ignore 4292 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4293 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4294 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4295 "interpolate": self._parse_interpolate(), 4296 }, 4297 ) 4298 else: 4299 with_fill = None 4300 4301 return self.expression( 4302 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4303 ) 4304 4305 def _parse_limit( 4306 self, 4307 this: t.Optional[exp.Expression] = None, 4308 top: bool = False, 4309 skip_limit_token: bool = False, 4310 ) -> t.Optional[exp.Expression]: 4311 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4312 comments = self._prev_comments 4313 if top: 4314 limit_paren = self._match(TokenType.L_PAREN) 4315 expression = self._parse_term() if limit_paren else self._parse_number() 4316 4317 if limit_paren: 4318 self._match_r_paren() 4319 else: 4320 expression = self._parse_term() 4321 4322 if self._match(TokenType.COMMA): 4323 offset = expression 4324 expression = self._parse_term() 4325 else: 4326 offset = None 4327 4328 limit_exp = self.expression( 4329 exp.Limit, 4330 this=this, 4331 expression=expression, 4332 offset=offset, 4333 comments=comments, 4334 expressions=self._parse_limit_by(), 4335 ) 4336 4337 return limit_exp 4338 4339 if self._match(TokenType.FETCH): 4340 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4341 direction = self._prev.text.upper() if direction else "FIRST" 4342 4343 count = self._parse_field(tokens=self.FETCH_TOKENS) 4344 percent = self._match(TokenType.PERCENT) 4345 4346 self._match_set((TokenType.ROW, TokenType.ROWS)) 4347 4348 only = self._match_text_seq("ONLY") 4349 with_ties = self._match_text_seq("WITH", "TIES") 4350 4351 if only and with_ties: 4352 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4353 4354 return self.expression( 4355 exp.Fetch, 4356 direction=direction, 4357 count=count, 4358 percent=percent, 4359 with_ties=with_ties, 4360 ) 4361 4362 return this 4363 4364 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4365 if not self._match(TokenType.OFFSET): 4366 return this 4367 4368 count = self._parse_term() 4369 self._match_set((TokenType.ROW, TokenType.ROWS)) 4370 4371 return self.expression( 4372 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4373 ) 4374 4375 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4376 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4377 4378 def _parse_locks(self) -> t.List[exp.Lock]: 4379 locks = [] 4380 while True: 4381 if self._match_text_seq("FOR", "UPDATE"): 4382 update = True 4383 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4384 "LOCK", "IN", "SHARE", "MODE" 4385 ): 4386 update = False 4387 else: 4388 break 4389 4390 expressions = None 4391 if self._match_text_seq("OF"): 4392 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4393 4394 wait: t.Optional[bool | exp.Expression] = None 4395 if self._match_text_seq("NOWAIT"): 4396 wait = True 4397 elif self._match_text_seq("WAIT"): 4398 wait = self._parse_primary() 4399 elif self._match_text_seq("SKIP", "LOCKED"): 4400 wait = False 4401 4402 locks.append( 4403 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4404 ) 4405 4406 return locks 4407 4408 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4409 while this and self._match_set(self.SET_OPERATIONS): 4410 token_type = self._prev.token_type 4411 4412 if token_type == TokenType.UNION: 4413 operation: t.Type[exp.SetOperation] = exp.Union 4414 elif token_type == TokenType.EXCEPT: 4415 operation = exp.Except 4416 else: 4417 operation = exp.Intersect 4418 4419 comments = self._prev.comments 4420 4421 if self._match(TokenType.DISTINCT): 4422 distinct: t.Optional[bool] = True 4423 elif self._match(TokenType.ALL): 4424 distinct = False 4425 else: 4426 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4427 if distinct is None: 4428 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4429 4430 by_name = self._match_text_seq("BY", "NAME") 4431 expression = self._parse_select(nested=True, parse_set_operation=False) 4432 4433 this = self.expression( 4434 operation, 4435 comments=comments, 4436 this=this, 4437 distinct=distinct, 4438 by_name=by_name, 4439 expression=expression, 4440 ) 4441 4442 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4443 expression = this.expression 4444 4445 if expression: 4446 for arg in self.SET_OP_MODIFIERS: 4447 expr = expression.args.get(arg) 4448 if expr: 4449 this.set(arg, expr.pop()) 4450 4451 return this 4452 4453 def _parse_expression(self) -> t.Optional[exp.Expression]: 4454 return self._parse_alias(self._parse_assignment()) 4455 4456 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4457 this = self._parse_disjunction() 4458 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4459 # This allows us to parse <non-identifier token> := <expr> 4460 this = exp.column( 4461 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4462 ) 4463 4464 while self._match_set(self.ASSIGNMENT): 4465 if isinstance(this, exp.Column) and len(this.parts) == 1: 4466 this = this.this 4467 4468 this = self.expression( 4469 self.ASSIGNMENT[self._prev.token_type], 4470 this=this, 4471 comments=self._prev_comments, 4472 expression=self._parse_assignment(), 4473 ) 4474 4475 return this 4476 4477 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4478 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4479 4480 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4481 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4482 4483 def _parse_equality(self) -> t.Optional[exp.Expression]: 4484 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4485 4486 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4487 return self._parse_tokens(self._parse_range, self.COMPARISON) 4488 4489 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4490 this = this or self._parse_bitwise() 4491 negate = self._match(TokenType.NOT) 4492 4493 if self._match_set(self.RANGE_PARSERS): 4494 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4495 if not expression: 4496 return this 4497 4498 this = expression 4499 elif self._match(TokenType.ISNULL): 4500 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4501 4502 # Postgres supports ISNULL and NOTNULL for conditions. 4503 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4504 if self._match(TokenType.NOTNULL): 4505 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4506 this = self.expression(exp.Not, this=this) 4507 4508 if negate: 4509 this = self._negate_range(this) 4510 4511 if self._match(TokenType.IS): 4512 this = self._parse_is(this) 4513 4514 return this 4515 4516 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4517 if not this: 4518 return this 4519 4520 return self.expression(exp.Not, this=this) 4521 4522 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4523 index = self._index - 1 4524 negate = self._match(TokenType.NOT) 4525 4526 if self._match_text_seq("DISTINCT", "FROM"): 4527 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4528 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4529 4530 if self._match(TokenType.JSON): 4531 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4532 4533 if self._match_text_seq("WITH"): 4534 _with = True 4535 elif self._match_text_seq("WITHOUT"): 4536 _with = False 4537 else: 4538 _with = None 4539 4540 unique = self._match(TokenType.UNIQUE) 4541 self._match_text_seq("KEYS") 4542 expression: t.Optional[exp.Expression] = self.expression( 4543 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4544 ) 4545 else: 4546 expression = self._parse_primary() or self._parse_null() 4547 if not expression: 4548 self._retreat(index) 4549 return None 4550 4551 this = self.expression(exp.Is, this=this, expression=expression) 4552 return self.expression(exp.Not, this=this) if negate else this 4553 4554 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4555 unnest = self._parse_unnest(with_alias=False) 4556 if unnest: 4557 this = self.expression(exp.In, this=this, unnest=unnest) 4558 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4559 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4560 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4561 4562 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4563 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4564 else: 4565 this = self.expression(exp.In, this=this, expressions=expressions) 4566 4567 if matched_l_paren: 4568 self._match_r_paren(this) 4569 elif not self._match(TokenType.R_BRACKET, expression=this): 4570 self.raise_error("Expecting ]") 4571 else: 4572 this = self.expression(exp.In, this=this, field=self._parse_column()) 4573 4574 return this 4575 4576 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4577 low = self._parse_bitwise() 4578 self._match(TokenType.AND) 4579 high = self._parse_bitwise() 4580 return self.expression(exp.Between, this=this, low=low, high=high) 4581 4582 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4583 if not self._match(TokenType.ESCAPE): 4584 return this 4585 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4586 4587 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4588 index = self._index 4589 4590 if not self._match(TokenType.INTERVAL) and match_interval: 4591 return None 4592 4593 if self._match(TokenType.STRING, advance=False): 4594 this = self._parse_primary() 4595 else: 4596 this = self._parse_term() 4597 4598 if not this or ( 4599 isinstance(this, exp.Column) 4600 and not this.table 4601 and not this.this.quoted 4602 and this.name.upper() == "IS" 4603 ): 4604 self._retreat(index) 4605 return None 4606 4607 unit = self._parse_function() or ( 4608 not self._match(TokenType.ALIAS, advance=False) 4609 and self._parse_var(any_token=True, upper=True) 4610 ) 4611 4612 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4613 # each INTERVAL expression into this canonical form so it's easy to transpile 4614 if this and this.is_number: 4615 this = exp.Literal.string(this.to_py()) 4616 elif this and this.is_string: 4617 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4618 if len(parts) == 1: 4619 if unit: 4620 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4621 self._retreat(self._index - 1) 4622 4623 this = exp.Literal.string(parts[0][0]) 4624 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4625 4626 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4627 unit = self.expression( 4628 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4629 ) 4630 4631 interval = self.expression(exp.Interval, this=this, unit=unit) 4632 4633 index = self._index 4634 self._match(TokenType.PLUS) 4635 4636 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4637 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4638 return self.expression( 4639 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4640 ) 4641 4642 self._retreat(index) 4643 return interval 4644 4645 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4646 this = self._parse_term() 4647 4648 while True: 4649 if self._match_set(self.BITWISE): 4650 this = self.expression( 4651 self.BITWISE[self._prev.token_type], 4652 this=this, 4653 expression=self._parse_term(), 4654 ) 4655 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4656 this = self.expression( 4657 exp.DPipe, 4658 this=this, 4659 expression=self._parse_term(), 4660 safe=not self.dialect.STRICT_STRING_CONCAT, 4661 ) 4662 elif self._match(TokenType.DQMARK): 4663 this = self.expression( 4664 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4665 ) 4666 elif self._match_pair(TokenType.LT, TokenType.LT): 4667 this = self.expression( 4668 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4669 ) 4670 elif self._match_pair(TokenType.GT, TokenType.GT): 4671 this = self.expression( 4672 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4673 ) 4674 else: 4675 break 4676 4677 return this 4678 4679 def _parse_term(self) -> t.Optional[exp.Expression]: 4680 this = self._parse_factor() 4681 4682 while self._match_set(self.TERM): 4683 klass = self.TERM[self._prev.token_type] 4684 comments = self._prev_comments 4685 expression = self._parse_factor() 4686 4687 this = self.expression(klass, this=this, comments=comments, expression=expression) 4688 4689 if isinstance(this, exp.Collate): 4690 expr = this.expression 4691 4692 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4693 # fallback to Identifier / Var 4694 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4695 ident = expr.this 4696 if isinstance(ident, exp.Identifier): 4697 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4698 4699 return this 4700 4701 def _parse_factor(self) -> t.Optional[exp.Expression]: 4702 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4703 this = parse_method() 4704 4705 while self._match_set(self.FACTOR): 4706 klass = self.FACTOR[self._prev.token_type] 4707 comments = self._prev_comments 4708 expression = parse_method() 4709 4710 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4711 self._retreat(self._index - 1) 4712 return this 4713 4714 this = self.expression(klass, this=this, comments=comments, expression=expression) 4715 4716 if isinstance(this, exp.Div): 4717 this.args["typed"] = self.dialect.TYPED_DIVISION 4718 this.args["safe"] = self.dialect.SAFE_DIVISION 4719 4720 return this 4721 4722 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4723 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4724 4725 def _parse_unary(self) -> t.Optional[exp.Expression]: 4726 if self._match_set(self.UNARY_PARSERS): 4727 return self.UNARY_PARSERS[self._prev.token_type](self) 4728 return self._parse_at_time_zone(self._parse_type()) 4729 4730 def _parse_type( 4731 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4732 ) -> t.Optional[exp.Expression]: 4733 interval = parse_interval and self._parse_interval() 4734 if interval: 4735 return interval 4736 4737 index = self._index 4738 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4739 4740 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4741 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4742 if isinstance(data_type, exp.Cast): 4743 # This constructor can contain ops directly after it, for instance struct unnesting: 4744 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4745 return self._parse_column_ops(data_type) 4746 4747 if data_type: 4748 index2 = self._index 4749 this = self._parse_primary() 4750 4751 if isinstance(this, exp.Literal): 4752 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4753 if parser: 4754 return parser(self, this, data_type) 4755 4756 return self.expression(exp.Cast, this=this, to=data_type) 4757 4758 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4759 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4760 # 4761 # If the index difference here is greater than 1, that means the parser itself must have 4762 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4763 # 4764 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4765 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4766 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4767 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4768 # 4769 # In these cases, we don't really want to return the converted type, but instead retreat 4770 # and try to parse a Column or Identifier in the section below. 4771 if data_type.expressions and index2 - index > 1: 4772 self._retreat(index2) 4773 return self._parse_column_ops(data_type) 4774 4775 self._retreat(index) 4776 4777 if fallback_to_identifier: 4778 return self._parse_id_var() 4779 4780 this = self._parse_column() 4781 return this and self._parse_column_ops(this) 4782 4783 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4784 this = self._parse_type() 4785 if not this: 4786 return None 4787 4788 if isinstance(this, exp.Column) and not this.table: 4789 this = exp.var(this.name.upper()) 4790 4791 return self.expression( 4792 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4793 ) 4794 4795 def _parse_types( 4796 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4797 ) -> t.Optional[exp.Expression]: 4798 index = self._index 4799 4800 this: t.Optional[exp.Expression] = None 4801 prefix = self._match_text_seq("SYSUDTLIB", ".") 4802 4803 if not self._match_set(self.TYPE_TOKENS): 4804 identifier = allow_identifiers and self._parse_id_var( 4805 any_token=False, tokens=(TokenType.VAR,) 4806 ) 4807 if isinstance(identifier, exp.Identifier): 4808 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4809 4810 if len(tokens) != 1: 4811 self.raise_error("Unexpected identifier", self._prev) 4812 4813 if tokens[0].token_type in self.TYPE_TOKENS: 4814 self._prev = tokens[0] 4815 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4816 type_name = identifier.name 4817 4818 while self._match(TokenType.DOT): 4819 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4820 4821 this = exp.DataType.build(type_name, udt=True) 4822 else: 4823 self._retreat(self._index - 1) 4824 return None 4825 else: 4826 return None 4827 4828 type_token = self._prev.token_type 4829 4830 if type_token == TokenType.PSEUDO_TYPE: 4831 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4832 4833 if type_token == TokenType.OBJECT_IDENTIFIER: 4834 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4835 4836 # https://materialize.com/docs/sql/types/map/ 4837 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4838 key_type = self._parse_types( 4839 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4840 ) 4841 if not self._match(TokenType.FARROW): 4842 self._retreat(index) 4843 return None 4844 4845 value_type = self._parse_types( 4846 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4847 ) 4848 if not self._match(TokenType.R_BRACKET): 4849 self._retreat(index) 4850 return None 4851 4852 return exp.DataType( 4853 this=exp.DataType.Type.MAP, 4854 expressions=[key_type, value_type], 4855 nested=True, 4856 prefix=prefix, 4857 ) 4858 4859 nested = type_token in self.NESTED_TYPE_TOKENS 4860 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4861 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4862 expressions = None 4863 maybe_func = False 4864 4865 if self._match(TokenType.L_PAREN): 4866 if is_struct: 4867 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4868 elif nested: 4869 expressions = self._parse_csv( 4870 lambda: self._parse_types( 4871 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4872 ) 4873 ) 4874 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4875 this = expressions[0] 4876 this.set("nullable", True) 4877 self._match_r_paren() 4878 return this 4879 elif type_token in self.ENUM_TYPE_TOKENS: 4880 expressions = self._parse_csv(self._parse_equality) 4881 elif is_aggregate: 4882 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4883 any_token=False, tokens=(TokenType.VAR,) 4884 ) 4885 if not func_or_ident or not self._match(TokenType.COMMA): 4886 return None 4887 expressions = self._parse_csv( 4888 lambda: self._parse_types( 4889 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4890 ) 4891 ) 4892 expressions.insert(0, func_or_ident) 4893 else: 4894 expressions = self._parse_csv(self._parse_type_size) 4895 4896 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4897 if type_token == TokenType.VECTOR and len(expressions) == 2: 4898 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4899 4900 if not expressions or not self._match(TokenType.R_PAREN): 4901 self._retreat(index) 4902 return None 4903 4904 maybe_func = True 4905 4906 values: t.Optional[t.List[exp.Expression]] = None 4907 4908 if nested and self._match(TokenType.LT): 4909 if is_struct: 4910 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4911 else: 4912 expressions = self._parse_csv( 4913 lambda: self._parse_types( 4914 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4915 ) 4916 ) 4917 4918 if not self._match(TokenType.GT): 4919 self.raise_error("Expecting >") 4920 4921 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4922 values = self._parse_csv(self._parse_assignment) 4923 if not values and is_struct: 4924 values = None 4925 self._retreat(self._index - 1) 4926 else: 4927 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4928 4929 if type_token in self.TIMESTAMPS: 4930 if self._match_text_seq("WITH", "TIME", "ZONE"): 4931 maybe_func = False 4932 tz_type = ( 4933 exp.DataType.Type.TIMETZ 4934 if type_token in self.TIMES 4935 else exp.DataType.Type.TIMESTAMPTZ 4936 ) 4937 this = exp.DataType(this=tz_type, expressions=expressions) 4938 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4939 maybe_func = False 4940 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4941 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4942 maybe_func = False 4943 elif type_token == TokenType.INTERVAL: 4944 unit = self._parse_var(upper=True) 4945 if unit: 4946 if self._match_text_seq("TO"): 4947 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4948 4949 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4950 else: 4951 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4952 4953 if maybe_func and check_func: 4954 index2 = self._index 4955 peek = self._parse_string() 4956 4957 if not peek: 4958 self._retreat(index) 4959 return None 4960 4961 self._retreat(index2) 4962 4963 if not this: 4964 if self._match_text_seq("UNSIGNED"): 4965 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4966 if not unsigned_type_token: 4967 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4968 4969 type_token = unsigned_type_token or type_token 4970 4971 this = exp.DataType( 4972 this=exp.DataType.Type[type_token.value], 4973 expressions=expressions, 4974 nested=nested, 4975 prefix=prefix, 4976 ) 4977 4978 # Empty arrays/structs are allowed 4979 if values is not None: 4980 cls = exp.Struct if is_struct else exp.Array 4981 this = exp.cast(cls(expressions=values), this, copy=False) 4982 4983 elif expressions: 4984 this.set("expressions", expressions) 4985 4986 # https://materialize.com/docs/sql/types/list/#type-name 4987 while self._match(TokenType.LIST): 4988 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4989 4990 index = self._index 4991 4992 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4993 matched_array = self._match(TokenType.ARRAY) 4994 4995 while self._curr: 4996 datatype_token = self._prev.token_type 4997 matched_l_bracket = self._match(TokenType.L_BRACKET) 4998 if not matched_l_bracket and not matched_array: 4999 break 5000 5001 matched_array = False 5002 values = self._parse_csv(self._parse_assignment) or None 5003 if ( 5004 values 5005 and not schema 5006 and ( 5007 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5008 ) 5009 ): 5010 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5011 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5012 self._retreat(index) 5013 break 5014 5015 this = exp.DataType( 5016 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5017 ) 5018 self._match(TokenType.R_BRACKET) 5019 5020 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5021 converter = self.TYPE_CONVERTERS.get(this.this) 5022 if converter: 5023 this = converter(t.cast(exp.DataType, this)) 5024 5025 return this 5026 5027 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5028 index = self._index 5029 5030 if ( 5031 self._curr 5032 and self._next 5033 and self._curr.token_type in self.TYPE_TOKENS 5034 and self._next.token_type in self.TYPE_TOKENS 5035 ): 5036 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5037 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5038 this = self._parse_id_var() 5039 else: 5040 this = ( 5041 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5042 or self._parse_id_var() 5043 ) 5044 5045 self._match(TokenType.COLON) 5046 5047 if ( 5048 type_required 5049 and not isinstance(this, exp.DataType) 5050 and not self._match_set(self.TYPE_TOKENS, advance=False) 5051 ): 5052 self._retreat(index) 5053 return self._parse_types() 5054 5055 return self._parse_column_def(this) 5056 5057 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5058 if not self._match_text_seq("AT", "TIME", "ZONE"): 5059 return this 5060 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5061 5062 def _parse_column(self) -> t.Optional[exp.Expression]: 5063 this = self._parse_column_reference() 5064 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5065 5066 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5067 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5068 5069 return column 5070 5071 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5072 this = self._parse_field() 5073 if ( 5074 not this 5075 and self._match(TokenType.VALUES, advance=False) 5076 and self.VALUES_FOLLOWED_BY_PAREN 5077 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5078 ): 5079 this = self._parse_id_var() 5080 5081 if isinstance(this, exp.Identifier): 5082 # We bubble up comments from the Identifier to the Column 5083 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5084 5085 return this 5086 5087 def _parse_colon_as_variant_extract( 5088 self, this: t.Optional[exp.Expression] 5089 ) -> t.Optional[exp.Expression]: 5090 casts = [] 5091 json_path = [] 5092 escape = None 5093 5094 while self._match(TokenType.COLON): 5095 start_index = self._index 5096 5097 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5098 path = self._parse_column_ops( 5099 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5100 ) 5101 5102 # The cast :: operator has a lower precedence than the extraction operator :, so 5103 # we rearrange the AST appropriately to avoid casting the JSON path 5104 while isinstance(path, exp.Cast): 5105 casts.append(path.to) 5106 path = path.this 5107 5108 if casts: 5109 dcolon_offset = next( 5110 i 5111 for i, t in enumerate(self._tokens[start_index:]) 5112 if t.token_type == TokenType.DCOLON 5113 ) 5114 end_token = self._tokens[start_index + dcolon_offset - 1] 5115 else: 5116 end_token = self._prev 5117 5118 if path: 5119 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5120 # it'll roundtrip to a string literal in GET_PATH 5121 if isinstance(path, exp.Identifier) and path.quoted: 5122 escape = True 5123 5124 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5125 5126 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5127 # Databricks transforms it back to the colon/dot notation 5128 if json_path: 5129 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5130 5131 if json_path_expr: 5132 json_path_expr.set("escape", escape) 5133 5134 this = self.expression( 5135 exp.JSONExtract, 5136 this=this, 5137 expression=json_path_expr, 5138 variant_extract=True, 5139 ) 5140 5141 while casts: 5142 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5143 5144 return this 5145 5146 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5147 return self._parse_types() 5148 5149 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5150 this = self._parse_bracket(this) 5151 5152 while self._match_set(self.COLUMN_OPERATORS): 5153 op_token = self._prev.token_type 5154 op = self.COLUMN_OPERATORS.get(op_token) 5155 5156 if op_token == TokenType.DCOLON: 5157 field = self._parse_dcolon() 5158 if not field: 5159 self.raise_error("Expected type") 5160 elif op and self._curr: 5161 field = self._parse_column_reference() or self._parse_bracket() 5162 else: 5163 field = self._parse_field(any_token=True, anonymous_func=True) 5164 5165 if isinstance(field, (exp.Func, exp.Window)) and this: 5166 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5167 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5168 this = exp.replace_tree( 5169 this, 5170 lambda n: ( 5171 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5172 if n.table 5173 else n.this 5174 ) 5175 if isinstance(n, exp.Column) 5176 else n, 5177 ) 5178 5179 if op: 5180 this = op(self, this, field) 5181 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5182 this = self.expression( 5183 exp.Column, 5184 comments=this.comments, 5185 this=field, 5186 table=this.this, 5187 db=this.args.get("table"), 5188 catalog=this.args.get("db"), 5189 ) 5190 elif isinstance(field, exp.Window): 5191 # Move the exp.Dot's to the window's function 5192 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5193 field.set("this", window_func) 5194 this = field 5195 else: 5196 this = self.expression(exp.Dot, this=this, expression=field) 5197 5198 if field and field.comments: 5199 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5200 5201 this = self._parse_bracket(this) 5202 5203 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5204 5205 def _parse_primary(self) -> t.Optional[exp.Expression]: 5206 if self._match_set(self.PRIMARY_PARSERS): 5207 token_type = self._prev.token_type 5208 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5209 5210 if token_type == TokenType.STRING: 5211 expressions = [primary] 5212 while self._match(TokenType.STRING): 5213 expressions.append(exp.Literal.string(self._prev.text)) 5214 5215 if len(expressions) > 1: 5216 return self.expression(exp.Concat, expressions=expressions) 5217 5218 return primary 5219 5220 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5221 return exp.Literal.number(f"0.{self._prev.text}") 5222 5223 if self._match(TokenType.L_PAREN): 5224 comments = self._prev_comments 5225 query = self._parse_select() 5226 5227 if query: 5228 expressions = [query] 5229 else: 5230 expressions = self._parse_expressions() 5231 5232 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5233 5234 if not this and self._match(TokenType.R_PAREN, advance=False): 5235 this = self.expression(exp.Tuple) 5236 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5237 this = self._parse_subquery(this=this, parse_alias=False) 5238 elif isinstance(this, exp.Subquery): 5239 this = self._parse_subquery( 5240 this=self._parse_set_operations(this), parse_alias=False 5241 ) 5242 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5243 this = self.expression(exp.Tuple, expressions=expressions) 5244 else: 5245 this = self.expression(exp.Paren, this=this) 5246 5247 if this: 5248 this.add_comments(comments) 5249 5250 self._match_r_paren(expression=this) 5251 return this 5252 5253 return None 5254 5255 def _parse_field( 5256 self, 5257 any_token: bool = False, 5258 tokens: t.Optional[t.Collection[TokenType]] = None, 5259 anonymous_func: bool = False, 5260 ) -> t.Optional[exp.Expression]: 5261 if anonymous_func: 5262 field = ( 5263 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5264 or self._parse_primary() 5265 ) 5266 else: 5267 field = self._parse_primary() or self._parse_function( 5268 anonymous=anonymous_func, any_token=any_token 5269 ) 5270 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5271 5272 def _parse_function( 5273 self, 5274 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5275 anonymous: bool = False, 5276 optional_parens: bool = True, 5277 any_token: bool = False, 5278 ) -> t.Optional[exp.Expression]: 5279 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5280 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5281 fn_syntax = False 5282 if ( 5283 self._match(TokenType.L_BRACE, advance=False) 5284 and self._next 5285 and self._next.text.upper() == "FN" 5286 ): 5287 self._advance(2) 5288 fn_syntax = True 5289 5290 func = self._parse_function_call( 5291 functions=functions, 5292 anonymous=anonymous, 5293 optional_parens=optional_parens, 5294 any_token=any_token, 5295 ) 5296 5297 if fn_syntax: 5298 self._match(TokenType.R_BRACE) 5299 5300 return func 5301 5302 def _parse_function_call( 5303 self, 5304 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5305 anonymous: bool = False, 5306 optional_parens: bool = True, 5307 any_token: bool = False, 5308 ) -> t.Optional[exp.Expression]: 5309 if not self._curr: 5310 return None 5311 5312 comments = self._curr.comments 5313 token_type = self._curr.token_type 5314 this = self._curr.text 5315 upper = this.upper() 5316 5317 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5318 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5319 self._advance() 5320 return self._parse_window(parser(self)) 5321 5322 if not self._next or self._next.token_type != TokenType.L_PAREN: 5323 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5324 self._advance() 5325 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5326 5327 return None 5328 5329 if any_token: 5330 if token_type in self.RESERVED_TOKENS: 5331 return None 5332 elif token_type not in self.FUNC_TOKENS: 5333 return None 5334 5335 self._advance(2) 5336 5337 parser = self.FUNCTION_PARSERS.get(upper) 5338 if parser and not anonymous: 5339 this = parser(self) 5340 else: 5341 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5342 5343 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5344 this = self.expression( 5345 subquery_predicate, comments=comments, this=self._parse_select() 5346 ) 5347 self._match_r_paren() 5348 return this 5349 5350 if functions is None: 5351 functions = self.FUNCTIONS 5352 5353 function = functions.get(upper) 5354 5355 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5356 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5357 5358 if alias: 5359 args = self._kv_to_prop_eq(args) 5360 5361 if function and not anonymous: 5362 if "dialect" in function.__code__.co_varnames: 5363 func = function(args, dialect=self.dialect) 5364 else: 5365 func = function(args) 5366 5367 func = self.validate_expression(func, args) 5368 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5369 func.meta["name"] = this 5370 5371 this = func 5372 else: 5373 if token_type == TokenType.IDENTIFIER: 5374 this = exp.Identifier(this=this, quoted=True) 5375 this = self.expression(exp.Anonymous, this=this, expressions=args) 5376 5377 if isinstance(this, exp.Expression): 5378 this.add_comments(comments) 5379 5380 self._match_r_paren(this) 5381 return self._parse_window(this) 5382 5383 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5384 return expression 5385 5386 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5387 transformed = [] 5388 5389 for index, e in enumerate(expressions): 5390 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5391 if isinstance(e, exp.Alias): 5392 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5393 5394 if not isinstance(e, exp.PropertyEQ): 5395 e = self.expression( 5396 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5397 ) 5398 5399 if isinstance(e.this, exp.Column): 5400 e.this.replace(e.this.this) 5401 else: 5402 e = self._to_prop_eq(e, index) 5403 5404 transformed.append(e) 5405 5406 return transformed 5407 5408 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5409 return self._parse_statement() 5410 5411 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5412 return self._parse_column_def(self._parse_id_var()) 5413 5414 def _parse_user_defined_function( 5415 self, kind: t.Optional[TokenType] = None 5416 ) -> t.Optional[exp.Expression]: 5417 this = self._parse_id_var() 5418 5419 while self._match(TokenType.DOT): 5420 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5421 5422 if not self._match(TokenType.L_PAREN): 5423 return this 5424 5425 expressions = self._parse_csv(self._parse_function_parameter) 5426 self._match_r_paren() 5427 return self.expression( 5428 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5429 ) 5430 5431 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5432 literal = self._parse_primary() 5433 if literal: 5434 return self.expression(exp.Introducer, this=token.text, expression=literal) 5435 5436 return self.expression(exp.Identifier, this=token.text) 5437 5438 def _parse_session_parameter(self) -> exp.SessionParameter: 5439 kind = None 5440 this = self._parse_id_var() or self._parse_primary() 5441 5442 if this and self._match(TokenType.DOT): 5443 kind = this.name 5444 this = self._parse_var() or self._parse_primary() 5445 5446 return self.expression(exp.SessionParameter, this=this, kind=kind) 5447 5448 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5449 return self._parse_id_var() 5450 5451 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5452 index = self._index 5453 5454 if self._match(TokenType.L_PAREN): 5455 expressions = t.cast( 5456 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5457 ) 5458 5459 if not self._match(TokenType.R_PAREN): 5460 self._retreat(index) 5461 else: 5462 expressions = [self._parse_lambda_arg()] 5463 5464 if self._match_set(self.LAMBDAS): 5465 return self.LAMBDAS[self._prev.token_type](self, expressions) 5466 5467 self._retreat(index) 5468 5469 this: t.Optional[exp.Expression] 5470 5471 if self._match(TokenType.DISTINCT): 5472 this = self.expression( 5473 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5474 ) 5475 else: 5476 this = self._parse_select_or_expression(alias=alias) 5477 5478 return self._parse_limit( 5479 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5480 ) 5481 5482 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5483 index = self._index 5484 if not self._match(TokenType.L_PAREN): 5485 return this 5486 5487 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5488 # expr can be of both types 5489 if self._match_set(self.SELECT_START_TOKENS): 5490 self._retreat(index) 5491 return this 5492 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5493 self._match_r_paren() 5494 return self.expression(exp.Schema, this=this, expressions=args) 5495 5496 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5497 return self._parse_column_def(self._parse_field(any_token=True)) 5498 5499 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5500 # column defs are not really columns, they're identifiers 5501 if isinstance(this, exp.Column): 5502 this = this.this 5503 5504 kind = self._parse_types(schema=True) 5505 5506 if self._match_text_seq("FOR", "ORDINALITY"): 5507 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5508 5509 constraints: t.List[exp.Expression] = [] 5510 5511 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5512 ("ALIAS", "MATERIALIZED") 5513 ): 5514 persisted = self._prev.text.upper() == "MATERIALIZED" 5515 constraint_kind = exp.ComputedColumnConstraint( 5516 this=self._parse_assignment(), 5517 persisted=persisted or self._match_text_seq("PERSISTED"), 5518 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5519 ) 5520 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5521 elif ( 5522 kind 5523 and self._match(TokenType.ALIAS, advance=False) 5524 and ( 5525 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5526 or (self._next and self._next.token_type == TokenType.L_PAREN) 5527 ) 5528 ): 5529 self._advance() 5530 constraints.append( 5531 self.expression( 5532 exp.ColumnConstraint, 5533 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5534 ) 5535 ) 5536 5537 while True: 5538 constraint = self._parse_column_constraint() 5539 if not constraint: 5540 break 5541 constraints.append(constraint) 5542 5543 if not kind and not constraints: 5544 return this 5545 5546 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5547 5548 def _parse_auto_increment( 5549 self, 5550 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5551 start = None 5552 increment = None 5553 5554 if self._match(TokenType.L_PAREN, advance=False): 5555 args = self._parse_wrapped_csv(self._parse_bitwise) 5556 start = seq_get(args, 0) 5557 increment = seq_get(args, 1) 5558 elif self._match_text_seq("START"): 5559 start = self._parse_bitwise() 5560 self._match_text_seq("INCREMENT") 5561 increment = self._parse_bitwise() 5562 5563 if start and increment: 5564 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5565 5566 return exp.AutoIncrementColumnConstraint() 5567 5568 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5569 if not self._match_text_seq("REFRESH"): 5570 self._retreat(self._index - 1) 5571 return None 5572 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5573 5574 def _parse_compress(self) -> exp.CompressColumnConstraint: 5575 if self._match(TokenType.L_PAREN, advance=False): 5576 return self.expression( 5577 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5578 ) 5579 5580 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5581 5582 def _parse_generated_as_identity( 5583 self, 5584 ) -> ( 5585 exp.GeneratedAsIdentityColumnConstraint 5586 | exp.ComputedColumnConstraint 5587 | exp.GeneratedAsRowColumnConstraint 5588 ): 5589 if self._match_text_seq("BY", "DEFAULT"): 5590 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5591 this = self.expression( 5592 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5593 ) 5594 else: 5595 self._match_text_seq("ALWAYS") 5596 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5597 5598 self._match(TokenType.ALIAS) 5599 5600 if self._match_text_seq("ROW"): 5601 start = self._match_text_seq("START") 5602 if not start: 5603 self._match(TokenType.END) 5604 hidden = self._match_text_seq("HIDDEN") 5605 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5606 5607 identity = self._match_text_seq("IDENTITY") 5608 5609 if self._match(TokenType.L_PAREN): 5610 if self._match(TokenType.START_WITH): 5611 this.set("start", self._parse_bitwise()) 5612 if self._match_text_seq("INCREMENT", "BY"): 5613 this.set("increment", self._parse_bitwise()) 5614 if self._match_text_seq("MINVALUE"): 5615 this.set("minvalue", self._parse_bitwise()) 5616 if self._match_text_seq("MAXVALUE"): 5617 this.set("maxvalue", self._parse_bitwise()) 5618 5619 if self._match_text_seq("CYCLE"): 5620 this.set("cycle", True) 5621 elif self._match_text_seq("NO", "CYCLE"): 5622 this.set("cycle", False) 5623 5624 if not identity: 5625 this.set("expression", self._parse_range()) 5626 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5627 args = self._parse_csv(self._parse_bitwise) 5628 this.set("start", seq_get(args, 0)) 5629 this.set("increment", seq_get(args, 1)) 5630 5631 self._match_r_paren() 5632 5633 return this 5634 5635 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5636 self._match_text_seq("LENGTH") 5637 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5638 5639 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5640 if self._match_text_seq("NULL"): 5641 return self.expression(exp.NotNullColumnConstraint) 5642 if self._match_text_seq("CASESPECIFIC"): 5643 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5644 if self._match_text_seq("FOR", "REPLICATION"): 5645 return self.expression(exp.NotForReplicationColumnConstraint) 5646 5647 # Unconsume the `NOT` token 5648 self._retreat(self._index - 1) 5649 return None 5650 5651 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5652 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5653 5654 procedure_option_follows = ( 5655 self._match(TokenType.WITH, advance=False) 5656 and self._next 5657 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5658 ) 5659 5660 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5661 return self.expression( 5662 exp.ColumnConstraint, 5663 this=this, 5664 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5665 ) 5666 5667 return this 5668 5669 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5670 if not self._match(TokenType.CONSTRAINT): 5671 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5672 5673 return self.expression( 5674 exp.Constraint, 5675 this=self._parse_id_var(), 5676 expressions=self._parse_unnamed_constraints(), 5677 ) 5678 5679 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5680 constraints = [] 5681 while True: 5682 constraint = self._parse_unnamed_constraint() or self._parse_function() 5683 if not constraint: 5684 break 5685 constraints.append(constraint) 5686 5687 return constraints 5688 5689 def _parse_unnamed_constraint( 5690 self, constraints: t.Optional[t.Collection[str]] = None 5691 ) -> t.Optional[exp.Expression]: 5692 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5693 constraints or self.CONSTRAINT_PARSERS 5694 ): 5695 return None 5696 5697 constraint = self._prev.text.upper() 5698 if constraint not in self.CONSTRAINT_PARSERS: 5699 self.raise_error(f"No parser found for schema constraint {constraint}.") 5700 5701 return self.CONSTRAINT_PARSERS[constraint](self) 5702 5703 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5704 return self._parse_id_var(any_token=False) 5705 5706 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5707 self._match_text_seq("KEY") 5708 return self.expression( 5709 exp.UniqueColumnConstraint, 5710 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5711 this=self._parse_schema(self._parse_unique_key()), 5712 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5713 on_conflict=self._parse_on_conflict(), 5714 ) 5715 5716 def _parse_key_constraint_options(self) -> t.List[str]: 5717 options = [] 5718 while True: 5719 if not self._curr: 5720 break 5721 5722 if self._match(TokenType.ON): 5723 action = None 5724 on = self._advance_any() and self._prev.text 5725 5726 if self._match_text_seq("NO", "ACTION"): 5727 action = "NO ACTION" 5728 elif self._match_text_seq("CASCADE"): 5729 action = "CASCADE" 5730 elif self._match_text_seq("RESTRICT"): 5731 action = "RESTRICT" 5732 elif self._match_pair(TokenType.SET, TokenType.NULL): 5733 action = "SET NULL" 5734 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5735 action = "SET DEFAULT" 5736 else: 5737 self.raise_error("Invalid key constraint") 5738 5739 options.append(f"ON {on} {action}") 5740 else: 5741 var = self._parse_var_from_options( 5742 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5743 ) 5744 if not var: 5745 break 5746 options.append(var.name) 5747 5748 return options 5749 5750 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5751 if match and not self._match(TokenType.REFERENCES): 5752 return None 5753 5754 expressions = None 5755 this = self._parse_table(schema=True) 5756 options = self._parse_key_constraint_options() 5757 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5758 5759 def _parse_foreign_key(self) -> exp.ForeignKey: 5760 expressions = self._parse_wrapped_id_vars() 5761 reference = self._parse_references() 5762 options = {} 5763 5764 while self._match(TokenType.ON): 5765 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5766 self.raise_error("Expected DELETE or UPDATE") 5767 5768 kind = self._prev.text.lower() 5769 5770 if self._match_text_seq("NO", "ACTION"): 5771 action = "NO ACTION" 5772 elif self._match(TokenType.SET): 5773 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5774 action = "SET " + self._prev.text.upper() 5775 else: 5776 self._advance() 5777 action = self._prev.text.upper() 5778 5779 options[kind] = action 5780 5781 return self.expression( 5782 exp.ForeignKey, 5783 expressions=expressions, 5784 reference=reference, 5785 **options, # type: ignore 5786 ) 5787 5788 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5789 return self._parse_field() 5790 5791 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5792 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5793 self._retreat(self._index - 1) 5794 return None 5795 5796 id_vars = self._parse_wrapped_id_vars() 5797 return self.expression( 5798 exp.PeriodForSystemTimeConstraint, 5799 this=seq_get(id_vars, 0), 5800 expression=seq_get(id_vars, 1), 5801 ) 5802 5803 def _parse_primary_key( 5804 self, wrapped_optional: bool = False, in_props: bool = False 5805 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5806 desc = ( 5807 self._match_set((TokenType.ASC, TokenType.DESC)) 5808 and self._prev.token_type == TokenType.DESC 5809 ) 5810 5811 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5812 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5813 5814 expressions = self._parse_wrapped_csv( 5815 self._parse_primary_key_part, optional=wrapped_optional 5816 ) 5817 options = self._parse_key_constraint_options() 5818 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5819 5820 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5821 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5822 5823 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5824 """ 5825 Parses a datetime column in ODBC format. We parse the column into the corresponding 5826 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5827 same as we did for `DATE('yyyy-mm-dd')`. 5828 5829 Reference: 5830 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5831 """ 5832 self._match(TokenType.VAR) 5833 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5834 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5835 if not self._match(TokenType.R_BRACE): 5836 self.raise_error("Expected }") 5837 return expression 5838 5839 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5840 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5841 return this 5842 5843 bracket_kind = self._prev.token_type 5844 if ( 5845 bracket_kind == TokenType.L_BRACE 5846 and self._curr 5847 and self._curr.token_type == TokenType.VAR 5848 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5849 ): 5850 return self._parse_odbc_datetime_literal() 5851 5852 expressions = self._parse_csv( 5853 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5854 ) 5855 5856 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5857 self.raise_error("Expected ]") 5858 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5859 self.raise_error("Expected }") 5860 5861 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5862 if bracket_kind == TokenType.L_BRACE: 5863 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5864 elif not this: 5865 this = build_array_constructor( 5866 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5867 ) 5868 else: 5869 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5870 if constructor_type: 5871 return build_array_constructor( 5872 constructor_type, 5873 args=expressions, 5874 bracket_kind=bracket_kind, 5875 dialect=self.dialect, 5876 ) 5877 5878 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5879 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5880 5881 self._add_comments(this) 5882 return self._parse_bracket(this) 5883 5884 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5885 if self._match(TokenType.COLON): 5886 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5887 return this 5888 5889 def _parse_case(self) -> t.Optional[exp.Expression]: 5890 ifs = [] 5891 default = None 5892 5893 comments = self._prev_comments 5894 expression = self._parse_assignment() 5895 5896 while self._match(TokenType.WHEN): 5897 this = self._parse_assignment() 5898 self._match(TokenType.THEN) 5899 then = self._parse_assignment() 5900 ifs.append(self.expression(exp.If, this=this, true=then)) 5901 5902 if self._match(TokenType.ELSE): 5903 default = self._parse_assignment() 5904 5905 if not self._match(TokenType.END): 5906 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5907 default = exp.column("interval") 5908 else: 5909 self.raise_error("Expected END after CASE", self._prev) 5910 5911 return self.expression( 5912 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5913 ) 5914 5915 def _parse_if(self) -> t.Optional[exp.Expression]: 5916 if self._match(TokenType.L_PAREN): 5917 args = self._parse_csv(self._parse_assignment) 5918 this = self.validate_expression(exp.If.from_arg_list(args), args) 5919 self._match_r_paren() 5920 else: 5921 index = self._index - 1 5922 5923 if self.NO_PAREN_IF_COMMANDS and index == 0: 5924 return self._parse_as_command(self._prev) 5925 5926 condition = self._parse_assignment() 5927 5928 if not condition: 5929 self._retreat(index) 5930 return None 5931 5932 self._match(TokenType.THEN) 5933 true = self._parse_assignment() 5934 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5935 self._match(TokenType.END) 5936 this = self.expression(exp.If, this=condition, true=true, false=false) 5937 5938 return this 5939 5940 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5941 if not self._match_text_seq("VALUE", "FOR"): 5942 self._retreat(self._index - 1) 5943 return None 5944 5945 return self.expression( 5946 exp.NextValueFor, 5947 this=self._parse_column(), 5948 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5949 ) 5950 5951 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5952 this = self._parse_function() or self._parse_var_or_string(upper=True) 5953 5954 if self._match(TokenType.FROM): 5955 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5956 5957 if not self._match(TokenType.COMMA): 5958 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5959 5960 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5961 5962 def _parse_gap_fill(self) -> exp.GapFill: 5963 self._match(TokenType.TABLE) 5964 this = self._parse_table() 5965 5966 self._match(TokenType.COMMA) 5967 args = [this, *self._parse_csv(self._parse_lambda)] 5968 5969 gap_fill = exp.GapFill.from_arg_list(args) 5970 return self.validate_expression(gap_fill, args) 5971 5972 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5973 this = self._parse_assignment() 5974 5975 if not self._match(TokenType.ALIAS): 5976 if self._match(TokenType.COMMA): 5977 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5978 5979 self.raise_error("Expected AS after CAST") 5980 5981 fmt = None 5982 to = self._parse_types() 5983 5984 if self._match(TokenType.FORMAT): 5985 fmt_string = self._parse_string() 5986 fmt = self._parse_at_time_zone(fmt_string) 5987 5988 if not to: 5989 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5990 if to.this in exp.DataType.TEMPORAL_TYPES: 5991 this = self.expression( 5992 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5993 this=this, 5994 format=exp.Literal.string( 5995 format_time( 5996 fmt_string.this if fmt_string else "", 5997 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5998 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5999 ) 6000 ), 6001 safe=safe, 6002 ) 6003 6004 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6005 this.set("zone", fmt.args["zone"]) 6006 return this 6007 elif not to: 6008 self.raise_error("Expected TYPE after CAST") 6009 elif isinstance(to, exp.Identifier): 6010 to = exp.DataType.build(to.name, udt=True) 6011 elif to.this == exp.DataType.Type.CHAR: 6012 if self._match(TokenType.CHARACTER_SET): 6013 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6014 6015 return self.expression( 6016 exp.Cast if strict else exp.TryCast, 6017 this=this, 6018 to=to, 6019 format=fmt, 6020 safe=safe, 6021 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6022 ) 6023 6024 def _parse_string_agg(self) -> exp.GroupConcat: 6025 if self._match(TokenType.DISTINCT): 6026 args: t.List[t.Optional[exp.Expression]] = [ 6027 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6028 ] 6029 if self._match(TokenType.COMMA): 6030 args.extend(self._parse_csv(self._parse_assignment)) 6031 else: 6032 args = self._parse_csv(self._parse_assignment) # type: ignore 6033 6034 if self._match_text_seq("ON", "OVERFLOW"): 6035 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6036 if self._match_text_seq("ERROR"): 6037 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6038 else: 6039 self._match_text_seq("TRUNCATE") 6040 on_overflow = self.expression( 6041 exp.OverflowTruncateBehavior, 6042 this=self._parse_string(), 6043 with_count=( 6044 self._match_text_seq("WITH", "COUNT") 6045 or not self._match_text_seq("WITHOUT", "COUNT") 6046 ), 6047 ) 6048 else: 6049 on_overflow = None 6050 6051 index = self._index 6052 if not self._match(TokenType.R_PAREN) and args: 6053 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6054 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6055 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6056 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6057 6058 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6059 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6060 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6061 if not self._match_text_seq("WITHIN", "GROUP"): 6062 self._retreat(index) 6063 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6064 6065 # The corresponding match_r_paren will be called in parse_function (caller) 6066 self._match_l_paren() 6067 6068 return self.expression( 6069 exp.GroupConcat, 6070 this=self._parse_order(this=seq_get(args, 0)), 6071 separator=seq_get(args, 1), 6072 on_overflow=on_overflow, 6073 ) 6074 6075 def _parse_convert( 6076 self, strict: bool, safe: t.Optional[bool] = None 6077 ) -> t.Optional[exp.Expression]: 6078 this = self._parse_bitwise() 6079 6080 if self._match(TokenType.USING): 6081 to: t.Optional[exp.Expression] = self.expression( 6082 exp.CharacterSet, this=self._parse_var() 6083 ) 6084 elif self._match(TokenType.COMMA): 6085 to = self._parse_types() 6086 else: 6087 to = None 6088 6089 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6090 6091 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6092 """ 6093 There are generally two variants of the DECODE function: 6094 6095 - DECODE(bin, charset) 6096 - DECODE(expression, search, result [, search, result] ... [, default]) 6097 6098 The second variant will always be parsed into a CASE expression. Note that NULL 6099 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6100 instead of relying on pattern matching. 6101 """ 6102 args = self._parse_csv(self._parse_assignment) 6103 6104 if len(args) < 3: 6105 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6106 6107 expression, *expressions = args 6108 if not expression: 6109 return None 6110 6111 ifs = [] 6112 for search, result in zip(expressions[::2], expressions[1::2]): 6113 if not search or not result: 6114 return None 6115 6116 if isinstance(search, exp.Literal): 6117 ifs.append( 6118 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6119 ) 6120 elif isinstance(search, exp.Null): 6121 ifs.append( 6122 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6123 ) 6124 else: 6125 cond = exp.or_( 6126 exp.EQ(this=expression.copy(), expression=search), 6127 exp.and_( 6128 exp.Is(this=expression.copy(), expression=exp.Null()), 6129 exp.Is(this=search.copy(), expression=exp.Null()), 6130 copy=False, 6131 ), 6132 copy=False, 6133 ) 6134 ifs.append(exp.If(this=cond, true=result)) 6135 6136 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6137 6138 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6139 self._match_text_seq("KEY") 6140 key = self._parse_column() 6141 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6142 self._match_text_seq("VALUE") 6143 value = self._parse_bitwise() 6144 6145 if not key and not value: 6146 return None 6147 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6148 6149 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6150 if not this or not self._match_text_seq("FORMAT", "JSON"): 6151 return this 6152 6153 return self.expression(exp.FormatJson, this=this) 6154 6155 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6156 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6157 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6158 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6159 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6160 else: 6161 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6162 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6163 6164 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6165 6166 if not empty and not error and not null: 6167 return None 6168 6169 return self.expression( 6170 exp.OnCondition, 6171 empty=empty, 6172 error=error, 6173 null=null, 6174 ) 6175 6176 def _parse_on_handling( 6177 self, on: str, *values: str 6178 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6179 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6180 for value in values: 6181 if self._match_text_seq(value, "ON", on): 6182 return f"{value} ON {on}" 6183 6184 index = self._index 6185 if self._match(TokenType.DEFAULT): 6186 default_value = self._parse_bitwise() 6187 if self._match_text_seq("ON", on): 6188 return default_value 6189 6190 self._retreat(index) 6191 6192 return None 6193 6194 @t.overload 6195 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6196 6197 @t.overload 6198 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6199 6200 def _parse_json_object(self, agg=False): 6201 star = self._parse_star() 6202 expressions = ( 6203 [star] 6204 if star 6205 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6206 ) 6207 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6208 6209 unique_keys = None 6210 if self._match_text_seq("WITH", "UNIQUE"): 6211 unique_keys = True 6212 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6213 unique_keys = False 6214 6215 self._match_text_seq("KEYS") 6216 6217 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6218 self._parse_type() 6219 ) 6220 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6221 6222 return self.expression( 6223 exp.JSONObjectAgg if agg else exp.JSONObject, 6224 expressions=expressions, 6225 null_handling=null_handling, 6226 unique_keys=unique_keys, 6227 return_type=return_type, 6228 encoding=encoding, 6229 ) 6230 6231 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6232 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6233 if not self._match_text_seq("NESTED"): 6234 this = self._parse_id_var() 6235 kind = self._parse_types(allow_identifiers=False) 6236 nested = None 6237 else: 6238 this = None 6239 kind = None 6240 nested = True 6241 6242 path = self._match_text_seq("PATH") and self._parse_string() 6243 nested_schema = nested and self._parse_json_schema() 6244 6245 return self.expression( 6246 exp.JSONColumnDef, 6247 this=this, 6248 kind=kind, 6249 path=path, 6250 nested_schema=nested_schema, 6251 ) 6252 6253 def _parse_json_schema(self) -> exp.JSONSchema: 6254 self._match_text_seq("COLUMNS") 6255 return self.expression( 6256 exp.JSONSchema, 6257 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6258 ) 6259 6260 def _parse_json_table(self) -> exp.JSONTable: 6261 this = self._parse_format_json(self._parse_bitwise()) 6262 path = self._match(TokenType.COMMA) and self._parse_string() 6263 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6264 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6265 schema = self._parse_json_schema() 6266 6267 return exp.JSONTable( 6268 this=this, 6269 schema=schema, 6270 path=path, 6271 error_handling=error_handling, 6272 empty_handling=empty_handling, 6273 ) 6274 6275 def _parse_match_against(self) -> exp.MatchAgainst: 6276 expressions = self._parse_csv(self._parse_column) 6277 6278 self._match_text_seq(")", "AGAINST", "(") 6279 6280 this = self._parse_string() 6281 6282 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6283 modifier = "IN NATURAL LANGUAGE MODE" 6284 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6285 modifier = f"{modifier} WITH QUERY EXPANSION" 6286 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6287 modifier = "IN BOOLEAN MODE" 6288 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6289 modifier = "WITH QUERY EXPANSION" 6290 else: 6291 modifier = None 6292 6293 return self.expression( 6294 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6295 ) 6296 6297 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6298 def _parse_open_json(self) -> exp.OpenJSON: 6299 this = self._parse_bitwise() 6300 path = self._match(TokenType.COMMA) and self._parse_string() 6301 6302 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6303 this = self._parse_field(any_token=True) 6304 kind = self._parse_types() 6305 path = self._parse_string() 6306 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6307 6308 return self.expression( 6309 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6310 ) 6311 6312 expressions = None 6313 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6314 self._match_l_paren() 6315 expressions = self._parse_csv(_parse_open_json_column_def) 6316 6317 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6318 6319 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6320 args = self._parse_csv(self._parse_bitwise) 6321 6322 if self._match(TokenType.IN): 6323 return self.expression( 6324 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6325 ) 6326 6327 if haystack_first: 6328 haystack = seq_get(args, 0) 6329 needle = seq_get(args, 1) 6330 else: 6331 needle = seq_get(args, 0) 6332 haystack = seq_get(args, 1) 6333 6334 return self.expression( 6335 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6336 ) 6337 6338 def _parse_predict(self) -> exp.Predict: 6339 self._match_text_seq("MODEL") 6340 this = self._parse_table() 6341 6342 self._match(TokenType.COMMA) 6343 self._match_text_seq("TABLE") 6344 6345 return self.expression( 6346 exp.Predict, 6347 this=this, 6348 expression=self._parse_table(), 6349 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6350 ) 6351 6352 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6353 args = self._parse_csv(self._parse_table) 6354 return exp.JoinHint(this=func_name.upper(), expressions=args) 6355 6356 def _parse_substring(self) -> exp.Substring: 6357 # Postgres supports the form: substring(string [from int] [for int]) 6358 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6359 6360 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6361 6362 if self._match(TokenType.FROM): 6363 args.append(self._parse_bitwise()) 6364 if self._match(TokenType.FOR): 6365 if len(args) == 1: 6366 args.append(exp.Literal.number(1)) 6367 args.append(self._parse_bitwise()) 6368 6369 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6370 6371 def _parse_trim(self) -> exp.Trim: 6372 # https://www.w3resource.com/sql/character-functions/trim.php 6373 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6374 6375 position = None 6376 collation = None 6377 expression = None 6378 6379 if self._match_texts(self.TRIM_TYPES): 6380 position = self._prev.text.upper() 6381 6382 this = self._parse_bitwise() 6383 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6384 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6385 expression = self._parse_bitwise() 6386 6387 if invert_order: 6388 this, expression = expression, this 6389 6390 if self._match(TokenType.COLLATE): 6391 collation = self._parse_bitwise() 6392 6393 return self.expression( 6394 exp.Trim, this=this, position=position, expression=expression, collation=collation 6395 ) 6396 6397 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6398 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6399 6400 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6401 return self._parse_window(self._parse_id_var(), alias=True) 6402 6403 def _parse_respect_or_ignore_nulls( 6404 self, this: t.Optional[exp.Expression] 6405 ) -> t.Optional[exp.Expression]: 6406 if self._match_text_seq("IGNORE", "NULLS"): 6407 return self.expression(exp.IgnoreNulls, this=this) 6408 if self._match_text_seq("RESPECT", "NULLS"): 6409 return self.expression(exp.RespectNulls, this=this) 6410 return this 6411 6412 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6413 if self._match(TokenType.HAVING): 6414 self._match_texts(("MAX", "MIN")) 6415 max = self._prev.text.upper() != "MIN" 6416 return self.expression( 6417 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6418 ) 6419 6420 return this 6421 6422 def _parse_window( 6423 self, this: t.Optional[exp.Expression], alias: bool = False 6424 ) -> t.Optional[exp.Expression]: 6425 func = this 6426 comments = func.comments if isinstance(func, exp.Expression) else None 6427 6428 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6429 self._match(TokenType.WHERE) 6430 this = self.expression( 6431 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6432 ) 6433 self._match_r_paren() 6434 6435 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6436 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6437 if self._match_text_seq("WITHIN", "GROUP"): 6438 order = self._parse_wrapped(self._parse_order) 6439 this = self.expression(exp.WithinGroup, this=this, expression=order) 6440 6441 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6442 # Some dialects choose to implement and some do not. 6443 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6444 6445 # There is some code above in _parse_lambda that handles 6446 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6447 6448 # The below changes handle 6449 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6450 6451 # Oracle allows both formats 6452 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6453 # and Snowflake chose to do the same for familiarity 6454 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6455 if isinstance(this, exp.AggFunc): 6456 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6457 6458 if ignore_respect and ignore_respect is not this: 6459 ignore_respect.replace(ignore_respect.this) 6460 this = self.expression(ignore_respect.__class__, this=this) 6461 6462 this = self._parse_respect_or_ignore_nulls(this) 6463 6464 # bigquery select from window x AS (partition by ...) 6465 if alias: 6466 over = None 6467 self._match(TokenType.ALIAS) 6468 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6469 return this 6470 else: 6471 over = self._prev.text.upper() 6472 6473 if comments and isinstance(func, exp.Expression): 6474 func.pop_comments() 6475 6476 if not self._match(TokenType.L_PAREN): 6477 return self.expression( 6478 exp.Window, 6479 comments=comments, 6480 this=this, 6481 alias=self._parse_id_var(False), 6482 over=over, 6483 ) 6484 6485 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6486 6487 first = self._match(TokenType.FIRST) 6488 if self._match_text_seq("LAST"): 6489 first = False 6490 6491 partition, order = self._parse_partition_and_order() 6492 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6493 6494 if kind: 6495 self._match(TokenType.BETWEEN) 6496 start = self._parse_window_spec() 6497 self._match(TokenType.AND) 6498 end = self._parse_window_spec() 6499 6500 spec = self.expression( 6501 exp.WindowSpec, 6502 kind=kind, 6503 start=start["value"], 6504 start_side=start["side"], 6505 end=end["value"], 6506 end_side=end["side"], 6507 ) 6508 else: 6509 spec = None 6510 6511 self._match_r_paren() 6512 6513 window = self.expression( 6514 exp.Window, 6515 comments=comments, 6516 this=this, 6517 partition_by=partition, 6518 order=order, 6519 spec=spec, 6520 alias=window_alias, 6521 over=over, 6522 first=first, 6523 ) 6524 6525 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6526 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6527 return self._parse_window(window, alias=alias) 6528 6529 return window 6530 6531 def _parse_partition_and_order( 6532 self, 6533 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6534 return self._parse_partition_by(), self._parse_order() 6535 6536 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6537 self._match(TokenType.BETWEEN) 6538 6539 return { 6540 "value": ( 6541 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6542 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6543 or self._parse_bitwise() 6544 ), 6545 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6546 } 6547 6548 def _parse_alias( 6549 self, this: t.Optional[exp.Expression], explicit: bool = False 6550 ) -> t.Optional[exp.Expression]: 6551 any_token = self._match(TokenType.ALIAS) 6552 comments = self._prev_comments or [] 6553 6554 if explicit and not any_token: 6555 return this 6556 6557 if self._match(TokenType.L_PAREN): 6558 aliases = self.expression( 6559 exp.Aliases, 6560 comments=comments, 6561 this=this, 6562 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6563 ) 6564 self._match_r_paren(aliases) 6565 return aliases 6566 6567 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6568 self.STRING_ALIASES and self._parse_string_as_identifier() 6569 ) 6570 6571 if alias: 6572 comments.extend(alias.pop_comments()) 6573 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6574 column = this.this 6575 6576 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6577 if not this.comments and column and column.comments: 6578 this.comments = column.pop_comments() 6579 6580 return this 6581 6582 def _parse_id_var( 6583 self, 6584 any_token: bool = True, 6585 tokens: t.Optional[t.Collection[TokenType]] = None, 6586 ) -> t.Optional[exp.Expression]: 6587 expression = self._parse_identifier() 6588 if not expression and ( 6589 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6590 ): 6591 quoted = self._prev.token_type == TokenType.STRING 6592 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6593 6594 return expression 6595 6596 def _parse_string(self) -> t.Optional[exp.Expression]: 6597 if self._match_set(self.STRING_PARSERS): 6598 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6599 return self._parse_placeholder() 6600 6601 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6602 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6603 6604 def _parse_number(self) -> t.Optional[exp.Expression]: 6605 if self._match_set(self.NUMERIC_PARSERS): 6606 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6607 return self._parse_placeholder() 6608 6609 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6610 if self._match(TokenType.IDENTIFIER): 6611 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6612 return self._parse_placeholder() 6613 6614 def _parse_var( 6615 self, 6616 any_token: bool = False, 6617 tokens: t.Optional[t.Collection[TokenType]] = None, 6618 upper: bool = False, 6619 ) -> t.Optional[exp.Expression]: 6620 if ( 6621 (any_token and self._advance_any()) 6622 or self._match(TokenType.VAR) 6623 or (self._match_set(tokens) if tokens else False) 6624 ): 6625 return self.expression( 6626 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6627 ) 6628 return self._parse_placeholder() 6629 6630 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6631 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6632 self._advance() 6633 return self._prev 6634 return None 6635 6636 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6637 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6638 6639 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6640 return self._parse_primary() or self._parse_var(any_token=True) 6641 6642 def _parse_null(self) -> t.Optional[exp.Expression]: 6643 if self._match_set(self.NULL_TOKENS): 6644 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6645 return self._parse_placeholder() 6646 6647 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6648 if self._match(TokenType.TRUE): 6649 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6650 if self._match(TokenType.FALSE): 6651 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6652 return self._parse_placeholder() 6653 6654 def _parse_star(self) -> t.Optional[exp.Expression]: 6655 if self._match(TokenType.STAR): 6656 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6657 return self._parse_placeholder() 6658 6659 def _parse_parameter(self) -> exp.Parameter: 6660 this = self._parse_identifier() or self._parse_primary_or_var() 6661 return self.expression(exp.Parameter, this=this) 6662 6663 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6664 if self._match_set(self.PLACEHOLDER_PARSERS): 6665 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6666 if placeholder: 6667 return placeholder 6668 self._advance(-1) 6669 return None 6670 6671 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6672 if not self._match_texts(keywords): 6673 return None 6674 if self._match(TokenType.L_PAREN, advance=False): 6675 return self._parse_wrapped_csv(self._parse_expression) 6676 6677 expression = self._parse_expression() 6678 return [expression] if expression else None 6679 6680 def _parse_csv( 6681 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6682 ) -> t.List[exp.Expression]: 6683 parse_result = parse_method() 6684 items = [parse_result] if parse_result is not None else [] 6685 6686 while self._match(sep): 6687 self._add_comments(parse_result) 6688 parse_result = parse_method() 6689 if parse_result is not None: 6690 items.append(parse_result) 6691 6692 return items 6693 6694 def _parse_tokens( 6695 self, parse_method: t.Callable, expressions: t.Dict 6696 ) -> t.Optional[exp.Expression]: 6697 this = parse_method() 6698 6699 while self._match_set(expressions): 6700 this = self.expression( 6701 expressions[self._prev.token_type], 6702 this=this, 6703 comments=self._prev_comments, 6704 expression=parse_method(), 6705 ) 6706 6707 return this 6708 6709 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6710 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6711 6712 def _parse_wrapped_csv( 6713 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6714 ) -> t.List[exp.Expression]: 6715 return self._parse_wrapped( 6716 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6717 ) 6718 6719 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6720 wrapped = self._match(TokenType.L_PAREN) 6721 if not wrapped and not optional: 6722 self.raise_error("Expecting (") 6723 parse_result = parse_method() 6724 if wrapped: 6725 self._match_r_paren() 6726 return parse_result 6727 6728 def _parse_expressions(self) -> t.List[exp.Expression]: 6729 return self._parse_csv(self._parse_expression) 6730 6731 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6732 return self._parse_select() or self._parse_set_operations( 6733 self._parse_expression() if alias else self._parse_assignment() 6734 ) 6735 6736 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6737 return self._parse_query_modifiers( 6738 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6739 ) 6740 6741 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6742 this = None 6743 if self._match_texts(self.TRANSACTION_KIND): 6744 this = self._prev.text 6745 6746 self._match_texts(("TRANSACTION", "WORK")) 6747 6748 modes = [] 6749 while True: 6750 mode = [] 6751 while self._match(TokenType.VAR): 6752 mode.append(self._prev.text) 6753 6754 if mode: 6755 modes.append(" ".join(mode)) 6756 if not self._match(TokenType.COMMA): 6757 break 6758 6759 return self.expression(exp.Transaction, this=this, modes=modes) 6760 6761 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6762 chain = None 6763 savepoint = None 6764 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6765 6766 self._match_texts(("TRANSACTION", "WORK")) 6767 6768 if self._match_text_seq("TO"): 6769 self._match_text_seq("SAVEPOINT") 6770 savepoint = self._parse_id_var() 6771 6772 if self._match(TokenType.AND): 6773 chain = not self._match_text_seq("NO") 6774 self._match_text_seq("CHAIN") 6775 6776 if is_rollback: 6777 return self.expression(exp.Rollback, savepoint=savepoint) 6778 6779 return self.expression(exp.Commit, chain=chain) 6780 6781 def _parse_refresh(self) -> exp.Refresh: 6782 self._match(TokenType.TABLE) 6783 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6784 6785 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6786 if not self._match_text_seq("ADD"): 6787 return None 6788 6789 self._match(TokenType.COLUMN) 6790 exists_column = self._parse_exists(not_=True) 6791 expression = self._parse_field_def() 6792 6793 if expression: 6794 expression.set("exists", exists_column) 6795 6796 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6797 if self._match_texts(("FIRST", "AFTER")): 6798 position = self._prev.text 6799 column_position = self.expression( 6800 exp.ColumnPosition, this=self._parse_column(), position=position 6801 ) 6802 expression.set("position", column_position) 6803 6804 return expression 6805 6806 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6807 drop = self._match(TokenType.DROP) and self._parse_drop() 6808 if drop and not isinstance(drop, exp.Command): 6809 drop.set("kind", drop.args.get("kind", "COLUMN")) 6810 return drop 6811 6812 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6813 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6814 return self.expression( 6815 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6816 ) 6817 6818 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6819 index = self._index - 1 6820 6821 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6822 return self._parse_csv( 6823 lambda: self.expression( 6824 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6825 ) 6826 ) 6827 6828 self._retreat(index) 6829 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6830 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6831 6832 if self._match_text_seq("ADD", "COLUMNS"): 6833 schema = self._parse_schema() 6834 if schema: 6835 return [schema] 6836 return [] 6837 6838 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6839 6840 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6841 if self._match_texts(self.ALTER_ALTER_PARSERS): 6842 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6843 6844 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6845 # keyword after ALTER we default to parsing this statement 6846 self._match(TokenType.COLUMN) 6847 column = self._parse_field(any_token=True) 6848 6849 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6850 return self.expression(exp.AlterColumn, this=column, drop=True) 6851 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6852 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6853 if self._match(TokenType.COMMENT): 6854 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6855 if self._match_text_seq("DROP", "NOT", "NULL"): 6856 return self.expression( 6857 exp.AlterColumn, 6858 this=column, 6859 drop=True, 6860 allow_null=True, 6861 ) 6862 if self._match_text_seq("SET", "NOT", "NULL"): 6863 return self.expression( 6864 exp.AlterColumn, 6865 this=column, 6866 allow_null=False, 6867 ) 6868 self._match_text_seq("SET", "DATA") 6869 self._match_text_seq("TYPE") 6870 return self.expression( 6871 exp.AlterColumn, 6872 this=column, 6873 dtype=self._parse_types(), 6874 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6875 using=self._match(TokenType.USING) and self._parse_assignment(), 6876 ) 6877 6878 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6879 if self._match_texts(("ALL", "EVEN", "AUTO")): 6880 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6881 6882 self._match_text_seq("KEY", "DISTKEY") 6883 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6884 6885 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6886 if compound: 6887 self._match_text_seq("SORTKEY") 6888 6889 if self._match(TokenType.L_PAREN, advance=False): 6890 return self.expression( 6891 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6892 ) 6893 6894 self._match_texts(("AUTO", "NONE")) 6895 return self.expression( 6896 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6897 ) 6898 6899 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6900 index = self._index - 1 6901 6902 partition_exists = self._parse_exists() 6903 if self._match(TokenType.PARTITION, advance=False): 6904 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6905 6906 self._retreat(index) 6907 return self._parse_csv(self._parse_drop_column) 6908 6909 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6910 if self._match(TokenType.COLUMN): 6911 exists = self._parse_exists() 6912 old_column = self._parse_column() 6913 to = self._match_text_seq("TO") 6914 new_column = self._parse_column() 6915 6916 if old_column is None or to is None or new_column is None: 6917 return None 6918 6919 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6920 6921 self._match_text_seq("TO") 6922 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6923 6924 def _parse_alter_table_set(self) -> exp.AlterSet: 6925 alter_set = self.expression(exp.AlterSet) 6926 6927 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6928 "TABLE", "PROPERTIES" 6929 ): 6930 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6931 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6932 alter_set.set("expressions", [self._parse_assignment()]) 6933 elif self._match_texts(("LOGGED", "UNLOGGED")): 6934 alter_set.set("option", exp.var(self._prev.text.upper())) 6935 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6936 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6937 elif self._match_text_seq("LOCATION"): 6938 alter_set.set("location", self._parse_field()) 6939 elif self._match_text_seq("ACCESS", "METHOD"): 6940 alter_set.set("access_method", self._parse_field()) 6941 elif self._match_text_seq("TABLESPACE"): 6942 alter_set.set("tablespace", self._parse_field()) 6943 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6944 alter_set.set("file_format", [self._parse_field()]) 6945 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6946 alter_set.set("file_format", self._parse_wrapped_options()) 6947 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6948 alter_set.set("copy_options", self._parse_wrapped_options()) 6949 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6950 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6951 else: 6952 if self._match_text_seq("SERDE"): 6953 alter_set.set("serde", self._parse_field()) 6954 6955 alter_set.set("expressions", [self._parse_properties()]) 6956 6957 return alter_set 6958 6959 def _parse_alter(self) -> exp.Alter | exp.Command: 6960 start = self._prev 6961 6962 alter_token = self._match_set(self.ALTERABLES) and self._prev 6963 if not alter_token: 6964 return self._parse_as_command(start) 6965 6966 exists = self._parse_exists() 6967 only = self._match_text_seq("ONLY") 6968 this = self._parse_table(schema=True) 6969 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6970 6971 if self._next: 6972 self._advance() 6973 6974 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6975 if parser: 6976 actions = ensure_list(parser(self)) 6977 not_valid = self._match_text_seq("NOT", "VALID") 6978 options = self._parse_csv(self._parse_property) 6979 6980 if not self._curr and actions: 6981 return self.expression( 6982 exp.Alter, 6983 this=this, 6984 kind=alter_token.text.upper(), 6985 exists=exists, 6986 actions=actions, 6987 only=only, 6988 options=options, 6989 cluster=cluster, 6990 not_valid=not_valid, 6991 ) 6992 6993 return self._parse_as_command(start) 6994 6995 def _parse_merge(self) -> exp.Merge: 6996 self._match(TokenType.INTO) 6997 target = self._parse_table() 6998 6999 if target and self._match(TokenType.ALIAS, advance=False): 7000 target.set("alias", self._parse_table_alias()) 7001 7002 self._match(TokenType.USING) 7003 using = self._parse_table() 7004 7005 self._match(TokenType.ON) 7006 on = self._parse_assignment() 7007 7008 return self.expression( 7009 exp.Merge, 7010 this=target, 7011 using=using, 7012 on=on, 7013 whens=self._parse_when_matched(), 7014 returning=self._parse_returning(), 7015 ) 7016 7017 def _parse_when_matched(self) -> exp.Whens: 7018 whens = [] 7019 7020 while self._match(TokenType.WHEN): 7021 matched = not self._match(TokenType.NOT) 7022 self._match_text_seq("MATCHED") 7023 source = ( 7024 False 7025 if self._match_text_seq("BY", "TARGET") 7026 else self._match_text_seq("BY", "SOURCE") 7027 ) 7028 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7029 7030 self._match(TokenType.THEN) 7031 7032 if self._match(TokenType.INSERT): 7033 this = self._parse_star() 7034 if this: 7035 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7036 else: 7037 then = self.expression( 7038 exp.Insert, 7039 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7040 expression=self._match_text_seq("VALUES") and self._parse_value(), 7041 ) 7042 elif self._match(TokenType.UPDATE): 7043 expressions = self._parse_star() 7044 if expressions: 7045 then = self.expression(exp.Update, expressions=expressions) 7046 else: 7047 then = self.expression( 7048 exp.Update, 7049 expressions=self._match(TokenType.SET) 7050 and self._parse_csv(self._parse_equality), 7051 ) 7052 elif self._match(TokenType.DELETE): 7053 then = self.expression(exp.Var, this=self._prev.text) 7054 else: 7055 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7056 7057 whens.append( 7058 self.expression( 7059 exp.When, 7060 matched=matched, 7061 source=source, 7062 condition=condition, 7063 then=then, 7064 ) 7065 ) 7066 return self.expression(exp.Whens, expressions=whens) 7067 7068 def _parse_show(self) -> t.Optional[exp.Expression]: 7069 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7070 if parser: 7071 return parser(self) 7072 return self._parse_as_command(self._prev) 7073 7074 def _parse_set_item_assignment( 7075 self, kind: t.Optional[str] = None 7076 ) -> t.Optional[exp.Expression]: 7077 index = self._index 7078 7079 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7080 return self._parse_set_transaction(global_=kind == "GLOBAL") 7081 7082 left = self._parse_primary() or self._parse_column() 7083 assignment_delimiter = self._match_texts(("=", "TO")) 7084 7085 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7086 self._retreat(index) 7087 return None 7088 7089 right = self._parse_statement() or self._parse_id_var() 7090 if isinstance(right, (exp.Column, exp.Identifier)): 7091 right = exp.var(right.name) 7092 7093 this = self.expression(exp.EQ, this=left, expression=right) 7094 return self.expression(exp.SetItem, this=this, kind=kind) 7095 7096 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7097 self._match_text_seq("TRANSACTION") 7098 characteristics = self._parse_csv( 7099 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7100 ) 7101 return self.expression( 7102 exp.SetItem, 7103 expressions=characteristics, 7104 kind="TRANSACTION", 7105 **{"global": global_}, # type: ignore 7106 ) 7107 7108 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7109 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7110 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7111 7112 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7113 index = self._index 7114 set_ = self.expression( 7115 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7116 ) 7117 7118 if self._curr: 7119 self._retreat(index) 7120 return self._parse_as_command(self._prev) 7121 7122 return set_ 7123 7124 def _parse_var_from_options( 7125 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7126 ) -> t.Optional[exp.Var]: 7127 start = self._curr 7128 if not start: 7129 return None 7130 7131 option = start.text.upper() 7132 continuations = options.get(option) 7133 7134 index = self._index 7135 self._advance() 7136 for keywords in continuations or []: 7137 if isinstance(keywords, str): 7138 keywords = (keywords,) 7139 7140 if self._match_text_seq(*keywords): 7141 option = f"{option} {' '.join(keywords)}" 7142 break 7143 else: 7144 if continuations or continuations is None: 7145 if raise_unmatched: 7146 self.raise_error(f"Unknown option {option}") 7147 7148 self._retreat(index) 7149 return None 7150 7151 return exp.var(option) 7152 7153 def _parse_as_command(self, start: Token) -> exp.Command: 7154 while self._curr: 7155 self._advance() 7156 text = self._find_sql(start, self._prev) 7157 size = len(start.text) 7158 self._warn_unsupported() 7159 return exp.Command(this=text[:size], expression=text[size:]) 7160 7161 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7162 settings = [] 7163 7164 self._match_l_paren() 7165 kind = self._parse_id_var() 7166 7167 if self._match(TokenType.L_PAREN): 7168 while True: 7169 key = self._parse_id_var() 7170 value = self._parse_primary() 7171 if not key and value is None: 7172 break 7173 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7174 self._match(TokenType.R_PAREN) 7175 7176 self._match_r_paren() 7177 7178 return self.expression( 7179 exp.DictProperty, 7180 this=this, 7181 kind=kind.this if kind else None, 7182 settings=settings, 7183 ) 7184 7185 def _parse_dict_range(self, this: str) -> exp.DictRange: 7186 self._match_l_paren() 7187 has_min = self._match_text_seq("MIN") 7188 if has_min: 7189 min = self._parse_var() or self._parse_primary() 7190 self._match_text_seq("MAX") 7191 max = self._parse_var() or self._parse_primary() 7192 else: 7193 max = self._parse_var() or self._parse_primary() 7194 min = exp.Literal.number(0) 7195 self._match_r_paren() 7196 return self.expression(exp.DictRange, this=this, min=min, max=max) 7197 7198 def _parse_comprehension( 7199 self, this: t.Optional[exp.Expression] 7200 ) -> t.Optional[exp.Comprehension]: 7201 index = self._index 7202 expression = self._parse_column() 7203 if not self._match(TokenType.IN): 7204 self._retreat(index - 1) 7205 return None 7206 iterator = self._parse_column() 7207 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7208 return self.expression( 7209 exp.Comprehension, 7210 this=this, 7211 expression=expression, 7212 iterator=iterator, 7213 condition=condition, 7214 ) 7215 7216 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7217 if self._match(TokenType.HEREDOC_STRING): 7218 return self.expression(exp.Heredoc, this=self._prev.text) 7219 7220 if not self._match_text_seq("$"): 7221 return None 7222 7223 tags = ["$"] 7224 tag_text = None 7225 7226 if self._is_connected(): 7227 self._advance() 7228 tags.append(self._prev.text.upper()) 7229 else: 7230 self.raise_error("No closing $ found") 7231 7232 if tags[-1] != "$": 7233 if self._is_connected() and self._match_text_seq("$"): 7234 tag_text = tags[-1] 7235 tags.append("$") 7236 else: 7237 self.raise_error("No closing $ found") 7238 7239 heredoc_start = self._curr 7240 7241 while self._curr: 7242 if self._match_text_seq(*tags, advance=False): 7243 this = self._find_sql(heredoc_start, self._prev) 7244 self._advance(len(tags)) 7245 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7246 7247 self._advance() 7248 7249 self.raise_error(f"No closing {''.join(tags)} found") 7250 return None 7251 7252 def _find_parser( 7253 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7254 ) -> t.Optional[t.Callable]: 7255 if not self._curr: 7256 return None 7257 7258 index = self._index 7259 this = [] 7260 while True: 7261 # The current token might be multiple words 7262 curr = self._curr.text.upper() 7263 key = curr.split(" ") 7264 this.append(curr) 7265 7266 self._advance() 7267 result, trie = in_trie(trie, key) 7268 if result == TrieResult.FAILED: 7269 break 7270 7271 if result == TrieResult.EXISTS: 7272 subparser = parsers[" ".join(this)] 7273 return subparser 7274 7275 self._retreat(index) 7276 return None 7277 7278 def _match(self, token_type, advance=True, expression=None): 7279 if not self._curr: 7280 return None 7281 7282 if self._curr.token_type == token_type: 7283 if advance: 7284 self._advance() 7285 self._add_comments(expression) 7286 return True 7287 7288 return None 7289 7290 def _match_set(self, types, advance=True): 7291 if not self._curr: 7292 return None 7293 7294 if self._curr.token_type in types: 7295 if advance: 7296 self._advance() 7297 return True 7298 7299 return None 7300 7301 def _match_pair(self, token_type_a, token_type_b, advance=True): 7302 if not self._curr or not self._next: 7303 return None 7304 7305 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7306 if advance: 7307 self._advance(2) 7308 return True 7309 7310 return None 7311 7312 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7313 if not self._match(TokenType.L_PAREN, expression=expression): 7314 self.raise_error("Expecting (") 7315 7316 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7317 if not self._match(TokenType.R_PAREN, expression=expression): 7318 self.raise_error("Expecting )") 7319 7320 def _match_texts(self, texts, advance=True): 7321 if ( 7322 self._curr 7323 and self._curr.token_type != TokenType.STRING 7324 and self._curr.text.upper() in texts 7325 ): 7326 if advance: 7327 self._advance() 7328 return True 7329 return None 7330 7331 def _match_text_seq(self, *texts, advance=True): 7332 index = self._index 7333 for text in texts: 7334 if ( 7335 self._curr 7336 and self._curr.token_type != TokenType.STRING 7337 and self._curr.text.upper() == text 7338 ): 7339 self._advance() 7340 else: 7341 self._retreat(index) 7342 return None 7343 7344 if not advance: 7345 self._retreat(index) 7346 7347 return True 7348 7349 def _replace_lambda( 7350 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7351 ) -> t.Optional[exp.Expression]: 7352 if not node: 7353 return node 7354 7355 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7356 7357 for column in node.find_all(exp.Column): 7358 typ = lambda_types.get(column.parts[0].name) 7359 if typ is not None: 7360 dot_or_id = column.to_dot() if column.table else column.this 7361 7362 if typ: 7363 dot_or_id = self.expression( 7364 exp.Cast, 7365 this=dot_or_id, 7366 to=typ, 7367 ) 7368 7369 parent = column.parent 7370 7371 while isinstance(parent, exp.Dot): 7372 if not isinstance(parent.parent, exp.Dot): 7373 parent.replace(dot_or_id) 7374 break 7375 parent = parent.parent 7376 else: 7377 if column is node: 7378 node = dot_or_id 7379 else: 7380 column.replace(dot_or_id) 7381 return node 7382 7383 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7384 start = self._prev 7385 7386 # Not to be confused with TRUNCATE(number, decimals) function call 7387 if self._match(TokenType.L_PAREN): 7388 self._retreat(self._index - 2) 7389 return self._parse_function() 7390 7391 # Clickhouse supports TRUNCATE DATABASE as well 7392 is_database = self._match(TokenType.DATABASE) 7393 7394 self._match(TokenType.TABLE) 7395 7396 exists = self._parse_exists(not_=False) 7397 7398 expressions = self._parse_csv( 7399 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7400 ) 7401 7402 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7403 7404 if self._match_text_seq("RESTART", "IDENTITY"): 7405 identity = "RESTART" 7406 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7407 identity = "CONTINUE" 7408 else: 7409 identity = None 7410 7411 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7412 option = self._prev.text 7413 else: 7414 option = None 7415 7416 partition = self._parse_partition() 7417 7418 # Fallback case 7419 if self._curr: 7420 return self._parse_as_command(start) 7421 7422 return self.expression( 7423 exp.TruncateTable, 7424 expressions=expressions, 7425 is_database=is_database, 7426 exists=exists, 7427 cluster=cluster, 7428 identity=identity, 7429 option=option, 7430 partition=partition, 7431 ) 7432 7433 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7434 this = self._parse_ordered(self._parse_opclass) 7435 7436 if not self._match(TokenType.WITH): 7437 return this 7438 7439 op = self._parse_var(any_token=True) 7440 7441 return self.expression(exp.WithOperator, this=this, op=op) 7442 7443 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7444 self._match(TokenType.EQ) 7445 self._match(TokenType.L_PAREN) 7446 7447 opts: t.List[t.Optional[exp.Expression]] = [] 7448 while self._curr and not self._match(TokenType.R_PAREN): 7449 if self._match_text_seq("FORMAT_NAME", "="): 7450 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7451 # so we parse it separately to use _parse_field() 7452 prop = self.expression( 7453 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7454 ) 7455 opts.append(prop) 7456 else: 7457 opts.append(self._parse_property()) 7458 7459 self._match(TokenType.COMMA) 7460 7461 return opts 7462 7463 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7464 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7465 7466 options = [] 7467 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7468 option = self._parse_var(any_token=True) 7469 prev = self._prev.text.upper() 7470 7471 # Different dialects might separate options and values by white space, "=" and "AS" 7472 self._match(TokenType.EQ) 7473 self._match(TokenType.ALIAS) 7474 7475 param = self.expression(exp.CopyParameter, this=option) 7476 7477 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7478 TokenType.L_PAREN, advance=False 7479 ): 7480 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7481 param.set("expressions", self._parse_wrapped_options()) 7482 elif prev == "FILE_FORMAT": 7483 # T-SQL's external file format case 7484 param.set("expression", self._parse_field()) 7485 else: 7486 param.set("expression", self._parse_unquoted_field()) 7487 7488 options.append(param) 7489 self._match(sep) 7490 7491 return options 7492 7493 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7494 expr = self.expression(exp.Credentials) 7495 7496 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7497 expr.set("storage", self._parse_field()) 7498 if self._match_text_seq("CREDENTIALS"): 7499 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7500 creds = ( 7501 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7502 ) 7503 expr.set("credentials", creds) 7504 if self._match_text_seq("ENCRYPTION"): 7505 expr.set("encryption", self._parse_wrapped_options()) 7506 if self._match_text_seq("IAM_ROLE"): 7507 expr.set("iam_role", self._parse_field()) 7508 if self._match_text_seq("REGION"): 7509 expr.set("region", self._parse_field()) 7510 7511 return expr 7512 7513 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7514 return self._parse_field() 7515 7516 def _parse_copy(self) -> exp.Copy | exp.Command: 7517 start = self._prev 7518 7519 self._match(TokenType.INTO) 7520 7521 this = ( 7522 self._parse_select(nested=True, parse_subquery_alias=False) 7523 if self._match(TokenType.L_PAREN, advance=False) 7524 else self._parse_table(schema=True) 7525 ) 7526 7527 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7528 7529 files = self._parse_csv(self._parse_file_location) 7530 credentials = self._parse_credentials() 7531 7532 self._match_text_seq("WITH") 7533 7534 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7535 7536 # Fallback case 7537 if self._curr: 7538 return self._parse_as_command(start) 7539 7540 return self.expression( 7541 exp.Copy, 7542 this=this, 7543 kind=kind, 7544 credentials=credentials, 7545 files=files, 7546 params=params, 7547 ) 7548 7549 def _parse_normalize(self) -> exp.Normalize: 7550 return self.expression( 7551 exp.Normalize, 7552 this=self._parse_bitwise(), 7553 form=self._match(TokenType.COMMA) and self._parse_var(), 7554 ) 7555 7556 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7557 if self._match_text_seq("COLUMNS", "(", advance=False): 7558 this = self._parse_function() 7559 if isinstance(this, exp.Columns): 7560 this.set("unpack", True) 7561 return this 7562 7563 return self.expression( 7564 exp.Star, 7565 **{ # type: ignore 7566 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7567 "replace": self._parse_star_op("REPLACE"), 7568 "rename": self._parse_star_op("RENAME"), 7569 }, 7570 ) 7571 7572 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7573 privilege_parts = [] 7574 7575 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7576 # (end of privilege list) or L_PAREN (start of column list) are met 7577 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7578 privilege_parts.append(self._curr.text.upper()) 7579 self._advance() 7580 7581 this = exp.var(" ".join(privilege_parts)) 7582 expressions = ( 7583 self._parse_wrapped_csv(self._parse_column) 7584 if self._match(TokenType.L_PAREN, advance=False) 7585 else None 7586 ) 7587 7588 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7589 7590 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7591 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7592 principal = self._parse_id_var() 7593 7594 if not principal: 7595 return None 7596 7597 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7598 7599 def _parse_grant(self) -> exp.Grant | exp.Command: 7600 start = self._prev 7601 7602 privileges = self._parse_csv(self._parse_grant_privilege) 7603 7604 self._match(TokenType.ON) 7605 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7606 7607 # Attempt to parse the securable e.g. MySQL allows names 7608 # such as "foo.*", "*.*" which are not easily parseable yet 7609 securable = self._try_parse(self._parse_table_parts) 7610 7611 if not securable or not self._match_text_seq("TO"): 7612 return self._parse_as_command(start) 7613 7614 principals = self._parse_csv(self._parse_grant_principal) 7615 7616 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7617 7618 if self._curr: 7619 return self._parse_as_command(start) 7620 7621 return self.expression( 7622 exp.Grant, 7623 privileges=privileges, 7624 kind=kind, 7625 securable=securable, 7626 principals=principals, 7627 grant_option=grant_option, 7628 ) 7629 7630 def _parse_overlay(self) -> exp.Overlay: 7631 return self.expression( 7632 exp.Overlay, 7633 **{ # type: ignore 7634 "this": self._parse_bitwise(), 7635 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7636 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7637 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7638 }, 7639 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME2, 337 TokenType.DATETIME64, 338 TokenType.SMALLDATETIME, 339 TokenType.DATE, 340 TokenType.DATE32, 341 TokenType.INT4RANGE, 342 TokenType.INT4MULTIRANGE, 343 TokenType.INT8RANGE, 344 TokenType.INT8MULTIRANGE, 345 TokenType.NUMRANGE, 346 TokenType.NUMMULTIRANGE, 347 TokenType.TSRANGE, 348 TokenType.TSMULTIRANGE, 349 TokenType.TSTZRANGE, 350 TokenType.TSTZMULTIRANGE, 351 TokenType.DATERANGE, 352 TokenType.DATEMULTIRANGE, 353 TokenType.DECIMAL, 354 TokenType.DECIMAL32, 355 TokenType.DECIMAL64, 356 TokenType.DECIMAL128, 357 TokenType.DECIMAL256, 358 TokenType.UDECIMAL, 359 TokenType.BIGDECIMAL, 360 TokenType.UUID, 361 TokenType.GEOGRAPHY, 362 TokenType.GEOMETRY, 363 TokenType.POINT, 364 TokenType.RING, 365 TokenType.LINESTRING, 366 TokenType.MULTILINESTRING, 367 TokenType.POLYGON, 368 TokenType.MULTIPOLYGON, 369 TokenType.HLLSKETCH, 370 TokenType.HSTORE, 371 TokenType.PSEUDO_TYPE, 372 TokenType.SUPER, 373 TokenType.SERIAL, 374 TokenType.SMALLSERIAL, 375 TokenType.BIGSERIAL, 376 TokenType.XML, 377 TokenType.YEAR, 378 TokenType.UNIQUEIDENTIFIER, 379 TokenType.USERDEFINED, 380 TokenType.MONEY, 381 TokenType.SMALLMONEY, 382 TokenType.ROWVERSION, 383 TokenType.IMAGE, 384 TokenType.VARIANT, 385 TokenType.VECTOR, 386 TokenType.OBJECT, 387 TokenType.OBJECT_IDENTIFIER, 388 TokenType.INET, 389 TokenType.IPADDRESS, 390 TokenType.IPPREFIX, 391 TokenType.IPV4, 392 TokenType.IPV6, 393 TokenType.UNKNOWN, 394 TokenType.NULL, 395 TokenType.NAME, 396 TokenType.TDIGEST, 397 *ENUM_TYPE_TOKENS, 398 *NESTED_TYPE_TOKENS, 399 *AGGREGATE_TYPE_TOKENS, 400 } 401 402 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 403 TokenType.BIGINT: TokenType.UBIGINT, 404 TokenType.INT: TokenType.UINT, 405 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 406 TokenType.SMALLINT: TokenType.USMALLINT, 407 TokenType.TINYINT: TokenType.UTINYINT, 408 TokenType.DECIMAL: TokenType.UDECIMAL, 409 } 410 411 SUBQUERY_PREDICATES = { 412 TokenType.ANY: exp.Any, 413 TokenType.ALL: exp.All, 414 TokenType.EXISTS: exp.Exists, 415 TokenType.SOME: exp.Any, 416 } 417 418 RESERVED_TOKENS = { 419 *Tokenizer.SINGLE_TOKENS.values(), 420 TokenType.SELECT, 421 } - {TokenType.IDENTIFIER} 422 423 DB_CREATABLES = { 424 TokenType.DATABASE, 425 TokenType.DICTIONARY, 426 TokenType.MODEL, 427 TokenType.SCHEMA, 428 TokenType.SEQUENCE, 429 TokenType.STORAGE_INTEGRATION, 430 TokenType.TABLE, 431 TokenType.TAG, 432 TokenType.VIEW, 433 TokenType.WAREHOUSE, 434 TokenType.STREAMLIT, 435 TokenType.SINK, 436 TokenType.SOURCE, 437 } 438 439 CREATABLES = { 440 TokenType.COLUMN, 441 TokenType.CONSTRAINT, 442 TokenType.FOREIGN_KEY, 443 TokenType.FUNCTION, 444 TokenType.INDEX, 445 TokenType.PROCEDURE, 446 *DB_CREATABLES, 447 } 448 449 ALTERABLES = { 450 TokenType.INDEX, 451 TokenType.TABLE, 452 TokenType.VIEW, 453 } 454 455 # Tokens that can represent identifiers 456 ID_VAR_TOKENS = { 457 TokenType.ALL, 458 TokenType.ATTACH, 459 TokenType.VAR, 460 TokenType.ANTI, 461 TokenType.APPLY, 462 TokenType.ASC, 463 TokenType.ASOF, 464 TokenType.AUTO_INCREMENT, 465 TokenType.BEGIN, 466 TokenType.BPCHAR, 467 TokenType.CACHE, 468 TokenType.CASE, 469 TokenType.COLLATE, 470 TokenType.COMMAND, 471 TokenType.COMMENT, 472 TokenType.COMMIT, 473 TokenType.CONSTRAINT, 474 TokenType.COPY, 475 TokenType.CUBE, 476 TokenType.DEFAULT, 477 TokenType.DELETE, 478 TokenType.DESC, 479 TokenType.DESCRIBE, 480 TokenType.DETACH, 481 TokenType.DICTIONARY, 482 TokenType.DIV, 483 TokenType.END, 484 TokenType.EXECUTE, 485 TokenType.ESCAPE, 486 TokenType.FALSE, 487 TokenType.FIRST, 488 TokenType.FILTER, 489 TokenType.FINAL, 490 TokenType.FORMAT, 491 TokenType.FULL, 492 TokenType.IDENTIFIER, 493 TokenType.IS, 494 TokenType.ISNULL, 495 TokenType.INTERVAL, 496 TokenType.KEEP, 497 TokenType.KILL, 498 TokenType.LEFT, 499 TokenType.LOAD, 500 TokenType.MERGE, 501 TokenType.NATURAL, 502 TokenType.NEXT, 503 TokenType.OFFSET, 504 TokenType.OPERATOR, 505 TokenType.ORDINALITY, 506 TokenType.OVERLAPS, 507 TokenType.OVERWRITE, 508 TokenType.PARTITION, 509 TokenType.PERCENT, 510 TokenType.PIVOT, 511 TokenType.PRAGMA, 512 TokenType.RANGE, 513 TokenType.RECURSIVE, 514 TokenType.REFERENCES, 515 TokenType.REFRESH, 516 TokenType.RENAME, 517 TokenType.REPLACE, 518 TokenType.RIGHT, 519 TokenType.ROLLUP, 520 TokenType.ROW, 521 TokenType.ROWS, 522 TokenType.SEMI, 523 TokenType.SET, 524 TokenType.SETTINGS, 525 TokenType.SHOW, 526 TokenType.TEMPORARY, 527 TokenType.TOP, 528 TokenType.TRUE, 529 TokenType.TRUNCATE, 530 TokenType.UNIQUE, 531 TokenType.UNNEST, 532 TokenType.UNPIVOT, 533 TokenType.UPDATE, 534 TokenType.USE, 535 TokenType.VOLATILE, 536 TokenType.WINDOW, 537 *CREATABLES, 538 *SUBQUERY_PREDICATES, 539 *TYPE_TOKENS, 540 *NO_PAREN_FUNCTIONS, 541 } 542 ID_VAR_TOKENS.remove(TokenType.UNION) 543 544 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 545 546 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 547 TokenType.ANTI, 548 TokenType.APPLY, 549 TokenType.ASOF, 550 TokenType.FULL, 551 TokenType.LEFT, 552 TokenType.LOCK, 553 TokenType.NATURAL, 554 TokenType.OFFSET, 555 TokenType.RIGHT, 556 TokenType.SEMI, 557 TokenType.WINDOW, 558 } 559 560 ALIAS_TOKENS = ID_VAR_TOKENS 561 562 ARRAY_CONSTRUCTORS = { 563 "ARRAY": exp.Array, 564 "LIST": exp.List, 565 } 566 567 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 568 569 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 570 571 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 572 573 FUNC_TOKENS = { 574 TokenType.COLLATE, 575 TokenType.COMMAND, 576 TokenType.CURRENT_DATE, 577 TokenType.CURRENT_DATETIME, 578 TokenType.CURRENT_TIMESTAMP, 579 TokenType.CURRENT_TIME, 580 TokenType.CURRENT_USER, 581 TokenType.FILTER, 582 TokenType.FIRST, 583 TokenType.FORMAT, 584 TokenType.GLOB, 585 TokenType.IDENTIFIER, 586 TokenType.INDEX, 587 TokenType.ISNULL, 588 TokenType.ILIKE, 589 TokenType.INSERT, 590 TokenType.LIKE, 591 TokenType.MERGE, 592 TokenType.NEXT, 593 TokenType.OFFSET, 594 TokenType.PRIMARY_KEY, 595 TokenType.RANGE, 596 TokenType.REPLACE, 597 TokenType.RLIKE, 598 TokenType.ROW, 599 TokenType.UNNEST, 600 TokenType.VAR, 601 TokenType.LEFT, 602 TokenType.RIGHT, 603 TokenType.SEQUENCE, 604 TokenType.DATE, 605 TokenType.DATETIME, 606 TokenType.TABLE, 607 TokenType.TIMESTAMP, 608 TokenType.TIMESTAMPTZ, 609 TokenType.TRUNCATE, 610 TokenType.WINDOW, 611 TokenType.XOR, 612 *TYPE_TOKENS, 613 *SUBQUERY_PREDICATES, 614 } 615 616 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 617 TokenType.AND: exp.And, 618 } 619 620 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 621 TokenType.COLON_EQ: exp.PropertyEQ, 622 } 623 624 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 625 TokenType.OR: exp.Or, 626 } 627 628 EQUALITY = { 629 TokenType.EQ: exp.EQ, 630 TokenType.NEQ: exp.NEQ, 631 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 632 } 633 634 COMPARISON = { 635 TokenType.GT: exp.GT, 636 TokenType.GTE: exp.GTE, 637 TokenType.LT: exp.LT, 638 TokenType.LTE: exp.LTE, 639 } 640 641 BITWISE = { 642 TokenType.AMP: exp.BitwiseAnd, 643 TokenType.CARET: exp.BitwiseXor, 644 TokenType.PIPE: exp.BitwiseOr, 645 } 646 647 TERM = { 648 TokenType.DASH: exp.Sub, 649 TokenType.PLUS: exp.Add, 650 TokenType.MOD: exp.Mod, 651 TokenType.COLLATE: exp.Collate, 652 } 653 654 FACTOR = { 655 TokenType.DIV: exp.IntDiv, 656 TokenType.LR_ARROW: exp.Distance, 657 TokenType.SLASH: exp.Div, 658 TokenType.STAR: exp.Mul, 659 } 660 661 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 662 663 TIMES = { 664 TokenType.TIME, 665 TokenType.TIMETZ, 666 } 667 668 TIMESTAMPS = { 669 TokenType.TIMESTAMP, 670 TokenType.TIMESTAMPTZ, 671 TokenType.TIMESTAMPLTZ, 672 *TIMES, 673 } 674 675 SET_OPERATIONS = { 676 TokenType.UNION, 677 TokenType.INTERSECT, 678 TokenType.EXCEPT, 679 } 680 681 JOIN_METHODS = { 682 TokenType.ASOF, 683 TokenType.NATURAL, 684 TokenType.POSITIONAL, 685 } 686 687 JOIN_SIDES = { 688 TokenType.LEFT, 689 TokenType.RIGHT, 690 TokenType.FULL, 691 } 692 693 JOIN_KINDS = { 694 TokenType.ANTI, 695 TokenType.CROSS, 696 TokenType.INNER, 697 TokenType.OUTER, 698 TokenType.SEMI, 699 TokenType.STRAIGHT_JOIN, 700 } 701 702 JOIN_HINTS: t.Set[str] = set() 703 704 LAMBDAS = { 705 TokenType.ARROW: lambda self, expressions: self.expression( 706 exp.Lambda, 707 this=self._replace_lambda( 708 self._parse_assignment(), 709 expressions, 710 ), 711 expressions=expressions, 712 ), 713 TokenType.FARROW: lambda self, expressions: self.expression( 714 exp.Kwarg, 715 this=exp.var(expressions[0].name), 716 expression=self._parse_assignment(), 717 ), 718 } 719 720 COLUMN_OPERATORS = { 721 TokenType.DOT: None, 722 TokenType.DCOLON: lambda self, this, to: self.expression( 723 exp.Cast if self.STRICT_CAST else exp.TryCast, 724 this=this, 725 to=to, 726 ), 727 TokenType.ARROW: lambda self, this, path: self.expression( 728 exp.JSONExtract, 729 this=this, 730 expression=self.dialect.to_json_path(path), 731 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 732 ), 733 TokenType.DARROW: lambda self, this, path: self.expression( 734 exp.JSONExtractScalar, 735 this=this, 736 expression=self.dialect.to_json_path(path), 737 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 738 ), 739 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 740 exp.JSONBExtract, 741 this=this, 742 expression=path, 743 ), 744 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 745 exp.JSONBExtractScalar, 746 this=this, 747 expression=path, 748 ), 749 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 750 exp.JSONBContains, 751 this=this, 752 expression=key, 753 ), 754 } 755 756 EXPRESSION_PARSERS = { 757 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 exp.Column: lambda self: self._parse_column(), 759 exp.Condition: lambda self: self._parse_assignment(), 760 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 761 exp.Expression: lambda self: self._parse_expression(), 762 exp.From: lambda self: self._parse_from(joins=True), 763 exp.Group: lambda self: self._parse_group(), 764 exp.Having: lambda self: self._parse_having(), 765 exp.Hint: lambda self: self._parse_hint_body(), 766 exp.Identifier: lambda self: self._parse_id_var(), 767 exp.Join: lambda self: self._parse_join(), 768 exp.Lambda: lambda self: self._parse_lambda(), 769 exp.Lateral: lambda self: self._parse_lateral(), 770 exp.Limit: lambda self: self._parse_limit(), 771 exp.Offset: lambda self: self._parse_offset(), 772 exp.Order: lambda self: self._parse_order(), 773 exp.Ordered: lambda self: self._parse_ordered(), 774 exp.Properties: lambda self: self._parse_properties(), 775 exp.Qualify: lambda self: self._parse_qualify(), 776 exp.Returning: lambda self: self._parse_returning(), 777 exp.Select: lambda self: self._parse_select(), 778 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 779 exp.Table: lambda self: self._parse_table_parts(), 780 exp.TableAlias: lambda self: self._parse_table_alias(), 781 exp.Tuple: lambda self: self._parse_value(), 782 exp.Whens: lambda self: self._parse_when_matched(), 783 exp.Where: lambda self: self._parse_where(), 784 exp.Window: lambda self: self._parse_named_window(), 785 exp.With: lambda self: self._parse_with(), 786 "JOIN_TYPE": lambda self: self._parse_join_parts(), 787 } 788 789 STATEMENT_PARSERS = { 790 TokenType.ALTER: lambda self: self._parse_alter(), 791 TokenType.BEGIN: lambda self: self._parse_transaction(), 792 TokenType.CACHE: lambda self: self._parse_cache(), 793 TokenType.COMMENT: lambda self: self._parse_comment(), 794 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 795 TokenType.COPY: lambda self: self._parse_copy(), 796 TokenType.CREATE: lambda self: self._parse_create(), 797 TokenType.DELETE: lambda self: self._parse_delete(), 798 TokenType.DESC: lambda self: self._parse_describe(), 799 TokenType.DESCRIBE: lambda self: self._parse_describe(), 800 TokenType.DROP: lambda self: self._parse_drop(), 801 TokenType.GRANT: lambda self: self._parse_grant(), 802 TokenType.INSERT: lambda self: self._parse_insert(), 803 TokenType.KILL: lambda self: self._parse_kill(), 804 TokenType.LOAD: lambda self: self._parse_load(), 805 TokenType.MERGE: lambda self: self._parse_merge(), 806 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 807 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 808 TokenType.REFRESH: lambda self: self._parse_refresh(), 809 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 810 TokenType.SET: lambda self: self._parse_set(), 811 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 812 TokenType.UNCACHE: lambda self: self._parse_uncache(), 813 TokenType.UPDATE: lambda self: self._parse_update(), 814 TokenType.USE: lambda self: self.expression( 815 exp.Use, 816 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 817 this=self._parse_table(schema=False), 818 ), 819 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 820 } 821 822 UNARY_PARSERS = { 823 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 824 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 825 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 826 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 827 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 828 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 829 } 830 831 STRING_PARSERS = { 832 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 833 exp.RawString, this=token.text 834 ), 835 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 836 exp.National, this=token.text 837 ), 838 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 839 TokenType.STRING: lambda self, token: self.expression( 840 exp.Literal, this=token.text, is_string=True 841 ), 842 TokenType.UNICODE_STRING: lambda self, token: self.expression( 843 exp.UnicodeString, 844 this=token.text, 845 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 846 ), 847 } 848 849 NUMERIC_PARSERS = { 850 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 851 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 852 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 853 TokenType.NUMBER: lambda self, token: self.expression( 854 exp.Literal, this=token.text, is_string=False 855 ), 856 } 857 858 PRIMARY_PARSERS = { 859 **STRING_PARSERS, 860 **NUMERIC_PARSERS, 861 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 862 TokenType.NULL: lambda self, _: self.expression(exp.Null), 863 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 864 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 865 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 866 TokenType.STAR: lambda self, _: self._parse_star_ops(), 867 } 868 869 PLACEHOLDER_PARSERS = { 870 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 871 TokenType.PARAMETER: lambda self: self._parse_parameter(), 872 TokenType.COLON: lambda self: ( 873 self.expression(exp.Placeholder, this=self._prev.text) 874 if self._match_set(self.ID_VAR_TOKENS) 875 else None 876 ), 877 } 878 879 RANGE_PARSERS = { 880 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 881 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 882 TokenType.GLOB: binary_range_parser(exp.Glob), 883 TokenType.ILIKE: binary_range_parser(exp.ILike), 884 TokenType.IN: lambda self, this: self._parse_in(this), 885 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 886 TokenType.IS: lambda self, this: self._parse_is(this), 887 TokenType.LIKE: binary_range_parser(exp.Like), 888 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 889 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 890 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 891 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 892 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 893 } 894 895 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 896 "ALLOWED_VALUES": lambda self: self.expression( 897 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 898 ), 899 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 900 "AUTO": lambda self: self._parse_auto_property(), 901 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 902 "BACKUP": lambda self: self.expression( 903 exp.BackupProperty, this=self._parse_var(any_token=True) 904 ), 905 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 906 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 907 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 908 "CHECKSUM": lambda self: self._parse_checksum(), 909 "CLUSTER BY": lambda self: self._parse_cluster(), 910 "CLUSTERED": lambda self: self._parse_clustered_by(), 911 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 912 exp.CollateProperty, **kwargs 913 ), 914 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 915 "CONTAINS": lambda self: self._parse_contains_property(), 916 "COPY": lambda self: self._parse_copy_property(), 917 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 918 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 919 "DEFINER": lambda self: self._parse_definer(), 920 "DETERMINISTIC": lambda self: self.expression( 921 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 922 ), 923 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 924 "DUPLICATE": lambda self: self._parse_duplicate(), 925 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 926 "DISTKEY": lambda self: self._parse_distkey(), 927 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 928 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 929 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 930 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 931 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 932 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 933 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 934 "FREESPACE": lambda self: self._parse_freespace(), 935 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 936 "HEAP": lambda self: self.expression(exp.HeapProperty), 937 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 938 "IMMUTABLE": lambda self: self.expression( 939 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 940 ), 941 "INHERITS": lambda self: self.expression( 942 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 943 ), 944 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 945 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 946 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 947 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 948 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 949 "LIKE": lambda self: self._parse_create_like(), 950 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 951 "LOCK": lambda self: self._parse_locking(), 952 "LOCKING": lambda self: self._parse_locking(), 953 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 954 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 955 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 956 "MODIFIES": lambda self: self._parse_modifies_property(), 957 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 958 "NO": lambda self: self._parse_no_property(), 959 "ON": lambda self: self._parse_on_property(), 960 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 961 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 962 "PARTITION": lambda self: self._parse_partitioned_of(), 963 "PARTITION BY": lambda self: self._parse_partitioned_by(), 964 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 965 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 966 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 967 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 968 "READS": lambda self: self._parse_reads_property(), 969 "REMOTE": lambda self: self._parse_remote_with_connection(), 970 "RETURNS": lambda self: self._parse_returns(), 971 "STRICT": lambda self: self.expression(exp.StrictProperty), 972 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 973 "ROW": lambda self: self._parse_row(), 974 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 975 "SAMPLE": lambda self: self.expression( 976 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 977 ), 978 "SECURE": lambda self: self.expression(exp.SecureProperty), 979 "SECURITY": lambda self: self._parse_security(), 980 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 981 "SETTINGS": lambda self: self._parse_settings_property(), 982 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 983 "SORTKEY": lambda self: self._parse_sortkey(), 984 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 985 "STABLE": lambda self: self.expression( 986 exp.StabilityProperty, this=exp.Literal.string("STABLE") 987 ), 988 "STORED": lambda self: self._parse_stored(), 989 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 990 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 991 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 992 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 993 "TO": lambda self: self._parse_to_table(), 994 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 995 "TRANSFORM": lambda self: self.expression( 996 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 997 ), 998 "TTL": lambda self: self._parse_ttl(), 999 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1000 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1001 "VOLATILE": lambda self: self._parse_volatile_property(), 1002 "WITH": lambda self: self._parse_with_property(), 1003 } 1004 1005 CONSTRAINT_PARSERS = { 1006 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1007 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1008 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1009 "CHARACTER SET": lambda self: self.expression( 1010 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1011 ), 1012 "CHECK": lambda self: self.expression( 1013 exp.CheckColumnConstraint, 1014 this=self._parse_wrapped(self._parse_assignment), 1015 enforced=self._match_text_seq("ENFORCED"), 1016 ), 1017 "COLLATE": lambda self: self.expression( 1018 exp.CollateColumnConstraint, 1019 this=self._parse_identifier() or self._parse_column(), 1020 ), 1021 "COMMENT": lambda self: self.expression( 1022 exp.CommentColumnConstraint, this=self._parse_string() 1023 ), 1024 "COMPRESS": lambda self: self._parse_compress(), 1025 "CLUSTERED": lambda self: self.expression( 1026 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1027 ), 1028 "NONCLUSTERED": lambda self: self.expression( 1029 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1030 ), 1031 "DEFAULT": lambda self: self.expression( 1032 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1033 ), 1034 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1035 "EPHEMERAL": lambda self: self.expression( 1036 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1037 ), 1038 "EXCLUDE": lambda self: self.expression( 1039 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1040 ), 1041 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1042 "FORMAT": lambda self: self.expression( 1043 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1044 ), 1045 "GENERATED": lambda self: self._parse_generated_as_identity(), 1046 "IDENTITY": lambda self: self._parse_auto_increment(), 1047 "INLINE": lambda self: self._parse_inline(), 1048 "LIKE": lambda self: self._parse_create_like(), 1049 "NOT": lambda self: self._parse_not_constraint(), 1050 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1051 "ON": lambda self: ( 1052 self._match(TokenType.UPDATE) 1053 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1054 ) 1055 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1056 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1057 "PERIOD": lambda self: self._parse_period_for_system_time(), 1058 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1059 "REFERENCES": lambda self: self._parse_references(match=False), 1060 "TITLE": lambda self: self.expression( 1061 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1062 ), 1063 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1064 "UNIQUE": lambda self: self._parse_unique(), 1065 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1066 "WATERMARK": lambda self: self.expression( 1067 exp.WatermarkColumnConstraint, 1068 this=self._match(TokenType.FOR) and self._parse_column(), 1069 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1070 ), 1071 "WITH": lambda self: self.expression( 1072 exp.Properties, expressions=self._parse_wrapped_properties() 1073 ), 1074 } 1075 1076 ALTER_PARSERS = { 1077 "ADD": lambda self: self._parse_alter_table_add(), 1078 "AS": lambda self: self._parse_select(), 1079 "ALTER": lambda self: self._parse_alter_table_alter(), 1080 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1081 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1082 "DROP": lambda self: self._parse_alter_table_drop(), 1083 "RENAME": lambda self: self._parse_alter_table_rename(), 1084 "SET": lambda self: self._parse_alter_table_set(), 1085 "SWAP": lambda self: self.expression( 1086 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1087 ), 1088 } 1089 1090 ALTER_ALTER_PARSERS = { 1091 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1092 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1093 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1094 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1095 } 1096 1097 SCHEMA_UNNAMED_CONSTRAINTS = { 1098 "CHECK", 1099 "EXCLUDE", 1100 "FOREIGN KEY", 1101 "LIKE", 1102 "PERIOD", 1103 "PRIMARY KEY", 1104 "UNIQUE", 1105 "WATERMARK", 1106 } 1107 1108 NO_PAREN_FUNCTION_PARSERS = { 1109 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1110 "CASE": lambda self: self._parse_case(), 1111 "CONNECT_BY_ROOT": lambda self: self.expression( 1112 exp.ConnectByRoot, this=self._parse_column() 1113 ), 1114 "IF": lambda self: self._parse_if(), 1115 } 1116 1117 INVALID_FUNC_NAME_TOKENS = { 1118 TokenType.IDENTIFIER, 1119 TokenType.STRING, 1120 } 1121 1122 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1123 1124 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1125 1126 FUNCTION_PARSERS = { 1127 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1128 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1129 "DECODE": lambda self: self._parse_decode(), 1130 "EXTRACT": lambda self: self._parse_extract(), 1131 "GAP_FILL": lambda self: self._parse_gap_fill(), 1132 "JSON_OBJECT": lambda self: self._parse_json_object(), 1133 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1134 "JSON_TABLE": lambda self: self._parse_json_table(), 1135 "MATCH": lambda self: self._parse_match_against(), 1136 "NORMALIZE": lambda self: self._parse_normalize(), 1137 "OPENJSON": lambda self: self._parse_open_json(), 1138 "OVERLAY": lambda self: self._parse_overlay(), 1139 "POSITION": lambda self: self._parse_position(), 1140 "PREDICT": lambda self: self._parse_predict(), 1141 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1142 "STRING_AGG": lambda self: self._parse_string_agg(), 1143 "SUBSTRING": lambda self: self._parse_substring(), 1144 "TRIM": lambda self: self._parse_trim(), 1145 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1146 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1147 } 1148 1149 QUERY_MODIFIER_PARSERS = { 1150 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1151 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1152 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1153 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1154 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1155 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1156 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1157 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1158 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1159 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1160 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1161 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1162 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1163 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1164 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1165 TokenType.CLUSTER_BY: lambda self: ( 1166 "cluster", 1167 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1168 ), 1169 TokenType.DISTRIBUTE_BY: lambda self: ( 1170 "distribute", 1171 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1172 ), 1173 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1174 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1175 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1176 } 1177 1178 SET_PARSERS = { 1179 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1180 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1181 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1182 "TRANSACTION": lambda self: self._parse_set_transaction(), 1183 } 1184 1185 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1186 1187 TYPE_LITERAL_PARSERS = { 1188 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1189 } 1190 1191 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1192 1193 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1194 1195 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1196 1197 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1198 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1199 "ISOLATION": ( 1200 ("LEVEL", "REPEATABLE", "READ"), 1201 ("LEVEL", "READ", "COMMITTED"), 1202 ("LEVEL", "READ", "UNCOMITTED"), 1203 ("LEVEL", "SERIALIZABLE"), 1204 ), 1205 "READ": ("WRITE", "ONLY"), 1206 } 1207 1208 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1209 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1210 ) 1211 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1212 1213 CREATE_SEQUENCE: OPTIONS_TYPE = { 1214 "SCALE": ("EXTEND", "NOEXTEND"), 1215 "SHARD": ("EXTEND", "NOEXTEND"), 1216 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1217 **dict.fromkeys( 1218 ( 1219 "SESSION", 1220 "GLOBAL", 1221 "KEEP", 1222 "NOKEEP", 1223 "ORDER", 1224 "NOORDER", 1225 "NOCACHE", 1226 "CYCLE", 1227 "NOCYCLE", 1228 "NOMINVALUE", 1229 "NOMAXVALUE", 1230 "NOSCALE", 1231 "NOSHARD", 1232 ), 1233 tuple(), 1234 ), 1235 } 1236 1237 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1238 1239 USABLES: OPTIONS_TYPE = dict.fromkeys( 1240 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1241 ) 1242 1243 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1244 1245 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1246 "TYPE": ("EVOLUTION",), 1247 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1248 } 1249 1250 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1251 1252 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1253 1254 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1255 "NOT": ("ENFORCED",), 1256 "MATCH": ( 1257 "FULL", 1258 "PARTIAL", 1259 "SIMPLE", 1260 ), 1261 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1262 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1263 } 1264 1265 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1266 1267 CLONE_KEYWORDS = {"CLONE", "COPY"} 1268 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1269 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1270 1271 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1272 1273 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1274 1275 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1276 1277 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1278 1279 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1280 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1281 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1282 1283 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1284 1285 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1286 1287 ADD_CONSTRAINT_TOKENS = { 1288 TokenType.CONSTRAINT, 1289 TokenType.FOREIGN_KEY, 1290 TokenType.INDEX, 1291 TokenType.KEY, 1292 TokenType.PRIMARY_KEY, 1293 TokenType.UNIQUE, 1294 } 1295 1296 DISTINCT_TOKENS = {TokenType.DISTINCT} 1297 1298 NULL_TOKENS = {TokenType.NULL} 1299 1300 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1301 1302 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1303 1304 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1305 1306 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1307 1308 ODBC_DATETIME_LITERALS = { 1309 "d": exp.Date, 1310 "t": exp.Time, 1311 "ts": exp.Timestamp, 1312 } 1313 1314 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1315 1316 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1317 1318 # The style options for the DESCRIBE statement 1319 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1320 1321 OPERATION_MODIFIERS: t.Set[str] = set() 1322 1323 STRICT_CAST = True 1324 1325 PREFIXED_PIVOT_COLUMNS = False 1326 IDENTIFY_PIVOT_STRINGS = False 1327 1328 LOG_DEFAULTS_TO_LN = False 1329 1330 # Whether ADD is present for each column added by ALTER TABLE 1331 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1332 1333 # Whether the table sample clause expects CSV syntax 1334 TABLESAMPLE_CSV = False 1335 1336 # The default method used for table sampling 1337 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1338 1339 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1340 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1341 1342 # Whether the TRIM function expects the characters to trim as its first argument 1343 TRIM_PATTERN_FIRST = False 1344 1345 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1346 STRING_ALIASES = False 1347 1348 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1349 MODIFIERS_ATTACHED_TO_SET_OP = True 1350 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1351 1352 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1353 NO_PAREN_IF_COMMANDS = True 1354 1355 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1356 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1357 1358 # Whether the `:` operator is used to extract a value from a VARIANT column 1359 COLON_IS_VARIANT_EXTRACT = False 1360 1361 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1362 # If this is True and '(' is not found, the keyword will be treated as an identifier 1363 VALUES_FOLLOWED_BY_PAREN = True 1364 1365 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1366 SUPPORTS_IMPLICIT_UNNEST = False 1367 1368 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1369 INTERVAL_SPANS = True 1370 1371 # Whether a PARTITION clause can follow a table reference 1372 SUPPORTS_PARTITION_SELECTION = False 1373 1374 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1375 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1376 1377 __slots__ = ( 1378 "error_level", 1379 "error_message_context", 1380 "max_errors", 1381 "dialect", 1382 "sql", 1383 "errors", 1384 "_tokens", 1385 "_index", 1386 "_curr", 1387 "_next", 1388 "_prev", 1389 "_prev_comments", 1390 ) 1391 1392 # Autofilled 1393 SHOW_TRIE: t.Dict = {} 1394 SET_TRIE: t.Dict = {} 1395 1396 def __init__( 1397 self, 1398 error_level: t.Optional[ErrorLevel] = None, 1399 error_message_context: int = 100, 1400 max_errors: int = 3, 1401 dialect: DialectType = None, 1402 ): 1403 from sqlglot.dialects import Dialect 1404 1405 self.error_level = error_level or ErrorLevel.IMMEDIATE 1406 self.error_message_context = error_message_context 1407 self.max_errors = max_errors 1408 self.dialect = Dialect.get_or_raise(dialect) 1409 self.reset() 1410 1411 def reset(self): 1412 self.sql = "" 1413 self.errors = [] 1414 self._tokens = [] 1415 self._index = 0 1416 self._curr = None 1417 self._next = None 1418 self._prev = None 1419 self._prev_comments = None 1420 1421 def parse( 1422 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1423 ) -> t.List[t.Optional[exp.Expression]]: 1424 """ 1425 Parses a list of tokens and returns a list of syntax trees, one tree 1426 per parsed SQL statement. 1427 1428 Args: 1429 raw_tokens: The list of tokens. 1430 sql: The original SQL string, used to produce helpful debug messages. 1431 1432 Returns: 1433 The list of the produced syntax trees. 1434 """ 1435 return self._parse( 1436 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1437 ) 1438 1439 def parse_into( 1440 self, 1441 expression_types: exp.IntoType, 1442 raw_tokens: t.List[Token], 1443 sql: t.Optional[str] = None, 1444 ) -> t.List[t.Optional[exp.Expression]]: 1445 """ 1446 Parses a list of tokens into a given Expression type. If a collection of Expression 1447 types is given instead, this method will try to parse the token list into each one 1448 of them, stopping at the first for which the parsing succeeds. 1449 1450 Args: 1451 expression_types: The expression type(s) to try and parse the token list into. 1452 raw_tokens: The list of tokens. 1453 sql: The original SQL string, used to produce helpful debug messages. 1454 1455 Returns: 1456 The target Expression. 1457 """ 1458 errors = [] 1459 for expression_type in ensure_list(expression_types): 1460 parser = self.EXPRESSION_PARSERS.get(expression_type) 1461 if not parser: 1462 raise TypeError(f"No parser registered for {expression_type}") 1463 1464 try: 1465 return self._parse(parser, raw_tokens, sql) 1466 except ParseError as e: 1467 e.errors[0]["into_expression"] = expression_type 1468 errors.append(e) 1469 1470 raise ParseError( 1471 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1472 errors=merge_errors(errors), 1473 ) from errors[-1] 1474 1475 def _parse( 1476 self, 1477 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1478 raw_tokens: t.List[Token], 1479 sql: t.Optional[str] = None, 1480 ) -> t.List[t.Optional[exp.Expression]]: 1481 self.reset() 1482 self.sql = sql or "" 1483 1484 total = len(raw_tokens) 1485 chunks: t.List[t.List[Token]] = [[]] 1486 1487 for i, token in enumerate(raw_tokens): 1488 if token.token_type == TokenType.SEMICOLON: 1489 if token.comments: 1490 chunks.append([token]) 1491 1492 if i < total - 1: 1493 chunks.append([]) 1494 else: 1495 chunks[-1].append(token) 1496 1497 expressions = [] 1498 1499 for tokens in chunks: 1500 self._index = -1 1501 self._tokens = tokens 1502 self._advance() 1503 1504 expressions.append(parse_method(self)) 1505 1506 if self._index < len(self._tokens): 1507 self.raise_error("Invalid expression / Unexpected token") 1508 1509 self.check_errors() 1510 1511 return expressions 1512 1513 def check_errors(self) -> None: 1514 """Logs or raises any found errors, depending on the chosen error level setting.""" 1515 if self.error_level == ErrorLevel.WARN: 1516 for error in self.errors: 1517 logger.error(str(error)) 1518 elif self.error_level == ErrorLevel.RAISE and self.errors: 1519 raise ParseError( 1520 concat_messages(self.errors, self.max_errors), 1521 errors=merge_errors(self.errors), 1522 ) 1523 1524 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1525 """ 1526 Appends an error in the list of recorded errors or raises it, depending on the chosen 1527 error level setting. 1528 """ 1529 token = token or self._curr or self._prev or Token.string("") 1530 start = token.start 1531 end = token.end + 1 1532 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1533 highlight = self.sql[start:end] 1534 end_context = self.sql[end : end + self.error_message_context] 1535 1536 error = ParseError.new( 1537 f"{message}. Line {token.line}, Col: {token.col}.\n" 1538 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1539 description=message, 1540 line=token.line, 1541 col=token.col, 1542 start_context=start_context, 1543 highlight=highlight, 1544 end_context=end_context, 1545 ) 1546 1547 if self.error_level == ErrorLevel.IMMEDIATE: 1548 raise error 1549 1550 self.errors.append(error) 1551 1552 def expression( 1553 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1554 ) -> E: 1555 """ 1556 Creates a new, validated Expression. 1557 1558 Args: 1559 exp_class: The expression class to instantiate. 1560 comments: An optional list of comments to attach to the expression. 1561 kwargs: The arguments to set for the expression along with their respective values. 1562 1563 Returns: 1564 The target expression. 1565 """ 1566 instance = exp_class(**kwargs) 1567 instance.add_comments(comments) if comments else self._add_comments(instance) 1568 return self.validate_expression(instance) 1569 1570 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1571 if expression and self._prev_comments: 1572 expression.add_comments(self._prev_comments) 1573 self._prev_comments = None 1574 1575 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1576 """ 1577 Validates an Expression, making sure that all its mandatory arguments are set. 1578 1579 Args: 1580 expression: The expression to validate. 1581 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1582 1583 Returns: 1584 The validated expression. 1585 """ 1586 if self.error_level != ErrorLevel.IGNORE: 1587 for error_message in expression.error_messages(args): 1588 self.raise_error(error_message) 1589 1590 return expression 1591 1592 def _find_sql(self, start: Token, end: Token) -> str: 1593 return self.sql[start.start : end.end + 1] 1594 1595 def _is_connected(self) -> bool: 1596 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1597 1598 def _advance(self, times: int = 1) -> None: 1599 self._index += times 1600 self._curr = seq_get(self._tokens, self._index) 1601 self._next = seq_get(self._tokens, self._index + 1) 1602 1603 if self._index > 0: 1604 self._prev = self._tokens[self._index - 1] 1605 self._prev_comments = self._prev.comments 1606 else: 1607 self._prev = None 1608 self._prev_comments = None 1609 1610 def _retreat(self, index: int) -> None: 1611 if index != self._index: 1612 self._advance(index - self._index) 1613 1614 def _warn_unsupported(self) -> None: 1615 if len(self._tokens) <= 1: 1616 return 1617 1618 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1619 # interested in emitting a warning for the one being currently processed. 1620 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1621 1622 logger.warning( 1623 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1624 ) 1625 1626 def _parse_command(self) -> exp.Command: 1627 self._warn_unsupported() 1628 return self.expression( 1629 exp.Command, 1630 comments=self._prev_comments, 1631 this=self._prev.text.upper(), 1632 expression=self._parse_string(), 1633 ) 1634 1635 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1636 """ 1637 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1638 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1639 solve this by setting & resetting the parser state accordingly 1640 """ 1641 index = self._index 1642 error_level = self.error_level 1643 1644 self.error_level = ErrorLevel.IMMEDIATE 1645 try: 1646 this = parse_method() 1647 except ParseError: 1648 this = None 1649 finally: 1650 if not this or retreat: 1651 self._retreat(index) 1652 self.error_level = error_level 1653 1654 return this 1655 1656 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1657 start = self._prev 1658 exists = self._parse_exists() if allow_exists else None 1659 1660 self._match(TokenType.ON) 1661 1662 materialized = self._match_text_seq("MATERIALIZED") 1663 kind = self._match_set(self.CREATABLES) and self._prev 1664 if not kind: 1665 return self._parse_as_command(start) 1666 1667 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1668 this = self._parse_user_defined_function(kind=kind.token_type) 1669 elif kind.token_type == TokenType.TABLE: 1670 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1671 elif kind.token_type == TokenType.COLUMN: 1672 this = self._parse_column() 1673 else: 1674 this = self._parse_id_var() 1675 1676 self._match(TokenType.IS) 1677 1678 return self.expression( 1679 exp.Comment, 1680 this=this, 1681 kind=kind.text, 1682 expression=self._parse_string(), 1683 exists=exists, 1684 materialized=materialized, 1685 ) 1686 1687 def _parse_to_table( 1688 self, 1689 ) -> exp.ToTableProperty: 1690 table = self._parse_table_parts(schema=True) 1691 return self.expression(exp.ToTableProperty, this=table) 1692 1693 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1694 def _parse_ttl(self) -> exp.Expression: 1695 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1696 this = self._parse_bitwise() 1697 1698 if self._match_text_seq("DELETE"): 1699 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1700 if self._match_text_seq("RECOMPRESS"): 1701 return self.expression( 1702 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1703 ) 1704 if self._match_text_seq("TO", "DISK"): 1705 return self.expression( 1706 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1707 ) 1708 if self._match_text_seq("TO", "VOLUME"): 1709 return self.expression( 1710 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1711 ) 1712 1713 return this 1714 1715 expressions = self._parse_csv(_parse_ttl_action) 1716 where = self._parse_where() 1717 group = self._parse_group() 1718 1719 aggregates = None 1720 if group and self._match(TokenType.SET): 1721 aggregates = self._parse_csv(self._parse_set_item) 1722 1723 return self.expression( 1724 exp.MergeTreeTTL, 1725 expressions=expressions, 1726 where=where, 1727 group=group, 1728 aggregates=aggregates, 1729 ) 1730 1731 def _parse_statement(self) -> t.Optional[exp.Expression]: 1732 if self._curr is None: 1733 return None 1734 1735 if self._match_set(self.STATEMENT_PARSERS): 1736 comments = self._prev_comments 1737 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1738 stmt.add_comments(comments, prepend=True) 1739 return stmt 1740 1741 if self._match_set(self.dialect.tokenizer.COMMANDS): 1742 return self._parse_command() 1743 1744 expression = self._parse_expression() 1745 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1746 return self._parse_query_modifiers(expression) 1747 1748 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1749 start = self._prev 1750 temporary = self._match(TokenType.TEMPORARY) 1751 materialized = self._match_text_seq("MATERIALIZED") 1752 1753 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1754 if not kind: 1755 return self._parse_as_command(start) 1756 1757 concurrently = self._match_text_seq("CONCURRENTLY") 1758 if_exists = exists or self._parse_exists() 1759 1760 if kind == "COLUMN": 1761 this = self._parse_column() 1762 else: 1763 this = self._parse_table_parts( 1764 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1765 ) 1766 1767 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1768 1769 if self._match(TokenType.L_PAREN, advance=False): 1770 expressions = self._parse_wrapped_csv(self._parse_types) 1771 else: 1772 expressions = None 1773 1774 return self.expression( 1775 exp.Drop, 1776 exists=if_exists, 1777 this=this, 1778 expressions=expressions, 1779 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1780 temporary=temporary, 1781 materialized=materialized, 1782 cascade=self._match_text_seq("CASCADE"), 1783 constraints=self._match_text_seq("CONSTRAINTS"), 1784 purge=self._match_text_seq("PURGE"), 1785 cluster=cluster, 1786 concurrently=concurrently, 1787 ) 1788 1789 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1790 return ( 1791 self._match_text_seq("IF") 1792 and (not not_ or self._match(TokenType.NOT)) 1793 and self._match(TokenType.EXISTS) 1794 ) 1795 1796 def _parse_create(self) -> exp.Create | exp.Command: 1797 # Note: this can't be None because we've matched a statement parser 1798 start = self._prev 1799 1800 replace = ( 1801 start.token_type == TokenType.REPLACE 1802 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1803 or self._match_pair(TokenType.OR, TokenType.ALTER) 1804 ) 1805 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1806 1807 unique = self._match(TokenType.UNIQUE) 1808 1809 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1810 clustered = True 1811 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1812 "COLUMNSTORE" 1813 ): 1814 clustered = False 1815 else: 1816 clustered = None 1817 1818 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1819 self._advance() 1820 1821 properties = None 1822 create_token = self._match_set(self.CREATABLES) and self._prev 1823 1824 if not create_token: 1825 # exp.Properties.Location.POST_CREATE 1826 properties = self._parse_properties() 1827 create_token = self._match_set(self.CREATABLES) and self._prev 1828 1829 if not properties or not create_token: 1830 return self._parse_as_command(start) 1831 1832 concurrently = self._match_text_seq("CONCURRENTLY") 1833 exists = self._parse_exists(not_=True) 1834 this = None 1835 expression: t.Optional[exp.Expression] = None 1836 indexes = None 1837 no_schema_binding = None 1838 begin = None 1839 end = None 1840 clone = None 1841 1842 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1843 nonlocal properties 1844 if properties and temp_props: 1845 properties.expressions.extend(temp_props.expressions) 1846 elif temp_props: 1847 properties = temp_props 1848 1849 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1850 this = self._parse_user_defined_function(kind=create_token.token_type) 1851 1852 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1853 extend_props(self._parse_properties()) 1854 1855 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1856 extend_props(self._parse_properties()) 1857 1858 if not expression: 1859 if self._match(TokenType.COMMAND): 1860 expression = self._parse_as_command(self._prev) 1861 else: 1862 begin = self._match(TokenType.BEGIN) 1863 return_ = self._match_text_seq("RETURN") 1864 1865 if self._match(TokenType.STRING, advance=False): 1866 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1867 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1868 expression = self._parse_string() 1869 extend_props(self._parse_properties()) 1870 else: 1871 expression = self._parse_user_defined_function_expression() 1872 1873 end = self._match_text_seq("END") 1874 1875 if return_: 1876 expression = self.expression(exp.Return, this=expression) 1877 elif create_token.token_type == TokenType.INDEX: 1878 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1879 if not self._match(TokenType.ON): 1880 index = self._parse_id_var() 1881 anonymous = False 1882 else: 1883 index = None 1884 anonymous = True 1885 1886 this = self._parse_index(index=index, anonymous=anonymous) 1887 elif create_token.token_type in self.DB_CREATABLES: 1888 table_parts = self._parse_table_parts( 1889 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1890 ) 1891 1892 # exp.Properties.Location.POST_NAME 1893 self._match(TokenType.COMMA) 1894 extend_props(self._parse_properties(before=True)) 1895 1896 this = self._parse_schema(this=table_parts) 1897 1898 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1899 extend_props(self._parse_properties()) 1900 1901 self._match(TokenType.ALIAS) 1902 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1903 # exp.Properties.Location.POST_ALIAS 1904 extend_props(self._parse_properties()) 1905 1906 if create_token.token_type == TokenType.SEQUENCE: 1907 expression = self._parse_types() 1908 extend_props(self._parse_properties()) 1909 else: 1910 expression = self._parse_ddl_select() 1911 1912 if create_token.token_type == TokenType.TABLE: 1913 # exp.Properties.Location.POST_EXPRESSION 1914 extend_props(self._parse_properties()) 1915 1916 indexes = [] 1917 while True: 1918 index = self._parse_index() 1919 1920 # exp.Properties.Location.POST_INDEX 1921 extend_props(self._parse_properties()) 1922 if not index: 1923 break 1924 else: 1925 self._match(TokenType.COMMA) 1926 indexes.append(index) 1927 elif create_token.token_type == TokenType.VIEW: 1928 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1929 no_schema_binding = True 1930 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1931 extend_props(self._parse_properties()) 1932 1933 shallow = self._match_text_seq("SHALLOW") 1934 1935 if self._match_texts(self.CLONE_KEYWORDS): 1936 copy = self._prev.text.lower() == "copy" 1937 clone = self.expression( 1938 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1939 ) 1940 1941 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1942 return self._parse_as_command(start) 1943 1944 create_kind_text = create_token.text.upper() 1945 return self.expression( 1946 exp.Create, 1947 this=this, 1948 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1949 replace=replace, 1950 refresh=refresh, 1951 unique=unique, 1952 expression=expression, 1953 exists=exists, 1954 properties=properties, 1955 indexes=indexes, 1956 no_schema_binding=no_schema_binding, 1957 begin=begin, 1958 end=end, 1959 clone=clone, 1960 concurrently=concurrently, 1961 clustered=clustered, 1962 ) 1963 1964 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1965 seq = exp.SequenceProperties() 1966 1967 options = [] 1968 index = self._index 1969 1970 while self._curr: 1971 self._match(TokenType.COMMA) 1972 if self._match_text_seq("INCREMENT"): 1973 self._match_text_seq("BY") 1974 self._match_text_seq("=") 1975 seq.set("increment", self._parse_term()) 1976 elif self._match_text_seq("MINVALUE"): 1977 seq.set("minvalue", self._parse_term()) 1978 elif self._match_text_seq("MAXVALUE"): 1979 seq.set("maxvalue", self._parse_term()) 1980 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1981 self._match_text_seq("=") 1982 seq.set("start", self._parse_term()) 1983 elif self._match_text_seq("CACHE"): 1984 # T-SQL allows empty CACHE which is initialized dynamically 1985 seq.set("cache", self._parse_number() or True) 1986 elif self._match_text_seq("OWNED", "BY"): 1987 # "OWNED BY NONE" is the default 1988 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1989 else: 1990 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1991 if opt: 1992 options.append(opt) 1993 else: 1994 break 1995 1996 seq.set("options", options if options else None) 1997 return None if self._index == index else seq 1998 1999 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2000 # only used for teradata currently 2001 self._match(TokenType.COMMA) 2002 2003 kwargs = { 2004 "no": self._match_text_seq("NO"), 2005 "dual": self._match_text_seq("DUAL"), 2006 "before": self._match_text_seq("BEFORE"), 2007 "default": self._match_text_seq("DEFAULT"), 2008 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2009 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2010 "after": self._match_text_seq("AFTER"), 2011 "minimum": self._match_texts(("MIN", "MINIMUM")), 2012 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2013 } 2014 2015 if self._match_texts(self.PROPERTY_PARSERS): 2016 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2017 try: 2018 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2019 except TypeError: 2020 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2021 2022 return None 2023 2024 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2025 return self._parse_wrapped_csv(self._parse_property) 2026 2027 def _parse_property(self) -> t.Optional[exp.Expression]: 2028 if self._match_texts(self.PROPERTY_PARSERS): 2029 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2030 2031 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2032 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2033 2034 if self._match_text_seq("COMPOUND", "SORTKEY"): 2035 return self._parse_sortkey(compound=True) 2036 2037 if self._match_text_seq("SQL", "SECURITY"): 2038 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2039 2040 index = self._index 2041 key = self._parse_column() 2042 2043 if not self._match(TokenType.EQ): 2044 self._retreat(index) 2045 return self._parse_sequence_properties() 2046 2047 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2048 if isinstance(key, exp.Column): 2049 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2050 2051 value = self._parse_bitwise() or self._parse_var(any_token=True) 2052 2053 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2054 if isinstance(value, exp.Column): 2055 value = exp.var(value.name) 2056 2057 return self.expression(exp.Property, this=key, value=value) 2058 2059 def _parse_stored(self) -> exp.FileFormatProperty: 2060 self._match(TokenType.ALIAS) 2061 2062 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2063 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2064 2065 return self.expression( 2066 exp.FileFormatProperty, 2067 this=( 2068 self.expression( 2069 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2070 ) 2071 if input_format or output_format 2072 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2073 ), 2074 ) 2075 2076 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2077 field = self._parse_field() 2078 if isinstance(field, exp.Identifier) and not field.quoted: 2079 field = exp.var(field) 2080 2081 return field 2082 2083 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2084 self._match(TokenType.EQ) 2085 self._match(TokenType.ALIAS) 2086 2087 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2088 2089 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2090 properties = [] 2091 while True: 2092 if before: 2093 prop = self._parse_property_before() 2094 else: 2095 prop = self._parse_property() 2096 if not prop: 2097 break 2098 for p in ensure_list(prop): 2099 properties.append(p) 2100 2101 if properties: 2102 return self.expression(exp.Properties, expressions=properties) 2103 2104 return None 2105 2106 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2107 return self.expression( 2108 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2109 ) 2110 2111 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2112 if self._match_texts(("DEFINER", "INVOKER")): 2113 security_specifier = self._prev.text.upper() 2114 return self.expression(exp.SecurityProperty, this=security_specifier) 2115 return None 2116 2117 def _parse_settings_property(self) -> exp.SettingsProperty: 2118 return self.expression( 2119 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2120 ) 2121 2122 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2123 if self._index >= 2: 2124 pre_volatile_token = self._tokens[self._index - 2] 2125 else: 2126 pre_volatile_token = None 2127 2128 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2129 return exp.VolatileProperty() 2130 2131 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2132 2133 def _parse_retention_period(self) -> exp.Var: 2134 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2135 number = self._parse_number() 2136 number_str = f"{number} " if number else "" 2137 unit = self._parse_var(any_token=True) 2138 return exp.var(f"{number_str}{unit}") 2139 2140 def _parse_system_versioning_property( 2141 self, with_: bool = False 2142 ) -> exp.WithSystemVersioningProperty: 2143 self._match(TokenType.EQ) 2144 prop = self.expression( 2145 exp.WithSystemVersioningProperty, 2146 **{ # type: ignore 2147 "on": True, 2148 "with": with_, 2149 }, 2150 ) 2151 2152 if self._match_text_seq("OFF"): 2153 prop.set("on", False) 2154 return prop 2155 2156 self._match(TokenType.ON) 2157 if self._match(TokenType.L_PAREN): 2158 while self._curr and not self._match(TokenType.R_PAREN): 2159 if self._match_text_seq("HISTORY_TABLE", "="): 2160 prop.set("this", self._parse_table_parts()) 2161 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2162 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2163 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2164 prop.set("retention_period", self._parse_retention_period()) 2165 2166 self._match(TokenType.COMMA) 2167 2168 return prop 2169 2170 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2171 self._match(TokenType.EQ) 2172 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2173 prop = self.expression(exp.DataDeletionProperty, on=on) 2174 2175 if self._match(TokenType.L_PAREN): 2176 while self._curr and not self._match(TokenType.R_PAREN): 2177 if self._match_text_seq("FILTER_COLUMN", "="): 2178 prop.set("filter_column", self._parse_column()) 2179 elif self._match_text_seq("RETENTION_PERIOD", "="): 2180 prop.set("retention_period", self._parse_retention_period()) 2181 2182 self._match(TokenType.COMMA) 2183 2184 return prop 2185 2186 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2187 kind = "HASH" 2188 expressions: t.Optional[t.List[exp.Expression]] = None 2189 if self._match_text_seq("BY", "HASH"): 2190 expressions = self._parse_wrapped_csv(self._parse_id_var) 2191 elif self._match_text_seq("BY", "RANDOM"): 2192 kind = "RANDOM" 2193 2194 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2195 buckets: t.Optional[exp.Expression] = None 2196 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2197 buckets = self._parse_number() 2198 2199 return self.expression( 2200 exp.DistributedByProperty, 2201 expressions=expressions, 2202 kind=kind, 2203 buckets=buckets, 2204 order=self._parse_order(), 2205 ) 2206 2207 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2208 self._match_text_seq("KEY") 2209 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2210 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2211 2212 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2213 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2214 prop = self._parse_system_versioning_property(with_=True) 2215 self._match_r_paren() 2216 return prop 2217 2218 if self._match(TokenType.L_PAREN, advance=False): 2219 return self._parse_wrapped_properties() 2220 2221 if self._match_text_seq("JOURNAL"): 2222 return self._parse_withjournaltable() 2223 2224 if self._match_texts(self.VIEW_ATTRIBUTES): 2225 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2226 2227 if self._match_text_seq("DATA"): 2228 return self._parse_withdata(no=False) 2229 elif self._match_text_seq("NO", "DATA"): 2230 return self._parse_withdata(no=True) 2231 2232 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2233 return self._parse_serde_properties(with_=True) 2234 2235 if self._match(TokenType.SCHEMA): 2236 return self.expression( 2237 exp.WithSchemaBindingProperty, 2238 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2239 ) 2240 2241 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2242 return self.expression( 2243 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2244 ) 2245 2246 if not self._next: 2247 return None 2248 2249 return self._parse_withisolatedloading() 2250 2251 def _parse_procedure_option(self) -> exp.Expression | None: 2252 if self._match_text_seq("EXECUTE", "AS"): 2253 return self.expression( 2254 exp.ExecuteAsProperty, 2255 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2256 or self._parse_string(), 2257 ) 2258 2259 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2260 2261 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2262 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2263 self._match(TokenType.EQ) 2264 2265 user = self._parse_id_var() 2266 self._match(TokenType.PARAMETER) 2267 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2268 2269 if not user or not host: 2270 return None 2271 2272 return exp.DefinerProperty(this=f"{user}@{host}") 2273 2274 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2275 self._match(TokenType.TABLE) 2276 self._match(TokenType.EQ) 2277 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2278 2279 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2280 return self.expression(exp.LogProperty, no=no) 2281 2282 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2283 return self.expression(exp.JournalProperty, **kwargs) 2284 2285 def _parse_checksum(self) -> exp.ChecksumProperty: 2286 self._match(TokenType.EQ) 2287 2288 on = None 2289 if self._match(TokenType.ON): 2290 on = True 2291 elif self._match_text_seq("OFF"): 2292 on = False 2293 2294 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2295 2296 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2297 return self.expression( 2298 exp.Cluster, 2299 expressions=( 2300 self._parse_wrapped_csv(self._parse_ordered) 2301 if wrapped 2302 else self._parse_csv(self._parse_ordered) 2303 ), 2304 ) 2305 2306 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2307 self._match_text_seq("BY") 2308 2309 self._match_l_paren() 2310 expressions = self._parse_csv(self._parse_column) 2311 self._match_r_paren() 2312 2313 if self._match_text_seq("SORTED", "BY"): 2314 self._match_l_paren() 2315 sorted_by = self._parse_csv(self._parse_ordered) 2316 self._match_r_paren() 2317 else: 2318 sorted_by = None 2319 2320 self._match(TokenType.INTO) 2321 buckets = self._parse_number() 2322 self._match_text_seq("BUCKETS") 2323 2324 return self.expression( 2325 exp.ClusteredByProperty, 2326 expressions=expressions, 2327 sorted_by=sorted_by, 2328 buckets=buckets, 2329 ) 2330 2331 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2332 if not self._match_text_seq("GRANTS"): 2333 self._retreat(self._index - 1) 2334 return None 2335 2336 return self.expression(exp.CopyGrantsProperty) 2337 2338 def _parse_freespace(self) -> exp.FreespaceProperty: 2339 self._match(TokenType.EQ) 2340 return self.expression( 2341 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2342 ) 2343 2344 def _parse_mergeblockratio( 2345 self, no: bool = False, default: bool = False 2346 ) -> exp.MergeBlockRatioProperty: 2347 if self._match(TokenType.EQ): 2348 return self.expression( 2349 exp.MergeBlockRatioProperty, 2350 this=self._parse_number(), 2351 percent=self._match(TokenType.PERCENT), 2352 ) 2353 2354 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2355 2356 def _parse_datablocksize( 2357 self, 2358 default: t.Optional[bool] = None, 2359 minimum: t.Optional[bool] = None, 2360 maximum: t.Optional[bool] = None, 2361 ) -> exp.DataBlocksizeProperty: 2362 self._match(TokenType.EQ) 2363 size = self._parse_number() 2364 2365 units = None 2366 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2367 units = self._prev.text 2368 2369 return self.expression( 2370 exp.DataBlocksizeProperty, 2371 size=size, 2372 units=units, 2373 default=default, 2374 minimum=minimum, 2375 maximum=maximum, 2376 ) 2377 2378 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2379 self._match(TokenType.EQ) 2380 always = self._match_text_seq("ALWAYS") 2381 manual = self._match_text_seq("MANUAL") 2382 never = self._match_text_seq("NEVER") 2383 default = self._match_text_seq("DEFAULT") 2384 2385 autotemp = None 2386 if self._match_text_seq("AUTOTEMP"): 2387 autotemp = self._parse_schema() 2388 2389 return self.expression( 2390 exp.BlockCompressionProperty, 2391 always=always, 2392 manual=manual, 2393 never=never, 2394 default=default, 2395 autotemp=autotemp, 2396 ) 2397 2398 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2399 index = self._index 2400 no = self._match_text_seq("NO") 2401 concurrent = self._match_text_seq("CONCURRENT") 2402 2403 if not self._match_text_seq("ISOLATED", "LOADING"): 2404 self._retreat(index) 2405 return None 2406 2407 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2408 return self.expression( 2409 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2410 ) 2411 2412 def _parse_locking(self) -> exp.LockingProperty: 2413 if self._match(TokenType.TABLE): 2414 kind = "TABLE" 2415 elif self._match(TokenType.VIEW): 2416 kind = "VIEW" 2417 elif self._match(TokenType.ROW): 2418 kind = "ROW" 2419 elif self._match_text_seq("DATABASE"): 2420 kind = "DATABASE" 2421 else: 2422 kind = None 2423 2424 if kind in ("DATABASE", "TABLE", "VIEW"): 2425 this = self._parse_table_parts() 2426 else: 2427 this = None 2428 2429 if self._match(TokenType.FOR): 2430 for_or_in = "FOR" 2431 elif self._match(TokenType.IN): 2432 for_or_in = "IN" 2433 else: 2434 for_or_in = None 2435 2436 if self._match_text_seq("ACCESS"): 2437 lock_type = "ACCESS" 2438 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2439 lock_type = "EXCLUSIVE" 2440 elif self._match_text_seq("SHARE"): 2441 lock_type = "SHARE" 2442 elif self._match_text_seq("READ"): 2443 lock_type = "READ" 2444 elif self._match_text_seq("WRITE"): 2445 lock_type = "WRITE" 2446 elif self._match_text_seq("CHECKSUM"): 2447 lock_type = "CHECKSUM" 2448 else: 2449 lock_type = None 2450 2451 override = self._match_text_seq("OVERRIDE") 2452 2453 return self.expression( 2454 exp.LockingProperty, 2455 this=this, 2456 kind=kind, 2457 for_or_in=for_or_in, 2458 lock_type=lock_type, 2459 override=override, 2460 ) 2461 2462 def _parse_partition_by(self) -> t.List[exp.Expression]: 2463 if self._match(TokenType.PARTITION_BY): 2464 return self._parse_csv(self._parse_assignment) 2465 return [] 2466 2467 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2468 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2469 if self._match_text_seq("MINVALUE"): 2470 return exp.var("MINVALUE") 2471 if self._match_text_seq("MAXVALUE"): 2472 return exp.var("MAXVALUE") 2473 return self._parse_bitwise() 2474 2475 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2476 expression = None 2477 from_expressions = None 2478 to_expressions = None 2479 2480 if self._match(TokenType.IN): 2481 this = self._parse_wrapped_csv(self._parse_bitwise) 2482 elif self._match(TokenType.FROM): 2483 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2484 self._match_text_seq("TO") 2485 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2486 elif self._match_text_seq("WITH", "(", "MODULUS"): 2487 this = self._parse_number() 2488 self._match_text_seq(",", "REMAINDER") 2489 expression = self._parse_number() 2490 self._match_r_paren() 2491 else: 2492 self.raise_error("Failed to parse partition bound spec.") 2493 2494 return self.expression( 2495 exp.PartitionBoundSpec, 2496 this=this, 2497 expression=expression, 2498 from_expressions=from_expressions, 2499 to_expressions=to_expressions, 2500 ) 2501 2502 # https://www.postgresql.org/docs/current/sql-createtable.html 2503 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2504 if not self._match_text_seq("OF"): 2505 self._retreat(self._index - 1) 2506 return None 2507 2508 this = self._parse_table(schema=True) 2509 2510 if self._match(TokenType.DEFAULT): 2511 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2512 elif self._match_text_seq("FOR", "VALUES"): 2513 expression = self._parse_partition_bound_spec() 2514 else: 2515 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2516 2517 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2518 2519 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2520 self._match(TokenType.EQ) 2521 return self.expression( 2522 exp.PartitionedByProperty, 2523 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2524 ) 2525 2526 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2527 if self._match_text_seq("AND", "STATISTICS"): 2528 statistics = True 2529 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2530 statistics = False 2531 else: 2532 statistics = None 2533 2534 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2535 2536 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2537 if self._match_text_seq("SQL"): 2538 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2539 return None 2540 2541 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2542 if self._match_text_seq("SQL", "DATA"): 2543 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2544 return None 2545 2546 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2547 if self._match_text_seq("PRIMARY", "INDEX"): 2548 return exp.NoPrimaryIndexProperty() 2549 if self._match_text_seq("SQL"): 2550 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2551 return None 2552 2553 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2554 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2555 return exp.OnCommitProperty() 2556 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2557 return exp.OnCommitProperty(delete=True) 2558 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2559 2560 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2561 if self._match_text_seq("SQL", "DATA"): 2562 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2563 return None 2564 2565 def _parse_distkey(self) -> exp.DistKeyProperty: 2566 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2567 2568 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2569 table = self._parse_table(schema=True) 2570 2571 options = [] 2572 while self._match_texts(("INCLUDING", "EXCLUDING")): 2573 this = self._prev.text.upper() 2574 2575 id_var = self._parse_id_var() 2576 if not id_var: 2577 return None 2578 2579 options.append( 2580 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2581 ) 2582 2583 return self.expression(exp.LikeProperty, this=table, expressions=options) 2584 2585 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2586 return self.expression( 2587 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2588 ) 2589 2590 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2591 self._match(TokenType.EQ) 2592 return self.expression( 2593 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2594 ) 2595 2596 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2597 self._match_text_seq("WITH", "CONNECTION") 2598 return self.expression( 2599 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2600 ) 2601 2602 def _parse_returns(self) -> exp.ReturnsProperty: 2603 value: t.Optional[exp.Expression] 2604 null = None 2605 is_table = self._match(TokenType.TABLE) 2606 2607 if is_table: 2608 if self._match(TokenType.LT): 2609 value = self.expression( 2610 exp.Schema, 2611 this="TABLE", 2612 expressions=self._parse_csv(self._parse_struct_types), 2613 ) 2614 if not self._match(TokenType.GT): 2615 self.raise_error("Expecting >") 2616 else: 2617 value = self._parse_schema(exp.var("TABLE")) 2618 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2619 null = True 2620 value = None 2621 else: 2622 value = self._parse_types() 2623 2624 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2625 2626 def _parse_describe(self) -> exp.Describe: 2627 kind = self._match_set(self.CREATABLES) and self._prev.text 2628 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2629 if self._match(TokenType.DOT): 2630 style = None 2631 self._retreat(self._index - 2) 2632 2633 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2634 2635 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2636 this = self._parse_statement() 2637 else: 2638 this = self._parse_table(schema=True) 2639 2640 properties = self._parse_properties() 2641 expressions = properties.expressions if properties else None 2642 partition = self._parse_partition() 2643 return self.expression( 2644 exp.Describe, 2645 this=this, 2646 style=style, 2647 kind=kind, 2648 expressions=expressions, 2649 partition=partition, 2650 format=format, 2651 ) 2652 2653 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2654 kind = self._prev.text.upper() 2655 expressions = [] 2656 2657 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2658 if self._match(TokenType.WHEN): 2659 expression = self._parse_disjunction() 2660 self._match(TokenType.THEN) 2661 else: 2662 expression = None 2663 2664 else_ = self._match(TokenType.ELSE) 2665 2666 if not self._match(TokenType.INTO): 2667 return None 2668 2669 return self.expression( 2670 exp.ConditionalInsert, 2671 this=self.expression( 2672 exp.Insert, 2673 this=self._parse_table(schema=True), 2674 expression=self._parse_derived_table_values(), 2675 ), 2676 expression=expression, 2677 else_=else_, 2678 ) 2679 2680 expression = parse_conditional_insert() 2681 while expression is not None: 2682 expressions.append(expression) 2683 expression = parse_conditional_insert() 2684 2685 return self.expression( 2686 exp.MultitableInserts, 2687 kind=kind, 2688 comments=comments, 2689 expressions=expressions, 2690 source=self._parse_table(), 2691 ) 2692 2693 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2694 comments = [] 2695 hint = self._parse_hint() 2696 overwrite = self._match(TokenType.OVERWRITE) 2697 ignore = self._match(TokenType.IGNORE) 2698 local = self._match_text_seq("LOCAL") 2699 alternative = None 2700 is_function = None 2701 2702 if self._match_text_seq("DIRECTORY"): 2703 this: t.Optional[exp.Expression] = self.expression( 2704 exp.Directory, 2705 this=self._parse_var_or_string(), 2706 local=local, 2707 row_format=self._parse_row_format(match_row=True), 2708 ) 2709 else: 2710 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2711 comments += ensure_list(self._prev_comments) 2712 return self._parse_multitable_inserts(comments) 2713 2714 if self._match(TokenType.OR): 2715 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2716 2717 self._match(TokenType.INTO) 2718 comments += ensure_list(self._prev_comments) 2719 self._match(TokenType.TABLE) 2720 is_function = self._match(TokenType.FUNCTION) 2721 2722 this = ( 2723 self._parse_table(schema=True, parse_partition=True) 2724 if not is_function 2725 else self._parse_function() 2726 ) 2727 2728 returning = self._parse_returning() 2729 2730 return self.expression( 2731 exp.Insert, 2732 comments=comments, 2733 hint=hint, 2734 is_function=is_function, 2735 this=this, 2736 stored=self._match_text_seq("STORED") and self._parse_stored(), 2737 by_name=self._match_text_seq("BY", "NAME"), 2738 exists=self._parse_exists(), 2739 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2740 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2741 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2742 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2743 conflict=self._parse_on_conflict(), 2744 returning=returning or self._parse_returning(), 2745 overwrite=overwrite, 2746 alternative=alternative, 2747 ignore=ignore, 2748 source=self._match(TokenType.TABLE) and self._parse_table(), 2749 ) 2750 2751 def _parse_kill(self) -> exp.Kill: 2752 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2753 2754 return self.expression( 2755 exp.Kill, 2756 this=self._parse_primary(), 2757 kind=kind, 2758 ) 2759 2760 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2761 conflict = self._match_text_seq("ON", "CONFLICT") 2762 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2763 2764 if not conflict and not duplicate: 2765 return None 2766 2767 conflict_keys = None 2768 constraint = None 2769 2770 if conflict: 2771 if self._match_text_seq("ON", "CONSTRAINT"): 2772 constraint = self._parse_id_var() 2773 elif self._match(TokenType.L_PAREN): 2774 conflict_keys = self._parse_csv(self._parse_id_var) 2775 self._match_r_paren() 2776 2777 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2778 if self._prev.token_type == TokenType.UPDATE: 2779 self._match(TokenType.SET) 2780 expressions = self._parse_csv(self._parse_equality) 2781 else: 2782 expressions = None 2783 2784 return self.expression( 2785 exp.OnConflict, 2786 duplicate=duplicate, 2787 expressions=expressions, 2788 action=action, 2789 conflict_keys=conflict_keys, 2790 constraint=constraint, 2791 ) 2792 2793 def _parse_returning(self) -> t.Optional[exp.Returning]: 2794 if not self._match(TokenType.RETURNING): 2795 return None 2796 return self.expression( 2797 exp.Returning, 2798 expressions=self._parse_csv(self._parse_expression), 2799 into=self._match(TokenType.INTO) and self._parse_table_part(), 2800 ) 2801 2802 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2803 if not self._match(TokenType.FORMAT): 2804 return None 2805 return self._parse_row_format() 2806 2807 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2808 index = self._index 2809 with_ = with_ or self._match_text_seq("WITH") 2810 2811 if not self._match(TokenType.SERDE_PROPERTIES): 2812 self._retreat(index) 2813 return None 2814 return self.expression( 2815 exp.SerdeProperties, 2816 **{ # type: ignore 2817 "expressions": self._parse_wrapped_properties(), 2818 "with": with_, 2819 }, 2820 ) 2821 2822 def _parse_row_format( 2823 self, match_row: bool = False 2824 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2825 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2826 return None 2827 2828 if self._match_text_seq("SERDE"): 2829 this = self._parse_string() 2830 2831 serde_properties = self._parse_serde_properties() 2832 2833 return self.expression( 2834 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2835 ) 2836 2837 self._match_text_seq("DELIMITED") 2838 2839 kwargs = {} 2840 2841 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2842 kwargs["fields"] = self._parse_string() 2843 if self._match_text_seq("ESCAPED", "BY"): 2844 kwargs["escaped"] = self._parse_string() 2845 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2846 kwargs["collection_items"] = self._parse_string() 2847 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2848 kwargs["map_keys"] = self._parse_string() 2849 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2850 kwargs["lines"] = self._parse_string() 2851 if self._match_text_seq("NULL", "DEFINED", "AS"): 2852 kwargs["null"] = self._parse_string() 2853 2854 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2855 2856 def _parse_load(self) -> exp.LoadData | exp.Command: 2857 if self._match_text_seq("DATA"): 2858 local = self._match_text_seq("LOCAL") 2859 self._match_text_seq("INPATH") 2860 inpath = self._parse_string() 2861 overwrite = self._match(TokenType.OVERWRITE) 2862 self._match_pair(TokenType.INTO, TokenType.TABLE) 2863 2864 return self.expression( 2865 exp.LoadData, 2866 this=self._parse_table(schema=True), 2867 local=local, 2868 overwrite=overwrite, 2869 inpath=inpath, 2870 partition=self._parse_partition(), 2871 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2872 serde=self._match_text_seq("SERDE") and self._parse_string(), 2873 ) 2874 return self._parse_as_command(self._prev) 2875 2876 def _parse_delete(self) -> exp.Delete: 2877 # This handles MySQL's "Multiple-Table Syntax" 2878 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2879 tables = None 2880 if not self._match(TokenType.FROM, advance=False): 2881 tables = self._parse_csv(self._parse_table) or None 2882 2883 returning = self._parse_returning() 2884 2885 return self.expression( 2886 exp.Delete, 2887 tables=tables, 2888 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2889 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2890 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2891 where=self._parse_where(), 2892 returning=returning or self._parse_returning(), 2893 limit=self._parse_limit(), 2894 ) 2895 2896 def _parse_update(self) -> exp.Update: 2897 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2898 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2899 returning = self._parse_returning() 2900 return self.expression( 2901 exp.Update, 2902 **{ # type: ignore 2903 "this": this, 2904 "expressions": expressions, 2905 "from": self._parse_from(joins=True), 2906 "where": self._parse_where(), 2907 "returning": returning or self._parse_returning(), 2908 "order": self._parse_order(), 2909 "limit": self._parse_limit(), 2910 }, 2911 ) 2912 2913 def _parse_uncache(self) -> exp.Uncache: 2914 if not self._match(TokenType.TABLE): 2915 self.raise_error("Expecting TABLE after UNCACHE") 2916 2917 return self.expression( 2918 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2919 ) 2920 2921 def _parse_cache(self) -> exp.Cache: 2922 lazy = self._match_text_seq("LAZY") 2923 self._match(TokenType.TABLE) 2924 table = self._parse_table(schema=True) 2925 2926 options = [] 2927 if self._match_text_seq("OPTIONS"): 2928 self._match_l_paren() 2929 k = self._parse_string() 2930 self._match(TokenType.EQ) 2931 v = self._parse_string() 2932 options = [k, v] 2933 self._match_r_paren() 2934 2935 self._match(TokenType.ALIAS) 2936 return self.expression( 2937 exp.Cache, 2938 this=table, 2939 lazy=lazy, 2940 options=options, 2941 expression=self._parse_select(nested=True), 2942 ) 2943 2944 def _parse_partition(self) -> t.Optional[exp.Partition]: 2945 if not self._match(TokenType.PARTITION): 2946 return None 2947 2948 return self.expression( 2949 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2950 ) 2951 2952 def _parse_value(self) -> t.Optional[exp.Tuple]: 2953 def _parse_value_expression() -> t.Optional[exp.Expression]: 2954 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 2955 return exp.var(self._prev.text.upper()) 2956 return self._parse_expression() 2957 2958 if self._match(TokenType.L_PAREN): 2959 expressions = self._parse_csv(_parse_value_expression) 2960 self._match_r_paren() 2961 return self.expression(exp.Tuple, expressions=expressions) 2962 2963 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2964 expression = self._parse_expression() 2965 if expression: 2966 return self.expression(exp.Tuple, expressions=[expression]) 2967 return None 2968 2969 def _parse_projections(self) -> t.List[exp.Expression]: 2970 return self._parse_expressions() 2971 2972 def _parse_select( 2973 self, 2974 nested: bool = False, 2975 table: bool = False, 2976 parse_subquery_alias: bool = True, 2977 parse_set_operation: bool = True, 2978 ) -> t.Optional[exp.Expression]: 2979 cte = self._parse_with() 2980 2981 if cte: 2982 this = self._parse_statement() 2983 2984 if not this: 2985 self.raise_error("Failed to parse any statement following CTE") 2986 return cte 2987 2988 if "with" in this.arg_types: 2989 this.set("with", cte) 2990 else: 2991 self.raise_error(f"{this.key} does not support CTE") 2992 this = cte 2993 2994 return this 2995 2996 # duckdb supports leading with FROM x 2997 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2998 2999 if self._match(TokenType.SELECT): 3000 comments = self._prev_comments 3001 3002 hint = self._parse_hint() 3003 3004 if self._next and not self._next.token_type == TokenType.DOT: 3005 all_ = self._match(TokenType.ALL) 3006 distinct = self._match_set(self.DISTINCT_TOKENS) 3007 else: 3008 all_, distinct = None, None 3009 3010 kind = ( 3011 self._match(TokenType.ALIAS) 3012 and self._match_texts(("STRUCT", "VALUE")) 3013 and self._prev.text.upper() 3014 ) 3015 3016 if distinct: 3017 distinct = self.expression( 3018 exp.Distinct, 3019 on=self._parse_value() if self._match(TokenType.ON) else None, 3020 ) 3021 3022 if all_ and distinct: 3023 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3024 3025 operation_modifiers = [] 3026 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3027 operation_modifiers.append(exp.var(self._prev.text.upper())) 3028 3029 limit = self._parse_limit(top=True) 3030 projections = self._parse_projections() 3031 3032 this = self.expression( 3033 exp.Select, 3034 kind=kind, 3035 hint=hint, 3036 distinct=distinct, 3037 expressions=projections, 3038 limit=limit, 3039 operation_modifiers=operation_modifiers or None, 3040 ) 3041 this.comments = comments 3042 3043 into = self._parse_into() 3044 if into: 3045 this.set("into", into) 3046 3047 if not from_: 3048 from_ = self._parse_from() 3049 3050 if from_: 3051 this.set("from", from_) 3052 3053 this = self._parse_query_modifiers(this) 3054 elif (table or nested) and self._match(TokenType.L_PAREN): 3055 if self._match(TokenType.PIVOT): 3056 this = self._parse_simplified_pivot() 3057 elif self._match(TokenType.FROM): 3058 this = exp.select("*").from_( 3059 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3060 ) 3061 else: 3062 this = ( 3063 self._parse_table() 3064 if table 3065 else self._parse_select(nested=True, parse_set_operation=False) 3066 ) 3067 3068 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3069 # in case a modifier (e.g. join) is following 3070 if table and isinstance(this, exp.Values) and this.alias: 3071 alias = this.args["alias"].pop() 3072 this = exp.Table(this=this, alias=alias) 3073 3074 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3075 3076 self._match_r_paren() 3077 3078 # We return early here so that the UNION isn't attached to the subquery by the 3079 # following call to _parse_set_operations, but instead becomes the parent node 3080 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3081 elif self._match(TokenType.VALUES, advance=False): 3082 this = self._parse_derived_table_values() 3083 elif from_: 3084 this = exp.select("*").from_(from_.this, copy=False) 3085 elif self._match(TokenType.SUMMARIZE): 3086 table = self._match(TokenType.TABLE) 3087 this = self._parse_select() or self._parse_string() or self._parse_table() 3088 return self.expression(exp.Summarize, this=this, table=table) 3089 elif self._match(TokenType.DESCRIBE): 3090 this = self._parse_describe() 3091 elif self._match_text_seq("STREAM"): 3092 this = self._parse_function() 3093 if this: 3094 this = self.expression(exp.Stream, this=this) 3095 else: 3096 self._retreat(self._index - 1) 3097 else: 3098 this = None 3099 3100 return self._parse_set_operations(this) if parse_set_operation else this 3101 3102 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3103 if not skip_with_token and not self._match(TokenType.WITH): 3104 return None 3105 3106 comments = self._prev_comments 3107 recursive = self._match(TokenType.RECURSIVE) 3108 3109 last_comments = None 3110 expressions = [] 3111 while True: 3112 expressions.append(self._parse_cte()) 3113 if last_comments: 3114 expressions[-1].add_comments(last_comments) 3115 3116 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3117 break 3118 else: 3119 self._match(TokenType.WITH) 3120 3121 last_comments = self._prev_comments 3122 3123 return self.expression( 3124 exp.With, comments=comments, expressions=expressions, recursive=recursive 3125 ) 3126 3127 def _parse_cte(self) -> exp.CTE: 3128 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3129 if not alias or not alias.this: 3130 self.raise_error("Expected CTE to have alias") 3131 3132 self._match(TokenType.ALIAS) 3133 comments = self._prev_comments 3134 3135 if self._match_text_seq("NOT", "MATERIALIZED"): 3136 materialized = False 3137 elif self._match_text_seq("MATERIALIZED"): 3138 materialized = True 3139 else: 3140 materialized = None 3141 3142 return self.expression( 3143 exp.CTE, 3144 this=self._parse_wrapped(self._parse_statement), 3145 alias=alias, 3146 materialized=materialized, 3147 comments=comments, 3148 ) 3149 3150 def _parse_table_alias( 3151 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3152 ) -> t.Optional[exp.TableAlias]: 3153 any_token = self._match(TokenType.ALIAS) 3154 alias = ( 3155 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3156 or self._parse_string_as_identifier() 3157 ) 3158 3159 index = self._index 3160 if self._match(TokenType.L_PAREN): 3161 columns = self._parse_csv(self._parse_function_parameter) 3162 self._match_r_paren() if columns else self._retreat(index) 3163 else: 3164 columns = None 3165 3166 if not alias and not columns: 3167 return None 3168 3169 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3170 3171 # We bubble up comments from the Identifier to the TableAlias 3172 if isinstance(alias, exp.Identifier): 3173 table_alias.add_comments(alias.pop_comments()) 3174 3175 return table_alias 3176 3177 def _parse_subquery( 3178 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3179 ) -> t.Optional[exp.Subquery]: 3180 if not this: 3181 return None 3182 3183 return self.expression( 3184 exp.Subquery, 3185 this=this, 3186 pivots=self._parse_pivots(), 3187 alias=self._parse_table_alias() if parse_alias else None, 3188 sample=self._parse_table_sample(), 3189 ) 3190 3191 def _implicit_unnests_to_explicit(self, this: E) -> E: 3192 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3193 3194 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3195 for i, join in enumerate(this.args.get("joins") or []): 3196 table = join.this 3197 normalized_table = table.copy() 3198 normalized_table.meta["maybe_column"] = True 3199 normalized_table = _norm(normalized_table, dialect=self.dialect) 3200 3201 if isinstance(table, exp.Table) and not join.args.get("on"): 3202 if normalized_table.parts[0].name in refs: 3203 table_as_column = table.to_column() 3204 unnest = exp.Unnest(expressions=[table_as_column]) 3205 3206 # Table.to_column creates a parent Alias node that we want to convert to 3207 # a TableAlias and attach to the Unnest, so it matches the parser's output 3208 if isinstance(table.args.get("alias"), exp.TableAlias): 3209 table_as_column.replace(table_as_column.this) 3210 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3211 3212 table.replace(unnest) 3213 3214 refs.add(normalized_table.alias_or_name) 3215 3216 return this 3217 3218 def _parse_query_modifiers( 3219 self, this: t.Optional[exp.Expression] 3220 ) -> t.Optional[exp.Expression]: 3221 if isinstance(this, (exp.Query, exp.Table)): 3222 for join in self._parse_joins(): 3223 this.append("joins", join) 3224 for lateral in iter(self._parse_lateral, None): 3225 this.append("laterals", lateral) 3226 3227 while True: 3228 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3229 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3230 key, expression = parser(self) 3231 3232 if expression: 3233 this.set(key, expression) 3234 if key == "limit": 3235 offset = expression.args.pop("offset", None) 3236 3237 if offset: 3238 offset = exp.Offset(expression=offset) 3239 this.set("offset", offset) 3240 3241 limit_by_expressions = expression.expressions 3242 expression.set("expressions", None) 3243 offset.set("expressions", limit_by_expressions) 3244 continue 3245 break 3246 3247 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3248 this = self._implicit_unnests_to_explicit(this) 3249 3250 return this 3251 3252 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3253 start = self._curr 3254 while self._curr: 3255 self._advance() 3256 3257 end = self._tokens[self._index - 1] 3258 return exp.Hint(expressions=[self._find_sql(start, end)]) 3259 3260 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3261 return self._parse_function_call() 3262 3263 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3264 start_index = self._index 3265 should_fallback_to_string = False 3266 3267 hints = [] 3268 try: 3269 for hint in iter( 3270 lambda: self._parse_csv( 3271 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3272 ), 3273 [], 3274 ): 3275 hints.extend(hint) 3276 except ParseError: 3277 should_fallback_to_string = True 3278 3279 if should_fallback_to_string or self._curr: 3280 self._retreat(start_index) 3281 return self._parse_hint_fallback_to_string() 3282 3283 return self.expression(exp.Hint, expressions=hints) 3284 3285 def _parse_hint(self) -> t.Optional[exp.Hint]: 3286 if self._match(TokenType.HINT) and self._prev_comments: 3287 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3288 3289 return None 3290 3291 def _parse_into(self) -> t.Optional[exp.Into]: 3292 if not self._match(TokenType.INTO): 3293 return None 3294 3295 temp = self._match(TokenType.TEMPORARY) 3296 unlogged = self._match_text_seq("UNLOGGED") 3297 self._match(TokenType.TABLE) 3298 3299 return self.expression( 3300 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3301 ) 3302 3303 def _parse_from( 3304 self, joins: bool = False, skip_from_token: bool = False 3305 ) -> t.Optional[exp.From]: 3306 if not skip_from_token and not self._match(TokenType.FROM): 3307 return None 3308 3309 return self.expression( 3310 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3311 ) 3312 3313 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3314 return self.expression( 3315 exp.MatchRecognizeMeasure, 3316 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3317 this=self._parse_expression(), 3318 ) 3319 3320 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3321 if not self._match(TokenType.MATCH_RECOGNIZE): 3322 return None 3323 3324 self._match_l_paren() 3325 3326 partition = self._parse_partition_by() 3327 order = self._parse_order() 3328 3329 measures = ( 3330 self._parse_csv(self._parse_match_recognize_measure) 3331 if self._match_text_seq("MEASURES") 3332 else None 3333 ) 3334 3335 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3336 rows = exp.var("ONE ROW PER MATCH") 3337 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3338 text = "ALL ROWS PER MATCH" 3339 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3340 text += " SHOW EMPTY MATCHES" 3341 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3342 text += " OMIT EMPTY MATCHES" 3343 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3344 text += " WITH UNMATCHED ROWS" 3345 rows = exp.var(text) 3346 else: 3347 rows = None 3348 3349 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3350 text = "AFTER MATCH SKIP" 3351 if self._match_text_seq("PAST", "LAST", "ROW"): 3352 text += " PAST LAST ROW" 3353 elif self._match_text_seq("TO", "NEXT", "ROW"): 3354 text += " TO NEXT ROW" 3355 elif self._match_text_seq("TO", "FIRST"): 3356 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3357 elif self._match_text_seq("TO", "LAST"): 3358 text += f" TO LAST {self._advance_any().text}" # type: ignore 3359 after = exp.var(text) 3360 else: 3361 after = None 3362 3363 if self._match_text_seq("PATTERN"): 3364 self._match_l_paren() 3365 3366 if not self._curr: 3367 self.raise_error("Expecting )", self._curr) 3368 3369 paren = 1 3370 start = self._curr 3371 3372 while self._curr and paren > 0: 3373 if self._curr.token_type == TokenType.L_PAREN: 3374 paren += 1 3375 if self._curr.token_type == TokenType.R_PAREN: 3376 paren -= 1 3377 3378 end = self._prev 3379 self._advance() 3380 3381 if paren > 0: 3382 self.raise_error("Expecting )", self._curr) 3383 3384 pattern = exp.var(self._find_sql(start, end)) 3385 else: 3386 pattern = None 3387 3388 define = ( 3389 self._parse_csv(self._parse_name_as_expression) 3390 if self._match_text_seq("DEFINE") 3391 else None 3392 ) 3393 3394 self._match_r_paren() 3395 3396 return self.expression( 3397 exp.MatchRecognize, 3398 partition_by=partition, 3399 order=order, 3400 measures=measures, 3401 rows=rows, 3402 after=after, 3403 pattern=pattern, 3404 define=define, 3405 alias=self._parse_table_alias(), 3406 ) 3407 3408 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3409 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3410 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3411 cross_apply = False 3412 3413 if cross_apply is not None: 3414 this = self._parse_select(table=True) 3415 view = None 3416 outer = None 3417 elif self._match(TokenType.LATERAL): 3418 this = self._parse_select(table=True) 3419 view = self._match(TokenType.VIEW) 3420 outer = self._match(TokenType.OUTER) 3421 else: 3422 return None 3423 3424 if not this: 3425 this = ( 3426 self._parse_unnest() 3427 or self._parse_function() 3428 or self._parse_id_var(any_token=False) 3429 ) 3430 3431 while self._match(TokenType.DOT): 3432 this = exp.Dot( 3433 this=this, 3434 expression=self._parse_function() or self._parse_id_var(any_token=False), 3435 ) 3436 3437 if view: 3438 table = self._parse_id_var(any_token=False) 3439 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3440 table_alias: t.Optional[exp.TableAlias] = self.expression( 3441 exp.TableAlias, this=table, columns=columns 3442 ) 3443 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3444 # We move the alias from the lateral's child node to the lateral itself 3445 table_alias = this.args["alias"].pop() 3446 else: 3447 table_alias = self._parse_table_alias() 3448 3449 return self.expression( 3450 exp.Lateral, 3451 this=this, 3452 view=view, 3453 outer=outer, 3454 alias=table_alias, 3455 cross_apply=cross_apply, 3456 ) 3457 3458 def _parse_join_parts( 3459 self, 3460 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3461 return ( 3462 self._match_set(self.JOIN_METHODS) and self._prev, 3463 self._match_set(self.JOIN_SIDES) and self._prev, 3464 self._match_set(self.JOIN_KINDS) and self._prev, 3465 ) 3466 3467 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3468 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3469 this = self._parse_column() 3470 if isinstance(this, exp.Column): 3471 return this.this 3472 return this 3473 3474 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3475 3476 def _parse_join( 3477 self, skip_join_token: bool = False, parse_bracket: bool = False 3478 ) -> t.Optional[exp.Join]: 3479 if self._match(TokenType.COMMA): 3480 return self.expression(exp.Join, this=self._parse_table()) 3481 3482 index = self._index 3483 method, side, kind = self._parse_join_parts() 3484 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3485 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3486 3487 if not skip_join_token and not join: 3488 self._retreat(index) 3489 kind = None 3490 method = None 3491 side = None 3492 3493 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3494 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3495 3496 if not skip_join_token and not join and not outer_apply and not cross_apply: 3497 return None 3498 3499 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3500 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3501 kwargs["expressions"] = self._parse_csv( 3502 lambda: self._parse_table(parse_bracket=parse_bracket) 3503 ) 3504 3505 if method: 3506 kwargs["method"] = method.text 3507 if side: 3508 kwargs["side"] = side.text 3509 if kind: 3510 kwargs["kind"] = kind.text 3511 if hint: 3512 kwargs["hint"] = hint 3513 3514 if self._match(TokenType.MATCH_CONDITION): 3515 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3516 3517 if self._match(TokenType.ON): 3518 kwargs["on"] = self._parse_assignment() 3519 elif self._match(TokenType.USING): 3520 kwargs["using"] = self._parse_using_identifiers() 3521 elif ( 3522 not (outer_apply or cross_apply) 3523 and not isinstance(kwargs["this"], exp.Unnest) 3524 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3525 ): 3526 index = self._index 3527 joins: t.Optional[list] = list(self._parse_joins()) 3528 3529 if joins and self._match(TokenType.ON): 3530 kwargs["on"] = self._parse_assignment() 3531 elif joins and self._match(TokenType.USING): 3532 kwargs["using"] = self._parse_using_identifiers() 3533 else: 3534 joins = None 3535 self._retreat(index) 3536 3537 kwargs["this"].set("joins", joins if joins else None) 3538 3539 comments = [c for token in (method, side, kind) if token for c in token.comments] 3540 return self.expression(exp.Join, comments=comments, **kwargs) 3541 3542 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3543 this = self._parse_assignment() 3544 3545 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3546 return this 3547 3548 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3549 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3550 3551 return this 3552 3553 def _parse_index_params(self) -> exp.IndexParameters: 3554 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3555 3556 if self._match(TokenType.L_PAREN, advance=False): 3557 columns = self._parse_wrapped_csv(self._parse_with_operator) 3558 else: 3559 columns = None 3560 3561 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3562 partition_by = self._parse_partition_by() 3563 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3564 tablespace = ( 3565 self._parse_var(any_token=True) 3566 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3567 else None 3568 ) 3569 where = self._parse_where() 3570 3571 on = self._parse_field() if self._match(TokenType.ON) else None 3572 3573 return self.expression( 3574 exp.IndexParameters, 3575 using=using, 3576 columns=columns, 3577 include=include, 3578 partition_by=partition_by, 3579 where=where, 3580 with_storage=with_storage, 3581 tablespace=tablespace, 3582 on=on, 3583 ) 3584 3585 def _parse_index( 3586 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3587 ) -> t.Optional[exp.Index]: 3588 if index or anonymous: 3589 unique = None 3590 primary = None 3591 amp = None 3592 3593 self._match(TokenType.ON) 3594 self._match(TokenType.TABLE) # hive 3595 table = self._parse_table_parts(schema=True) 3596 else: 3597 unique = self._match(TokenType.UNIQUE) 3598 primary = self._match_text_seq("PRIMARY") 3599 amp = self._match_text_seq("AMP") 3600 3601 if not self._match(TokenType.INDEX): 3602 return None 3603 3604 index = self._parse_id_var() 3605 table = None 3606 3607 params = self._parse_index_params() 3608 3609 return self.expression( 3610 exp.Index, 3611 this=index, 3612 table=table, 3613 unique=unique, 3614 primary=primary, 3615 amp=amp, 3616 params=params, 3617 ) 3618 3619 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3620 hints: t.List[exp.Expression] = [] 3621 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3622 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3623 hints.append( 3624 self.expression( 3625 exp.WithTableHint, 3626 expressions=self._parse_csv( 3627 lambda: self._parse_function() or self._parse_var(any_token=True) 3628 ), 3629 ) 3630 ) 3631 self._match_r_paren() 3632 else: 3633 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3634 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3635 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3636 3637 self._match_set((TokenType.INDEX, TokenType.KEY)) 3638 if self._match(TokenType.FOR): 3639 hint.set("target", self._advance_any() and self._prev.text.upper()) 3640 3641 hint.set("expressions", self._parse_wrapped_id_vars()) 3642 hints.append(hint) 3643 3644 return hints or None 3645 3646 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3647 return ( 3648 (not schema and self._parse_function(optional_parens=False)) 3649 or self._parse_id_var(any_token=False) 3650 or self._parse_string_as_identifier() 3651 or self._parse_placeholder() 3652 ) 3653 3654 def _parse_table_parts( 3655 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3656 ) -> exp.Table: 3657 catalog = None 3658 db = None 3659 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3660 3661 while self._match(TokenType.DOT): 3662 if catalog: 3663 # This allows nesting the table in arbitrarily many dot expressions if needed 3664 table = self.expression( 3665 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3666 ) 3667 else: 3668 catalog = db 3669 db = table 3670 # "" used for tsql FROM a..b case 3671 table = self._parse_table_part(schema=schema) or "" 3672 3673 if ( 3674 wildcard 3675 and self._is_connected() 3676 and (isinstance(table, exp.Identifier) or not table) 3677 and self._match(TokenType.STAR) 3678 ): 3679 if isinstance(table, exp.Identifier): 3680 table.args["this"] += "*" 3681 else: 3682 table = exp.Identifier(this="*") 3683 3684 # We bubble up comments from the Identifier to the Table 3685 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3686 3687 if is_db_reference: 3688 catalog = db 3689 db = table 3690 table = None 3691 3692 if not table and not is_db_reference: 3693 self.raise_error(f"Expected table name but got {self._curr}") 3694 if not db and is_db_reference: 3695 self.raise_error(f"Expected database name but got {self._curr}") 3696 3697 table = self.expression( 3698 exp.Table, 3699 comments=comments, 3700 this=table, 3701 db=db, 3702 catalog=catalog, 3703 ) 3704 3705 changes = self._parse_changes() 3706 if changes: 3707 table.set("changes", changes) 3708 3709 at_before = self._parse_historical_data() 3710 if at_before: 3711 table.set("when", at_before) 3712 3713 pivots = self._parse_pivots() 3714 if pivots: 3715 table.set("pivots", pivots) 3716 3717 return table 3718 3719 def _parse_table( 3720 self, 3721 schema: bool = False, 3722 joins: bool = False, 3723 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3724 parse_bracket: bool = False, 3725 is_db_reference: bool = False, 3726 parse_partition: bool = False, 3727 ) -> t.Optional[exp.Expression]: 3728 lateral = self._parse_lateral() 3729 if lateral: 3730 return lateral 3731 3732 unnest = self._parse_unnest() 3733 if unnest: 3734 return unnest 3735 3736 values = self._parse_derived_table_values() 3737 if values: 3738 return values 3739 3740 subquery = self._parse_select(table=True) 3741 if subquery: 3742 if not subquery.args.get("pivots"): 3743 subquery.set("pivots", self._parse_pivots()) 3744 return subquery 3745 3746 bracket = parse_bracket and self._parse_bracket(None) 3747 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3748 3749 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3750 self._parse_table 3751 ) 3752 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3753 3754 only = self._match(TokenType.ONLY) 3755 3756 this = t.cast( 3757 exp.Expression, 3758 bracket 3759 or rows_from 3760 or self._parse_bracket( 3761 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3762 ), 3763 ) 3764 3765 if only: 3766 this.set("only", only) 3767 3768 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3769 self._match_text_seq("*") 3770 3771 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3772 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3773 this.set("partition", self._parse_partition()) 3774 3775 if schema: 3776 return self._parse_schema(this=this) 3777 3778 version = self._parse_version() 3779 3780 if version: 3781 this.set("version", version) 3782 3783 if self.dialect.ALIAS_POST_TABLESAMPLE: 3784 this.set("sample", self._parse_table_sample()) 3785 3786 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3787 if alias: 3788 this.set("alias", alias) 3789 3790 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3791 return self.expression( 3792 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3793 ) 3794 3795 this.set("hints", self._parse_table_hints()) 3796 3797 if not this.args.get("pivots"): 3798 this.set("pivots", self._parse_pivots()) 3799 3800 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3801 this.set("sample", self._parse_table_sample()) 3802 3803 if joins: 3804 for join in self._parse_joins(): 3805 this.append("joins", join) 3806 3807 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3808 this.set("ordinality", True) 3809 this.set("alias", self._parse_table_alias()) 3810 3811 return this 3812 3813 def _parse_version(self) -> t.Optional[exp.Version]: 3814 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3815 this = "TIMESTAMP" 3816 elif self._match(TokenType.VERSION_SNAPSHOT): 3817 this = "VERSION" 3818 else: 3819 return None 3820 3821 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3822 kind = self._prev.text.upper() 3823 start = self._parse_bitwise() 3824 self._match_texts(("TO", "AND")) 3825 end = self._parse_bitwise() 3826 expression: t.Optional[exp.Expression] = self.expression( 3827 exp.Tuple, expressions=[start, end] 3828 ) 3829 elif self._match_text_seq("CONTAINED", "IN"): 3830 kind = "CONTAINED IN" 3831 expression = self.expression( 3832 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3833 ) 3834 elif self._match(TokenType.ALL): 3835 kind = "ALL" 3836 expression = None 3837 else: 3838 self._match_text_seq("AS", "OF") 3839 kind = "AS OF" 3840 expression = self._parse_type() 3841 3842 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3843 3844 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3845 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3846 index = self._index 3847 historical_data = None 3848 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3849 this = self._prev.text.upper() 3850 kind = ( 3851 self._match(TokenType.L_PAREN) 3852 and self._match_texts(self.HISTORICAL_DATA_KIND) 3853 and self._prev.text.upper() 3854 ) 3855 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3856 3857 if expression: 3858 self._match_r_paren() 3859 historical_data = self.expression( 3860 exp.HistoricalData, this=this, kind=kind, expression=expression 3861 ) 3862 else: 3863 self._retreat(index) 3864 3865 return historical_data 3866 3867 def _parse_changes(self) -> t.Optional[exp.Changes]: 3868 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3869 return None 3870 3871 information = self._parse_var(any_token=True) 3872 self._match_r_paren() 3873 3874 return self.expression( 3875 exp.Changes, 3876 information=information, 3877 at_before=self._parse_historical_data(), 3878 end=self._parse_historical_data(), 3879 ) 3880 3881 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3882 if not self._match(TokenType.UNNEST): 3883 return None 3884 3885 expressions = self._parse_wrapped_csv(self._parse_equality) 3886 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3887 3888 alias = self._parse_table_alias() if with_alias else None 3889 3890 if alias: 3891 if self.dialect.UNNEST_COLUMN_ONLY: 3892 if alias.args.get("columns"): 3893 self.raise_error("Unexpected extra column alias in unnest.") 3894 3895 alias.set("columns", [alias.this]) 3896 alias.set("this", None) 3897 3898 columns = alias.args.get("columns") or [] 3899 if offset and len(expressions) < len(columns): 3900 offset = columns.pop() 3901 3902 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3903 self._match(TokenType.ALIAS) 3904 offset = self._parse_id_var( 3905 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3906 ) or exp.to_identifier("offset") 3907 3908 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3909 3910 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3911 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3912 if not is_derived and not ( 3913 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3914 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3915 ): 3916 return None 3917 3918 expressions = self._parse_csv(self._parse_value) 3919 alias = self._parse_table_alias() 3920 3921 if is_derived: 3922 self._match_r_paren() 3923 3924 return self.expression( 3925 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3926 ) 3927 3928 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3929 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3930 as_modifier and self._match_text_seq("USING", "SAMPLE") 3931 ): 3932 return None 3933 3934 bucket_numerator = None 3935 bucket_denominator = None 3936 bucket_field = None 3937 percent = None 3938 size = None 3939 seed = None 3940 3941 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3942 matched_l_paren = self._match(TokenType.L_PAREN) 3943 3944 if self.TABLESAMPLE_CSV: 3945 num = None 3946 expressions = self._parse_csv(self._parse_primary) 3947 else: 3948 expressions = None 3949 num = ( 3950 self._parse_factor() 3951 if self._match(TokenType.NUMBER, advance=False) 3952 else self._parse_primary() or self._parse_placeholder() 3953 ) 3954 3955 if self._match_text_seq("BUCKET"): 3956 bucket_numerator = self._parse_number() 3957 self._match_text_seq("OUT", "OF") 3958 bucket_denominator = bucket_denominator = self._parse_number() 3959 self._match(TokenType.ON) 3960 bucket_field = self._parse_field() 3961 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3962 percent = num 3963 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3964 size = num 3965 else: 3966 percent = num 3967 3968 if matched_l_paren: 3969 self._match_r_paren() 3970 3971 if self._match(TokenType.L_PAREN): 3972 method = self._parse_var(upper=True) 3973 seed = self._match(TokenType.COMMA) and self._parse_number() 3974 self._match_r_paren() 3975 elif self._match_texts(("SEED", "REPEATABLE")): 3976 seed = self._parse_wrapped(self._parse_number) 3977 3978 if not method and self.DEFAULT_SAMPLING_METHOD: 3979 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3980 3981 return self.expression( 3982 exp.TableSample, 3983 expressions=expressions, 3984 method=method, 3985 bucket_numerator=bucket_numerator, 3986 bucket_denominator=bucket_denominator, 3987 bucket_field=bucket_field, 3988 percent=percent, 3989 size=size, 3990 seed=seed, 3991 ) 3992 3993 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3994 return list(iter(self._parse_pivot, None)) or None 3995 3996 def _parse_joins(self) -> t.Iterator[exp.Join]: 3997 return iter(self._parse_join, None) 3998 3999 # https://duckdb.org/docs/sql/statements/pivot 4000 def _parse_simplified_pivot(self) -> exp.Pivot: 4001 def _parse_on() -> t.Optional[exp.Expression]: 4002 this = self._parse_bitwise() 4003 return self._parse_in(this) if self._match(TokenType.IN) else this 4004 4005 this = self._parse_table() 4006 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4007 using = self._match(TokenType.USING) and self._parse_csv( 4008 lambda: self._parse_alias(self._parse_function()) 4009 ) 4010 group = self._parse_group() 4011 return self.expression( 4012 exp.Pivot, this=this, expressions=expressions, using=using, group=group 4013 ) 4014 4015 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4016 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4017 this = self._parse_select_or_expression() 4018 4019 self._match(TokenType.ALIAS) 4020 alias = self._parse_bitwise() 4021 if alias: 4022 if isinstance(alias, exp.Column) and not alias.db: 4023 alias = alias.this 4024 return self.expression(exp.PivotAlias, this=this, alias=alias) 4025 4026 return this 4027 4028 value = self._parse_column() 4029 4030 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4031 self.raise_error("Expecting IN (") 4032 4033 if self._match(TokenType.ANY): 4034 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4035 else: 4036 exprs = self._parse_csv(_parse_aliased_expression) 4037 4038 self._match_r_paren() 4039 return self.expression(exp.In, this=value, expressions=exprs) 4040 4041 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4042 index = self._index 4043 include_nulls = None 4044 4045 if self._match(TokenType.PIVOT): 4046 unpivot = False 4047 elif self._match(TokenType.UNPIVOT): 4048 unpivot = True 4049 4050 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4051 if self._match_text_seq("INCLUDE", "NULLS"): 4052 include_nulls = True 4053 elif self._match_text_seq("EXCLUDE", "NULLS"): 4054 include_nulls = False 4055 else: 4056 return None 4057 4058 expressions = [] 4059 4060 if not self._match(TokenType.L_PAREN): 4061 self._retreat(index) 4062 return None 4063 4064 if unpivot: 4065 expressions = self._parse_csv(self._parse_column) 4066 else: 4067 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4068 4069 if not expressions: 4070 self.raise_error("Failed to parse PIVOT's aggregation list") 4071 4072 if not self._match(TokenType.FOR): 4073 self.raise_error("Expecting FOR") 4074 4075 field = self._parse_pivot_in() 4076 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4077 self._parse_bitwise 4078 ) 4079 4080 self._match_r_paren() 4081 4082 pivot = self.expression( 4083 exp.Pivot, 4084 expressions=expressions, 4085 field=field, 4086 unpivot=unpivot, 4087 include_nulls=include_nulls, 4088 default_on_null=default_on_null, 4089 ) 4090 4091 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4092 pivot.set("alias", self._parse_table_alias()) 4093 4094 if not unpivot: 4095 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4096 4097 columns: t.List[exp.Expression] = [] 4098 for fld in pivot.args["field"].expressions: 4099 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4100 for name in names: 4101 if self.PREFIXED_PIVOT_COLUMNS: 4102 name = f"{name}_{field_name}" if name else field_name 4103 else: 4104 name = f"{field_name}_{name}" if name else field_name 4105 4106 columns.append(exp.to_identifier(name)) 4107 4108 pivot.set("columns", columns) 4109 4110 return pivot 4111 4112 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4113 return [agg.alias for agg in aggregations] 4114 4115 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4116 if not skip_where_token and not self._match(TokenType.PREWHERE): 4117 return None 4118 4119 return self.expression( 4120 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4121 ) 4122 4123 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4124 if not skip_where_token and not self._match(TokenType.WHERE): 4125 return None 4126 4127 return self.expression( 4128 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4129 ) 4130 4131 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4132 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4133 return None 4134 4135 elements: t.Dict[str, t.Any] = defaultdict(list) 4136 4137 if self._match(TokenType.ALL): 4138 elements["all"] = True 4139 elif self._match(TokenType.DISTINCT): 4140 elements["all"] = False 4141 4142 while True: 4143 index = self._index 4144 4145 elements["expressions"].extend( 4146 self._parse_csv( 4147 lambda: None 4148 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4149 else self._parse_assignment() 4150 ) 4151 ) 4152 4153 before_with_index = self._index 4154 with_prefix = self._match(TokenType.WITH) 4155 4156 if self._match(TokenType.ROLLUP): 4157 elements["rollup"].append( 4158 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4159 ) 4160 elif self._match(TokenType.CUBE): 4161 elements["cube"].append( 4162 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4163 ) 4164 elif self._match(TokenType.GROUPING_SETS): 4165 elements["grouping_sets"].append( 4166 self.expression( 4167 exp.GroupingSets, 4168 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4169 ) 4170 ) 4171 elif self._match_text_seq("TOTALS"): 4172 elements["totals"] = True # type: ignore 4173 4174 if before_with_index <= self._index <= before_with_index + 1: 4175 self._retreat(before_with_index) 4176 break 4177 4178 if index == self._index: 4179 break 4180 4181 return self.expression(exp.Group, **elements) # type: ignore 4182 4183 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4184 return self.expression( 4185 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4186 ) 4187 4188 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4189 if self._match(TokenType.L_PAREN): 4190 grouping_set = self._parse_csv(self._parse_column) 4191 self._match_r_paren() 4192 return self.expression(exp.Tuple, expressions=grouping_set) 4193 4194 return self._parse_column() 4195 4196 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4197 if not skip_having_token and not self._match(TokenType.HAVING): 4198 return None 4199 return self.expression(exp.Having, this=self._parse_assignment()) 4200 4201 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4202 if not self._match(TokenType.QUALIFY): 4203 return None 4204 return self.expression(exp.Qualify, this=self._parse_assignment()) 4205 4206 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4207 if skip_start_token: 4208 start = None 4209 elif self._match(TokenType.START_WITH): 4210 start = self._parse_assignment() 4211 else: 4212 return None 4213 4214 self._match(TokenType.CONNECT_BY) 4215 nocycle = self._match_text_seq("NOCYCLE") 4216 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4217 exp.Prior, this=self._parse_bitwise() 4218 ) 4219 connect = self._parse_assignment() 4220 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4221 4222 if not start and self._match(TokenType.START_WITH): 4223 start = self._parse_assignment() 4224 4225 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4226 4227 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4228 this = self._parse_id_var(any_token=True) 4229 if self._match(TokenType.ALIAS): 4230 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4231 return this 4232 4233 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4234 if self._match_text_seq("INTERPOLATE"): 4235 return self._parse_wrapped_csv(self._parse_name_as_expression) 4236 return None 4237 4238 def _parse_order( 4239 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4240 ) -> t.Optional[exp.Expression]: 4241 siblings = None 4242 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4243 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4244 return this 4245 4246 siblings = True 4247 4248 return self.expression( 4249 exp.Order, 4250 this=this, 4251 expressions=self._parse_csv(self._parse_ordered), 4252 siblings=siblings, 4253 ) 4254 4255 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4256 if not self._match(token): 4257 return None 4258 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4259 4260 def _parse_ordered( 4261 self, parse_method: t.Optional[t.Callable] = None 4262 ) -> t.Optional[exp.Ordered]: 4263 this = parse_method() if parse_method else self._parse_assignment() 4264 if not this: 4265 return None 4266 4267 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4268 this = exp.var("ALL") 4269 4270 asc = self._match(TokenType.ASC) 4271 desc = self._match(TokenType.DESC) or (asc and False) 4272 4273 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4274 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4275 4276 nulls_first = is_nulls_first or False 4277 explicitly_null_ordered = is_nulls_first or is_nulls_last 4278 4279 if ( 4280 not explicitly_null_ordered 4281 and ( 4282 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4283 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4284 ) 4285 and self.dialect.NULL_ORDERING != "nulls_are_last" 4286 ): 4287 nulls_first = True 4288 4289 if self._match_text_seq("WITH", "FILL"): 4290 with_fill = self.expression( 4291 exp.WithFill, 4292 **{ # type: ignore 4293 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4294 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4295 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4296 "interpolate": self._parse_interpolate(), 4297 }, 4298 ) 4299 else: 4300 with_fill = None 4301 4302 return self.expression( 4303 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4304 ) 4305 4306 def _parse_limit( 4307 self, 4308 this: t.Optional[exp.Expression] = None, 4309 top: bool = False, 4310 skip_limit_token: bool = False, 4311 ) -> t.Optional[exp.Expression]: 4312 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4313 comments = self._prev_comments 4314 if top: 4315 limit_paren = self._match(TokenType.L_PAREN) 4316 expression = self._parse_term() if limit_paren else self._parse_number() 4317 4318 if limit_paren: 4319 self._match_r_paren() 4320 else: 4321 expression = self._parse_term() 4322 4323 if self._match(TokenType.COMMA): 4324 offset = expression 4325 expression = self._parse_term() 4326 else: 4327 offset = None 4328 4329 limit_exp = self.expression( 4330 exp.Limit, 4331 this=this, 4332 expression=expression, 4333 offset=offset, 4334 comments=comments, 4335 expressions=self._parse_limit_by(), 4336 ) 4337 4338 return limit_exp 4339 4340 if self._match(TokenType.FETCH): 4341 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4342 direction = self._prev.text.upper() if direction else "FIRST" 4343 4344 count = self._parse_field(tokens=self.FETCH_TOKENS) 4345 percent = self._match(TokenType.PERCENT) 4346 4347 self._match_set((TokenType.ROW, TokenType.ROWS)) 4348 4349 only = self._match_text_seq("ONLY") 4350 with_ties = self._match_text_seq("WITH", "TIES") 4351 4352 if only and with_ties: 4353 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4354 4355 return self.expression( 4356 exp.Fetch, 4357 direction=direction, 4358 count=count, 4359 percent=percent, 4360 with_ties=with_ties, 4361 ) 4362 4363 return this 4364 4365 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4366 if not self._match(TokenType.OFFSET): 4367 return this 4368 4369 count = self._parse_term() 4370 self._match_set((TokenType.ROW, TokenType.ROWS)) 4371 4372 return self.expression( 4373 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4374 ) 4375 4376 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4377 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4378 4379 def _parse_locks(self) -> t.List[exp.Lock]: 4380 locks = [] 4381 while True: 4382 if self._match_text_seq("FOR", "UPDATE"): 4383 update = True 4384 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4385 "LOCK", "IN", "SHARE", "MODE" 4386 ): 4387 update = False 4388 else: 4389 break 4390 4391 expressions = None 4392 if self._match_text_seq("OF"): 4393 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4394 4395 wait: t.Optional[bool | exp.Expression] = None 4396 if self._match_text_seq("NOWAIT"): 4397 wait = True 4398 elif self._match_text_seq("WAIT"): 4399 wait = self._parse_primary() 4400 elif self._match_text_seq("SKIP", "LOCKED"): 4401 wait = False 4402 4403 locks.append( 4404 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4405 ) 4406 4407 return locks 4408 4409 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4410 while this and self._match_set(self.SET_OPERATIONS): 4411 token_type = self._prev.token_type 4412 4413 if token_type == TokenType.UNION: 4414 operation: t.Type[exp.SetOperation] = exp.Union 4415 elif token_type == TokenType.EXCEPT: 4416 operation = exp.Except 4417 else: 4418 operation = exp.Intersect 4419 4420 comments = self._prev.comments 4421 4422 if self._match(TokenType.DISTINCT): 4423 distinct: t.Optional[bool] = True 4424 elif self._match(TokenType.ALL): 4425 distinct = False 4426 else: 4427 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4428 if distinct is None: 4429 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4430 4431 by_name = self._match_text_seq("BY", "NAME") 4432 expression = self._parse_select(nested=True, parse_set_operation=False) 4433 4434 this = self.expression( 4435 operation, 4436 comments=comments, 4437 this=this, 4438 distinct=distinct, 4439 by_name=by_name, 4440 expression=expression, 4441 ) 4442 4443 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4444 expression = this.expression 4445 4446 if expression: 4447 for arg in self.SET_OP_MODIFIERS: 4448 expr = expression.args.get(arg) 4449 if expr: 4450 this.set(arg, expr.pop()) 4451 4452 return this 4453 4454 def _parse_expression(self) -> t.Optional[exp.Expression]: 4455 return self._parse_alias(self._parse_assignment()) 4456 4457 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4458 this = self._parse_disjunction() 4459 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4460 # This allows us to parse <non-identifier token> := <expr> 4461 this = exp.column( 4462 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4463 ) 4464 4465 while self._match_set(self.ASSIGNMENT): 4466 if isinstance(this, exp.Column) and len(this.parts) == 1: 4467 this = this.this 4468 4469 this = self.expression( 4470 self.ASSIGNMENT[self._prev.token_type], 4471 this=this, 4472 comments=self._prev_comments, 4473 expression=self._parse_assignment(), 4474 ) 4475 4476 return this 4477 4478 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4479 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4480 4481 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4482 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4483 4484 def _parse_equality(self) -> t.Optional[exp.Expression]: 4485 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4486 4487 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4488 return self._parse_tokens(self._parse_range, self.COMPARISON) 4489 4490 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4491 this = this or self._parse_bitwise() 4492 negate = self._match(TokenType.NOT) 4493 4494 if self._match_set(self.RANGE_PARSERS): 4495 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4496 if not expression: 4497 return this 4498 4499 this = expression 4500 elif self._match(TokenType.ISNULL): 4501 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4502 4503 # Postgres supports ISNULL and NOTNULL for conditions. 4504 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4505 if self._match(TokenType.NOTNULL): 4506 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4507 this = self.expression(exp.Not, this=this) 4508 4509 if negate: 4510 this = self._negate_range(this) 4511 4512 if self._match(TokenType.IS): 4513 this = self._parse_is(this) 4514 4515 return this 4516 4517 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4518 if not this: 4519 return this 4520 4521 return self.expression(exp.Not, this=this) 4522 4523 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4524 index = self._index - 1 4525 negate = self._match(TokenType.NOT) 4526 4527 if self._match_text_seq("DISTINCT", "FROM"): 4528 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4529 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4530 4531 if self._match(TokenType.JSON): 4532 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4533 4534 if self._match_text_seq("WITH"): 4535 _with = True 4536 elif self._match_text_seq("WITHOUT"): 4537 _with = False 4538 else: 4539 _with = None 4540 4541 unique = self._match(TokenType.UNIQUE) 4542 self._match_text_seq("KEYS") 4543 expression: t.Optional[exp.Expression] = self.expression( 4544 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4545 ) 4546 else: 4547 expression = self._parse_primary() or self._parse_null() 4548 if not expression: 4549 self._retreat(index) 4550 return None 4551 4552 this = self.expression(exp.Is, this=this, expression=expression) 4553 return self.expression(exp.Not, this=this) if negate else this 4554 4555 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4556 unnest = self._parse_unnest(with_alias=False) 4557 if unnest: 4558 this = self.expression(exp.In, this=this, unnest=unnest) 4559 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4560 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4561 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4562 4563 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4564 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4565 else: 4566 this = self.expression(exp.In, this=this, expressions=expressions) 4567 4568 if matched_l_paren: 4569 self._match_r_paren(this) 4570 elif not self._match(TokenType.R_BRACKET, expression=this): 4571 self.raise_error("Expecting ]") 4572 else: 4573 this = self.expression(exp.In, this=this, field=self._parse_column()) 4574 4575 return this 4576 4577 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4578 low = self._parse_bitwise() 4579 self._match(TokenType.AND) 4580 high = self._parse_bitwise() 4581 return self.expression(exp.Between, this=this, low=low, high=high) 4582 4583 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4584 if not self._match(TokenType.ESCAPE): 4585 return this 4586 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4587 4588 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4589 index = self._index 4590 4591 if not self._match(TokenType.INTERVAL) and match_interval: 4592 return None 4593 4594 if self._match(TokenType.STRING, advance=False): 4595 this = self._parse_primary() 4596 else: 4597 this = self._parse_term() 4598 4599 if not this or ( 4600 isinstance(this, exp.Column) 4601 and not this.table 4602 and not this.this.quoted 4603 and this.name.upper() == "IS" 4604 ): 4605 self._retreat(index) 4606 return None 4607 4608 unit = self._parse_function() or ( 4609 not self._match(TokenType.ALIAS, advance=False) 4610 and self._parse_var(any_token=True, upper=True) 4611 ) 4612 4613 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4614 # each INTERVAL expression into this canonical form so it's easy to transpile 4615 if this and this.is_number: 4616 this = exp.Literal.string(this.to_py()) 4617 elif this and this.is_string: 4618 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4619 if len(parts) == 1: 4620 if unit: 4621 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4622 self._retreat(self._index - 1) 4623 4624 this = exp.Literal.string(parts[0][0]) 4625 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4626 4627 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4628 unit = self.expression( 4629 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4630 ) 4631 4632 interval = self.expression(exp.Interval, this=this, unit=unit) 4633 4634 index = self._index 4635 self._match(TokenType.PLUS) 4636 4637 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4638 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4639 return self.expression( 4640 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4641 ) 4642 4643 self._retreat(index) 4644 return interval 4645 4646 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4647 this = self._parse_term() 4648 4649 while True: 4650 if self._match_set(self.BITWISE): 4651 this = self.expression( 4652 self.BITWISE[self._prev.token_type], 4653 this=this, 4654 expression=self._parse_term(), 4655 ) 4656 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4657 this = self.expression( 4658 exp.DPipe, 4659 this=this, 4660 expression=self._parse_term(), 4661 safe=not self.dialect.STRICT_STRING_CONCAT, 4662 ) 4663 elif self._match(TokenType.DQMARK): 4664 this = self.expression( 4665 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4666 ) 4667 elif self._match_pair(TokenType.LT, TokenType.LT): 4668 this = self.expression( 4669 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4670 ) 4671 elif self._match_pair(TokenType.GT, TokenType.GT): 4672 this = self.expression( 4673 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4674 ) 4675 else: 4676 break 4677 4678 return this 4679 4680 def _parse_term(self) -> t.Optional[exp.Expression]: 4681 this = self._parse_factor() 4682 4683 while self._match_set(self.TERM): 4684 klass = self.TERM[self._prev.token_type] 4685 comments = self._prev_comments 4686 expression = self._parse_factor() 4687 4688 this = self.expression(klass, this=this, comments=comments, expression=expression) 4689 4690 if isinstance(this, exp.Collate): 4691 expr = this.expression 4692 4693 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4694 # fallback to Identifier / Var 4695 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4696 ident = expr.this 4697 if isinstance(ident, exp.Identifier): 4698 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4699 4700 return this 4701 4702 def _parse_factor(self) -> t.Optional[exp.Expression]: 4703 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4704 this = parse_method() 4705 4706 while self._match_set(self.FACTOR): 4707 klass = self.FACTOR[self._prev.token_type] 4708 comments = self._prev_comments 4709 expression = parse_method() 4710 4711 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4712 self._retreat(self._index - 1) 4713 return this 4714 4715 this = self.expression(klass, this=this, comments=comments, expression=expression) 4716 4717 if isinstance(this, exp.Div): 4718 this.args["typed"] = self.dialect.TYPED_DIVISION 4719 this.args["safe"] = self.dialect.SAFE_DIVISION 4720 4721 return this 4722 4723 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4724 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4725 4726 def _parse_unary(self) -> t.Optional[exp.Expression]: 4727 if self._match_set(self.UNARY_PARSERS): 4728 return self.UNARY_PARSERS[self._prev.token_type](self) 4729 return self._parse_at_time_zone(self._parse_type()) 4730 4731 def _parse_type( 4732 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4733 ) -> t.Optional[exp.Expression]: 4734 interval = parse_interval and self._parse_interval() 4735 if interval: 4736 return interval 4737 4738 index = self._index 4739 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4740 4741 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4742 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4743 if isinstance(data_type, exp.Cast): 4744 # This constructor can contain ops directly after it, for instance struct unnesting: 4745 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4746 return self._parse_column_ops(data_type) 4747 4748 if data_type: 4749 index2 = self._index 4750 this = self._parse_primary() 4751 4752 if isinstance(this, exp.Literal): 4753 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4754 if parser: 4755 return parser(self, this, data_type) 4756 4757 return self.expression(exp.Cast, this=this, to=data_type) 4758 4759 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4760 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4761 # 4762 # If the index difference here is greater than 1, that means the parser itself must have 4763 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4764 # 4765 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4766 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4767 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4768 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4769 # 4770 # In these cases, we don't really want to return the converted type, but instead retreat 4771 # and try to parse a Column or Identifier in the section below. 4772 if data_type.expressions and index2 - index > 1: 4773 self._retreat(index2) 4774 return self._parse_column_ops(data_type) 4775 4776 self._retreat(index) 4777 4778 if fallback_to_identifier: 4779 return self._parse_id_var() 4780 4781 this = self._parse_column() 4782 return this and self._parse_column_ops(this) 4783 4784 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4785 this = self._parse_type() 4786 if not this: 4787 return None 4788 4789 if isinstance(this, exp.Column) and not this.table: 4790 this = exp.var(this.name.upper()) 4791 4792 return self.expression( 4793 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4794 ) 4795 4796 def _parse_types( 4797 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4798 ) -> t.Optional[exp.Expression]: 4799 index = self._index 4800 4801 this: t.Optional[exp.Expression] = None 4802 prefix = self._match_text_seq("SYSUDTLIB", ".") 4803 4804 if not self._match_set(self.TYPE_TOKENS): 4805 identifier = allow_identifiers and self._parse_id_var( 4806 any_token=False, tokens=(TokenType.VAR,) 4807 ) 4808 if isinstance(identifier, exp.Identifier): 4809 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4810 4811 if len(tokens) != 1: 4812 self.raise_error("Unexpected identifier", self._prev) 4813 4814 if tokens[0].token_type in self.TYPE_TOKENS: 4815 self._prev = tokens[0] 4816 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4817 type_name = identifier.name 4818 4819 while self._match(TokenType.DOT): 4820 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4821 4822 this = exp.DataType.build(type_name, udt=True) 4823 else: 4824 self._retreat(self._index - 1) 4825 return None 4826 else: 4827 return None 4828 4829 type_token = self._prev.token_type 4830 4831 if type_token == TokenType.PSEUDO_TYPE: 4832 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4833 4834 if type_token == TokenType.OBJECT_IDENTIFIER: 4835 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4836 4837 # https://materialize.com/docs/sql/types/map/ 4838 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4839 key_type = self._parse_types( 4840 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4841 ) 4842 if not self._match(TokenType.FARROW): 4843 self._retreat(index) 4844 return None 4845 4846 value_type = self._parse_types( 4847 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4848 ) 4849 if not self._match(TokenType.R_BRACKET): 4850 self._retreat(index) 4851 return None 4852 4853 return exp.DataType( 4854 this=exp.DataType.Type.MAP, 4855 expressions=[key_type, value_type], 4856 nested=True, 4857 prefix=prefix, 4858 ) 4859 4860 nested = type_token in self.NESTED_TYPE_TOKENS 4861 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4862 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4863 expressions = None 4864 maybe_func = False 4865 4866 if self._match(TokenType.L_PAREN): 4867 if is_struct: 4868 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4869 elif nested: 4870 expressions = self._parse_csv( 4871 lambda: self._parse_types( 4872 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4873 ) 4874 ) 4875 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4876 this = expressions[0] 4877 this.set("nullable", True) 4878 self._match_r_paren() 4879 return this 4880 elif type_token in self.ENUM_TYPE_TOKENS: 4881 expressions = self._parse_csv(self._parse_equality) 4882 elif is_aggregate: 4883 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4884 any_token=False, tokens=(TokenType.VAR,) 4885 ) 4886 if not func_or_ident or not self._match(TokenType.COMMA): 4887 return None 4888 expressions = self._parse_csv( 4889 lambda: self._parse_types( 4890 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4891 ) 4892 ) 4893 expressions.insert(0, func_or_ident) 4894 else: 4895 expressions = self._parse_csv(self._parse_type_size) 4896 4897 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4898 if type_token == TokenType.VECTOR and len(expressions) == 2: 4899 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4900 4901 if not expressions or not self._match(TokenType.R_PAREN): 4902 self._retreat(index) 4903 return None 4904 4905 maybe_func = True 4906 4907 values: t.Optional[t.List[exp.Expression]] = None 4908 4909 if nested and self._match(TokenType.LT): 4910 if is_struct: 4911 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4912 else: 4913 expressions = self._parse_csv( 4914 lambda: self._parse_types( 4915 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4916 ) 4917 ) 4918 4919 if not self._match(TokenType.GT): 4920 self.raise_error("Expecting >") 4921 4922 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4923 values = self._parse_csv(self._parse_assignment) 4924 if not values and is_struct: 4925 values = None 4926 self._retreat(self._index - 1) 4927 else: 4928 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4929 4930 if type_token in self.TIMESTAMPS: 4931 if self._match_text_seq("WITH", "TIME", "ZONE"): 4932 maybe_func = False 4933 tz_type = ( 4934 exp.DataType.Type.TIMETZ 4935 if type_token in self.TIMES 4936 else exp.DataType.Type.TIMESTAMPTZ 4937 ) 4938 this = exp.DataType(this=tz_type, expressions=expressions) 4939 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4940 maybe_func = False 4941 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4942 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4943 maybe_func = False 4944 elif type_token == TokenType.INTERVAL: 4945 unit = self._parse_var(upper=True) 4946 if unit: 4947 if self._match_text_seq("TO"): 4948 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4949 4950 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4951 else: 4952 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4953 4954 if maybe_func and check_func: 4955 index2 = self._index 4956 peek = self._parse_string() 4957 4958 if not peek: 4959 self._retreat(index) 4960 return None 4961 4962 self._retreat(index2) 4963 4964 if not this: 4965 if self._match_text_seq("UNSIGNED"): 4966 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4967 if not unsigned_type_token: 4968 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4969 4970 type_token = unsigned_type_token or type_token 4971 4972 this = exp.DataType( 4973 this=exp.DataType.Type[type_token.value], 4974 expressions=expressions, 4975 nested=nested, 4976 prefix=prefix, 4977 ) 4978 4979 # Empty arrays/structs are allowed 4980 if values is not None: 4981 cls = exp.Struct if is_struct else exp.Array 4982 this = exp.cast(cls(expressions=values), this, copy=False) 4983 4984 elif expressions: 4985 this.set("expressions", expressions) 4986 4987 # https://materialize.com/docs/sql/types/list/#type-name 4988 while self._match(TokenType.LIST): 4989 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4990 4991 index = self._index 4992 4993 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4994 matched_array = self._match(TokenType.ARRAY) 4995 4996 while self._curr: 4997 datatype_token = self._prev.token_type 4998 matched_l_bracket = self._match(TokenType.L_BRACKET) 4999 if not matched_l_bracket and not matched_array: 5000 break 5001 5002 matched_array = False 5003 values = self._parse_csv(self._parse_assignment) or None 5004 if ( 5005 values 5006 and not schema 5007 and ( 5008 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5009 ) 5010 ): 5011 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5012 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5013 self._retreat(index) 5014 break 5015 5016 this = exp.DataType( 5017 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5018 ) 5019 self._match(TokenType.R_BRACKET) 5020 5021 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5022 converter = self.TYPE_CONVERTERS.get(this.this) 5023 if converter: 5024 this = converter(t.cast(exp.DataType, this)) 5025 5026 return this 5027 5028 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5029 index = self._index 5030 5031 if ( 5032 self._curr 5033 and self._next 5034 and self._curr.token_type in self.TYPE_TOKENS 5035 and self._next.token_type in self.TYPE_TOKENS 5036 ): 5037 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5038 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5039 this = self._parse_id_var() 5040 else: 5041 this = ( 5042 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5043 or self._parse_id_var() 5044 ) 5045 5046 self._match(TokenType.COLON) 5047 5048 if ( 5049 type_required 5050 and not isinstance(this, exp.DataType) 5051 and not self._match_set(self.TYPE_TOKENS, advance=False) 5052 ): 5053 self._retreat(index) 5054 return self._parse_types() 5055 5056 return self._parse_column_def(this) 5057 5058 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5059 if not self._match_text_seq("AT", "TIME", "ZONE"): 5060 return this 5061 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5062 5063 def _parse_column(self) -> t.Optional[exp.Expression]: 5064 this = self._parse_column_reference() 5065 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5066 5067 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5068 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5069 5070 return column 5071 5072 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5073 this = self._parse_field() 5074 if ( 5075 not this 5076 and self._match(TokenType.VALUES, advance=False) 5077 and self.VALUES_FOLLOWED_BY_PAREN 5078 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5079 ): 5080 this = self._parse_id_var() 5081 5082 if isinstance(this, exp.Identifier): 5083 # We bubble up comments from the Identifier to the Column 5084 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5085 5086 return this 5087 5088 def _parse_colon_as_variant_extract( 5089 self, this: t.Optional[exp.Expression] 5090 ) -> t.Optional[exp.Expression]: 5091 casts = [] 5092 json_path = [] 5093 escape = None 5094 5095 while self._match(TokenType.COLON): 5096 start_index = self._index 5097 5098 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5099 path = self._parse_column_ops( 5100 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5101 ) 5102 5103 # The cast :: operator has a lower precedence than the extraction operator :, so 5104 # we rearrange the AST appropriately to avoid casting the JSON path 5105 while isinstance(path, exp.Cast): 5106 casts.append(path.to) 5107 path = path.this 5108 5109 if casts: 5110 dcolon_offset = next( 5111 i 5112 for i, t in enumerate(self._tokens[start_index:]) 5113 if t.token_type == TokenType.DCOLON 5114 ) 5115 end_token = self._tokens[start_index + dcolon_offset - 1] 5116 else: 5117 end_token = self._prev 5118 5119 if path: 5120 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5121 # it'll roundtrip to a string literal in GET_PATH 5122 if isinstance(path, exp.Identifier) and path.quoted: 5123 escape = True 5124 5125 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5126 5127 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5128 # Databricks transforms it back to the colon/dot notation 5129 if json_path: 5130 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5131 5132 if json_path_expr: 5133 json_path_expr.set("escape", escape) 5134 5135 this = self.expression( 5136 exp.JSONExtract, 5137 this=this, 5138 expression=json_path_expr, 5139 variant_extract=True, 5140 ) 5141 5142 while casts: 5143 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5144 5145 return this 5146 5147 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5148 return self._parse_types() 5149 5150 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5151 this = self._parse_bracket(this) 5152 5153 while self._match_set(self.COLUMN_OPERATORS): 5154 op_token = self._prev.token_type 5155 op = self.COLUMN_OPERATORS.get(op_token) 5156 5157 if op_token == TokenType.DCOLON: 5158 field = self._parse_dcolon() 5159 if not field: 5160 self.raise_error("Expected type") 5161 elif op and self._curr: 5162 field = self._parse_column_reference() or self._parse_bracket() 5163 else: 5164 field = self._parse_field(any_token=True, anonymous_func=True) 5165 5166 if isinstance(field, (exp.Func, exp.Window)) and this: 5167 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5168 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5169 this = exp.replace_tree( 5170 this, 5171 lambda n: ( 5172 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5173 if n.table 5174 else n.this 5175 ) 5176 if isinstance(n, exp.Column) 5177 else n, 5178 ) 5179 5180 if op: 5181 this = op(self, this, field) 5182 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5183 this = self.expression( 5184 exp.Column, 5185 comments=this.comments, 5186 this=field, 5187 table=this.this, 5188 db=this.args.get("table"), 5189 catalog=this.args.get("db"), 5190 ) 5191 elif isinstance(field, exp.Window): 5192 # Move the exp.Dot's to the window's function 5193 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5194 field.set("this", window_func) 5195 this = field 5196 else: 5197 this = self.expression(exp.Dot, this=this, expression=field) 5198 5199 if field and field.comments: 5200 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5201 5202 this = self._parse_bracket(this) 5203 5204 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5205 5206 def _parse_primary(self) -> t.Optional[exp.Expression]: 5207 if self._match_set(self.PRIMARY_PARSERS): 5208 token_type = self._prev.token_type 5209 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5210 5211 if token_type == TokenType.STRING: 5212 expressions = [primary] 5213 while self._match(TokenType.STRING): 5214 expressions.append(exp.Literal.string(self._prev.text)) 5215 5216 if len(expressions) > 1: 5217 return self.expression(exp.Concat, expressions=expressions) 5218 5219 return primary 5220 5221 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5222 return exp.Literal.number(f"0.{self._prev.text}") 5223 5224 if self._match(TokenType.L_PAREN): 5225 comments = self._prev_comments 5226 query = self._parse_select() 5227 5228 if query: 5229 expressions = [query] 5230 else: 5231 expressions = self._parse_expressions() 5232 5233 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5234 5235 if not this and self._match(TokenType.R_PAREN, advance=False): 5236 this = self.expression(exp.Tuple) 5237 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5238 this = self._parse_subquery(this=this, parse_alias=False) 5239 elif isinstance(this, exp.Subquery): 5240 this = self._parse_subquery( 5241 this=self._parse_set_operations(this), parse_alias=False 5242 ) 5243 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5244 this = self.expression(exp.Tuple, expressions=expressions) 5245 else: 5246 this = self.expression(exp.Paren, this=this) 5247 5248 if this: 5249 this.add_comments(comments) 5250 5251 self._match_r_paren(expression=this) 5252 return this 5253 5254 return None 5255 5256 def _parse_field( 5257 self, 5258 any_token: bool = False, 5259 tokens: t.Optional[t.Collection[TokenType]] = None, 5260 anonymous_func: bool = False, 5261 ) -> t.Optional[exp.Expression]: 5262 if anonymous_func: 5263 field = ( 5264 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5265 or self._parse_primary() 5266 ) 5267 else: 5268 field = self._parse_primary() or self._parse_function( 5269 anonymous=anonymous_func, any_token=any_token 5270 ) 5271 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5272 5273 def _parse_function( 5274 self, 5275 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5276 anonymous: bool = False, 5277 optional_parens: bool = True, 5278 any_token: bool = False, 5279 ) -> t.Optional[exp.Expression]: 5280 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5281 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5282 fn_syntax = False 5283 if ( 5284 self._match(TokenType.L_BRACE, advance=False) 5285 and self._next 5286 and self._next.text.upper() == "FN" 5287 ): 5288 self._advance(2) 5289 fn_syntax = True 5290 5291 func = self._parse_function_call( 5292 functions=functions, 5293 anonymous=anonymous, 5294 optional_parens=optional_parens, 5295 any_token=any_token, 5296 ) 5297 5298 if fn_syntax: 5299 self._match(TokenType.R_BRACE) 5300 5301 return func 5302 5303 def _parse_function_call( 5304 self, 5305 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5306 anonymous: bool = False, 5307 optional_parens: bool = True, 5308 any_token: bool = False, 5309 ) -> t.Optional[exp.Expression]: 5310 if not self._curr: 5311 return None 5312 5313 comments = self._curr.comments 5314 token_type = self._curr.token_type 5315 this = self._curr.text 5316 upper = this.upper() 5317 5318 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5319 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5320 self._advance() 5321 return self._parse_window(parser(self)) 5322 5323 if not self._next or self._next.token_type != TokenType.L_PAREN: 5324 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5325 self._advance() 5326 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5327 5328 return None 5329 5330 if any_token: 5331 if token_type in self.RESERVED_TOKENS: 5332 return None 5333 elif token_type not in self.FUNC_TOKENS: 5334 return None 5335 5336 self._advance(2) 5337 5338 parser = self.FUNCTION_PARSERS.get(upper) 5339 if parser and not anonymous: 5340 this = parser(self) 5341 else: 5342 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5343 5344 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5345 this = self.expression( 5346 subquery_predicate, comments=comments, this=self._parse_select() 5347 ) 5348 self._match_r_paren() 5349 return this 5350 5351 if functions is None: 5352 functions = self.FUNCTIONS 5353 5354 function = functions.get(upper) 5355 5356 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5357 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5358 5359 if alias: 5360 args = self._kv_to_prop_eq(args) 5361 5362 if function and not anonymous: 5363 if "dialect" in function.__code__.co_varnames: 5364 func = function(args, dialect=self.dialect) 5365 else: 5366 func = function(args) 5367 5368 func = self.validate_expression(func, args) 5369 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5370 func.meta["name"] = this 5371 5372 this = func 5373 else: 5374 if token_type == TokenType.IDENTIFIER: 5375 this = exp.Identifier(this=this, quoted=True) 5376 this = self.expression(exp.Anonymous, this=this, expressions=args) 5377 5378 if isinstance(this, exp.Expression): 5379 this.add_comments(comments) 5380 5381 self._match_r_paren(this) 5382 return self._parse_window(this) 5383 5384 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5385 return expression 5386 5387 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5388 transformed = [] 5389 5390 for index, e in enumerate(expressions): 5391 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5392 if isinstance(e, exp.Alias): 5393 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5394 5395 if not isinstance(e, exp.PropertyEQ): 5396 e = self.expression( 5397 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5398 ) 5399 5400 if isinstance(e.this, exp.Column): 5401 e.this.replace(e.this.this) 5402 else: 5403 e = self._to_prop_eq(e, index) 5404 5405 transformed.append(e) 5406 5407 return transformed 5408 5409 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5410 return self._parse_statement() 5411 5412 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5413 return self._parse_column_def(self._parse_id_var()) 5414 5415 def _parse_user_defined_function( 5416 self, kind: t.Optional[TokenType] = None 5417 ) -> t.Optional[exp.Expression]: 5418 this = self._parse_id_var() 5419 5420 while self._match(TokenType.DOT): 5421 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5422 5423 if not self._match(TokenType.L_PAREN): 5424 return this 5425 5426 expressions = self._parse_csv(self._parse_function_parameter) 5427 self._match_r_paren() 5428 return self.expression( 5429 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5430 ) 5431 5432 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5433 literal = self._parse_primary() 5434 if literal: 5435 return self.expression(exp.Introducer, this=token.text, expression=literal) 5436 5437 return self.expression(exp.Identifier, this=token.text) 5438 5439 def _parse_session_parameter(self) -> exp.SessionParameter: 5440 kind = None 5441 this = self._parse_id_var() or self._parse_primary() 5442 5443 if this and self._match(TokenType.DOT): 5444 kind = this.name 5445 this = self._parse_var() or self._parse_primary() 5446 5447 return self.expression(exp.SessionParameter, this=this, kind=kind) 5448 5449 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5450 return self._parse_id_var() 5451 5452 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5453 index = self._index 5454 5455 if self._match(TokenType.L_PAREN): 5456 expressions = t.cast( 5457 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5458 ) 5459 5460 if not self._match(TokenType.R_PAREN): 5461 self._retreat(index) 5462 else: 5463 expressions = [self._parse_lambda_arg()] 5464 5465 if self._match_set(self.LAMBDAS): 5466 return self.LAMBDAS[self._prev.token_type](self, expressions) 5467 5468 self._retreat(index) 5469 5470 this: t.Optional[exp.Expression] 5471 5472 if self._match(TokenType.DISTINCT): 5473 this = self.expression( 5474 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5475 ) 5476 else: 5477 this = self._parse_select_or_expression(alias=alias) 5478 5479 return self._parse_limit( 5480 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5481 ) 5482 5483 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5484 index = self._index 5485 if not self._match(TokenType.L_PAREN): 5486 return this 5487 5488 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5489 # expr can be of both types 5490 if self._match_set(self.SELECT_START_TOKENS): 5491 self._retreat(index) 5492 return this 5493 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5494 self._match_r_paren() 5495 return self.expression(exp.Schema, this=this, expressions=args) 5496 5497 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5498 return self._parse_column_def(self._parse_field(any_token=True)) 5499 5500 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5501 # column defs are not really columns, they're identifiers 5502 if isinstance(this, exp.Column): 5503 this = this.this 5504 5505 kind = self._parse_types(schema=True) 5506 5507 if self._match_text_seq("FOR", "ORDINALITY"): 5508 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5509 5510 constraints: t.List[exp.Expression] = [] 5511 5512 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5513 ("ALIAS", "MATERIALIZED") 5514 ): 5515 persisted = self._prev.text.upper() == "MATERIALIZED" 5516 constraint_kind = exp.ComputedColumnConstraint( 5517 this=self._parse_assignment(), 5518 persisted=persisted or self._match_text_seq("PERSISTED"), 5519 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5520 ) 5521 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5522 elif ( 5523 kind 5524 and self._match(TokenType.ALIAS, advance=False) 5525 and ( 5526 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5527 or (self._next and self._next.token_type == TokenType.L_PAREN) 5528 ) 5529 ): 5530 self._advance() 5531 constraints.append( 5532 self.expression( 5533 exp.ColumnConstraint, 5534 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5535 ) 5536 ) 5537 5538 while True: 5539 constraint = self._parse_column_constraint() 5540 if not constraint: 5541 break 5542 constraints.append(constraint) 5543 5544 if not kind and not constraints: 5545 return this 5546 5547 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5548 5549 def _parse_auto_increment( 5550 self, 5551 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5552 start = None 5553 increment = None 5554 5555 if self._match(TokenType.L_PAREN, advance=False): 5556 args = self._parse_wrapped_csv(self._parse_bitwise) 5557 start = seq_get(args, 0) 5558 increment = seq_get(args, 1) 5559 elif self._match_text_seq("START"): 5560 start = self._parse_bitwise() 5561 self._match_text_seq("INCREMENT") 5562 increment = self._parse_bitwise() 5563 5564 if start and increment: 5565 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5566 5567 return exp.AutoIncrementColumnConstraint() 5568 5569 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5570 if not self._match_text_seq("REFRESH"): 5571 self._retreat(self._index - 1) 5572 return None 5573 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5574 5575 def _parse_compress(self) -> exp.CompressColumnConstraint: 5576 if self._match(TokenType.L_PAREN, advance=False): 5577 return self.expression( 5578 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5579 ) 5580 5581 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5582 5583 def _parse_generated_as_identity( 5584 self, 5585 ) -> ( 5586 exp.GeneratedAsIdentityColumnConstraint 5587 | exp.ComputedColumnConstraint 5588 | exp.GeneratedAsRowColumnConstraint 5589 ): 5590 if self._match_text_seq("BY", "DEFAULT"): 5591 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5592 this = self.expression( 5593 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5594 ) 5595 else: 5596 self._match_text_seq("ALWAYS") 5597 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5598 5599 self._match(TokenType.ALIAS) 5600 5601 if self._match_text_seq("ROW"): 5602 start = self._match_text_seq("START") 5603 if not start: 5604 self._match(TokenType.END) 5605 hidden = self._match_text_seq("HIDDEN") 5606 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5607 5608 identity = self._match_text_seq("IDENTITY") 5609 5610 if self._match(TokenType.L_PAREN): 5611 if self._match(TokenType.START_WITH): 5612 this.set("start", self._parse_bitwise()) 5613 if self._match_text_seq("INCREMENT", "BY"): 5614 this.set("increment", self._parse_bitwise()) 5615 if self._match_text_seq("MINVALUE"): 5616 this.set("minvalue", self._parse_bitwise()) 5617 if self._match_text_seq("MAXVALUE"): 5618 this.set("maxvalue", self._parse_bitwise()) 5619 5620 if self._match_text_seq("CYCLE"): 5621 this.set("cycle", True) 5622 elif self._match_text_seq("NO", "CYCLE"): 5623 this.set("cycle", False) 5624 5625 if not identity: 5626 this.set("expression", self._parse_range()) 5627 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5628 args = self._parse_csv(self._parse_bitwise) 5629 this.set("start", seq_get(args, 0)) 5630 this.set("increment", seq_get(args, 1)) 5631 5632 self._match_r_paren() 5633 5634 return this 5635 5636 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5637 self._match_text_seq("LENGTH") 5638 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5639 5640 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5641 if self._match_text_seq("NULL"): 5642 return self.expression(exp.NotNullColumnConstraint) 5643 if self._match_text_seq("CASESPECIFIC"): 5644 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5645 if self._match_text_seq("FOR", "REPLICATION"): 5646 return self.expression(exp.NotForReplicationColumnConstraint) 5647 5648 # Unconsume the `NOT` token 5649 self._retreat(self._index - 1) 5650 return None 5651 5652 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5653 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5654 5655 procedure_option_follows = ( 5656 self._match(TokenType.WITH, advance=False) 5657 and self._next 5658 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5659 ) 5660 5661 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5662 return self.expression( 5663 exp.ColumnConstraint, 5664 this=this, 5665 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5666 ) 5667 5668 return this 5669 5670 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5671 if not self._match(TokenType.CONSTRAINT): 5672 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5673 5674 return self.expression( 5675 exp.Constraint, 5676 this=self._parse_id_var(), 5677 expressions=self._parse_unnamed_constraints(), 5678 ) 5679 5680 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5681 constraints = [] 5682 while True: 5683 constraint = self._parse_unnamed_constraint() or self._parse_function() 5684 if not constraint: 5685 break 5686 constraints.append(constraint) 5687 5688 return constraints 5689 5690 def _parse_unnamed_constraint( 5691 self, constraints: t.Optional[t.Collection[str]] = None 5692 ) -> t.Optional[exp.Expression]: 5693 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5694 constraints or self.CONSTRAINT_PARSERS 5695 ): 5696 return None 5697 5698 constraint = self._prev.text.upper() 5699 if constraint not in self.CONSTRAINT_PARSERS: 5700 self.raise_error(f"No parser found for schema constraint {constraint}.") 5701 5702 return self.CONSTRAINT_PARSERS[constraint](self) 5703 5704 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5705 return self._parse_id_var(any_token=False) 5706 5707 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5708 self._match_text_seq("KEY") 5709 return self.expression( 5710 exp.UniqueColumnConstraint, 5711 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5712 this=self._parse_schema(self._parse_unique_key()), 5713 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5714 on_conflict=self._parse_on_conflict(), 5715 ) 5716 5717 def _parse_key_constraint_options(self) -> t.List[str]: 5718 options = [] 5719 while True: 5720 if not self._curr: 5721 break 5722 5723 if self._match(TokenType.ON): 5724 action = None 5725 on = self._advance_any() and self._prev.text 5726 5727 if self._match_text_seq("NO", "ACTION"): 5728 action = "NO ACTION" 5729 elif self._match_text_seq("CASCADE"): 5730 action = "CASCADE" 5731 elif self._match_text_seq("RESTRICT"): 5732 action = "RESTRICT" 5733 elif self._match_pair(TokenType.SET, TokenType.NULL): 5734 action = "SET NULL" 5735 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5736 action = "SET DEFAULT" 5737 else: 5738 self.raise_error("Invalid key constraint") 5739 5740 options.append(f"ON {on} {action}") 5741 else: 5742 var = self._parse_var_from_options( 5743 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5744 ) 5745 if not var: 5746 break 5747 options.append(var.name) 5748 5749 return options 5750 5751 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5752 if match and not self._match(TokenType.REFERENCES): 5753 return None 5754 5755 expressions = None 5756 this = self._parse_table(schema=True) 5757 options = self._parse_key_constraint_options() 5758 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5759 5760 def _parse_foreign_key(self) -> exp.ForeignKey: 5761 expressions = self._parse_wrapped_id_vars() 5762 reference = self._parse_references() 5763 options = {} 5764 5765 while self._match(TokenType.ON): 5766 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5767 self.raise_error("Expected DELETE or UPDATE") 5768 5769 kind = self._prev.text.lower() 5770 5771 if self._match_text_seq("NO", "ACTION"): 5772 action = "NO ACTION" 5773 elif self._match(TokenType.SET): 5774 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5775 action = "SET " + self._prev.text.upper() 5776 else: 5777 self._advance() 5778 action = self._prev.text.upper() 5779 5780 options[kind] = action 5781 5782 return self.expression( 5783 exp.ForeignKey, 5784 expressions=expressions, 5785 reference=reference, 5786 **options, # type: ignore 5787 ) 5788 5789 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5790 return self._parse_field() 5791 5792 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5793 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5794 self._retreat(self._index - 1) 5795 return None 5796 5797 id_vars = self._parse_wrapped_id_vars() 5798 return self.expression( 5799 exp.PeriodForSystemTimeConstraint, 5800 this=seq_get(id_vars, 0), 5801 expression=seq_get(id_vars, 1), 5802 ) 5803 5804 def _parse_primary_key( 5805 self, wrapped_optional: bool = False, in_props: bool = False 5806 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5807 desc = ( 5808 self._match_set((TokenType.ASC, TokenType.DESC)) 5809 and self._prev.token_type == TokenType.DESC 5810 ) 5811 5812 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5813 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5814 5815 expressions = self._parse_wrapped_csv( 5816 self._parse_primary_key_part, optional=wrapped_optional 5817 ) 5818 options = self._parse_key_constraint_options() 5819 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5820 5821 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5822 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5823 5824 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5825 """ 5826 Parses a datetime column in ODBC format. We parse the column into the corresponding 5827 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5828 same as we did for `DATE('yyyy-mm-dd')`. 5829 5830 Reference: 5831 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5832 """ 5833 self._match(TokenType.VAR) 5834 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5835 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5836 if not self._match(TokenType.R_BRACE): 5837 self.raise_error("Expected }") 5838 return expression 5839 5840 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5841 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5842 return this 5843 5844 bracket_kind = self._prev.token_type 5845 if ( 5846 bracket_kind == TokenType.L_BRACE 5847 and self._curr 5848 and self._curr.token_type == TokenType.VAR 5849 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5850 ): 5851 return self._parse_odbc_datetime_literal() 5852 5853 expressions = self._parse_csv( 5854 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5855 ) 5856 5857 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5858 self.raise_error("Expected ]") 5859 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5860 self.raise_error("Expected }") 5861 5862 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5863 if bracket_kind == TokenType.L_BRACE: 5864 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5865 elif not this: 5866 this = build_array_constructor( 5867 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5868 ) 5869 else: 5870 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5871 if constructor_type: 5872 return build_array_constructor( 5873 constructor_type, 5874 args=expressions, 5875 bracket_kind=bracket_kind, 5876 dialect=self.dialect, 5877 ) 5878 5879 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5880 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5881 5882 self._add_comments(this) 5883 return self._parse_bracket(this) 5884 5885 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5886 if self._match(TokenType.COLON): 5887 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5888 return this 5889 5890 def _parse_case(self) -> t.Optional[exp.Expression]: 5891 ifs = [] 5892 default = None 5893 5894 comments = self._prev_comments 5895 expression = self._parse_assignment() 5896 5897 while self._match(TokenType.WHEN): 5898 this = self._parse_assignment() 5899 self._match(TokenType.THEN) 5900 then = self._parse_assignment() 5901 ifs.append(self.expression(exp.If, this=this, true=then)) 5902 5903 if self._match(TokenType.ELSE): 5904 default = self._parse_assignment() 5905 5906 if not self._match(TokenType.END): 5907 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5908 default = exp.column("interval") 5909 else: 5910 self.raise_error("Expected END after CASE", self._prev) 5911 5912 return self.expression( 5913 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5914 ) 5915 5916 def _parse_if(self) -> t.Optional[exp.Expression]: 5917 if self._match(TokenType.L_PAREN): 5918 args = self._parse_csv(self._parse_assignment) 5919 this = self.validate_expression(exp.If.from_arg_list(args), args) 5920 self._match_r_paren() 5921 else: 5922 index = self._index - 1 5923 5924 if self.NO_PAREN_IF_COMMANDS and index == 0: 5925 return self._parse_as_command(self._prev) 5926 5927 condition = self._parse_assignment() 5928 5929 if not condition: 5930 self._retreat(index) 5931 return None 5932 5933 self._match(TokenType.THEN) 5934 true = self._parse_assignment() 5935 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5936 self._match(TokenType.END) 5937 this = self.expression(exp.If, this=condition, true=true, false=false) 5938 5939 return this 5940 5941 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5942 if not self._match_text_seq("VALUE", "FOR"): 5943 self._retreat(self._index - 1) 5944 return None 5945 5946 return self.expression( 5947 exp.NextValueFor, 5948 this=self._parse_column(), 5949 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5950 ) 5951 5952 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5953 this = self._parse_function() or self._parse_var_or_string(upper=True) 5954 5955 if self._match(TokenType.FROM): 5956 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5957 5958 if not self._match(TokenType.COMMA): 5959 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5960 5961 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5962 5963 def _parse_gap_fill(self) -> exp.GapFill: 5964 self._match(TokenType.TABLE) 5965 this = self._parse_table() 5966 5967 self._match(TokenType.COMMA) 5968 args = [this, *self._parse_csv(self._parse_lambda)] 5969 5970 gap_fill = exp.GapFill.from_arg_list(args) 5971 return self.validate_expression(gap_fill, args) 5972 5973 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5974 this = self._parse_assignment() 5975 5976 if not self._match(TokenType.ALIAS): 5977 if self._match(TokenType.COMMA): 5978 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5979 5980 self.raise_error("Expected AS after CAST") 5981 5982 fmt = None 5983 to = self._parse_types() 5984 5985 if self._match(TokenType.FORMAT): 5986 fmt_string = self._parse_string() 5987 fmt = self._parse_at_time_zone(fmt_string) 5988 5989 if not to: 5990 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5991 if to.this in exp.DataType.TEMPORAL_TYPES: 5992 this = self.expression( 5993 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5994 this=this, 5995 format=exp.Literal.string( 5996 format_time( 5997 fmt_string.this if fmt_string else "", 5998 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5999 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6000 ) 6001 ), 6002 safe=safe, 6003 ) 6004 6005 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6006 this.set("zone", fmt.args["zone"]) 6007 return this 6008 elif not to: 6009 self.raise_error("Expected TYPE after CAST") 6010 elif isinstance(to, exp.Identifier): 6011 to = exp.DataType.build(to.name, udt=True) 6012 elif to.this == exp.DataType.Type.CHAR: 6013 if self._match(TokenType.CHARACTER_SET): 6014 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6015 6016 return self.expression( 6017 exp.Cast if strict else exp.TryCast, 6018 this=this, 6019 to=to, 6020 format=fmt, 6021 safe=safe, 6022 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6023 ) 6024 6025 def _parse_string_agg(self) -> exp.GroupConcat: 6026 if self._match(TokenType.DISTINCT): 6027 args: t.List[t.Optional[exp.Expression]] = [ 6028 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6029 ] 6030 if self._match(TokenType.COMMA): 6031 args.extend(self._parse_csv(self._parse_assignment)) 6032 else: 6033 args = self._parse_csv(self._parse_assignment) # type: ignore 6034 6035 if self._match_text_seq("ON", "OVERFLOW"): 6036 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6037 if self._match_text_seq("ERROR"): 6038 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6039 else: 6040 self._match_text_seq("TRUNCATE") 6041 on_overflow = self.expression( 6042 exp.OverflowTruncateBehavior, 6043 this=self._parse_string(), 6044 with_count=( 6045 self._match_text_seq("WITH", "COUNT") 6046 or not self._match_text_seq("WITHOUT", "COUNT") 6047 ), 6048 ) 6049 else: 6050 on_overflow = None 6051 6052 index = self._index 6053 if not self._match(TokenType.R_PAREN) and args: 6054 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6055 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6056 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6057 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6058 6059 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6060 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6061 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6062 if not self._match_text_seq("WITHIN", "GROUP"): 6063 self._retreat(index) 6064 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6065 6066 # The corresponding match_r_paren will be called in parse_function (caller) 6067 self._match_l_paren() 6068 6069 return self.expression( 6070 exp.GroupConcat, 6071 this=self._parse_order(this=seq_get(args, 0)), 6072 separator=seq_get(args, 1), 6073 on_overflow=on_overflow, 6074 ) 6075 6076 def _parse_convert( 6077 self, strict: bool, safe: t.Optional[bool] = None 6078 ) -> t.Optional[exp.Expression]: 6079 this = self._parse_bitwise() 6080 6081 if self._match(TokenType.USING): 6082 to: t.Optional[exp.Expression] = self.expression( 6083 exp.CharacterSet, this=self._parse_var() 6084 ) 6085 elif self._match(TokenType.COMMA): 6086 to = self._parse_types() 6087 else: 6088 to = None 6089 6090 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6091 6092 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6093 """ 6094 There are generally two variants of the DECODE function: 6095 6096 - DECODE(bin, charset) 6097 - DECODE(expression, search, result [, search, result] ... [, default]) 6098 6099 The second variant will always be parsed into a CASE expression. Note that NULL 6100 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6101 instead of relying on pattern matching. 6102 """ 6103 args = self._parse_csv(self._parse_assignment) 6104 6105 if len(args) < 3: 6106 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6107 6108 expression, *expressions = args 6109 if not expression: 6110 return None 6111 6112 ifs = [] 6113 for search, result in zip(expressions[::2], expressions[1::2]): 6114 if not search or not result: 6115 return None 6116 6117 if isinstance(search, exp.Literal): 6118 ifs.append( 6119 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6120 ) 6121 elif isinstance(search, exp.Null): 6122 ifs.append( 6123 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6124 ) 6125 else: 6126 cond = exp.or_( 6127 exp.EQ(this=expression.copy(), expression=search), 6128 exp.and_( 6129 exp.Is(this=expression.copy(), expression=exp.Null()), 6130 exp.Is(this=search.copy(), expression=exp.Null()), 6131 copy=False, 6132 ), 6133 copy=False, 6134 ) 6135 ifs.append(exp.If(this=cond, true=result)) 6136 6137 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6138 6139 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6140 self._match_text_seq("KEY") 6141 key = self._parse_column() 6142 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6143 self._match_text_seq("VALUE") 6144 value = self._parse_bitwise() 6145 6146 if not key and not value: 6147 return None 6148 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6149 6150 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6151 if not this or not self._match_text_seq("FORMAT", "JSON"): 6152 return this 6153 6154 return self.expression(exp.FormatJson, this=this) 6155 6156 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6157 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6158 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6159 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6160 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6161 else: 6162 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6163 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6164 6165 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6166 6167 if not empty and not error and not null: 6168 return None 6169 6170 return self.expression( 6171 exp.OnCondition, 6172 empty=empty, 6173 error=error, 6174 null=null, 6175 ) 6176 6177 def _parse_on_handling( 6178 self, on: str, *values: str 6179 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6180 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6181 for value in values: 6182 if self._match_text_seq(value, "ON", on): 6183 return f"{value} ON {on}" 6184 6185 index = self._index 6186 if self._match(TokenType.DEFAULT): 6187 default_value = self._parse_bitwise() 6188 if self._match_text_seq("ON", on): 6189 return default_value 6190 6191 self._retreat(index) 6192 6193 return None 6194 6195 @t.overload 6196 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6197 6198 @t.overload 6199 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6200 6201 def _parse_json_object(self, agg=False): 6202 star = self._parse_star() 6203 expressions = ( 6204 [star] 6205 if star 6206 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6207 ) 6208 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6209 6210 unique_keys = None 6211 if self._match_text_seq("WITH", "UNIQUE"): 6212 unique_keys = True 6213 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6214 unique_keys = False 6215 6216 self._match_text_seq("KEYS") 6217 6218 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6219 self._parse_type() 6220 ) 6221 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6222 6223 return self.expression( 6224 exp.JSONObjectAgg if agg else exp.JSONObject, 6225 expressions=expressions, 6226 null_handling=null_handling, 6227 unique_keys=unique_keys, 6228 return_type=return_type, 6229 encoding=encoding, 6230 ) 6231 6232 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6233 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6234 if not self._match_text_seq("NESTED"): 6235 this = self._parse_id_var() 6236 kind = self._parse_types(allow_identifiers=False) 6237 nested = None 6238 else: 6239 this = None 6240 kind = None 6241 nested = True 6242 6243 path = self._match_text_seq("PATH") and self._parse_string() 6244 nested_schema = nested and self._parse_json_schema() 6245 6246 return self.expression( 6247 exp.JSONColumnDef, 6248 this=this, 6249 kind=kind, 6250 path=path, 6251 nested_schema=nested_schema, 6252 ) 6253 6254 def _parse_json_schema(self) -> exp.JSONSchema: 6255 self._match_text_seq("COLUMNS") 6256 return self.expression( 6257 exp.JSONSchema, 6258 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6259 ) 6260 6261 def _parse_json_table(self) -> exp.JSONTable: 6262 this = self._parse_format_json(self._parse_bitwise()) 6263 path = self._match(TokenType.COMMA) and self._parse_string() 6264 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6265 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6266 schema = self._parse_json_schema() 6267 6268 return exp.JSONTable( 6269 this=this, 6270 schema=schema, 6271 path=path, 6272 error_handling=error_handling, 6273 empty_handling=empty_handling, 6274 ) 6275 6276 def _parse_match_against(self) -> exp.MatchAgainst: 6277 expressions = self._parse_csv(self._parse_column) 6278 6279 self._match_text_seq(")", "AGAINST", "(") 6280 6281 this = self._parse_string() 6282 6283 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6284 modifier = "IN NATURAL LANGUAGE MODE" 6285 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6286 modifier = f"{modifier} WITH QUERY EXPANSION" 6287 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6288 modifier = "IN BOOLEAN MODE" 6289 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6290 modifier = "WITH QUERY EXPANSION" 6291 else: 6292 modifier = None 6293 6294 return self.expression( 6295 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6296 ) 6297 6298 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6299 def _parse_open_json(self) -> exp.OpenJSON: 6300 this = self._parse_bitwise() 6301 path = self._match(TokenType.COMMA) and self._parse_string() 6302 6303 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6304 this = self._parse_field(any_token=True) 6305 kind = self._parse_types() 6306 path = self._parse_string() 6307 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6308 6309 return self.expression( 6310 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6311 ) 6312 6313 expressions = None 6314 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6315 self._match_l_paren() 6316 expressions = self._parse_csv(_parse_open_json_column_def) 6317 6318 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6319 6320 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6321 args = self._parse_csv(self._parse_bitwise) 6322 6323 if self._match(TokenType.IN): 6324 return self.expression( 6325 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6326 ) 6327 6328 if haystack_first: 6329 haystack = seq_get(args, 0) 6330 needle = seq_get(args, 1) 6331 else: 6332 needle = seq_get(args, 0) 6333 haystack = seq_get(args, 1) 6334 6335 return self.expression( 6336 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6337 ) 6338 6339 def _parse_predict(self) -> exp.Predict: 6340 self._match_text_seq("MODEL") 6341 this = self._parse_table() 6342 6343 self._match(TokenType.COMMA) 6344 self._match_text_seq("TABLE") 6345 6346 return self.expression( 6347 exp.Predict, 6348 this=this, 6349 expression=self._parse_table(), 6350 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6351 ) 6352 6353 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6354 args = self._parse_csv(self._parse_table) 6355 return exp.JoinHint(this=func_name.upper(), expressions=args) 6356 6357 def _parse_substring(self) -> exp.Substring: 6358 # Postgres supports the form: substring(string [from int] [for int]) 6359 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6360 6361 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6362 6363 if self._match(TokenType.FROM): 6364 args.append(self._parse_bitwise()) 6365 if self._match(TokenType.FOR): 6366 if len(args) == 1: 6367 args.append(exp.Literal.number(1)) 6368 args.append(self._parse_bitwise()) 6369 6370 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6371 6372 def _parse_trim(self) -> exp.Trim: 6373 # https://www.w3resource.com/sql/character-functions/trim.php 6374 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6375 6376 position = None 6377 collation = None 6378 expression = None 6379 6380 if self._match_texts(self.TRIM_TYPES): 6381 position = self._prev.text.upper() 6382 6383 this = self._parse_bitwise() 6384 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6385 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6386 expression = self._parse_bitwise() 6387 6388 if invert_order: 6389 this, expression = expression, this 6390 6391 if self._match(TokenType.COLLATE): 6392 collation = self._parse_bitwise() 6393 6394 return self.expression( 6395 exp.Trim, this=this, position=position, expression=expression, collation=collation 6396 ) 6397 6398 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6399 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6400 6401 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6402 return self._parse_window(self._parse_id_var(), alias=True) 6403 6404 def _parse_respect_or_ignore_nulls( 6405 self, this: t.Optional[exp.Expression] 6406 ) -> t.Optional[exp.Expression]: 6407 if self._match_text_seq("IGNORE", "NULLS"): 6408 return self.expression(exp.IgnoreNulls, this=this) 6409 if self._match_text_seq("RESPECT", "NULLS"): 6410 return self.expression(exp.RespectNulls, this=this) 6411 return this 6412 6413 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6414 if self._match(TokenType.HAVING): 6415 self._match_texts(("MAX", "MIN")) 6416 max = self._prev.text.upper() != "MIN" 6417 return self.expression( 6418 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6419 ) 6420 6421 return this 6422 6423 def _parse_window( 6424 self, this: t.Optional[exp.Expression], alias: bool = False 6425 ) -> t.Optional[exp.Expression]: 6426 func = this 6427 comments = func.comments if isinstance(func, exp.Expression) else None 6428 6429 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6430 self._match(TokenType.WHERE) 6431 this = self.expression( 6432 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6433 ) 6434 self._match_r_paren() 6435 6436 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6437 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6438 if self._match_text_seq("WITHIN", "GROUP"): 6439 order = self._parse_wrapped(self._parse_order) 6440 this = self.expression(exp.WithinGroup, this=this, expression=order) 6441 6442 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6443 # Some dialects choose to implement and some do not. 6444 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6445 6446 # There is some code above in _parse_lambda that handles 6447 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6448 6449 # The below changes handle 6450 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6451 6452 # Oracle allows both formats 6453 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6454 # and Snowflake chose to do the same for familiarity 6455 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6456 if isinstance(this, exp.AggFunc): 6457 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6458 6459 if ignore_respect and ignore_respect is not this: 6460 ignore_respect.replace(ignore_respect.this) 6461 this = self.expression(ignore_respect.__class__, this=this) 6462 6463 this = self._parse_respect_or_ignore_nulls(this) 6464 6465 # bigquery select from window x AS (partition by ...) 6466 if alias: 6467 over = None 6468 self._match(TokenType.ALIAS) 6469 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6470 return this 6471 else: 6472 over = self._prev.text.upper() 6473 6474 if comments and isinstance(func, exp.Expression): 6475 func.pop_comments() 6476 6477 if not self._match(TokenType.L_PAREN): 6478 return self.expression( 6479 exp.Window, 6480 comments=comments, 6481 this=this, 6482 alias=self._parse_id_var(False), 6483 over=over, 6484 ) 6485 6486 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6487 6488 first = self._match(TokenType.FIRST) 6489 if self._match_text_seq("LAST"): 6490 first = False 6491 6492 partition, order = self._parse_partition_and_order() 6493 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6494 6495 if kind: 6496 self._match(TokenType.BETWEEN) 6497 start = self._parse_window_spec() 6498 self._match(TokenType.AND) 6499 end = self._parse_window_spec() 6500 6501 spec = self.expression( 6502 exp.WindowSpec, 6503 kind=kind, 6504 start=start["value"], 6505 start_side=start["side"], 6506 end=end["value"], 6507 end_side=end["side"], 6508 ) 6509 else: 6510 spec = None 6511 6512 self._match_r_paren() 6513 6514 window = self.expression( 6515 exp.Window, 6516 comments=comments, 6517 this=this, 6518 partition_by=partition, 6519 order=order, 6520 spec=spec, 6521 alias=window_alias, 6522 over=over, 6523 first=first, 6524 ) 6525 6526 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6527 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6528 return self._parse_window(window, alias=alias) 6529 6530 return window 6531 6532 def _parse_partition_and_order( 6533 self, 6534 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6535 return self._parse_partition_by(), self._parse_order() 6536 6537 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6538 self._match(TokenType.BETWEEN) 6539 6540 return { 6541 "value": ( 6542 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6543 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6544 or self._parse_bitwise() 6545 ), 6546 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6547 } 6548 6549 def _parse_alias( 6550 self, this: t.Optional[exp.Expression], explicit: bool = False 6551 ) -> t.Optional[exp.Expression]: 6552 any_token = self._match(TokenType.ALIAS) 6553 comments = self._prev_comments or [] 6554 6555 if explicit and not any_token: 6556 return this 6557 6558 if self._match(TokenType.L_PAREN): 6559 aliases = self.expression( 6560 exp.Aliases, 6561 comments=comments, 6562 this=this, 6563 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6564 ) 6565 self._match_r_paren(aliases) 6566 return aliases 6567 6568 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6569 self.STRING_ALIASES and self._parse_string_as_identifier() 6570 ) 6571 6572 if alias: 6573 comments.extend(alias.pop_comments()) 6574 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6575 column = this.this 6576 6577 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6578 if not this.comments and column and column.comments: 6579 this.comments = column.pop_comments() 6580 6581 return this 6582 6583 def _parse_id_var( 6584 self, 6585 any_token: bool = True, 6586 tokens: t.Optional[t.Collection[TokenType]] = None, 6587 ) -> t.Optional[exp.Expression]: 6588 expression = self._parse_identifier() 6589 if not expression and ( 6590 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6591 ): 6592 quoted = self._prev.token_type == TokenType.STRING 6593 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6594 6595 return expression 6596 6597 def _parse_string(self) -> t.Optional[exp.Expression]: 6598 if self._match_set(self.STRING_PARSERS): 6599 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6600 return self._parse_placeholder() 6601 6602 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6603 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6604 6605 def _parse_number(self) -> t.Optional[exp.Expression]: 6606 if self._match_set(self.NUMERIC_PARSERS): 6607 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6608 return self._parse_placeholder() 6609 6610 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6611 if self._match(TokenType.IDENTIFIER): 6612 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6613 return self._parse_placeholder() 6614 6615 def _parse_var( 6616 self, 6617 any_token: bool = False, 6618 tokens: t.Optional[t.Collection[TokenType]] = None, 6619 upper: bool = False, 6620 ) -> t.Optional[exp.Expression]: 6621 if ( 6622 (any_token and self._advance_any()) 6623 or self._match(TokenType.VAR) 6624 or (self._match_set(tokens) if tokens else False) 6625 ): 6626 return self.expression( 6627 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6628 ) 6629 return self._parse_placeholder() 6630 6631 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6632 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6633 self._advance() 6634 return self._prev 6635 return None 6636 6637 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6638 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6639 6640 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6641 return self._parse_primary() or self._parse_var(any_token=True) 6642 6643 def _parse_null(self) -> t.Optional[exp.Expression]: 6644 if self._match_set(self.NULL_TOKENS): 6645 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6646 return self._parse_placeholder() 6647 6648 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6649 if self._match(TokenType.TRUE): 6650 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6651 if self._match(TokenType.FALSE): 6652 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6653 return self._parse_placeholder() 6654 6655 def _parse_star(self) -> t.Optional[exp.Expression]: 6656 if self._match(TokenType.STAR): 6657 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6658 return self._parse_placeholder() 6659 6660 def _parse_parameter(self) -> exp.Parameter: 6661 this = self._parse_identifier() or self._parse_primary_or_var() 6662 return self.expression(exp.Parameter, this=this) 6663 6664 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6665 if self._match_set(self.PLACEHOLDER_PARSERS): 6666 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6667 if placeholder: 6668 return placeholder 6669 self._advance(-1) 6670 return None 6671 6672 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6673 if not self._match_texts(keywords): 6674 return None 6675 if self._match(TokenType.L_PAREN, advance=False): 6676 return self._parse_wrapped_csv(self._parse_expression) 6677 6678 expression = self._parse_expression() 6679 return [expression] if expression else None 6680 6681 def _parse_csv( 6682 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6683 ) -> t.List[exp.Expression]: 6684 parse_result = parse_method() 6685 items = [parse_result] if parse_result is not None else [] 6686 6687 while self._match(sep): 6688 self._add_comments(parse_result) 6689 parse_result = parse_method() 6690 if parse_result is not None: 6691 items.append(parse_result) 6692 6693 return items 6694 6695 def _parse_tokens( 6696 self, parse_method: t.Callable, expressions: t.Dict 6697 ) -> t.Optional[exp.Expression]: 6698 this = parse_method() 6699 6700 while self._match_set(expressions): 6701 this = self.expression( 6702 expressions[self._prev.token_type], 6703 this=this, 6704 comments=self._prev_comments, 6705 expression=parse_method(), 6706 ) 6707 6708 return this 6709 6710 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6711 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6712 6713 def _parse_wrapped_csv( 6714 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6715 ) -> t.List[exp.Expression]: 6716 return self._parse_wrapped( 6717 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6718 ) 6719 6720 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6721 wrapped = self._match(TokenType.L_PAREN) 6722 if not wrapped and not optional: 6723 self.raise_error("Expecting (") 6724 parse_result = parse_method() 6725 if wrapped: 6726 self._match_r_paren() 6727 return parse_result 6728 6729 def _parse_expressions(self) -> t.List[exp.Expression]: 6730 return self._parse_csv(self._parse_expression) 6731 6732 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6733 return self._parse_select() or self._parse_set_operations( 6734 self._parse_expression() if alias else self._parse_assignment() 6735 ) 6736 6737 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6738 return self._parse_query_modifiers( 6739 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6740 ) 6741 6742 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6743 this = None 6744 if self._match_texts(self.TRANSACTION_KIND): 6745 this = self._prev.text 6746 6747 self._match_texts(("TRANSACTION", "WORK")) 6748 6749 modes = [] 6750 while True: 6751 mode = [] 6752 while self._match(TokenType.VAR): 6753 mode.append(self._prev.text) 6754 6755 if mode: 6756 modes.append(" ".join(mode)) 6757 if not self._match(TokenType.COMMA): 6758 break 6759 6760 return self.expression(exp.Transaction, this=this, modes=modes) 6761 6762 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6763 chain = None 6764 savepoint = None 6765 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6766 6767 self._match_texts(("TRANSACTION", "WORK")) 6768 6769 if self._match_text_seq("TO"): 6770 self._match_text_seq("SAVEPOINT") 6771 savepoint = self._parse_id_var() 6772 6773 if self._match(TokenType.AND): 6774 chain = not self._match_text_seq("NO") 6775 self._match_text_seq("CHAIN") 6776 6777 if is_rollback: 6778 return self.expression(exp.Rollback, savepoint=savepoint) 6779 6780 return self.expression(exp.Commit, chain=chain) 6781 6782 def _parse_refresh(self) -> exp.Refresh: 6783 self._match(TokenType.TABLE) 6784 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6785 6786 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6787 if not self._match_text_seq("ADD"): 6788 return None 6789 6790 self._match(TokenType.COLUMN) 6791 exists_column = self._parse_exists(not_=True) 6792 expression = self._parse_field_def() 6793 6794 if expression: 6795 expression.set("exists", exists_column) 6796 6797 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6798 if self._match_texts(("FIRST", "AFTER")): 6799 position = self._prev.text 6800 column_position = self.expression( 6801 exp.ColumnPosition, this=self._parse_column(), position=position 6802 ) 6803 expression.set("position", column_position) 6804 6805 return expression 6806 6807 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6808 drop = self._match(TokenType.DROP) and self._parse_drop() 6809 if drop and not isinstance(drop, exp.Command): 6810 drop.set("kind", drop.args.get("kind", "COLUMN")) 6811 return drop 6812 6813 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6814 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6815 return self.expression( 6816 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6817 ) 6818 6819 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6820 index = self._index - 1 6821 6822 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6823 return self._parse_csv( 6824 lambda: self.expression( 6825 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6826 ) 6827 ) 6828 6829 self._retreat(index) 6830 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6831 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6832 6833 if self._match_text_seq("ADD", "COLUMNS"): 6834 schema = self._parse_schema() 6835 if schema: 6836 return [schema] 6837 return [] 6838 6839 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6840 6841 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6842 if self._match_texts(self.ALTER_ALTER_PARSERS): 6843 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6844 6845 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6846 # keyword after ALTER we default to parsing this statement 6847 self._match(TokenType.COLUMN) 6848 column = self._parse_field(any_token=True) 6849 6850 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6851 return self.expression(exp.AlterColumn, this=column, drop=True) 6852 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6853 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6854 if self._match(TokenType.COMMENT): 6855 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6856 if self._match_text_seq("DROP", "NOT", "NULL"): 6857 return self.expression( 6858 exp.AlterColumn, 6859 this=column, 6860 drop=True, 6861 allow_null=True, 6862 ) 6863 if self._match_text_seq("SET", "NOT", "NULL"): 6864 return self.expression( 6865 exp.AlterColumn, 6866 this=column, 6867 allow_null=False, 6868 ) 6869 self._match_text_seq("SET", "DATA") 6870 self._match_text_seq("TYPE") 6871 return self.expression( 6872 exp.AlterColumn, 6873 this=column, 6874 dtype=self._parse_types(), 6875 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6876 using=self._match(TokenType.USING) and self._parse_assignment(), 6877 ) 6878 6879 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6880 if self._match_texts(("ALL", "EVEN", "AUTO")): 6881 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6882 6883 self._match_text_seq("KEY", "DISTKEY") 6884 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6885 6886 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6887 if compound: 6888 self._match_text_seq("SORTKEY") 6889 6890 if self._match(TokenType.L_PAREN, advance=False): 6891 return self.expression( 6892 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6893 ) 6894 6895 self._match_texts(("AUTO", "NONE")) 6896 return self.expression( 6897 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6898 ) 6899 6900 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6901 index = self._index - 1 6902 6903 partition_exists = self._parse_exists() 6904 if self._match(TokenType.PARTITION, advance=False): 6905 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6906 6907 self._retreat(index) 6908 return self._parse_csv(self._parse_drop_column) 6909 6910 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6911 if self._match(TokenType.COLUMN): 6912 exists = self._parse_exists() 6913 old_column = self._parse_column() 6914 to = self._match_text_seq("TO") 6915 new_column = self._parse_column() 6916 6917 if old_column is None or to is None or new_column is None: 6918 return None 6919 6920 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6921 6922 self._match_text_seq("TO") 6923 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6924 6925 def _parse_alter_table_set(self) -> exp.AlterSet: 6926 alter_set = self.expression(exp.AlterSet) 6927 6928 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6929 "TABLE", "PROPERTIES" 6930 ): 6931 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6932 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6933 alter_set.set("expressions", [self._parse_assignment()]) 6934 elif self._match_texts(("LOGGED", "UNLOGGED")): 6935 alter_set.set("option", exp.var(self._prev.text.upper())) 6936 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6937 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6938 elif self._match_text_seq("LOCATION"): 6939 alter_set.set("location", self._parse_field()) 6940 elif self._match_text_seq("ACCESS", "METHOD"): 6941 alter_set.set("access_method", self._parse_field()) 6942 elif self._match_text_seq("TABLESPACE"): 6943 alter_set.set("tablespace", self._parse_field()) 6944 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6945 alter_set.set("file_format", [self._parse_field()]) 6946 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6947 alter_set.set("file_format", self._parse_wrapped_options()) 6948 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6949 alter_set.set("copy_options", self._parse_wrapped_options()) 6950 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6951 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6952 else: 6953 if self._match_text_seq("SERDE"): 6954 alter_set.set("serde", self._parse_field()) 6955 6956 alter_set.set("expressions", [self._parse_properties()]) 6957 6958 return alter_set 6959 6960 def _parse_alter(self) -> exp.Alter | exp.Command: 6961 start = self._prev 6962 6963 alter_token = self._match_set(self.ALTERABLES) and self._prev 6964 if not alter_token: 6965 return self._parse_as_command(start) 6966 6967 exists = self._parse_exists() 6968 only = self._match_text_seq("ONLY") 6969 this = self._parse_table(schema=True) 6970 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6971 6972 if self._next: 6973 self._advance() 6974 6975 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6976 if parser: 6977 actions = ensure_list(parser(self)) 6978 not_valid = self._match_text_seq("NOT", "VALID") 6979 options = self._parse_csv(self._parse_property) 6980 6981 if not self._curr and actions: 6982 return self.expression( 6983 exp.Alter, 6984 this=this, 6985 kind=alter_token.text.upper(), 6986 exists=exists, 6987 actions=actions, 6988 only=only, 6989 options=options, 6990 cluster=cluster, 6991 not_valid=not_valid, 6992 ) 6993 6994 return self._parse_as_command(start) 6995 6996 def _parse_merge(self) -> exp.Merge: 6997 self._match(TokenType.INTO) 6998 target = self._parse_table() 6999 7000 if target and self._match(TokenType.ALIAS, advance=False): 7001 target.set("alias", self._parse_table_alias()) 7002 7003 self._match(TokenType.USING) 7004 using = self._parse_table() 7005 7006 self._match(TokenType.ON) 7007 on = self._parse_assignment() 7008 7009 return self.expression( 7010 exp.Merge, 7011 this=target, 7012 using=using, 7013 on=on, 7014 whens=self._parse_when_matched(), 7015 returning=self._parse_returning(), 7016 ) 7017 7018 def _parse_when_matched(self) -> exp.Whens: 7019 whens = [] 7020 7021 while self._match(TokenType.WHEN): 7022 matched = not self._match(TokenType.NOT) 7023 self._match_text_seq("MATCHED") 7024 source = ( 7025 False 7026 if self._match_text_seq("BY", "TARGET") 7027 else self._match_text_seq("BY", "SOURCE") 7028 ) 7029 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7030 7031 self._match(TokenType.THEN) 7032 7033 if self._match(TokenType.INSERT): 7034 this = self._parse_star() 7035 if this: 7036 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7037 else: 7038 then = self.expression( 7039 exp.Insert, 7040 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7041 expression=self._match_text_seq("VALUES") and self._parse_value(), 7042 ) 7043 elif self._match(TokenType.UPDATE): 7044 expressions = self._parse_star() 7045 if expressions: 7046 then = self.expression(exp.Update, expressions=expressions) 7047 else: 7048 then = self.expression( 7049 exp.Update, 7050 expressions=self._match(TokenType.SET) 7051 and self._parse_csv(self._parse_equality), 7052 ) 7053 elif self._match(TokenType.DELETE): 7054 then = self.expression(exp.Var, this=self._prev.text) 7055 else: 7056 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7057 7058 whens.append( 7059 self.expression( 7060 exp.When, 7061 matched=matched, 7062 source=source, 7063 condition=condition, 7064 then=then, 7065 ) 7066 ) 7067 return self.expression(exp.Whens, expressions=whens) 7068 7069 def _parse_show(self) -> t.Optional[exp.Expression]: 7070 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7071 if parser: 7072 return parser(self) 7073 return self._parse_as_command(self._prev) 7074 7075 def _parse_set_item_assignment( 7076 self, kind: t.Optional[str] = None 7077 ) -> t.Optional[exp.Expression]: 7078 index = self._index 7079 7080 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7081 return self._parse_set_transaction(global_=kind == "GLOBAL") 7082 7083 left = self._parse_primary() or self._parse_column() 7084 assignment_delimiter = self._match_texts(("=", "TO")) 7085 7086 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7087 self._retreat(index) 7088 return None 7089 7090 right = self._parse_statement() or self._parse_id_var() 7091 if isinstance(right, (exp.Column, exp.Identifier)): 7092 right = exp.var(right.name) 7093 7094 this = self.expression(exp.EQ, this=left, expression=right) 7095 return self.expression(exp.SetItem, this=this, kind=kind) 7096 7097 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7098 self._match_text_seq("TRANSACTION") 7099 characteristics = self._parse_csv( 7100 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7101 ) 7102 return self.expression( 7103 exp.SetItem, 7104 expressions=characteristics, 7105 kind="TRANSACTION", 7106 **{"global": global_}, # type: ignore 7107 ) 7108 7109 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7110 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7111 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7112 7113 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7114 index = self._index 7115 set_ = self.expression( 7116 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7117 ) 7118 7119 if self._curr: 7120 self._retreat(index) 7121 return self._parse_as_command(self._prev) 7122 7123 return set_ 7124 7125 def _parse_var_from_options( 7126 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7127 ) -> t.Optional[exp.Var]: 7128 start = self._curr 7129 if not start: 7130 return None 7131 7132 option = start.text.upper() 7133 continuations = options.get(option) 7134 7135 index = self._index 7136 self._advance() 7137 for keywords in continuations or []: 7138 if isinstance(keywords, str): 7139 keywords = (keywords,) 7140 7141 if self._match_text_seq(*keywords): 7142 option = f"{option} {' '.join(keywords)}" 7143 break 7144 else: 7145 if continuations or continuations is None: 7146 if raise_unmatched: 7147 self.raise_error(f"Unknown option {option}") 7148 7149 self._retreat(index) 7150 return None 7151 7152 return exp.var(option) 7153 7154 def _parse_as_command(self, start: Token) -> exp.Command: 7155 while self._curr: 7156 self._advance() 7157 text = self._find_sql(start, self._prev) 7158 size = len(start.text) 7159 self._warn_unsupported() 7160 return exp.Command(this=text[:size], expression=text[size:]) 7161 7162 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7163 settings = [] 7164 7165 self._match_l_paren() 7166 kind = self._parse_id_var() 7167 7168 if self._match(TokenType.L_PAREN): 7169 while True: 7170 key = self._parse_id_var() 7171 value = self._parse_primary() 7172 if not key and value is None: 7173 break 7174 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7175 self._match(TokenType.R_PAREN) 7176 7177 self._match_r_paren() 7178 7179 return self.expression( 7180 exp.DictProperty, 7181 this=this, 7182 kind=kind.this if kind else None, 7183 settings=settings, 7184 ) 7185 7186 def _parse_dict_range(self, this: str) -> exp.DictRange: 7187 self._match_l_paren() 7188 has_min = self._match_text_seq("MIN") 7189 if has_min: 7190 min = self._parse_var() or self._parse_primary() 7191 self._match_text_seq("MAX") 7192 max = self._parse_var() or self._parse_primary() 7193 else: 7194 max = self._parse_var() or self._parse_primary() 7195 min = exp.Literal.number(0) 7196 self._match_r_paren() 7197 return self.expression(exp.DictRange, this=this, min=min, max=max) 7198 7199 def _parse_comprehension( 7200 self, this: t.Optional[exp.Expression] 7201 ) -> t.Optional[exp.Comprehension]: 7202 index = self._index 7203 expression = self._parse_column() 7204 if not self._match(TokenType.IN): 7205 self._retreat(index - 1) 7206 return None 7207 iterator = self._parse_column() 7208 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7209 return self.expression( 7210 exp.Comprehension, 7211 this=this, 7212 expression=expression, 7213 iterator=iterator, 7214 condition=condition, 7215 ) 7216 7217 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7218 if self._match(TokenType.HEREDOC_STRING): 7219 return self.expression(exp.Heredoc, this=self._prev.text) 7220 7221 if not self._match_text_seq("$"): 7222 return None 7223 7224 tags = ["$"] 7225 tag_text = None 7226 7227 if self._is_connected(): 7228 self._advance() 7229 tags.append(self._prev.text.upper()) 7230 else: 7231 self.raise_error("No closing $ found") 7232 7233 if tags[-1] != "$": 7234 if self._is_connected() and self._match_text_seq("$"): 7235 tag_text = tags[-1] 7236 tags.append("$") 7237 else: 7238 self.raise_error("No closing $ found") 7239 7240 heredoc_start = self._curr 7241 7242 while self._curr: 7243 if self._match_text_seq(*tags, advance=False): 7244 this = self._find_sql(heredoc_start, self._prev) 7245 self._advance(len(tags)) 7246 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7247 7248 self._advance() 7249 7250 self.raise_error(f"No closing {''.join(tags)} found") 7251 return None 7252 7253 def _find_parser( 7254 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7255 ) -> t.Optional[t.Callable]: 7256 if not self._curr: 7257 return None 7258 7259 index = self._index 7260 this = [] 7261 while True: 7262 # The current token might be multiple words 7263 curr = self._curr.text.upper() 7264 key = curr.split(" ") 7265 this.append(curr) 7266 7267 self._advance() 7268 result, trie = in_trie(trie, key) 7269 if result == TrieResult.FAILED: 7270 break 7271 7272 if result == TrieResult.EXISTS: 7273 subparser = parsers[" ".join(this)] 7274 return subparser 7275 7276 self._retreat(index) 7277 return None 7278 7279 def _match(self, token_type, advance=True, expression=None): 7280 if not self._curr: 7281 return None 7282 7283 if self._curr.token_type == token_type: 7284 if advance: 7285 self._advance() 7286 self._add_comments(expression) 7287 return True 7288 7289 return None 7290 7291 def _match_set(self, types, advance=True): 7292 if not self._curr: 7293 return None 7294 7295 if self._curr.token_type in types: 7296 if advance: 7297 self._advance() 7298 return True 7299 7300 return None 7301 7302 def _match_pair(self, token_type_a, token_type_b, advance=True): 7303 if not self._curr or not self._next: 7304 return None 7305 7306 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7307 if advance: 7308 self._advance(2) 7309 return True 7310 7311 return None 7312 7313 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7314 if not self._match(TokenType.L_PAREN, expression=expression): 7315 self.raise_error("Expecting (") 7316 7317 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7318 if not self._match(TokenType.R_PAREN, expression=expression): 7319 self.raise_error("Expecting )") 7320 7321 def _match_texts(self, texts, advance=True): 7322 if ( 7323 self._curr 7324 and self._curr.token_type != TokenType.STRING 7325 and self._curr.text.upper() in texts 7326 ): 7327 if advance: 7328 self._advance() 7329 return True 7330 return None 7331 7332 def _match_text_seq(self, *texts, advance=True): 7333 index = self._index 7334 for text in texts: 7335 if ( 7336 self._curr 7337 and self._curr.token_type != TokenType.STRING 7338 and self._curr.text.upper() == text 7339 ): 7340 self._advance() 7341 else: 7342 self._retreat(index) 7343 return None 7344 7345 if not advance: 7346 self._retreat(index) 7347 7348 return True 7349 7350 def _replace_lambda( 7351 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7352 ) -> t.Optional[exp.Expression]: 7353 if not node: 7354 return node 7355 7356 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7357 7358 for column in node.find_all(exp.Column): 7359 typ = lambda_types.get(column.parts[0].name) 7360 if typ is not None: 7361 dot_or_id = column.to_dot() if column.table else column.this 7362 7363 if typ: 7364 dot_or_id = self.expression( 7365 exp.Cast, 7366 this=dot_or_id, 7367 to=typ, 7368 ) 7369 7370 parent = column.parent 7371 7372 while isinstance(parent, exp.Dot): 7373 if not isinstance(parent.parent, exp.Dot): 7374 parent.replace(dot_or_id) 7375 break 7376 parent = parent.parent 7377 else: 7378 if column is node: 7379 node = dot_or_id 7380 else: 7381 column.replace(dot_or_id) 7382 return node 7383 7384 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7385 start = self._prev 7386 7387 # Not to be confused with TRUNCATE(number, decimals) function call 7388 if self._match(TokenType.L_PAREN): 7389 self._retreat(self._index - 2) 7390 return self._parse_function() 7391 7392 # Clickhouse supports TRUNCATE DATABASE as well 7393 is_database = self._match(TokenType.DATABASE) 7394 7395 self._match(TokenType.TABLE) 7396 7397 exists = self._parse_exists(not_=False) 7398 7399 expressions = self._parse_csv( 7400 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7401 ) 7402 7403 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7404 7405 if self._match_text_seq("RESTART", "IDENTITY"): 7406 identity = "RESTART" 7407 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7408 identity = "CONTINUE" 7409 else: 7410 identity = None 7411 7412 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7413 option = self._prev.text 7414 else: 7415 option = None 7416 7417 partition = self._parse_partition() 7418 7419 # Fallback case 7420 if self._curr: 7421 return self._parse_as_command(start) 7422 7423 return self.expression( 7424 exp.TruncateTable, 7425 expressions=expressions, 7426 is_database=is_database, 7427 exists=exists, 7428 cluster=cluster, 7429 identity=identity, 7430 option=option, 7431 partition=partition, 7432 ) 7433 7434 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7435 this = self._parse_ordered(self._parse_opclass) 7436 7437 if not self._match(TokenType.WITH): 7438 return this 7439 7440 op = self._parse_var(any_token=True) 7441 7442 return self.expression(exp.WithOperator, this=this, op=op) 7443 7444 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7445 self._match(TokenType.EQ) 7446 self._match(TokenType.L_PAREN) 7447 7448 opts: t.List[t.Optional[exp.Expression]] = [] 7449 while self._curr and not self._match(TokenType.R_PAREN): 7450 if self._match_text_seq("FORMAT_NAME", "="): 7451 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7452 # so we parse it separately to use _parse_field() 7453 prop = self.expression( 7454 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7455 ) 7456 opts.append(prop) 7457 else: 7458 opts.append(self._parse_property()) 7459 7460 self._match(TokenType.COMMA) 7461 7462 return opts 7463 7464 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7465 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7466 7467 options = [] 7468 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7469 option = self._parse_var(any_token=True) 7470 prev = self._prev.text.upper() 7471 7472 # Different dialects might separate options and values by white space, "=" and "AS" 7473 self._match(TokenType.EQ) 7474 self._match(TokenType.ALIAS) 7475 7476 param = self.expression(exp.CopyParameter, this=option) 7477 7478 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7479 TokenType.L_PAREN, advance=False 7480 ): 7481 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7482 param.set("expressions", self._parse_wrapped_options()) 7483 elif prev == "FILE_FORMAT": 7484 # T-SQL's external file format case 7485 param.set("expression", self._parse_field()) 7486 else: 7487 param.set("expression", self._parse_unquoted_field()) 7488 7489 options.append(param) 7490 self._match(sep) 7491 7492 return options 7493 7494 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7495 expr = self.expression(exp.Credentials) 7496 7497 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7498 expr.set("storage", self._parse_field()) 7499 if self._match_text_seq("CREDENTIALS"): 7500 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7501 creds = ( 7502 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7503 ) 7504 expr.set("credentials", creds) 7505 if self._match_text_seq("ENCRYPTION"): 7506 expr.set("encryption", self._parse_wrapped_options()) 7507 if self._match_text_seq("IAM_ROLE"): 7508 expr.set("iam_role", self._parse_field()) 7509 if self._match_text_seq("REGION"): 7510 expr.set("region", self._parse_field()) 7511 7512 return expr 7513 7514 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7515 return self._parse_field() 7516 7517 def _parse_copy(self) -> exp.Copy | exp.Command: 7518 start = self._prev 7519 7520 self._match(TokenType.INTO) 7521 7522 this = ( 7523 self._parse_select(nested=True, parse_subquery_alias=False) 7524 if self._match(TokenType.L_PAREN, advance=False) 7525 else self._parse_table(schema=True) 7526 ) 7527 7528 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7529 7530 files = self._parse_csv(self._parse_file_location) 7531 credentials = self._parse_credentials() 7532 7533 self._match_text_seq("WITH") 7534 7535 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7536 7537 # Fallback case 7538 if self._curr: 7539 return self._parse_as_command(start) 7540 7541 return self.expression( 7542 exp.Copy, 7543 this=this, 7544 kind=kind, 7545 credentials=credentials, 7546 files=files, 7547 params=params, 7548 ) 7549 7550 def _parse_normalize(self) -> exp.Normalize: 7551 return self.expression( 7552 exp.Normalize, 7553 this=self._parse_bitwise(), 7554 form=self._match(TokenType.COMMA) and self._parse_var(), 7555 ) 7556 7557 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7558 if self._match_text_seq("COLUMNS", "(", advance=False): 7559 this = self._parse_function() 7560 if isinstance(this, exp.Columns): 7561 this.set("unpack", True) 7562 return this 7563 7564 return self.expression( 7565 exp.Star, 7566 **{ # type: ignore 7567 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7568 "replace": self._parse_star_op("REPLACE"), 7569 "rename": self._parse_star_op("RENAME"), 7570 }, 7571 ) 7572 7573 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7574 privilege_parts = [] 7575 7576 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7577 # (end of privilege list) or L_PAREN (start of column list) are met 7578 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7579 privilege_parts.append(self._curr.text.upper()) 7580 self._advance() 7581 7582 this = exp.var(" ".join(privilege_parts)) 7583 expressions = ( 7584 self._parse_wrapped_csv(self._parse_column) 7585 if self._match(TokenType.L_PAREN, advance=False) 7586 else None 7587 ) 7588 7589 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7590 7591 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7592 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7593 principal = self._parse_id_var() 7594 7595 if not principal: 7596 return None 7597 7598 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7599 7600 def _parse_grant(self) -> exp.Grant | exp.Command: 7601 start = self._prev 7602 7603 privileges = self._parse_csv(self._parse_grant_privilege) 7604 7605 self._match(TokenType.ON) 7606 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7607 7608 # Attempt to parse the securable e.g. MySQL allows names 7609 # such as "foo.*", "*.*" which are not easily parseable yet 7610 securable = self._try_parse(self._parse_table_parts) 7611 7612 if not securable or not self._match_text_seq("TO"): 7613 return self._parse_as_command(start) 7614 7615 principals = self._parse_csv(self._parse_grant_principal) 7616 7617 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7618 7619 if self._curr: 7620 return self._parse_as_command(start) 7621 7622 return self.expression( 7623 exp.Grant, 7624 privileges=privileges, 7625 kind=kind, 7626 securable=securable, 7627 principals=principals, 7628 grant_option=grant_option, 7629 ) 7630 7631 def _parse_overlay(self) -> exp.Overlay: 7632 return self.expression( 7633 exp.Overlay, 7634 **{ # type: ignore 7635 "this": self._parse_bitwise(), 7636 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7637 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7638 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7639 }, 7640 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1396 def __init__( 1397 self, 1398 error_level: t.Optional[ErrorLevel] = None, 1399 error_message_context: int = 100, 1400 max_errors: int = 3, 1401 dialect: DialectType = None, 1402 ): 1403 from sqlglot.dialects import Dialect 1404 1405 self.error_level = error_level or ErrorLevel.IMMEDIATE 1406 self.error_message_context = error_message_context 1407 self.max_errors = max_errors 1408 self.dialect = Dialect.get_or_raise(dialect) 1409 self.reset()
1421 def parse( 1422 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1423 ) -> t.List[t.Optional[exp.Expression]]: 1424 """ 1425 Parses a list of tokens and returns a list of syntax trees, one tree 1426 per parsed SQL statement. 1427 1428 Args: 1429 raw_tokens: The list of tokens. 1430 sql: The original SQL string, used to produce helpful debug messages. 1431 1432 Returns: 1433 The list of the produced syntax trees. 1434 """ 1435 return self._parse( 1436 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1437 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1439 def parse_into( 1440 self, 1441 expression_types: exp.IntoType, 1442 raw_tokens: t.List[Token], 1443 sql: t.Optional[str] = None, 1444 ) -> t.List[t.Optional[exp.Expression]]: 1445 """ 1446 Parses a list of tokens into a given Expression type. If a collection of Expression 1447 types is given instead, this method will try to parse the token list into each one 1448 of them, stopping at the first for which the parsing succeeds. 1449 1450 Args: 1451 expression_types: The expression type(s) to try and parse the token list into. 1452 raw_tokens: The list of tokens. 1453 sql: The original SQL string, used to produce helpful debug messages. 1454 1455 Returns: 1456 The target Expression. 1457 """ 1458 errors = [] 1459 for expression_type in ensure_list(expression_types): 1460 parser = self.EXPRESSION_PARSERS.get(expression_type) 1461 if not parser: 1462 raise TypeError(f"No parser registered for {expression_type}") 1463 1464 try: 1465 return self._parse(parser, raw_tokens, sql) 1466 except ParseError as e: 1467 e.errors[0]["into_expression"] = expression_type 1468 errors.append(e) 1469 1470 raise ParseError( 1471 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1472 errors=merge_errors(errors), 1473 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1513 def check_errors(self) -> None: 1514 """Logs or raises any found errors, depending on the chosen error level setting.""" 1515 if self.error_level == ErrorLevel.WARN: 1516 for error in self.errors: 1517 logger.error(str(error)) 1518 elif self.error_level == ErrorLevel.RAISE and self.errors: 1519 raise ParseError( 1520 concat_messages(self.errors, self.max_errors), 1521 errors=merge_errors(self.errors), 1522 )
Logs or raises any found errors, depending on the chosen error level setting.
1524 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1525 """ 1526 Appends an error in the list of recorded errors or raises it, depending on the chosen 1527 error level setting. 1528 """ 1529 token = token or self._curr or self._prev or Token.string("") 1530 start = token.start 1531 end = token.end + 1 1532 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1533 highlight = self.sql[start:end] 1534 end_context = self.sql[end : end + self.error_message_context] 1535 1536 error = ParseError.new( 1537 f"{message}. Line {token.line}, Col: {token.col}.\n" 1538 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1539 description=message, 1540 line=token.line, 1541 col=token.col, 1542 start_context=start_context, 1543 highlight=highlight, 1544 end_context=end_context, 1545 ) 1546 1547 if self.error_level == ErrorLevel.IMMEDIATE: 1548 raise error 1549 1550 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1552 def expression( 1553 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1554 ) -> E: 1555 """ 1556 Creates a new, validated Expression. 1557 1558 Args: 1559 exp_class: The expression class to instantiate. 1560 comments: An optional list of comments to attach to the expression. 1561 kwargs: The arguments to set for the expression along with their respective values. 1562 1563 Returns: 1564 The target expression. 1565 """ 1566 instance = exp_class(**kwargs) 1567 instance.add_comments(comments) if comments else self._add_comments(instance) 1568 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1575 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1576 """ 1577 Validates an Expression, making sure that all its mandatory arguments are set. 1578 1579 Args: 1580 expression: The expression to validate. 1581 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1582 1583 Returns: 1584 The validated expression. 1585 """ 1586 if self.error_level != ErrorLevel.IGNORE: 1587 for error_message in expression.error_messages(args): 1588 self.raise_error(error_message) 1589 1590 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.