sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 191 "CONCAT": lambda args, dialect: exp.Concat( 192 expressions=args, 193 safe=not dialect.STRICT_STRING_CONCAT, 194 coalesce=dialect.CONCAT_COALESCE, 195 ), 196 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 197 expressions=args, 198 safe=not dialect.STRICT_STRING_CONCAT, 199 coalesce=dialect.CONCAT_COALESCE, 200 ), 201 "CONVERT_TIMEZONE": build_convert_timezone, 202 "DATE_TO_DATE_STR": lambda args: exp.Cast( 203 this=seq_get(args, 0), 204 to=exp.DataType(this=exp.DataType.Type.TEXT), 205 ), 206 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 207 start=seq_get(args, 0), 208 end=seq_get(args, 1), 209 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 210 ), 211 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 212 "HEX": build_hex, 213 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 214 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 215 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 216 "LIKE": build_like, 217 "LOG": build_logarithm, 218 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 219 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 220 "LOWER": build_lower, 221 "LPAD": lambda args: build_pad(args), 222 "LEFTPAD": lambda args: build_pad(args), 223 "LTRIM": lambda args: build_trim(args), 224 "MOD": build_mod, 225 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 226 "RPAD": lambda args: build_pad(args, is_left=False), 227 "RTRIM": lambda args: build_trim(args, is_left=False), 228 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 229 if len(args) != 2 230 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 231 "TIME_TO_TIME_STR": lambda args: exp.Cast( 232 this=seq_get(args, 0), 233 to=exp.DataType(this=exp.DataType.Type.TEXT), 234 ), 235 "TO_HEX": build_hex, 236 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 237 this=exp.Cast( 238 this=seq_get(args, 0), 239 to=exp.DataType(this=exp.DataType.Type.TEXT), 240 ), 241 start=exp.Literal.number(1), 242 length=exp.Literal.number(10), 243 ), 244 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 245 "UPPER": build_upper, 246 "VAR_MAP": build_var_map, 247 } 248 249 NO_PAREN_FUNCTIONS = { 250 TokenType.CURRENT_DATE: exp.CurrentDate, 251 TokenType.CURRENT_DATETIME: exp.CurrentDate, 252 TokenType.CURRENT_TIME: exp.CurrentTime, 253 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 254 TokenType.CURRENT_USER: exp.CurrentUser, 255 } 256 257 STRUCT_TYPE_TOKENS = { 258 TokenType.NESTED, 259 TokenType.OBJECT, 260 TokenType.STRUCT, 261 } 262 263 NESTED_TYPE_TOKENS = { 264 TokenType.ARRAY, 265 TokenType.LIST, 266 TokenType.LOWCARDINALITY, 267 TokenType.MAP, 268 TokenType.NULLABLE, 269 *STRUCT_TYPE_TOKENS, 270 } 271 272 ENUM_TYPE_TOKENS = { 273 TokenType.ENUM, 274 TokenType.ENUM8, 275 TokenType.ENUM16, 276 } 277 278 AGGREGATE_TYPE_TOKENS = { 279 TokenType.AGGREGATEFUNCTION, 280 TokenType.SIMPLEAGGREGATEFUNCTION, 281 } 282 283 TYPE_TOKENS = { 284 TokenType.BIT, 285 TokenType.BOOLEAN, 286 TokenType.TINYINT, 287 TokenType.UTINYINT, 288 TokenType.SMALLINT, 289 TokenType.USMALLINT, 290 TokenType.INT, 291 TokenType.UINT, 292 TokenType.BIGINT, 293 TokenType.UBIGINT, 294 TokenType.INT128, 295 TokenType.UINT128, 296 TokenType.INT256, 297 TokenType.UINT256, 298 TokenType.MEDIUMINT, 299 TokenType.UMEDIUMINT, 300 TokenType.FIXEDSTRING, 301 TokenType.FLOAT, 302 TokenType.DOUBLE, 303 TokenType.CHAR, 304 TokenType.NCHAR, 305 TokenType.VARCHAR, 306 TokenType.NVARCHAR, 307 TokenType.BPCHAR, 308 TokenType.TEXT, 309 TokenType.MEDIUMTEXT, 310 TokenType.LONGTEXT, 311 TokenType.MEDIUMBLOB, 312 TokenType.LONGBLOB, 313 TokenType.BINARY, 314 TokenType.VARBINARY, 315 TokenType.JSON, 316 TokenType.JSONB, 317 TokenType.INTERVAL, 318 TokenType.TINYBLOB, 319 TokenType.TINYTEXT, 320 TokenType.TIME, 321 TokenType.TIMETZ, 322 TokenType.TIMESTAMP, 323 TokenType.TIMESTAMP_S, 324 TokenType.TIMESTAMP_MS, 325 TokenType.TIMESTAMP_NS, 326 TokenType.TIMESTAMPTZ, 327 TokenType.TIMESTAMPLTZ, 328 TokenType.TIMESTAMPNTZ, 329 TokenType.DATETIME, 330 TokenType.DATETIME64, 331 TokenType.DATE, 332 TokenType.DATE32, 333 TokenType.INT4RANGE, 334 TokenType.INT4MULTIRANGE, 335 TokenType.INT8RANGE, 336 TokenType.INT8MULTIRANGE, 337 TokenType.NUMRANGE, 338 TokenType.NUMMULTIRANGE, 339 TokenType.TSRANGE, 340 TokenType.TSMULTIRANGE, 341 TokenType.TSTZRANGE, 342 TokenType.TSTZMULTIRANGE, 343 TokenType.DATERANGE, 344 TokenType.DATEMULTIRANGE, 345 TokenType.DECIMAL, 346 TokenType.DECIMAL32, 347 TokenType.DECIMAL64, 348 TokenType.DECIMAL128, 349 TokenType.UDECIMAL, 350 TokenType.BIGDECIMAL, 351 TokenType.UUID, 352 TokenType.GEOGRAPHY, 353 TokenType.GEOMETRY, 354 TokenType.HLLSKETCH, 355 TokenType.HSTORE, 356 TokenType.PSEUDO_TYPE, 357 TokenType.SUPER, 358 TokenType.SERIAL, 359 TokenType.SMALLSERIAL, 360 TokenType.BIGSERIAL, 361 TokenType.XML, 362 TokenType.YEAR, 363 TokenType.UNIQUEIDENTIFIER, 364 TokenType.USERDEFINED, 365 TokenType.MONEY, 366 TokenType.SMALLMONEY, 367 TokenType.ROWVERSION, 368 TokenType.IMAGE, 369 TokenType.VARIANT, 370 TokenType.VECTOR, 371 TokenType.OBJECT, 372 TokenType.OBJECT_IDENTIFIER, 373 TokenType.INET, 374 TokenType.IPADDRESS, 375 TokenType.IPPREFIX, 376 TokenType.IPV4, 377 TokenType.IPV6, 378 TokenType.UNKNOWN, 379 TokenType.NULL, 380 TokenType.NAME, 381 TokenType.TDIGEST, 382 *ENUM_TYPE_TOKENS, 383 *NESTED_TYPE_TOKENS, 384 *AGGREGATE_TYPE_TOKENS, 385 } 386 387 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 388 TokenType.BIGINT: TokenType.UBIGINT, 389 TokenType.INT: TokenType.UINT, 390 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 391 TokenType.SMALLINT: TokenType.USMALLINT, 392 TokenType.TINYINT: TokenType.UTINYINT, 393 TokenType.DECIMAL: TokenType.UDECIMAL, 394 } 395 396 SUBQUERY_PREDICATES = { 397 TokenType.ANY: exp.Any, 398 TokenType.ALL: exp.All, 399 TokenType.EXISTS: exp.Exists, 400 TokenType.SOME: exp.Any, 401 } 402 403 RESERVED_TOKENS = { 404 *Tokenizer.SINGLE_TOKENS.values(), 405 TokenType.SELECT, 406 } - {TokenType.IDENTIFIER} 407 408 DB_CREATABLES = { 409 TokenType.DATABASE, 410 TokenType.DICTIONARY, 411 TokenType.MODEL, 412 TokenType.SCHEMA, 413 TokenType.SEQUENCE, 414 TokenType.STORAGE_INTEGRATION, 415 TokenType.TABLE, 416 TokenType.TAG, 417 TokenType.VIEW, 418 TokenType.WAREHOUSE, 419 TokenType.STREAMLIT, 420 } 421 422 CREATABLES = { 423 TokenType.COLUMN, 424 TokenType.CONSTRAINT, 425 TokenType.FOREIGN_KEY, 426 TokenType.FUNCTION, 427 TokenType.INDEX, 428 TokenType.PROCEDURE, 429 *DB_CREATABLES, 430 } 431 432 ALTERABLES = { 433 TokenType.TABLE, 434 TokenType.VIEW, 435 } 436 437 # Tokens that can represent identifiers 438 ID_VAR_TOKENS = { 439 TokenType.ALL, 440 TokenType.VAR, 441 TokenType.ANTI, 442 TokenType.APPLY, 443 TokenType.ASC, 444 TokenType.ASOF, 445 TokenType.AUTO_INCREMENT, 446 TokenType.BEGIN, 447 TokenType.BPCHAR, 448 TokenType.CACHE, 449 TokenType.CASE, 450 TokenType.COLLATE, 451 TokenType.COMMAND, 452 TokenType.COMMENT, 453 TokenType.COMMIT, 454 TokenType.CONSTRAINT, 455 TokenType.COPY, 456 TokenType.CUBE, 457 TokenType.DEFAULT, 458 TokenType.DELETE, 459 TokenType.DESC, 460 TokenType.DESCRIBE, 461 TokenType.DICTIONARY, 462 TokenType.DIV, 463 TokenType.END, 464 TokenType.EXECUTE, 465 TokenType.ESCAPE, 466 TokenType.FALSE, 467 TokenType.FIRST, 468 TokenType.FILTER, 469 TokenType.FINAL, 470 TokenType.FORMAT, 471 TokenType.FULL, 472 TokenType.IDENTIFIER, 473 TokenType.IS, 474 TokenType.ISNULL, 475 TokenType.INTERVAL, 476 TokenType.KEEP, 477 TokenType.KILL, 478 TokenType.LEFT, 479 TokenType.LOAD, 480 TokenType.MERGE, 481 TokenType.NATURAL, 482 TokenType.NEXT, 483 TokenType.OFFSET, 484 TokenType.OPERATOR, 485 TokenType.ORDINALITY, 486 TokenType.OVERLAPS, 487 TokenType.OVERWRITE, 488 TokenType.PARTITION, 489 TokenType.PERCENT, 490 TokenType.PIVOT, 491 TokenType.PRAGMA, 492 TokenType.RANGE, 493 TokenType.RECURSIVE, 494 TokenType.REFERENCES, 495 TokenType.REFRESH, 496 TokenType.RENAME, 497 TokenType.REPLACE, 498 TokenType.RIGHT, 499 TokenType.ROLLUP, 500 TokenType.ROW, 501 TokenType.ROWS, 502 TokenType.SEMI, 503 TokenType.SET, 504 TokenType.SETTINGS, 505 TokenType.SHOW, 506 TokenType.TEMPORARY, 507 TokenType.TOP, 508 TokenType.TRUE, 509 TokenType.TRUNCATE, 510 TokenType.UNIQUE, 511 TokenType.UNNEST, 512 TokenType.UNPIVOT, 513 TokenType.UPDATE, 514 TokenType.USE, 515 TokenType.VOLATILE, 516 TokenType.WINDOW, 517 *CREATABLES, 518 *SUBQUERY_PREDICATES, 519 *TYPE_TOKENS, 520 *NO_PAREN_FUNCTIONS, 521 } 522 523 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 524 525 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 526 TokenType.ANTI, 527 TokenType.APPLY, 528 TokenType.ASOF, 529 TokenType.FULL, 530 TokenType.LEFT, 531 TokenType.LOCK, 532 TokenType.NATURAL, 533 TokenType.OFFSET, 534 TokenType.RIGHT, 535 TokenType.SEMI, 536 TokenType.WINDOW, 537 } 538 539 ALIAS_TOKENS = ID_VAR_TOKENS 540 541 ARRAY_CONSTRUCTORS = { 542 "ARRAY": exp.Array, 543 "LIST": exp.List, 544 } 545 546 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 547 548 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 549 550 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 551 552 FUNC_TOKENS = { 553 TokenType.COLLATE, 554 TokenType.COMMAND, 555 TokenType.CURRENT_DATE, 556 TokenType.CURRENT_DATETIME, 557 TokenType.CURRENT_TIMESTAMP, 558 TokenType.CURRENT_TIME, 559 TokenType.CURRENT_USER, 560 TokenType.FILTER, 561 TokenType.FIRST, 562 TokenType.FORMAT, 563 TokenType.GLOB, 564 TokenType.IDENTIFIER, 565 TokenType.INDEX, 566 TokenType.ISNULL, 567 TokenType.ILIKE, 568 TokenType.INSERT, 569 TokenType.LIKE, 570 TokenType.MERGE, 571 TokenType.OFFSET, 572 TokenType.PRIMARY_KEY, 573 TokenType.RANGE, 574 TokenType.REPLACE, 575 TokenType.RLIKE, 576 TokenType.ROW, 577 TokenType.UNNEST, 578 TokenType.VAR, 579 TokenType.LEFT, 580 TokenType.RIGHT, 581 TokenType.SEQUENCE, 582 TokenType.DATE, 583 TokenType.DATETIME, 584 TokenType.TABLE, 585 TokenType.TIMESTAMP, 586 TokenType.TIMESTAMPTZ, 587 TokenType.TRUNCATE, 588 TokenType.WINDOW, 589 TokenType.XOR, 590 *TYPE_TOKENS, 591 *SUBQUERY_PREDICATES, 592 } 593 594 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 595 TokenType.AND: exp.And, 596 } 597 598 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 599 TokenType.COLON_EQ: exp.PropertyEQ, 600 } 601 602 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 603 TokenType.OR: exp.Or, 604 } 605 606 EQUALITY = { 607 TokenType.EQ: exp.EQ, 608 TokenType.NEQ: exp.NEQ, 609 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 610 } 611 612 COMPARISON = { 613 TokenType.GT: exp.GT, 614 TokenType.GTE: exp.GTE, 615 TokenType.LT: exp.LT, 616 TokenType.LTE: exp.LTE, 617 } 618 619 BITWISE = { 620 TokenType.AMP: exp.BitwiseAnd, 621 TokenType.CARET: exp.BitwiseXor, 622 TokenType.PIPE: exp.BitwiseOr, 623 } 624 625 TERM = { 626 TokenType.DASH: exp.Sub, 627 TokenType.PLUS: exp.Add, 628 TokenType.MOD: exp.Mod, 629 TokenType.COLLATE: exp.Collate, 630 } 631 632 FACTOR = { 633 TokenType.DIV: exp.IntDiv, 634 TokenType.LR_ARROW: exp.Distance, 635 TokenType.SLASH: exp.Div, 636 TokenType.STAR: exp.Mul, 637 } 638 639 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 640 641 TIMES = { 642 TokenType.TIME, 643 TokenType.TIMETZ, 644 } 645 646 TIMESTAMPS = { 647 TokenType.TIMESTAMP, 648 TokenType.TIMESTAMPTZ, 649 TokenType.TIMESTAMPLTZ, 650 *TIMES, 651 } 652 653 SET_OPERATIONS = { 654 TokenType.UNION, 655 TokenType.INTERSECT, 656 TokenType.EXCEPT, 657 } 658 659 JOIN_METHODS = { 660 TokenType.ASOF, 661 TokenType.NATURAL, 662 TokenType.POSITIONAL, 663 } 664 665 JOIN_SIDES = { 666 TokenType.LEFT, 667 TokenType.RIGHT, 668 TokenType.FULL, 669 } 670 671 JOIN_KINDS = { 672 TokenType.ANTI, 673 TokenType.CROSS, 674 TokenType.INNER, 675 TokenType.OUTER, 676 TokenType.SEMI, 677 TokenType.STRAIGHT_JOIN, 678 } 679 680 JOIN_HINTS: t.Set[str] = set() 681 682 LAMBDAS = { 683 TokenType.ARROW: lambda self, expressions: self.expression( 684 exp.Lambda, 685 this=self._replace_lambda( 686 self._parse_assignment(), 687 expressions, 688 ), 689 expressions=expressions, 690 ), 691 TokenType.FARROW: lambda self, expressions: self.expression( 692 exp.Kwarg, 693 this=exp.var(expressions[0].name), 694 expression=self._parse_assignment(), 695 ), 696 } 697 698 COLUMN_OPERATORS = { 699 TokenType.DOT: None, 700 TokenType.DCOLON: lambda self, this, to: self.expression( 701 exp.Cast if self.STRICT_CAST else exp.TryCast, 702 this=this, 703 to=to, 704 ), 705 TokenType.ARROW: lambda self, this, path: self.expression( 706 exp.JSONExtract, 707 this=this, 708 expression=self.dialect.to_json_path(path), 709 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 710 ), 711 TokenType.DARROW: lambda self, this, path: self.expression( 712 exp.JSONExtractScalar, 713 this=this, 714 expression=self.dialect.to_json_path(path), 715 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 716 ), 717 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 718 exp.JSONBExtract, 719 this=this, 720 expression=path, 721 ), 722 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 723 exp.JSONBExtractScalar, 724 this=this, 725 expression=path, 726 ), 727 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 728 exp.JSONBContains, 729 this=this, 730 expression=key, 731 ), 732 } 733 734 EXPRESSION_PARSERS = { 735 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 736 exp.Column: lambda self: self._parse_column(), 737 exp.Condition: lambda self: self._parse_assignment(), 738 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 739 exp.Expression: lambda self: self._parse_expression(), 740 exp.From: lambda self: self._parse_from(joins=True), 741 exp.Group: lambda self: self._parse_group(), 742 exp.Having: lambda self: self._parse_having(), 743 exp.Identifier: lambda self: self._parse_id_var(), 744 exp.Join: lambda self: self._parse_join(), 745 exp.Lambda: lambda self: self._parse_lambda(), 746 exp.Lateral: lambda self: self._parse_lateral(), 747 exp.Limit: lambda self: self._parse_limit(), 748 exp.Offset: lambda self: self._parse_offset(), 749 exp.Order: lambda self: self._parse_order(), 750 exp.Ordered: lambda self: self._parse_ordered(), 751 exp.Properties: lambda self: self._parse_properties(), 752 exp.Qualify: lambda self: self._parse_qualify(), 753 exp.Returning: lambda self: self._parse_returning(), 754 exp.Select: lambda self: self._parse_select(), 755 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 756 exp.Table: lambda self: self._parse_table_parts(), 757 exp.TableAlias: lambda self: self._parse_table_alias(), 758 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 759 exp.Where: lambda self: self._parse_where(), 760 exp.Window: lambda self: self._parse_named_window(), 761 exp.With: lambda self: self._parse_with(), 762 "JOIN_TYPE": lambda self: self._parse_join_parts(), 763 } 764 765 STATEMENT_PARSERS = { 766 TokenType.ALTER: lambda self: self._parse_alter(), 767 TokenType.BEGIN: lambda self: self._parse_transaction(), 768 TokenType.CACHE: lambda self: self._parse_cache(), 769 TokenType.COMMENT: lambda self: self._parse_comment(), 770 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 771 TokenType.COPY: lambda self: self._parse_copy(), 772 TokenType.CREATE: lambda self: self._parse_create(), 773 TokenType.DELETE: lambda self: self._parse_delete(), 774 TokenType.DESC: lambda self: self._parse_describe(), 775 TokenType.DESCRIBE: lambda self: self._parse_describe(), 776 TokenType.DROP: lambda self: self._parse_drop(), 777 TokenType.INSERT: lambda self: self._parse_insert(), 778 TokenType.KILL: lambda self: self._parse_kill(), 779 TokenType.LOAD: lambda self: self._parse_load(), 780 TokenType.MERGE: lambda self: self._parse_merge(), 781 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 782 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 783 TokenType.REFRESH: lambda self: self._parse_refresh(), 784 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 785 TokenType.SET: lambda self: self._parse_set(), 786 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 787 TokenType.UNCACHE: lambda self: self._parse_uncache(), 788 TokenType.UPDATE: lambda self: self._parse_update(), 789 TokenType.USE: lambda self: self.expression( 790 exp.Use, 791 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 792 this=self._parse_table(schema=False), 793 ), 794 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 795 } 796 797 UNARY_PARSERS = { 798 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 799 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 800 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 801 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 802 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 803 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 804 } 805 806 STRING_PARSERS = { 807 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 808 exp.RawString, this=token.text 809 ), 810 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 811 exp.National, this=token.text 812 ), 813 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 814 TokenType.STRING: lambda self, token: self.expression( 815 exp.Literal, this=token.text, is_string=True 816 ), 817 TokenType.UNICODE_STRING: lambda self, token: self.expression( 818 exp.UnicodeString, 819 this=token.text, 820 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 821 ), 822 } 823 824 NUMERIC_PARSERS = { 825 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 826 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 827 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 828 TokenType.NUMBER: lambda self, token: self.expression( 829 exp.Literal, this=token.text, is_string=False 830 ), 831 } 832 833 PRIMARY_PARSERS = { 834 **STRING_PARSERS, 835 **NUMERIC_PARSERS, 836 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 837 TokenType.NULL: lambda self, _: self.expression(exp.Null), 838 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 839 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 840 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 841 TokenType.STAR: lambda self, _: self.expression( 842 exp.Star, 843 **{ 844 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 845 "replace": self._parse_star_op("REPLACE"), 846 "rename": self._parse_star_op("RENAME"), 847 }, 848 ), 849 } 850 851 PLACEHOLDER_PARSERS = { 852 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 853 TokenType.PARAMETER: lambda self: self._parse_parameter(), 854 TokenType.COLON: lambda self: ( 855 self.expression(exp.Placeholder, this=self._prev.text) 856 if self._match_set(self.ID_VAR_TOKENS) 857 else None 858 ), 859 } 860 861 RANGE_PARSERS = { 862 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 863 TokenType.GLOB: binary_range_parser(exp.Glob), 864 TokenType.ILIKE: binary_range_parser(exp.ILike), 865 TokenType.IN: lambda self, this: self._parse_in(this), 866 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 867 TokenType.IS: lambda self, this: self._parse_is(this), 868 TokenType.LIKE: binary_range_parser(exp.Like), 869 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 870 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 871 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 872 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 873 } 874 875 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 876 "ALLOWED_VALUES": lambda self: self.expression( 877 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 878 ), 879 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 880 "AUTO": lambda self: self._parse_auto_property(), 881 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 882 "BACKUP": lambda self: self.expression( 883 exp.BackupProperty, this=self._parse_var(any_token=True) 884 ), 885 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 886 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 887 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 888 "CHECKSUM": lambda self: self._parse_checksum(), 889 "CLUSTER BY": lambda self: self._parse_cluster(), 890 "CLUSTERED": lambda self: self._parse_clustered_by(), 891 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 892 exp.CollateProperty, **kwargs 893 ), 894 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 895 "CONTAINS": lambda self: self._parse_contains_property(), 896 "COPY": lambda self: self._parse_copy_property(), 897 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 898 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 899 "DEFINER": lambda self: self._parse_definer(), 900 "DETERMINISTIC": lambda self: self.expression( 901 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 902 ), 903 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 904 "DUPLICATE": lambda self: self._parse_duplicate(), 905 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 906 "DISTKEY": lambda self: self._parse_distkey(), 907 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 908 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 909 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 910 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 911 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 912 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 913 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 914 "FREESPACE": lambda self: self._parse_freespace(), 915 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 916 "HEAP": lambda self: self.expression(exp.HeapProperty), 917 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 918 "IMMUTABLE": lambda self: self.expression( 919 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 920 ), 921 "INHERITS": lambda self: self.expression( 922 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 923 ), 924 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 925 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 926 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 927 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 928 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 929 "LIKE": lambda self: self._parse_create_like(), 930 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 931 "LOCK": lambda self: self._parse_locking(), 932 "LOCKING": lambda self: self._parse_locking(), 933 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 934 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 935 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 936 "MODIFIES": lambda self: self._parse_modifies_property(), 937 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 938 "NO": lambda self: self._parse_no_property(), 939 "ON": lambda self: self._parse_on_property(), 940 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 941 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 942 "PARTITION": lambda self: self._parse_partitioned_of(), 943 "PARTITION BY": lambda self: self._parse_partitioned_by(), 944 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 945 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 946 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 947 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 948 "READS": lambda self: self._parse_reads_property(), 949 "REMOTE": lambda self: self._parse_remote_with_connection(), 950 "RETURNS": lambda self: self._parse_returns(), 951 "STRICT": lambda self: self.expression(exp.StrictProperty), 952 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 953 "ROW": lambda self: self._parse_row(), 954 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 955 "SAMPLE": lambda self: self.expression( 956 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 957 ), 958 "SECURE": lambda self: self.expression(exp.SecureProperty), 959 "SECURITY": lambda self: self._parse_security(), 960 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 961 "SETTINGS": lambda self: self._parse_settings_property(), 962 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 963 "SORTKEY": lambda self: self._parse_sortkey(), 964 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 965 "STABLE": lambda self: self.expression( 966 exp.StabilityProperty, this=exp.Literal.string("STABLE") 967 ), 968 "STORED": lambda self: self._parse_stored(), 969 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 970 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 971 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 972 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 973 "TO": lambda self: self._parse_to_table(), 974 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 975 "TRANSFORM": lambda self: self.expression( 976 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 977 ), 978 "TTL": lambda self: self._parse_ttl(), 979 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 980 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 981 "VOLATILE": lambda self: self._parse_volatile_property(), 982 "WITH": lambda self: self._parse_with_property(), 983 } 984 985 CONSTRAINT_PARSERS = { 986 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 987 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 988 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 989 "CHARACTER SET": lambda self: self.expression( 990 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 991 ), 992 "CHECK": lambda self: self.expression( 993 exp.CheckColumnConstraint, 994 this=self._parse_wrapped(self._parse_assignment), 995 enforced=self._match_text_seq("ENFORCED"), 996 ), 997 "COLLATE": lambda self: self.expression( 998 exp.CollateColumnConstraint, 999 this=self._parse_identifier() or self._parse_column(), 1000 ), 1001 "COMMENT": lambda self: self.expression( 1002 exp.CommentColumnConstraint, this=self._parse_string() 1003 ), 1004 "COMPRESS": lambda self: self._parse_compress(), 1005 "CLUSTERED": lambda self: self.expression( 1006 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1007 ), 1008 "NONCLUSTERED": lambda self: self.expression( 1009 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1010 ), 1011 "DEFAULT": lambda self: self.expression( 1012 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1013 ), 1014 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1015 "EPHEMERAL": lambda self: self.expression( 1016 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1017 ), 1018 "EXCLUDE": lambda self: self.expression( 1019 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1020 ), 1021 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1022 "FORMAT": lambda self: self.expression( 1023 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1024 ), 1025 "GENERATED": lambda self: self._parse_generated_as_identity(), 1026 "IDENTITY": lambda self: self._parse_auto_increment(), 1027 "INLINE": lambda self: self._parse_inline(), 1028 "LIKE": lambda self: self._parse_create_like(), 1029 "NOT": lambda self: self._parse_not_constraint(), 1030 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1031 "ON": lambda self: ( 1032 self._match(TokenType.UPDATE) 1033 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1034 ) 1035 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1036 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1037 "PERIOD": lambda self: self._parse_period_for_system_time(), 1038 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1039 "REFERENCES": lambda self: self._parse_references(match=False), 1040 "TITLE": lambda self: self.expression( 1041 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1042 ), 1043 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1044 "UNIQUE": lambda self: self._parse_unique(), 1045 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1046 "WITH": lambda self: self.expression( 1047 exp.Properties, expressions=self._parse_wrapped_properties() 1048 ), 1049 } 1050 1051 ALTER_PARSERS = { 1052 "ADD": lambda self: self._parse_alter_table_add(), 1053 "ALTER": lambda self: self._parse_alter_table_alter(), 1054 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1055 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1056 "DROP": lambda self: self._parse_alter_table_drop(), 1057 "RENAME": lambda self: self._parse_alter_table_rename(), 1058 "SET": lambda self: self._parse_alter_table_set(), 1059 "AS": lambda self: self._parse_select(), 1060 } 1061 1062 ALTER_ALTER_PARSERS = { 1063 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1064 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1065 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1066 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1067 } 1068 1069 SCHEMA_UNNAMED_CONSTRAINTS = { 1070 "CHECK", 1071 "EXCLUDE", 1072 "FOREIGN KEY", 1073 "LIKE", 1074 "PERIOD", 1075 "PRIMARY KEY", 1076 "UNIQUE", 1077 } 1078 1079 NO_PAREN_FUNCTION_PARSERS = { 1080 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1081 "CASE": lambda self: self._parse_case(), 1082 "CONNECT_BY_ROOT": lambda self: self.expression( 1083 exp.ConnectByRoot, this=self._parse_column() 1084 ), 1085 "IF": lambda self: self._parse_if(), 1086 "NEXT": lambda self: self._parse_next_value_for(), 1087 } 1088 1089 INVALID_FUNC_NAME_TOKENS = { 1090 TokenType.IDENTIFIER, 1091 TokenType.STRING, 1092 } 1093 1094 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1095 1096 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1097 1098 FUNCTION_PARSERS = { 1099 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1100 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1101 "DECODE": lambda self: self._parse_decode(), 1102 "EXTRACT": lambda self: self._parse_extract(), 1103 "GAP_FILL": lambda self: self._parse_gap_fill(), 1104 "JSON_OBJECT": lambda self: self._parse_json_object(), 1105 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1106 "JSON_TABLE": lambda self: self._parse_json_table(), 1107 "MATCH": lambda self: self._parse_match_against(), 1108 "NORMALIZE": lambda self: self._parse_normalize(), 1109 "OPENJSON": lambda self: self._parse_open_json(), 1110 "POSITION": lambda self: self._parse_position(), 1111 "PREDICT": lambda self: self._parse_predict(), 1112 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1113 "STRING_AGG": lambda self: self._parse_string_agg(), 1114 "SUBSTRING": lambda self: self._parse_substring(), 1115 "TRIM": lambda self: self._parse_trim(), 1116 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1117 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1118 } 1119 1120 QUERY_MODIFIER_PARSERS = { 1121 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1122 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1123 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1124 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1125 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1126 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1127 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1128 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1129 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1130 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1131 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1132 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1133 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1134 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1135 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1136 TokenType.CLUSTER_BY: lambda self: ( 1137 "cluster", 1138 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1139 ), 1140 TokenType.DISTRIBUTE_BY: lambda self: ( 1141 "distribute", 1142 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1143 ), 1144 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1145 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1146 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1147 } 1148 1149 SET_PARSERS = { 1150 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1151 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1152 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1153 "TRANSACTION": lambda self: self._parse_set_transaction(), 1154 } 1155 1156 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1157 1158 TYPE_LITERAL_PARSERS = { 1159 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1160 } 1161 1162 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1163 1164 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1165 1166 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1167 1168 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1169 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1170 "ISOLATION": ( 1171 ("LEVEL", "REPEATABLE", "READ"), 1172 ("LEVEL", "READ", "COMMITTED"), 1173 ("LEVEL", "READ", "UNCOMITTED"), 1174 ("LEVEL", "SERIALIZABLE"), 1175 ), 1176 "READ": ("WRITE", "ONLY"), 1177 } 1178 1179 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1180 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1181 ) 1182 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1183 1184 CREATE_SEQUENCE: OPTIONS_TYPE = { 1185 "SCALE": ("EXTEND", "NOEXTEND"), 1186 "SHARD": ("EXTEND", "NOEXTEND"), 1187 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1188 **dict.fromkeys( 1189 ( 1190 "SESSION", 1191 "GLOBAL", 1192 "KEEP", 1193 "NOKEEP", 1194 "ORDER", 1195 "NOORDER", 1196 "NOCACHE", 1197 "CYCLE", 1198 "NOCYCLE", 1199 "NOMINVALUE", 1200 "NOMAXVALUE", 1201 "NOSCALE", 1202 "NOSHARD", 1203 ), 1204 tuple(), 1205 ), 1206 } 1207 1208 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1209 1210 USABLES: OPTIONS_TYPE = dict.fromkeys( 1211 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1212 ) 1213 1214 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1215 1216 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1217 "TYPE": ("EVOLUTION",), 1218 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1219 } 1220 1221 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1222 "NOT": ("ENFORCED",), 1223 "MATCH": ( 1224 "FULL", 1225 "PARTIAL", 1226 "SIMPLE", 1227 ), 1228 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1229 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1230 } 1231 1232 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1233 1234 CLONE_KEYWORDS = {"CLONE", "COPY"} 1235 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1236 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1237 1238 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1239 1240 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1241 1242 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1243 1244 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1245 1246 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1247 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1248 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1249 1250 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1251 1252 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1253 1254 ADD_CONSTRAINT_TOKENS = { 1255 TokenType.CONSTRAINT, 1256 TokenType.FOREIGN_KEY, 1257 TokenType.INDEX, 1258 TokenType.KEY, 1259 TokenType.PRIMARY_KEY, 1260 TokenType.UNIQUE, 1261 } 1262 1263 DISTINCT_TOKENS = {TokenType.DISTINCT} 1264 1265 NULL_TOKENS = {TokenType.NULL} 1266 1267 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1268 1269 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1270 1271 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1272 1273 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1274 1275 ODBC_DATETIME_LITERALS = { 1276 "d": exp.Date, 1277 "t": exp.Time, 1278 "ts": exp.Timestamp, 1279 } 1280 1281 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1282 1283 STRICT_CAST = True 1284 1285 PREFIXED_PIVOT_COLUMNS = False 1286 IDENTIFY_PIVOT_STRINGS = False 1287 1288 LOG_DEFAULTS_TO_LN = False 1289 1290 # Whether ADD is present for each column added by ALTER TABLE 1291 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1292 1293 # Whether the table sample clause expects CSV syntax 1294 TABLESAMPLE_CSV = False 1295 1296 # The default method used for table sampling 1297 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1298 1299 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1300 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1301 1302 # Whether the TRIM function expects the characters to trim as its first argument 1303 TRIM_PATTERN_FIRST = False 1304 1305 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1306 STRING_ALIASES = False 1307 1308 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1309 MODIFIERS_ATTACHED_TO_SET_OP = True 1310 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1311 1312 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1313 NO_PAREN_IF_COMMANDS = True 1314 1315 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1316 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1317 1318 # Whether the `:` operator is used to extract a value from a VARIANT column 1319 COLON_IS_VARIANT_EXTRACT = False 1320 1321 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1322 # If this is True and '(' is not found, the keyword will be treated as an identifier 1323 VALUES_FOLLOWED_BY_PAREN = True 1324 1325 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1326 SUPPORTS_IMPLICIT_UNNEST = False 1327 1328 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1329 INTERVAL_SPANS = True 1330 1331 # Whether a PARTITION clause can follow a table reference 1332 SUPPORTS_PARTITION_SELECTION = False 1333 1334 __slots__ = ( 1335 "error_level", 1336 "error_message_context", 1337 "max_errors", 1338 "dialect", 1339 "sql", 1340 "errors", 1341 "_tokens", 1342 "_index", 1343 "_curr", 1344 "_next", 1345 "_prev", 1346 "_prev_comments", 1347 ) 1348 1349 # Autofilled 1350 SHOW_TRIE: t.Dict = {} 1351 SET_TRIE: t.Dict = {} 1352 1353 def __init__( 1354 self, 1355 error_level: t.Optional[ErrorLevel] = None, 1356 error_message_context: int = 100, 1357 max_errors: int = 3, 1358 dialect: DialectType = None, 1359 ): 1360 from sqlglot.dialects import Dialect 1361 1362 self.error_level = error_level or ErrorLevel.IMMEDIATE 1363 self.error_message_context = error_message_context 1364 self.max_errors = max_errors 1365 self.dialect = Dialect.get_or_raise(dialect) 1366 self.reset() 1367 1368 def reset(self): 1369 self.sql = "" 1370 self.errors = [] 1371 self._tokens = [] 1372 self._index = 0 1373 self._curr = None 1374 self._next = None 1375 self._prev = None 1376 self._prev_comments = None 1377 1378 def parse( 1379 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1380 ) -> t.List[t.Optional[exp.Expression]]: 1381 """ 1382 Parses a list of tokens and returns a list of syntax trees, one tree 1383 per parsed SQL statement. 1384 1385 Args: 1386 raw_tokens: The list of tokens. 1387 sql: The original SQL string, used to produce helpful debug messages. 1388 1389 Returns: 1390 The list of the produced syntax trees. 1391 """ 1392 return self._parse( 1393 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1394 ) 1395 1396 def parse_into( 1397 self, 1398 expression_types: exp.IntoType, 1399 raw_tokens: t.List[Token], 1400 sql: t.Optional[str] = None, 1401 ) -> t.List[t.Optional[exp.Expression]]: 1402 """ 1403 Parses a list of tokens into a given Expression type. If a collection of Expression 1404 types is given instead, this method will try to parse the token list into each one 1405 of them, stopping at the first for which the parsing succeeds. 1406 1407 Args: 1408 expression_types: The expression type(s) to try and parse the token list into. 1409 raw_tokens: The list of tokens. 1410 sql: The original SQL string, used to produce helpful debug messages. 1411 1412 Returns: 1413 The target Expression. 1414 """ 1415 errors = [] 1416 for expression_type in ensure_list(expression_types): 1417 parser = self.EXPRESSION_PARSERS.get(expression_type) 1418 if not parser: 1419 raise TypeError(f"No parser registered for {expression_type}") 1420 1421 try: 1422 return self._parse(parser, raw_tokens, sql) 1423 except ParseError as e: 1424 e.errors[0]["into_expression"] = expression_type 1425 errors.append(e) 1426 1427 raise ParseError( 1428 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1429 errors=merge_errors(errors), 1430 ) from errors[-1] 1431 1432 def _parse( 1433 self, 1434 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1435 raw_tokens: t.List[Token], 1436 sql: t.Optional[str] = None, 1437 ) -> t.List[t.Optional[exp.Expression]]: 1438 self.reset() 1439 self.sql = sql or "" 1440 1441 total = len(raw_tokens) 1442 chunks: t.List[t.List[Token]] = [[]] 1443 1444 for i, token in enumerate(raw_tokens): 1445 if token.token_type == TokenType.SEMICOLON: 1446 if token.comments: 1447 chunks.append([token]) 1448 1449 if i < total - 1: 1450 chunks.append([]) 1451 else: 1452 chunks[-1].append(token) 1453 1454 expressions = [] 1455 1456 for tokens in chunks: 1457 self._index = -1 1458 self._tokens = tokens 1459 self._advance() 1460 1461 expressions.append(parse_method(self)) 1462 1463 if self._index < len(self._tokens): 1464 self.raise_error("Invalid expression / Unexpected token") 1465 1466 self.check_errors() 1467 1468 return expressions 1469 1470 def check_errors(self) -> None: 1471 """Logs or raises any found errors, depending on the chosen error level setting.""" 1472 if self.error_level == ErrorLevel.WARN: 1473 for error in self.errors: 1474 logger.error(str(error)) 1475 elif self.error_level == ErrorLevel.RAISE and self.errors: 1476 raise ParseError( 1477 concat_messages(self.errors, self.max_errors), 1478 errors=merge_errors(self.errors), 1479 ) 1480 1481 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1482 """ 1483 Appends an error in the list of recorded errors or raises it, depending on the chosen 1484 error level setting. 1485 """ 1486 token = token or self._curr or self._prev or Token.string("") 1487 start = token.start 1488 end = token.end + 1 1489 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1490 highlight = self.sql[start:end] 1491 end_context = self.sql[end : end + self.error_message_context] 1492 1493 error = ParseError.new( 1494 f"{message}. Line {token.line}, Col: {token.col}.\n" 1495 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1496 description=message, 1497 line=token.line, 1498 col=token.col, 1499 start_context=start_context, 1500 highlight=highlight, 1501 end_context=end_context, 1502 ) 1503 1504 if self.error_level == ErrorLevel.IMMEDIATE: 1505 raise error 1506 1507 self.errors.append(error) 1508 1509 def expression( 1510 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1511 ) -> E: 1512 """ 1513 Creates a new, validated Expression. 1514 1515 Args: 1516 exp_class: The expression class to instantiate. 1517 comments: An optional list of comments to attach to the expression. 1518 kwargs: The arguments to set for the expression along with their respective values. 1519 1520 Returns: 1521 The target expression. 1522 """ 1523 instance = exp_class(**kwargs) 1524 instance.add_comments(comments) if comments else self._add_comments(instance) 1525 return self.validate_expression(instance) 1526 1527 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1528 if expression and self._prev_comments: 1529 expression.add_comments(self._prev_comments) 1530 self._prev_comments = None 1531 1532 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1533 """ 1534 Validates an Expression, making sure that all its mandatory arguments are set. 1535 1536 Args: 1537 expression: The expression to validate. 1538 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1539 1540 Returns: 1541 The validated expression. 1542 """ 1543 if self.error_level != ErrorLevel.IGNORE: 1544 for error_message in expression.error_messages(args): 1545 self.raise_error(error_message) 1546 1547 return expression 1548 1549 def _find_sql(self, start: Token, end: Token) -> str: 1550 return self.sql[start.start : end.end + 1] 1551 1552 def _is_connected(self) -> bool: 1553 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1554 1555 def _advance(self, times: int = 1) -> None: 1556 self._index += times 1557 self._curr = seq_get(self._tokens, self._index) 1558 self._next = seq_get(self._tokens, self._index + 1) 1559 1560 if self._index > 0: 1561 self._prev = self._tokens[self._index - 1] 1562 self._prev_comments = self._prev.comments 1563 else: 1564 self._prev = None 1565 self._prev_comments = None 1566 1567 def _retreat(self, index: int) -> None: 1568 if index != self._index: 1569 self._advance(index - self._index) 1570 1571 def _warn_unsupported(self) -> None: 1572 if len(self._tokens) <= 1: 1573 return 1574 1575 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1576 # interested in emitting a warning for the one being currently processed. 1577 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1578 1579 logger.warning( 1580 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1581 ) 1582 1583 def _parse_command(self) -> exp.Command: 1584 self._warn_unsupported() 1585 return self.expression( 1586 exp.Command, 1587 comments=self._prev_comments, 1588 this=self._prev.text.upper(), 1589 expression=self._parse_string(), 1590 ) 1591 1592 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1593 """ 1594 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1595 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1596 solve this by setting & resetting the parser state accordingly 1597 """ 1598 index = self._index 1599 error_level = self.error_level 1600 1601 self.error_level = ErrorLevel.IMMEDIATE 1602 try: 1603 this = parse_method() 1604 except ParseError: 1605 this = None 1606 finally: 1607 if not this or retreat: 1608 self._retreat(index) 1609 self.error_level = error_level 1610 1611 return this 1612 1613 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1614 start = self._prev 1615 exists = self._parse_exists() if allow_exists else None 1616 1617 self._match(TokenType.ON) 1618 1619 materialized = self._match_text_seq("MATERIALIZED") 1620 kind = self._match_set(self.CREATABLES) and self._prev 1621 if not kind: 1622 return self._parse_as_command(start) 1623 1624 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1625 this = self._parse_user_defined_function(kind=kind.token_type) 1626 elif kind.token_type == TokenType.TABLE: 1627 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1628 elif kind.token_type == TokenType.COLUMN: 1629 this = self._parse_column() 1630 else: 1631 this = self._parse_id_var() 1632 1633 self._match(TokenType.IS) 1634 1635 return self.expression( 1636 exp.Comment, 1637 this=this, 1638 kind=kind.text, 1639 expression=self._parse_string(), 1640 exists=exists, 1641 materialized=materialized, 1642 ) 1643 1644 def _parse_to_table( 1645 self, 1646 ) -> exp.ToTableProperty: 1647 table = self._parse_table_parts(schema=True) 1648 return self.expression(exp.ToTableProperty, this=table) 1649 1650 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1651 def _parse_ttl(self) -> exp.Expression: 1652 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1653 this = self._parse_bitwise() 1654 1655 if self._match_text_seq("DELETE"): 1656 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1657 if self._match_text_seq("RECOMPRESS"): 1658 return self.expression( 1659 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1660 ) 1661 if self._match_text_seq("TO", "DISK"): 1662 return self.expression( 1663 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1664 ) 1665 if self._match_text_seq("TO", "VOLUME"): 1666 return self.expression( 1667 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1668 ) 1669 1670 return this 1671 1672 expressions = self._parse_csv(_parse_ttl_action) 1673 where = self._parse_where() 1674 group = self._parse_group() 1675 1676 aggregates = None 1677 if group and self._match(TokenType.SET): 1678 aggregates = self._parse_csv(self._parse_set_item) 1679 1680 return self.expression( 1681 exp.MergeTreeTTL, 1682 expressions=expressions, 1683 where=where, 1684 group=group, 1685 aggregates=aggregates, 1686 ) 1687 1688 def _parse_statement(self) -> t.Optional[exp.Expression]: 1689 if self._curr is None: 1690 return None 1691 1692 if self._match_set(self.STATEMENT_PARSERS): 1693 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1694 1695 if self._match_set(self.dialect.tokenizer.COMMANDS): 1696 return self._parse_command() 1697 1698 expression = self._parse_expression() 1699 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1700 return self._parse_query_modifiers(expression) 1701 1702 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1703 start = self._prev 1704 temporary = self._match(TokenType.TEMPORARY) 1705 materialized = self._match_text_seq("MATERIALIZED") 1706 1707 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1708 if not kind: 1709 return self._parse_as_command(start) 1710 1711 concurrently = self._match_text_seq("CONCURRENTLY") 1712 if_exists = exists or self._parse_exists() 1713 table = self._parse_table_parts( 1714 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1715 ) 1716 1717 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1718 1719 if self._match(TokenType.L_PAREN, advance=False): 1720 expressions = self._parse_wrapped_csv(self._parse_types) 1721 else: 1722 expressions = None 1723 1724 return self.expression( 1725 exp.Drop, 1726 comments=start.comments, 1727 exists=if_exists, 1728 this=table, 1729 expressions=expressions, 1730 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1731 temporary=temporary, 1732 materialized=materialized, 1733 cascade=self._match_text_seq("CASCADE"), 1734 constraints=self._match_text_seq("CONSTRAINTS"), 1735 purge=self._match_text_seq("PURGE"), 1736 cluster=cluster, 1737 concurrently=concurrently, 1738 ) 1739 1740 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1741 return ( 1742 self._match_text_seq("IF") 1743 and (not not_ or self._match(TokenType.NOT)) 1744 and self._match(TokenType.EXISTS) 1745 ) 1746 1747 def _parse_create(self) -> exp.Create | exp.Command: 1748 # Note: this can't be None because we've matched a statement parser 1749 start = self._prev 1750 comments = self._prev_comments 1751 1752 replace = ( 1753 start.token_type == TokenType.REPLACE 1754 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1755 or self._match_pair(TokenType.OR, TokenType.ALTER) 1756 ) 1757 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1758 1759 unique = self._match(TokenType.UNIQUE) 1760 1761 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1762 clustered = True 1763 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1764 "COLUMNSTORE" 1765 ): 1766 clustered = False 1767 else: 1768 clustered = None 1769 1770 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1771 self._advance() 1772 1773 properties = None 1774 create_token = self._match_set(self.CREATABLES) and self._prev 1775 1776 if not create_token: 1777 # exp.Properties.Location.POST_CREATE 1778 properties = self._parse_properties() 1779 create_token = self._match_set(self.CREATABLES) and self._prev 1780 1781 if not properties or not create_token: 1782 return self._parse_as_command(start) 1783 1784 concurrently = self._match_text_seq("CONCURRENTLY") 1785 exists = self._parse_exists(not_=True) 1786 this = None 1787 expression: t.Optional[exp.Expression] = None 1788 indexes = None 1789 no_schema_binding = None 1790 begin = None 1791 end = None 1792 clone = None 1793 1794 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1795 nonlocal properties 1796 if properties and temp_props: 1797 properties.expressions.extend(temp_props.expressions) 1798 elif temp_props: 1799 properties = temp_props 1800 1801 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1802 this = self._parse_user_defined_function(kind=create_token.token_type) 1803 1804 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1805 extend_props(self._parse_properties()) 1806 1807 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1808 extend_props(self._parse_properties()) 1809 1810 if not expression: 1811 if self._match(TokenType.COMMAND): 1812 expression = self._parse_as_command(self._prev) 1813 else: 1814 begin = self._match(TokenType.BEGIN) 1815 return_ = self._match_text_seq("RETURN") 1816 1817 if self._match(TokenType.STRING, advance=False): 1818 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1819 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1820 expression = self._parse_string() 1821 extend_props(self._parse_properties()) 1822 else: 1823 expression = self._parse_statement() 1824 1825 end = self._match_text_seq("END") 1826 1827 if return_: 1828 expression = self.expression(exp.Return, this=expression) 1829 elif create_token.token_type == TokenType.INDEX: 1830 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1831 if not self._match(TokenType.ON): 1832 index = self._parse_id_var() 1833 anonymous = False 1834 else: 1835 index = None 1836 anonymous = True 1837 1838 this = self._parse_index(index=index, anonymous=anonymous) 1839 elif create_token.token_type in self.DB_CREATABLES: 1840 table_parts = self._parse_table_parts( 1841 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1842 ) 1843 1844 # exp.Properties.Location.POST_NAME 1845 self._match(TokenType.COMMA) 1846 extend_props(self._parse_properties(before=True)) 1847 1848 this = self._parse_schema(this=table_parts) 1849 1850 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1851 extend_props(self._parse_properties()) 1852 1853 self._match(TokenType.ALIAS) 1854 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1855 # exp.Properties.Location.POST_ALIAS 1856 extend_props(self._parse_properties()) 1857 1858 if create_token.token_type == TokenType.SEQUENCE: 1859 expression = self._parse_types() 1860 extend_props(self._parse_properties()) 1861 else: 1862 expression = self._parse_ddl_select() 1863 1864 if create_token.token_type == TokenType.TABLE: 1865 # exp.Properties.Location.POST_EXPRESSION 1866 extend_props(self._parse_properties()) 1867 1868 indexes = [] 1869 while True: 1870 index = self._parse_index() 1871 1872 # exp.Properties.Location.POST_INDEX 1873 extend_props(self._parse_properties()) 1874 if not index: 1875 break 1876 else: 1877 self._match(TokenType.COMMA) 1878 indexes.append(index) 1879 elif create_token.token_type == TokenType.VIEW: 1880 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1881 no_schema_binding = True 1882 1883 shallow = self._match_text_seq("SHALLOW") 1884 1885 if self._match_texts(self.CLONE_KEYWORDS): 1886 copy = self._prev.text.lower() == "copy" 1887 clone = self.expression( 1888 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1889 ) 1890 1891 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1892 return self._parse_as_command(start) 1893 1894 create_kind_text = create_token.text.upper() 1895 return self.expression( 1896 exp.Create, 1897 comments=comments, 1898 this=this, 1899 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1900 replace=replace, 1901 refresh=refresh, 1902 unique=unique, 1903 expression=expression, 1904 exists=exists, 1905 properties=properties, 1906 indexes=indexes, 1907 no_schema_binding=no_schema_binding, 1908 begin=begin, 1909 end=end, 1910 clone=clone, 1911 concurrently=concurrently, 1912 clustered=clustered, 1913 ) 1914 1915 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1916 seq = exp.SequenceProperties() 1917 1918 options = [] 1919 index = self._index 1920 1921 while self._curr: 1922 self._match(TokenType.COMMA) 1923 if self._match_text_seq("INCREMENT"): 1924 self._match_text_seq("BY") 1925 self._match_text_seq("=") 1926 seq.set("increment", self._parse_term()) 1927 elif self._match_text_seq("MINVALUE"): 1928 seq.set("minvalue", self._parse_term()) 1929 elif self._match_text_seq("MAXVALUE"): 1930 seq.set("maxvalue", self._parse_term()) 1931 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1932 self._match_text_seq("=") 1933 seq.set("start", self._parse_term()) 1934 elif self._match_text_seq("CACHE"): 1935 # T-SQL allows empty CACHE which is initialized dynamically 1936 seq.set("cache", self._parse_number() or True) 1937 elif self._match_text_seq("OWNED", "BY"): 1938 # "OWNED BY NONE" is the default 1939 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1940 else: 1941 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1942 if opt: 1943 options.append(opt) 1944 else: 1945 break 1946 1947 seq.set("options", options if options else None) 1948 return None if self._index == index else seq 1949 1950 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1951 # only used for teradata currently 1952 self._match(TokenType.COMMA) 1953 1954 kwargs = { 1955 "no": self._match_text_seq("NO"), 1956 "dual": self._match_text_seq("DUAL"), 1957 "before": self._match_text_seq("BEFORE"), 1958 "default": self._match_text_seq("DEFAULT"), 1959 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1960 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1961 "after": self._match_text_seq("AFTER"), 1962 "minimum": self._match_texts(("MIN", "MINIMUM")), 1963 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1964 } 1965 1966 if self._match_texts(self.PROPERTY_PARSERS): 1967 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1968 try: 1969 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1970 except TypeError: 1971 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1972 1973 return None 1974 1975 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1976 return self._parse_wrapped_csv(self._parse_property) 1977 1978 def _parse_property(self) -> t.Optional[exp.Expression]: 1979 if self._match_texts(self.PROPERTY_PARSERS): 1980 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1981 1982 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1983 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1984 1985 if self._match_text_seq("COMPOUND", "SORTKEY"): 1986 return self._parse_sortkey(compound=True) 1987 1988 if self._match_text_seq("SQL", "SECURITY"): 1989 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1990 1991 index = self._index 1992 key = self._parse_column() 1993 1994 if not self._match(TokenType.EQ): 1995 self._retreat(index) 1996 return self._parse_sequence_properties() 1997 1998 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1999 if isinstance(key, exp.Column): 2000 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2001 2002 value = self._parse_bitwise() or self._parse_var(any_token=True) 2003 2004 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2005 if isinstance(value, exp.Column): 2006 value = exp.var(value.name) 2007 2008 return self.expression(exp.Property, this=key, value=value) 2009 2010 def _parse_stored(self) -> exp.FileFormatProperty: 2011 self._match(TokenType.ALIAS) 2012 2013 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2014 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2015 2016 return self.expression( 2017 exp.FileFormatProperty, 2018 this=( 2019 self.expression( 2020 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2021 ) 2022 if input_format or output_format 2023 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2024 ), 2025 ) 2026 2027 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2028 field = self._parse_field() 2029 if isinstance(field, exp.Identifier) and not field.quoted: 2030 field = exp.var(field) 2031 2032 return field 2033 2034 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2035 self._match(TokenType.EQ) 2036 self._match(TokenType.ALIAS) 2037 2038 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2039 2040 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2041 properties = [] 2042 while True: 2043 if before: 2044 prop = self._parse_property_before() 2045 else: 2046 prop = self._parse_property() 2047 if not prop: 2048 break 2049 for p in ensure_list(prop): 2050 properties.append(p) 2051 2052 if properties: 2053 return self.expression(exp.Properties, expressions=properties) 2054 2055 return None 2056 2057 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2058 return self.expression( 2059 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2060 ) 2061 2062 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2063 if self._match_texts(("DEFINER", "INVOKER")): 2064 security_specifier = self._prev.text.upper() 2065 return self.expression(exp.SecurityProperty, this=security_specifier) 2066 return None 2067 2068 def _parse_settings_property(self) -> exp.SettingsProperty: 2069 return self.expression( 2070 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2071 ) 2072 2073 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2074 if self._index >= 2: 2075 pre_volatile_token = self._tokens[self._index - 2] 2076 else: 2077 pre_volatile_token = None 2078 2079 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2080 return exp.VolatileProperty() 2081 2082 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2083 2084 def _parse_retention_period(self) -> exp.Var: 2085 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2086 number = self._parse_number() 2087 number_str = f"{number} " if number else "" 2088 unit = self._parse_var(any_token=True) 2089 return exp.var(f"{number_str}{unit}") 2090 2091 def _parse_system_versioning_property( 2092 self, with_: bool = False 2093 ) -> exp.WithSystemVersioningProperty: 2094 self._match(TokenType.EQ) 2095 prop = self.expression( 2096 exp.WithSystemVersioningProperty, 2097 **{ # type: ignore 2098 "on": True, 2099 "with": with_, 2100 }, 2101 ) 2102 2103 if self._match_text_seq("OFF"): 2104 prop.set("on", False) 2105 return prop 2106 2107 self._match(TokenType.ON) 2108 if self._match(TokenType.L_PAREN): 2109 while self._curr and not self._match(TokenType.R_PAREN): 2110 if self._match_text_seq("HISTORY_TABLE", "="): 2111 prop.set("this", self._parse_table_parts()) 2112 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2113 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2114 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2115 prop.set("retention_period", self._parse_retention_period()) 2116 2117 self._match(TokenType.COMMA) 2118 2119 return prop 2120 2121 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2122 self._match(TokenType.EQ) 2123 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2124 prop = self.expression(exp.DataDeletionProperty, on=on) 2125 2126 if self._match(TokenType.L_PAREN): 2127 while self._curr and not self._match(TokenType.R_PAREN): 2128 if self._match_text_seq("FILTER_COLUMN", "="): 2129 prop.set("filter_column", self._parse_column()) 2130 elif self._match_text_seq("RETENTION_PERIOD", "="): 2131 prop.set("retention_period", self._parse_retention_period()) 2132 2133 self._match(TokenType.COMMA) 2134 2135 return prop 2136 2137 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2138 kind = "HASH" 2139 expressions: t.Optional[t.List[exp.Expression]] = None 2140 if self._match_text_seq("BY", "HASH"): 2141 expressions = self._parse_wrapped_csv(self._parse_id_var) 2142 elif self._match_text_seq("BY", "RANDOM"): 2143 kind = "RANDOM" 2144 2145 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2146 buckets: t.Optional[exp.Expression] = None 2147 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2148 buckets = self._parse_number() 2149 2150 return self.expression( 2151 exp.DistributedByProperty, 2152 expressions=expressions, 2153 kind=kind, 2154 buckets=buckets, 2155 order=self._parse_order(), 2156 ) 2157 2158 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2159 self._match_text_seq("KEY") 2160 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2161 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2162 2163 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2164 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2165 prop = self._parse_system_versioning_property(with_=True) 2166 self._match_r_paren() 2167 return prop 2168 2169 if self._match(TokenType.L_PAREN, advance=False): 2170 return self._parse_wrapped_properties() 2171 2172 if self._match_text_seq("JOURNAL"): 2173 return self._parse_withjournaltable() 2174 2175 if self._match_texts(self.VIEW_ATTRIBUTES): 2176 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2177 2178 if self._match_text_seq("DATA"): 2179 return self._parse_withdata(no=False) 2180 elif self._match_text_seq("NO", "DATA"): 2181 return self._parse_withdata(no=True) 2182 2183 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2184 return self._parse_serde_properties(with_=True) 2185 2186 if self._match(TokenType.SCHEMA): 2187 return self.expression( 2188 exp.WithSchemaBindingProperty, 2189 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2190 ) 2191 2192 if not self._next: 2193 return None 2194 2195 return self._parse_withisolatedloading() 2196 2197 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2198 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2199 self._match(TokenType.EQ) 2200 2201 user = self._parse_id_var() 2202 self._match(TokenType.PARAMETER) 2203 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2204 2205 if not user or not host: 2206 return None 2207 2208 return exp.DefinerProperty(this=f"{user}@{host}") 2209 2210 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2211 self._match(TokenType.TABLE) 2212 self._match(TokenType.EQ) 2213 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2214 2215 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2216 return self.expression(exp.LogProperty, no=no) 2217 2218 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2219 return self.expression(exp.JournalProperty, **kwargs) 2220 2221 def _parse_checksum(self) -> exp.ChecksumProperty: 2222 self._match(TokenType.EQ) 2223 2224 on = None 2225 if self._match(TokenType.ON): 2226 on = True 2227 elif self._match_text_seq("OFF"): 2228 on = False 2229 2230 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2231 2232 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2233 return self.expression( 2234 exp.Cluster, 2235 expressions=( 2236 self._parse_wrapped_csv(self._parse_ordered) 2237 if wrapped 2238 else self._parse_csv(self._parse_ordered) 2239 ), 2240 ) 2241 2242 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2243 self._match_text_seq("BY") 2244 2245 self._match_l_paren() 2246 expressions = self._parse_csv(self._parse_column) 2247 self._match_r_paren() 2248 2249 if self._match_text_seq("SORTED", "BY"): 2250 self._match_l_paren() 2251 sorted_by = self._parse_csv(self._parse_ordered) 2252 self._match_r_paren() 2253 else: 2254 sorted_by = None 2255 2256 self._match(TokenType.INTO) 2257 buckets = self._parse_number() 2258 self._match_text_seq("BUCKETS") 2259 2260 return self.expression( 2261 exp.ClusteredByProperty, 2262 expressions=expressions, 2263 sorted_by=sorted_by, 2264 buckets=buckets, 2265 ) 2266 2267 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2268 if not self._match_text_seq("GRANTS"): 2269 self._retreat(self._index - 1) 2270 return None 2271 2272 return self.expression(exp.CopyGrantsProperty) 2273 2274 def _parse_freespace(self) -> exp.FreespaceProperty: 2275 self._match(TokenType.EQ) 2276 return self.expression( 2277 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2278 ) 2279 2280 def _parse_mergeblockratio( 2281 self, no: bool = False, default: bool = False 2282 ) -> exp.MergeBlockRatioProperty: 2283 if self._match(TokenType.EQ): 2284 return self.expression( 2285 exp.MergeBlockRatioProperty, 2286 this=self._parse_number(), 2287 percent=self._match(TokenType.PERCENT), 2288 ) 2289 2290 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2291 2292 def _parse_datablocksize( 2293 self, 2294 default: t.Optional[bool] = None, 2295 minimum: t.Optional[bool] = None, 2296 maximum: t.Optional[bool] = None, 2297 ) -> exp.DataBlocksizeProperty: 2298 self._match(TokenType.EQ) 2299 size = self._parse_number() 2300 2301 units = None 2302 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2303 units = self._prev.text 2304 2305 return self.expression( 2306 exp.DataBlocksizeProperty, 2307 size=size, 2308 units=units, 2309 default=default, 2310 minimum=minimum, 2311 maximum=maximum, 2312 ) 2313 2314 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2315 self._match(TokenType.EQ) 2316 always = self._match_text_seq("ALWAYS") 2317 manual = self._match_text_seq("MANUAL") 2318 never = self._match_text_seq("NEVER") 2319 default = self._match_text_seq("DEFAULT") 2320 2321 autotemp = None 2322 if self._match_text_seq("AUTOTEMP"): 2323 autotemp = self._parse_schema() 2324 2325 return self.expression( 2326 exp.BlockCompressionProperty, 2327 always=always, 2328 manual=manual, 2329 never=never, 2330 default=default, 2331 autotemp=autotemp, 2332 ) 2333 2334 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2335 index = self._index 2336 no = self._match_text_seq("NO") 2337 concurrent = self._match_text_seq("CONCURRENT") 2338 2339 if not self._match_text_seq("ISOLATED", "LOADING"): 2340 self._retreat(index) 2341 return None 2342 2343 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2344 return self.expression( 2345 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2346 ) 2347 2348 def _parse_locking(self) -> exp.LockingProperty: 2349 if self._match(TokenType.TABLE): 2350 kind = "TABLE" 2351 elif self._match(TokenType.VIEW): 2352 kind = "VIEW" 2353 elif self._match(TokenType.ROW): 2354 kind = "ROW" 2355 elif self._match_text_seq("DATABASE"): 2356 kind = "DATABASE" 2357 else: 2358 kind = None 2359 2360 if kind in ("DATABASE", "TABLE", "VIEW"): 2361 this = self._parse_table_parts() 2362 else: 2363 this = None 2364 2365 if self._match(TokenType.FOR): 2366 for_or_in = "FOR" 2367 elif self._match(TokenType.IN): 2368 for_or_in = "IN" 2369 else: 2370 for_or_in = None 2371 2372 if self._match_text_seq("ACCESS"): 2373 lock_type = "ACCESS" 2374 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2375 lock_type = "EXCLUSIVE" 2376 elif self._match_text_seq("SHARE"): 2377 lock_type = "SHARE" 2378 elif self._match_text_seq("READ"): 2379 lock_type = "READ" 2380 elif self._match_text_seq("WRITE"): 2381 lock_type = "WRITE" 2382 elif self._match_text_seq("CHECKSUM"): 2383 lock_type = "CHECKSUM" 2384 else: 2385 lock_type = None 2386 2387 override = self._match_text_seq("OVERRIDE") 2388 2389 return self.expression( 2390 exp.LockingProperty, 2391 this=this, 2392 kind=kind, 2393 for_or_in=for_or_in, 2394 lock_type=lock_type, 2395 override=override, 2396 ) 2397 2398 def _parse_partition_by(self) -> t.List[exp.Expression]: 2399 if self._match(TokenType.PARTITION_BY): 2400 return self._parse_csv(self._parse_assignment) 2401 return [] 2402 2403 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2404 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2405 if self._match_text_seq("MINVALUE"): 2406 return exp.var("MINVALUE") 2407 if self._match_text_seq("MAXVALUE"): 2408 return exp.var("MAXVALUE") 2409 return self._parse_bitwise() 2410 2411 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2412 expression = None 2413 from_expressions = None 2414 to_expressions = None 2415 2416 if self._match(TokenType.IN): 2417 this = self._parse_wrapped_csv(self._parse_bitwise) 2418 elif self._match(TokenType.FROM): 2419 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2420 self._match_text_seq("TO") 2421 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2422 elif self._match_text_seq("WITH", "(", "MODULUS"): 2423 this = self._parse_number() 2424 self._match_text_seq(",", "REMAINDER") 2425 expression = self._parse_number() 2426 self._match_r_paren() 2427 else: 2428 self.raise_error("Failed to parse partition bound spec.") 2429 2430 return self.expression( 2431 exp.PartitionBoundSpec, 2432 this=this, 2433 expression=expression, 2434 from_expressions=from_expressions, 2435 to_expressions=to_expressions, 2436 ) 2437 2438 # https://www.postgresql.org/docs/current/sql-createtable.html 2439 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2440 if not self._match_text_seq("OF"): 2441 self._retreat(self._index - 1) 2442 return None 2443 2444 this = self._parse_table(schema=True) 2445 2446 if self._match(TokenType.DEFAULT): 2447 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2448 elif self._match_text_seq("FOR", "VALUES"): 2449 expression = self._parse_partition_bound_spec() 2450 else: 2451 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2452 2453 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2454 2455 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2456 self._match(TokenType.EQ) 2457 return self.expression( 2458 exp.PartitionedByProperty, 2459 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2460 ) 2461 2462 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2463 if self._match_text_seq("AND", "STATISTICS"): 2464 statistics = True 2465 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2466 statistics = False 2467 else: 2468 statistics = None 2469 2470 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2471 2472 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2473 if self._match_text_seq("SQL"): 2474 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2475 return None 2476 2477 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2478 if self._match_text_seq("SQL", "DATA"): 2479 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2480 return None 2481 2482 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2483 if self._match_text_seq("PRIMARY", "INDEX"): 2484 return exp.NoPrimaryIndexProperty() 2485 if self._match_text_seq("SQL"): 2486 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2487 return None 2488 2489 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2490 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2491 return exp.OnCommitProperty() 2492 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2493 return exp.OnCommitProperty(delete=True) 2494 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2495 2496 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2497 if self._match_text_seq("SQL", "DATA"): 2498 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2499 return None 2500 2501 def _parse_distkey(self) -> exp.DistKeyProperty: 2502 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2503 2504 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2505 table = self._parse_table(schema=True) 2506 2507 options = [] 2508 while self._match_texts(("INCLUDING", "EXCLUDING")): 2509 this = self._prev.text.upper() 2510 2511 id_var = self._parse_id_var() 2512 if not id_var: 2513 return None 2514 2515 options.append( 2516 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2517 ) 2518 2519 return self.expression(exp.LikeProperty, this=table, expressions=options) 2520 2521 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2522 return self.expression( 2523 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2524 ) 2525 2526 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2527 self._match(TokenType.EQ) 2528 return self.expression( 2529 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2530 ) 2531 2532 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2533 self._match_text_seq("WITH", "CONNECTION") 2534 return self.expression( 2535 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2536 ) 2537 2538 def _parse_returns(self) -> exp.ReturnsProperty: 2539 value: t.Optional[exp.Expression] 2540 null = None 2541 is_table = self._match(TokenType.TABLE) 2542 2543 if is_table: 2544 if self._match(TokenType.LT): 2545 value = self.expression( 2546 exp.Schema, 2547 this="TABLE", 2548 expressions=self._parse_csv(self._parse_struct_types), 2549 ) 2550 if not self._match(TokenType.GT): 2551 self.raise_error("Expecting >") 2552 else: 2553 value = self._parse_schema(exp.var("TABLE")) 2554 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2555 null = True 2556 value = None 2557 else: 2558 value = self._parse_types() 2559 2560 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2561 2562 def _parse_describe(self) -> exp.Describe: 2563 kind = self._match_set(self.CREATABLES) and self._prev.text 2564 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2565 if self._match(TokenType.DOT): 2566 style = None 2567 self._retreat(self._index - 2) 2568 this = self._parse_table(schema=True) 2569 properties = self._parse_properties() 2570 expressions = properties.expressions if properties else None 2571 partition = self._parse_partition() 2572 return self.expression( 2573 exp.Describe, 2574 this=this, 2575 style=style, 2576 kind=kind, 2577 expressions=expressions, 2578 partition=partition, 2579 ) 2580 2581 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2582 kind = self._prev.text.upper() 2583 expressions = [] 2584 2585 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2586 if self._match(TokenType.WHEN): 2587 expression = self._parse_disjunction() 2588 self._match(TokenType.THEN) 2589 else: 2590 expression = None 2591 2592 else_ = self._match(TokenType.ELSE) 2593 2594 if not self._match(TokenType.INTO): 2595 return None 2596 2597 return self.expression( 2598 exp.ConditionalInsert, 2599 this=self.expression( 2600 exp.Insert, 2601 this=self._parse_table(schema=True), 2602 expression=self._parse_derived_table_values(), 2603 ), 2604 expression=expression, 2605 else_=else_, 2606 ) 2607 2608 expression = parse_conditional_insert() 2609 while expression is not None: 2610 expressions.append(expression) 2611 expression = parse_conditional_insert() 2612 2613 return self.expression( 2614 exp.MultitableInserts, 2615 kind=kind, 2616 comments=comments, 2617 expressions=expressions, 2618 source=self._parse_table(), 2619 ) 2620 2621 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2622 comments = ensure_list(self._prev_comments) 2623 hint = self._parse_hint() 2624 overwrite = self._match(TokenType.OVERWRITE) 2625 ignore = self._match(TokenType.IGNORE) 2626 local = self._match_text_seq("LOCAL") 2627 alternative = None 2628 is_function = None 2629 2630 if self._match_text_seq("DIRECTORY"): 2631 this: t.Optional[exp.Expression] = self.expression( 2632 exp.Directory, 2633 this=self._parse_var_or_string(), 2634 local=local, 2635 row_format=self._parse_row_format(match_row=True), 2636 ) 2637 else: 2638 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2639 comments += ensure_list(self._prev_comments) 2640 return self._parse_multitable_inserts(comments) 2641 2642 if self._match(TokenType.OR): 2643 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2644 2645 self._match(TokenType.INTO) 2646 comments += ensure_list(self._prev_comments) 2647 self._match(TokenType.TABLE) 2648 is_function = self._match(TokenType.FUNCTION) 2649 2650 this = ( 2651 self._parse_table(schema=True, parse_partition=True) 2652 if not is_function 2653 else self._parse_function() 2654 ) 2655 2656 returning = self._parse_returning() 2657 2658 return self.expression( 2659 exp.Insert, 2660 comments=comments, 2661 hint=hint, 2662 is_function=is_function, 2663 this=this, 2664 stored=self._match_text_seq("STORED") and self._parse_stored(), 2665 by_name=self._match_text_seq("BY", "NAME"), 2666 exists=self._parse_exists(), 2667 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2668 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2669 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2670 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2671 conflict=self._parse_on_conflict(), 2672 returning=returning or self._parse_returning(), 2673 overwrite=overwrite, 2674 alternative=alternative, 2675 ignore=ignore, 2676 source=self._match(TokenType.TABLE) and self._parse_table(), 2677 ) 2678 2679 def _parse_kill(self) -> exp.Kill: 2680 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2681 2682 return self.expression( 2683 exp.Kill, 2684 this=self._parse_primary(), 2685 kind=kind, 2686 ) 2687 2688 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2689 conflict = self._match_text_seq("ON", "CONFLICT") 2690 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2691 2692 if not conflict and not duplicate: 2693 return None 2694 2695 conflict_keys = None 2696 constraint = None 2697 2698 if conflict: 2699 if self._match_text_seq("ON", "CONSTRAINT"): 2700 constraint = self._parse_id_var() 2701 elif self._match(TokenType.L_PAREN): 2702 conflict_keys = self._parse_csv(self._parse_id_var) 2703 self._match_r_paren() 2704 2705 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2706 if self._prev.token_type == TokenType.UPDATE: 2707 self._match(TokenType.SET) 2708 expressions = self._parse_csv(self._parse_equality) 2709 else: 2710 expressions = None 2711 2712 return self.expression( 2713 exp.OnConflict, 2714 duplicate=duplicate, 2715 expressions=expressions, 2716 action=action, 2717 conflict_keys=conflict_keys, 2718 constraint=constraint, 2719 ) 2720 2721 def _parse_returning(self) -> t.Optional[exp.Returning]: 2722 if not self._match(TokenType.RETURNING): 2723 return None 2724 return self.expression( 2725 exp.Returning, 2726 expressions=self._parse_csv(self._parse_expression), 2727 into=self._match(TokenType.INTO) and self._parse_table_part(), 2728 ) 2729 2730 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2731 if not self._match(TokenType.FORMAT): 2732 return None 2733 return self._parse_row_format() 2734 2735 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2736 index = self._index 2737 with_ = with_ or self._match_text_seq("WITH") 2738 2739 if not self._match(TokenType.SERDE_PROPERTIES): 2740 self._retreat(index) 2741 return None 2742 return self.expression( 2743 exp.SerdeProperties, 2744 **{ # type: ignore 2745 "expressions": self._parse_wrapped_properties(), 2746 "with": with_, 2747 }, 2748 ) 2749 2750 def _parse_row_format( 2751 self, match_row: bool = False 2752 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2753 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2754 return None 2755 2756 if self._match_text_seq("SERDE"): 2757 this = self._parse_string() 2758 2759 serde_properties = self._parse_serde_properties() 2760 2761 return self.expression( 2762 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2763 ) 2764 2765 self._match_text_seq("DELIMITED") 2766 2767 kwargs = {} 2768 2769 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2770 kwargs["fields"] = self._parse_string() 2771 if self._match_text_seq("ESCAPED", "BY"): 2772 kwargs["escaped"] = self._parse_string() 2773 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2774 kwargs["collection_items"] = self._parse_string() 2775 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2776 kwargs["map_keys"] = self._parse_string() 2777 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2778 kwargs["lines"] = self._parse_string() 2779 if self._match_text_seq("NULL", "DEFINED", "AS"): 2780 kwargs["null"] = self._parse_string() 2781 2782 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2783 2784 def _parse_load(self) -> exp.LoadData | exp.Command: 2785 if self._match_text_seq("DATA"): 2786 local = self._match_text_seq("LOCAL") 2787 self._match_text_seq("INPATH") 2788 inpath = self._parse_string() 2789 overwrite = self._match(TokenType.OVERWRITE) 2790 self._match_pair(TokenType.INTO, TokenType.TABLE) 2791 2792 return self.expression( 2793 exp.LoadData, 2794 this=self._parse_table(schema=True), 2795 local=local, 2796 overwrite=overwrite, 2797 inpath=inpath, 2798 partition=self._parse_partition(), 2799 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2800 serde=self._match_text_seq("SERDE") and self._parse_string(), 2801 ) 2802 return self._parse_as_command(self._prev) 2803 2804 def _parse_delete(self) -> exp.Delete: 2805 # This handles MySQL's "Multiple-Table Syntax" 2806 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2807 tables = None 2808 comments = self._prev_comments 2809 if not self._match(TokenType.FROM, advance=False): 2810 tables = self._parse_csv(self._parse_table) or None 2811 2812 returning = self._parse_returning() 2813 2814 return self.expression( 2815 exp.Delete, 2816 comments=comments, 2817 tables=tables, 2818 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2819 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2820 where=self._parse_where(), 2821 returning=returning or self._parse_returning(), 2822 limit=self._parse_limit(), 2823 ) 2824 2825 def _parse_update(self) -> exp.Update: 2826 comments = self._prev_comments 2827 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2828 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2829 returning = self._parse_returning() 2830 return self.expression( 2831 exp.Update, 2832 comments=comments, 2833 **{ # type: ignore 2834 "this": this, 2835 "expressions": expressions, 2836 "from": self._parse_from(joins=True), 2837 "where": self._parse_where(), 2838 "returning": returning or self._parse_returning(), 2839 "order": self._parse_order(), 2840 "limit": self._parse_limit(), 2841 }, 2842 ) 2843 2844 def _parse_uncache(self) -> exp.Uncache: 2845 if not self._match(TokenType.TABLE): 2846 self.raise_error("Expecting TABLE after UNCACHE") 2847 2848 return self.expression( 2849 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2850 ) 2851 2852 def _parse_cache(self) -> exp.Cache: 2853 lazy = self._match_text_seq("LAZY") 2854 self._match(TokenType.TABLE) 2855 table = self._parse_table(schema=True) 2856 2857 options = [] 2858 if self._match_text_seq("OPTIONS"): 2859 self._match_l_paren() 2860 k = self._parse_string() 2861 self._match(TokenType.EQ) 2862 v = self._parse_string() 2863 options = [k, v] 2864 self._match_r_paren() 2865 2866 self._match(TokenType.ALIAS) 2867 return self.expression( 2868 exp.Cache, 2869 this=table, 2870 lazy=lazy, 2871 options=options, 2872 expression=self._parse_select(nested=True), 2873 ) 2874 2875 def _parse_partition(self) -> t.Optional[exp.Partition]: 2876 if not self._match(TokenType.PARTITION): 2877 return None 2878 2879 return self.expression( 2880 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2881 ) 2882 2883 def _parse_value(self) -> t.Optional[exp.Tuple]: 2884 if self._match(TokenType.L_PAREN): 2885 expressions = self._parse_csv(self._parse_expression) 2886 self._match_r_paren() 2887 return self.expression(exp.Tuple, expressions=expressions) 2888 2889 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2890 expression = self._parse_expression() 2891 if expression: 2892 return self.expression(exp.Tuple, expressions=[expression]) 2893 return None 2894 2895 def _parse_projections(self) -> t.List[exp.Expression]: 2896 return self._parse_expressions() 2897 2898 def _parse_select( 2899 self, 2900 nested: bool = False, 2901 table: bool = False, 2902 parse_subquery_alias: bool = True, 2903 parse_set_operation: bool = True, 2904 ) -> t.Optional[exp.Expression]: 2905 cte = self._parse_with() 2906 2907 if cte: 2908 this = self._parse_statement() 2909 2910 if not this: 2911 self.raise_error("Failed to parse any statement following CTE") 2912 return cte 2913 2914 if "with" in this.arg_types: 2915 this.set("with", cte) 2916 else: 2917 self.raise_error(f"{this.key} does not support CTE") 2918 this = cte 2919 2920 return this 2921 2922 # duckdb supports leading with FROM x 2923 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2924 2925 if self._match(TokenType.SELECT): 2926 comments = self._prev_comments 2927 2928 hint = self._parse_hint() 2929 2930 if self._next and not self._next.token_type == TokenType.DOT: 2931 all_ = self._match(TokenType.ALL) 2932 distinct = self._match_set(self.DISTINCT_TOKENS) 2933 else: 2934 all_, distinct = None, None 2935 2936 kind = ( 2937 self._match(TokenType.ALIAS) 2938 and self._match_texts(("STRUCT", "VALUE")) 2939 and self._prev.text.upper() 2940 ) 2941 2942 if distinct: 2943 distinct = self.expression( 2944 exp.Distinct, 2945 on=self._parse_value() if self._match(TokenType.ON) else None, 2946 ) 2947 2948 if all_ and distinct: 2949 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2950 2951 limit = self._parse_limit(top=True) 2952 projections = self._parse_projections() 2953 2954 this = self.expression( 2955 exp.Select, 2956 kind=kind, 2957 hint=hint, 2958 distinct=distinct, 2959 expressions=projections, 2960 limit=limit, 2961 ) 2962 this.comments = comments 2963 2964 into = self._parse_into() 2965 if into: 2966 this.set("into", into) 2967 2968 if not from_: 2969 from_ = self._parse_from() 2970 2971 if from_: 2972 this.set("from", from_) 2973 2974 this = self._parse_query_modifiers(this) 2975 elif (table or nested) and self._match(TokenType.L_PAREN): 2976 if self._match(TokenType.PIVOT): 2977 this = self._parse_simplified_pivot() 2978 elif self._match(TokenType.FROM): 2979 this = exp.select("*").from_( 2980 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2981 ) 2982 else: 2983 this = ( 2984 self._parse_table() 2985 if table 2986 else self._parse_select(nested=True, parse_set_operation=False) 2987 ) 2988 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2989 2990 self._match_r_paren() 2991 2992 # We return early here so that the UNION isn't attached to the subquery by the 2993 # following call to _parse_set_operations, but instead becomes the parent node 2994 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2995 elif self._match(TokenType.VALUES, advance=False): 2996 this = self._parse_derived_table_values() 2997 elif from_: 2998 this = exp.select("*").from_(from_.this, copy=False) 2999 elif self._match(TokenType.SUMMARIZE): 3000 table = self._match(TokenType.TABLE) 3001 this = self._parse_select() or self._parse_string() or self._parse_table() 3002 return self.expression(exp.Summarize, this=this, table=table) 3003 elif self._match(TokenType.DESCRIBE): 3004 this = self._parse_describe() 3005 elif self._match_text_seq("STREAM"): 3006 this = self.expression(exp.Stream, this=self._parse_function()) 3007 else: 3008 this = None 3009 3010 return self._parse_set_operations(this) if parse_set_operation else this 3011 3012 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3013 if not skip_with_token and not self._match(TokenType.WITH): 3014 return None 3015 3016 comments = self._prev_comments 3017 recursive = self._match(TokenType.RECURSIVE) 3018 3019 expressions = [] 3020 while True: 3021 expressions.append(self._parse_cte()) 3022 3023 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3024 break 3025 else: 3026 self._match(TokenType.WITH) 3027 3028 return self.expression( 3029 exp.With, comments=comments, expressions=expressions, recursive=recursive 3030 ) 3031 3032 def _parse_cte(self) -> exp.CTE: 3033 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3034 if not alias or not alias.this: 3035 self.raise_error("Expected CTE to have alias") 3036 3037 self._match(TokenType.ALIAS) 3038 comments = self._prev_comments 3039 3040 if self._match_text_seq("NOT", "MATERIALIZED"): 3041 materialized = False 3042 elif self._match_text_seq("MATERIALIZED"): 3043 materialized = True 3044 else: 3045 materialized = None 3046 3047 return self.expression( 3048 exp.CTE, 3049 this=self._parse_wrapped(self._parse_statement), 3050 alias=alias, 3051 materialized=materialized, 3052 comments=comments, 3053 ) 3054 3055 def _parse_table_alias( 3056 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3057 ) -> t.Optional[exp.TableAlias]: 3058 any_token = self._match(TokenType.ALIAS) 3059 alias = ( 3060 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3061 or self._parse_string_as_identifier() 3062 ) 3063 3064 index = self._index 3065 if self._match(TokenType.L_PAREN): 3066 columns = self._parse_csv(self._parse_function_parameter) 3067 self._match_r_paren() if columns else self._retreat(index) 3068 else: 3069 columns = None 3070 3071 if not alias and not columns: 3072 return None 3073 3074 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3075 3076 # We bubble up comments from the Identifier to the TableAlias 3077 if isinstance(alias, exp.Identifier): 3078 table_alias.add_comments(alias.pop_comments()) 3079 3080 return table_alias 3081 3082 def _parse_subquery( 3083 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3084 ) -> t.Optional[exp.Subquery]: 3085 if not this: 3086 return None 3087 3088 return self.expression( 3089 exp.Subquery, 3090 this=this, 3091 pivots=self._parse_pivots(), 3092 alias=self._parse_table_alias() if parse_alias else None, 3093 sample=self._parse_table_sample(), 3094 ) 3095 3096 def _implicit_unnests_to_explicit(self, this: E) -> E: 3097 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3098 3099 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3100 for i, join in enumerate(this.args.get("joins") or []): 3101 table = join.this 3102 normalized_table = table.copy() 3103 normalized_table.meta["maybe_column"] = True 3104 normalized_table = _norm(normalized_table, dialect=self.dialect) 3105 3106 if isinstance(table, exp.Table) and not join.args.get("on"): 3107 if normalized_table.parts[0].name in refs: 3108 table_as_column = table.to_column() 3109 unnest = exp.Unnest(expressions=[table_as_column]) 3110 3111 # Table.to_column creates a parent Alias node that we want to convert to 3112 # a TableAlias and attach to the Unnest, so it matches the parser's output 3113 if isinstance(table.args.get("alias"), exp.TableAlias): 3114 table_as_column.replace(table_as_column.this) 3115 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3116 3117 table.replace(unnest) 3118 3119 refs.add(normalized_table.alias_or_name) 3120 3121 return this 3122 3123 def _parse_query_modifiers( 3124 self, this: t.Optional[exp.Expression] 3125 ) -> t.Optional[exp.Expression]: 3126 if isinstance(this, (exp.Query, exp.Table)): 3127 for join in self._parse_joins(): 3128 this.append("joins", join) 3129 for lateral in iter(self._parse_lateral, None): 3130 this.append("laterals", lateral) 3131 3132 while True: 3133 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3134 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3135 key, expression = parser(self) 3136 3137 if expression: 3138 this.set(key, expression) 3139 if key == "limit": 3140 offset = expression.args.pop("offset", None) 3141 3142 if offset: 3143 offset = exp.Offset(expression=offset) 3144 this.set("offset", offset) 3145 3146 limit_by_expressions = expression.expressions 3147 expression.set("expressions", None) 3148 offset.set("expressions", limit_by_expressions) 3149 continue 3150 break 3151 3152 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3153 this = self._implicit_unnests_to_explicit(this) 3154 3155 return this 3156 3157 def _parse_hint(self) -> t.Optional[exp.Hint]: 3158 if self._match(TokenType.HINT): 3159 hints = [] 3160 for hint in iter( 3161 lambda: self._parse_csv( 3162 lambda: self._parse_function() or self._parse_var(upper=True) 3163 ), 3164 [], 3165 ): 3166 hints.extend(hint) 3167 3168 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3169 self.raise_error("Expected */ after HINT") 3170 3171 return self.expression(exp.Hint, expressions=hints) 3172 3173 return None 3174 3175 def _parse_into(self) -> t.Optional[exp.Into]: 3176 if not self._match(TokenType.INTO): 3177 return None 3178 3179 temp = self._match(TokenType.TEMPORARY) 3180 unlogged = self._match_text_seq("UNLOGGED") 3181 self._match(TokenType.TABLE) 3182 3183 return self.expression( 3184 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3185 ) 3186 3187 def _parse_from( 3188 self, joins: bool = False, skip_from_token: bool = False 3189 ) -> t.Optional[exp.From]: 3190 if not skip_from_token and not self._match(TokenType.FROM): 3191 return None 3192 3193 return self.expression( 3194 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3195 ) 3196 3197 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3198 return self.expression( 3199 exp.MatchRecognizeMeasure, 3200 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3201 this=self._parse_expression(), 3202 ) 3203 3204 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3205 if not self._match(TokenType.MATCH_RECOGNIZE): 3206 return None 3207 3208 self._match_l_paren() 3209 3210 partition = self._parse_partition_by() 3211 order = self._parse_order() 3212 3213 measures = ( 3214 self._parse_csv(self._parse_match_recognize_measure) 3215 if self._match_text_seq("MEASURES") 3216 else None 3217 ) 3218 3219 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3220 rows = exp.var("ONE ROW PER MATCH") 3221 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3222 text = "ALL ROWS PER MATCH" 3223 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3224 text += " SHOW EMPTY MATCHES" 3225 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3226 text += " OMIT EMPTY MATCHES" 3227 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3228 text += " WITH UNMATCHED ROWS" 3229 rows = exp.var(text) 3230 else: 3231 rows = None 3232 3233 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3234 text = "AFTER MATCH SKIP" 3235 if self._match_text_seq("PAST", "LAST", "ROW"): 3236 text += " PAST LAST ROW" 3237 elif self._match_text_seq("TO", "NEXT", "ROW"): 3238 text += " TO NEXT ROW" 3239 elif self._match_text_seq("TO", "FIRST"): 3240 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3241 elif self._match_text_seq("TO", "LAST"): 3242 text += f" TO LAST {self._advance_any().text}" # type: ignore 3243 after = exp.var(text) 3244 else: 3245 after = None 3246 3247 if self._match_text_seq("PATTERN"): 3248 self._match_l_paren() 3249 3250 if not self._curr: 3251 self.raise_error("Expecting )", self._curr) 3252 3253 paren = 1 3254 start = self._curr 3255 3256 while self._curr and paren > 0: 3257 if self._curr.token_type == TokenType.L_PAREN: 3258 paren += 1 3259 if self._curr.token_type == TokenType.R_PAREN: 3260 paren -= 1 3261 3262 end = self._prev 3263 self._advance() 3264 3265 if paren > 0: 3266 self.raise_error("Expecting )", self._curr) 3267 3268 pattern = exp.var(self._find_sql(start, end)) 3269 else: 3270 pattern = None 3271 3272 define = ( 3273 self._parse_csv(self._parse_name_as_expression) 3274 if self._match_text_seq("DEFINE") 3275 else None 3276 ) 3277 3278 self._match_r_paren() 3279 3280 return self.expression( 3281 exp.MatchRecognize, 3282 partition_by=partition, 3283 order=order, 3284 measures=measures, 3285 rows=rows, 3286 after=after, 3287 pattern=pattern, 3288 define=define, 3289 alias=self._parse_table_alias(), 3290 ) 3291 3292 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3293 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3294 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3295 cross_apply = False 3296 3297 if cross_apply is not None: 3298 this = self._parse_select(table=True) 3299 view = None 3300 outer = None 3301 elif self._match(TokenType.LATERAL): 3302 this = self._parse_select(table=True) 3303 view = self._match(TokenType.VIEW) 3304 outer = self._match(TokenType.OUTER) 3305 else: 3306 return None 3307 3308 if not this: 3309 this = ( 3310 self._parse_unnest() 3311 or self._parse_function() 3312 or self._parse_id_var(any_token=False) 3313 ) 3314 3315 while self._match(TokenType.DOT): 3316 this = exp.Dot( 3317 this=this, 3318 expression=self._parse_function() or self._parse_id_var(any_token=False), 3319 ) 3320 3321 if view: 3322 table = self._parse_id_var(any_token=False) 3323 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3324 table_alias: t.Optional[exp.TableAlias] = self.expression( 3325 exp.TableAlias, this=table, columns=columns 3326 ) 3327 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3328 # We move the alias from the lateral's child node to the lateral itself 3329 table_alias = this.args["alias"].pop() 3330 else: 3331 table_alias = self._parse_table_alias() 3332 3333 return self.expression( 3334 exp.Lateral, 3335 this=this, 3336 view=view, 3337 outer=outer, 3338 alias=table_alias, 3339 cross_apply=cross_apply, 3340 ) 3341 3342 def _parse_join_parts( 3343 self, 3344 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3345 return ( 3346 self._match_set(self.JOIN_METHODS) and self._prev, 3347 self._match_set(self.JOIN_SIDES) and self._prev, 3348 self._match_set(self.JOIN_KINDS) and self._prev, 3349 ) 3350 3351 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3352 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3353 this = self._parse_column() 3354 if isinstance(this, exp.Column): 3355 return this.this 3356 return this 3357 3358 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3359 3360 def _parse_join( 3361 self, skip_join_token: bool = False, parse_bracket: bool = False 3362 ) -> t.Optional[exp.Join]: 3363 if self._match(TokenType.COMMA): 3364 return self.expression(exp.Join, this=self._parse_table()) 3365 3366 index = self._index 3367 method, side, kind = self._parse_join_parts() 3368 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3369 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3370 3371 if not skip_join_token and not join: 3372 self._retreat(index) 3373 kind = None 3374 method = None 3375 side = None 3376 3377 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3378 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3379 3380 if not skip_join_token and not join and not outer_apply and not cross_apply: 3381 return None 3382 3383 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3384 3385 if method: 3386 kwargs["method"] = method.text 3387 if side: 3388 kwargs["side"] = side.text 3389 if kind: 3390 kwargs["kind"] = kind.text 3391 if hint: 3392 kwargs["hint"] = hint 3393 3394 if self._match(TokenType.MATCH_CONDITION): 3395 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3396 3397 if self._match(TokenType.ON): 3398 kwargs["on"] = self._parse_assignment() 3399 elif self._match(TokenType.USING): 3400 kwargs["using"] = self._parse_using_identifiers() 3401 elif ( 3402 not (outer_apply or cross_apply) 3403 and not isinstance(kwargs["this"], exp.Unnest) 3404 and not (kind and kind.token_type == TokenType.CROSS) 3405 ): 3406 index = self._index 3407 joins: t.Optional[list] = list(self._parse_joins()) 3408 3409 if joins and self._match(TokenType.ON): 3410 kwargs["on"] = self._parse_assignment() 3411 elif joins and self._match(TokenType.USING): 3412 kwargs["using"] = self._parse_using_identifiers() 3413 else: 3414 joins = None 3415 self._retreat(index) 3416 3417 kwargs["this"].set("joins", joins if joins else None) 3418 3419 comments = [c for token in (method, side, kind) if token for c in token.comments] 3420 return self.expression(exp.Join, comments=comments, **kwargs) 3421 3422 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3423 this = self._parse_assignment() 3424 3425 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3426 return this 3427 3428 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3429 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3430 3431 return this 3432 3433 def _parse_index_params(self) -> exp.IndexParameters: 3434 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3435 3436 if self._match(TokenType.L_PAREN, advance=False): 3437 columns = self._parse_wrapped_csv(self._parse_with_operator) 3438 else: 3439 columns = None 3440 3441 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3442 partition_by = self._parse_partition_by() 3443 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3444 tablespace = ( 3445 self._parse_var(any_token=True) 3446 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3447 else None 3448 ) 3449 where = self._parse_where() 3450 3451 on = self._parse_field() if self._match(TokenType.ON) else None 3452 3453 return self.expression( 3454 exp.IndexParameters, 3455 using=using, 3456 columns=columns, 3457 include=include, 3458 partition_by=partition_by, 3459 where=where, 3460 with_storage=with_storage, 3461 tablespace=tablespace, 3462 on=on, 3463 ) 3464 3465 def _parse_index( 3466 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3467 ) -> t.Optional[exp.Index]: 3468 if index or anonymous: 3469 unique = None 3470 primary = None 3471 amp = None 3472 3473 self._match(TokenType.ON) 3474 self._match(TokenType.TABLE) # hive 3475 table = self._parse_table_parts(schema=True) 3476 else: 3477 unique = self._match(TokenType.UNIQUE) 3478 primary = self._match_text_seq("PRIMARY") 3479 amp = self._match_text_seq("AMP") 3480 3481 if not self._match(TokenType.INDEX): 3482 return None 3483 3484 index = self._parse_id_var() 3485 table = None 3486 3487 params = self._parse_index_params() 3488 3489 return self.expression( 3490 exp.Index, 3491 this=index, 3492 table=table, 3493 unique=unique, 3494 primary=primary, 3495 amp=amp, 3496 params=params, 3497 ) 3498 3499 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3500 hints: t.List[exp.Expression] = [] 3501 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3502 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3503 hints.append( 3504 self.expression( 3505 exp.WithTableHint, 3506 expressions=self._parse_csv( 3507 lambda: self._parse_function() or self._parse_var(any_token=True) 3508 ), 3509 ) 3510 ) 3511 self._match_r_paren() 3512 else: 3513 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3514 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3515 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3516 3517 self._match_set((TokenType.INDEX, TokenType.KEY)) 3518 if self._match(TokenType.FOR): 3519 hint.set("target", self._advance_any() and self._prev.text.upper()) 3520 3521 hint.set("expressions", self._parse_wrapped_id_vars()) 3522 hints.append(hint) 3523 3524 return hints or None 3525 3526 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3527 return ( 3528 (not schema and self._parse_function(optional_parens=False)) 3529 or self._parse_id_var(any_token=False) 3530 or self._parse_string_as_identifier() 3531 or self._parse_placeholder() 3532 ) 3533 3534 def _parse_table_parts( 3535 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3536 ) -> exp.Table: 3537 catalog = None 3538 db = None 3539 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3540 3541 while self._match(TokenType.DOT): 3542 if catalog: 3543 # This allows nesting the table in arbitrarily many dot expressions if needed 3544 table = self.expression( 3545 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3546 ) 3547 else: 3548 catalog = db 3549 db = table 3550 # "" used for tsql FROM a..b case 3551 table = self._parse_table_part(schema=schema) or "" 3552 3553 if ( 3554 wildcard 3555 and self._is_connected() 3556 and (isinstance(table, exp.Identifier) or not table) 3557 and self._match(TokenType.STAR) 3558 ): 3559 if isinstance(table, exp.Identifier): 3560 table.args["this"] += "*" 3561 else: 3562 table = exp.Identifier(this="*") 3563 3564 # We bubble up comments from the Identifier to the Table 3565 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3566 3567 if is_db_reference: 3568 catalog = db 3569 db = table 3570 table = None 3571 3572 if not table and not is_db_reference: 3573 self.raise_error(f"Expected table name but got {self._curr}") 3574 if not db and is_db_reference: 3575 self.raise_error(f"Expected database name but got {self._curr}") 3576 3577 table = self.expression( 3578 exp.Table, 3579 comments=comments, 3580 this=table, 3581 db=db, 3582 catalog=catalog, 3583 ) 3584 3585 changes = self._parse_changes() 3586 if changes: 3587 table.set("changes", changes) 3588 3589 at_before = self._parse_historical_data() 3590 if at_before: 3591 table.set("when", at_before) 3592 3593 pivots = self._parse_pivots() 3594 if pivots: 3595 table.set("pivots", pivots) 3596 3597 return table 3598 3599 def _parse_table( 3600 self, 3601 schema: bool = False, 3602 joins: bool = False, 3603 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3604 parse_bracket: bool = False, 3605 is_db_reference: bool = False, 3606 parse_partition: bool = False, 3607 ) -> t.Optional[exp.Expression]: 3608 lateral = self._parse_lateral() 3609 if lateral: 3610 return lateral 3611 3612 unnest = self._parse_unnest() 3613 if unnest: 3614 return unnest 3615 3616 values = self._parse_derived_table_values() 3617 if values: 3618 return values 3619 3620 subquery = self._parse_select(table=True) 3621 if subquery: 3622 if not subquery.args.get("pivots"): 3623 subquery.set("pivots", self._parse_pivots()) 3624 return subquery 3625 3626 bracket = parse_bracket and self._parse_bracket(None) 3627 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3628 3629 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3630 self._parse_table 3631 ) 3632 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3633 3634 only = self._match(TokenType.ONLY) 3635 3636 this = t.cast( 3637 exp.Expression, 3638 bracket 3639 or rows_from 3640 or self._parse_bracket( 3641 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3642 ), 3643 ) 3644 3645 if only: 3646 this.set("only", only) 3647 3648 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3649 self._match_text_seq("*") 3650 3651 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3652 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3653 this.set("partition", self._parse_partition()) 3654 3655 if schema: 3656 return self._parse_schema(this=this) 3657 3658 version = self._parse_version() 3659 3660 if version: 3661 this.set("version", version) 3662 3663 if self.dialect.ALIAS_POST_TABLESAMPLE: 3664 this.set("sample", self._parse_table_sample()) 3665 3666 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3667 if alias: 3668 this.set("alias", alias) 3669 3670 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3671 return self.expression( 3672 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3673 ) 3674 3675 this.set("hints", self._parse_table_hints()) 3676 3677 if not this.args.get("pivots"): 3678 this.set("pivots", self._parse_pivots()) 3679 3680 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3681 this.set("sample", self._parse_table_sample()) 3682 3683 if joins: 3684 for join in self._parse_joins(): 3685 this.append("joins", join) 3686 3687 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3688 this.set("ordinality", True) 3689 this.set("alias", self._parse_table_alias()) 3690 3691 return this 3692 3693 def _parse_version(self) -> t.Optional[exp.Version]: 3694 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3695 this = "TIMESTAMP" 3696 elif self._match(TokenType.VERSION_SNAPSHOT): 3697 this = "VERSION" 3698 else: 3699 return None 3700 3701 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3702 kind = self._prev.text.upper() 3703 start = self._parse_bitwise() 3704 self._match_texts(("TO", "AND")) 3705 end = self._parse_bitwise() 3706 expression: t.Optional[exp.Expression] = self.expression( 3707 exp.Tuple, expressions=[start, end] 3708 ) 3709 elif self._match_text_seq("CONTAINED", "IN"): 3710 kind = "CONTAINED IN" 3711 expression = self.expression( 3712 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3713 ) 3714 elif self._match(TokenType.ALL): 3715 kind = "ALL" 3716 expression = None 3717 else: 3718 self._match_text_seq("AS", "OF") 3719 kind = "AS OF" 3720 expression = self._parse_type() 3721 3722 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3723 3724 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3725 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3726 index = self._index 3727 historical_data = None 3728 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3729 this = self._prev.text.upper() 3730 kind = ( 3731 self._match(TokenType.L_PAREN) 3732 and self._match_texts(self.HISTORICAL_DATA_KIND) 3733 and self._prev.text.upper() 3734 ) 3735 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3736 3737 if expression: 3738 self._match_r_paren() 3739 historical_data = self.expression( 3740 exp.HistoricalData, this=this, kind=kind, expression=expression 3741 ) 3742 else: 3743 self._retreat(index) 3744 3745 return historical_data 3746 3747 def _parse_changes(self) -> t.Optional[exp.Changes]: 3748 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3749 return None 3750 3751 information = self._parse_var(any_token=True) 3752 self._match_r_paren() 3753 3754 return self.expression( 3755 exp.Changes, 3756 information=information, 3757 at_before=self._parse_historical_data(), 3758 end=self._parse_historical_data(), 3759 ) 3760 3761 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3762 if not self._match(TokenType.UNNEST): 3763 return None 3764 3765 expressions = self._parse_wrapped_csv(self._parse_equality) 3766 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3767 3768 alias = self._parse_table_alias() if with_alias else None 3769 3770 if alias: 3771 if self.dialect.UNNEST_COLUMN_ONLY: 3772 if alias.args.get("columns"): 3773 self.raise_error("Unexpected extra column alias in unnest.") 3774 3775 alias.set("columns", [alias.this]) 3776 alias.set("this", None) 3777 3778 columns = alias.args.get("columns") or [] 3779 if offset and len(expressions) < len(columns): 3780 offset = columns.pop() 3781 3782 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3783 self._match(TokenType.ALIAS) 3784 offset = self._parse_id_var( 3785 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3786 ) or exp.to_identifier("offset") 3787 3788 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3789 3790 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3791 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3792 if not is_derived and not ( 3793 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3794 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3795 ): 3796 return None 3797 3798 expressions = self._parse_csv(self._parse_value) 3799 alias = self._parse_table_alias() 3800 3801 if is_derived: 3802 self._match_r_paren() 3803 3804 return self.expression( 3805 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3806 ) 3807 3808 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3809 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3810 as_modifier and self._match_text_seq("USING", "SAMPLE") 3811 ): 3812 return None 3813 3814 bucket_numerator = None 3815 bucket_denominator = None 3816 bucket_field = None 3817 percent = None 3818 size = None 3819 seed = None 3820 3821 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3822 matched_l_paren = self._match(TokenType.L_PAREN) 3823 3824 if self.TABLESAMPLE_CSV: 3825 num = None 3826 expressions = self._parse_csv(self._parse_primary) 3827 else: 3828 expressions = None 3829 num = ( 3830 self._parse_factor() 3831 if self._match(TokenType.NUMBER, advance=False) 3832 else self._parse_primary() or self._parse_placeholder() 3833 ) 3834 3835 if self._match_text_seq("BUCKET"): 3836 bucket_numerator = self._parse_number() 3837 self._match_text_seq("OUT", "OF") 3838 bucket_denominator = bucket_denominator = self._parse_number() 3839 self._match(TokenType.ON) 3840 bucket_field = self._parse_field() 3841 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3842 percent = num 3843 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3844 size = num 3845 else: 3846 percent = num 3847 3848 if matched_l_paren: 3849 self._match_r_paren() 3850 3851 if self._match(TokenType.L_PAREN): 3852 method = self._parse_var(upper=True) 3853 seed = self._match(TokenType.COMMA) and self._parse_number() 3854 self._match_r_paren() 3855 elif self._match_texts(("SEED", "REPEATABLE")): 3856 seed = self._parse_wrapped(self._parse_number) 3857 3858 if not method and self.DEFAULT_SAMPLING_METHOD: 3859 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3860 3861 return self.expression( 3862 exp.TableSample, 3863 expressions=expressions, 3864 method=method, 3865 bucket_numerator=bucket_numerator, 3866 bucket_denominator=bucket_denominator, 3867 bucket_field=bucket_field, 3868 percent=percent, 3869 size=size, 3870 seed=seed, 3871 ) 3872 3873 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3874 return list(iter(self._parse_pivot, None)) or None 3875 3876 def _parse_joins(self) -> t.Iterator[exp.Join]: 3877 return iter(self._parse_join, None) 3878 3879 # https://duckdb.org/docs/sql/statements/pivot 3880 def _parse_simplified_pivot(self) -> exp.Pivot: 3881 def _parse_on() -> t.Optional[exp.Expression]: 3882 this = self._parse_bitwise() 3883 return self._parse_in(this) if self._match(TokenType.IN) else this 3884 3885 this = self._parse_table() 3886 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3887 using = self._match(TokenType.USING) and self._parse_csv( 3888 lambda: self._parse_alias(self._parse_function()) 3889 ) 3890 group = self._parse_group() 3891 return self.expression( 3892 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3893 ) 3894 3895 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3896 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3897 this = self._parse_select_or_expression() 3898 3899 self._match(TokenType.ALIAS) 3900 alias = self._parse_bitwise() 3901 if alias: 3902 if isinstance(alias, exp.Column) and not alias.db: 3903 alias = alias.this 3904 return self.expression(exp.PivotAlias, this=this, alias=alias) 3905 3906 return this 3907 3908 value = self._parse_column() 3909 3910 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3911 self.raise_error("Expecting IN (") 3912 3913 if self._match(TokenType.ANY): 3914 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3915 else: 3916 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3917 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3918 3919 self._match_r_paren() 3920 return expr 3921 3922 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3923 index = self._index 3924 include_nulls = None 3925 3926 if self._match(TokenType.PIVOT): 3927 unpivot = False 3928 elif self._match(TokenType.UNPIVOT): 3929 unpivot = True 3930 3931 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3932 if self._match_text_seq("INCLUDE", "NULLS"): 3933 include_nulls = True 3934 elif self._match_text_seq("EXCLUDE", "NULLS"): 3935 include_nulls = False 3936 else: 3937 return None 3938 3939 expressions = [] 3940 3941 if not self._match(TokenType.L_PAREN): 3942 self._retreat(index) 3943 return None 3944 3945 if unpivot: 3946 expressions = self._parse_csv(self._parse_column) 3947 else: 3948 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3949 3950 if not expressions: 3951 self.raise_error("Failed to parse PIVOT's aggregation list") 3952 3953 if not self._match(TokenType.FOR): 3954 self.raise_error("Expecting FOR") 3955 3956 field = self._parse_pivot_in() 3957 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3958 self._parse_bitwise 3959 ) 3960 3961 self._match_r_paren() 3962 3963 pivot = self.expression( 3964 exp.Pivot, 3965 expressions=expressions, 3966 field=field, 3967 unpivot=unpivot, 3968 include_nulls=include_nulls, 3969 default_on_null=default_on_null, 3970 ) 3971 3972 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3973 pivot.set("alias", self._parse_table_alias()) 3974 3975 if not unpivot: 3976 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3977 3978 columns: t.List[exp.Expression] = [] 3979 for fld in pivot.args["field"].expressions: 3980 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3981 for name in names: 3982 if self.PREFIXED_PIVOT_COLUMNS: 3983 name = f"{name}_{field_name}" if name else field_name 3984 else: 3985 name = f"{field_name}_{name}" if name else field_name 3986 3987 columns.append(exp.to_identifier(name)) 3988 3989 pivot.set("columns", columns) 3990 3991 return pivot 3992 3993 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3994 return [agg.alias for agg in aggregations] 3995 3996 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3997 if not skip_where_token and not self._match(TokenType.PREWHERE): 3998 return None 3999 4000 return self.expression( 4001 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4002 ) 4003 4004 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4005 if not skip_where_token and not self._match(TokenType.WHERE): 4006 return None 4007 4008 return self.expression( 4009 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4010 ) 4011 4012 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4013 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4014 return None 4015 4016 elements: t.Dict[str, t.Any] = defaultdict(list) 4017 4018 if self._match(TokenType.ALL): 4019 elements["all"] = True 4020 elif self._match(TokenType.DISTINCT): 4021 elements["all"] = False 4022 4023 while True: 4024 index = self._index 4025 4026 elements["expressions"].extend( 4027 self._parse_csv( 4028 lambda: None 4029 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4030 else self._parse_assignment() 4031 ) 4032 ) 4033 4034 before_with_index = self._index 4035 with_prefix = self._match(TokenType.WITH) 4036 4037 if self._match(TokenType.ROLLUP): 4038 elements["rollup"].append( 4039 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4040 ) 4041 elif self._match(TokenType.CUBE): 4042 elements["cube"].append( 4043 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4044 ) 4045 elif self._match(TokenType.GROUPING_SETS): 4046 elements["grouping_sets"].append( 4047 self.expression( 4048 exp.GroupingSets, 4049 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4050 ) 4051 ) 4052 elif self._match_text_seq("TOTALS"): 4053 elements["totals"] = True # type: ignore 4054 4055 if before_with_index <= self._index <= before_with_index + 1: 4056 self._retreat(before_with_index) 4057 break 4058 4059 if index == self._index: 4060 break 4061 4062 return self.expression(exp.Group, **elements) # type: ignore 4063 4064 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4065 return self.expression( 4066 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4067 ) 4068 4069 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4070 if self._match(TokenType.L_PAREN): 4071 grouping_set = self._parse_csv(self._parse_column) 4072 self._match_r_paren() 4073 return self.expression(exp.Tuple, expressions=grouping_set) 4074 4075 return self._parse_column() 4076 4077 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4078 if not skip_having_token and not self._match(TokenType.HAVING): 4079 return None 4080 return self.expression(exp.Having, this=self._parse_assignment()) 4081 4082 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4083 if not self._match(TokenType.QUALIFY): 4084 return None 4085 return self.expression(exp.Qualify, this=self._parse_assignment()) 4086 4087 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4088 if skip_start_token: 4089 start = None 4090 elif self._match(TokenType.START_WITH): 4091 start = self._parse_assignment() 4092 else: 4093 return None 4094 4095 self._match(TokenType.CONNECT_BY) 4096 nocycle = self._match_text_seq("NOCYCLE") 4097 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4098 exp.Prior, this=self._parse_bitwise() 4099 ) 4100 connect = self._parse_assignment() 4101 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4102 4103 if not start and self._match(TokenType.START_WITH): 4104 start = self._parse_assignment() 4105 4106 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4107 4108 def _parse_name_as_expression(self) -> exp.Alias: 4109 return self.expression( 4110 exp.Alias, 4111 alias=self._parse_id_var(any_token=True), 4112 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4113 ) 4114 4115 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4116 if self._match_text_seq("INTERPOLATE"): 4117 return self._parse_wrapped_csv(self._parse_name_as_expression) 4118 return None 4119 4120 def _parse_order( 4121 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4122 ) -> t.Optional[exp.Expression]: 4123 siblings = None 4124 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4125 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4126 return this 4127 4128 siblings = True 4129 4130 return self.expression( 4131 exp.Order, 4132 this=this, 4133 expressions=self._parse_csv(self._parse_ordered), 4134 siblings=siblings, 4135 ) 4136 4137 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4138 if not self._match(token): 4139 return None 4140 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4141 4142 def _parse_ordered( 4143 self, parse_method: t.Optional[t.Callable] = None 4144 ) -> t.Optional[exp.Ordered]: 4145 this = parse_method() if parse_method else self._parse_assignment() 4146 if not this: 4147 return None 4148 4149 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4150 this = exp.var("ALL") 4151 4152 asc = self._match(TokenType.ASC) 4153 desc = self._match(TokenType.DESC) or (asc and False) 4154 4155 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4156 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4157 4158 nulls_first = is_nulls_first or False 4159 explicitly_null_ordered = is_nulls_first or is_nulls_last 4160 4161 if ( 4162 not explicitly_null_ordered 4163 and ( 4164 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4165 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4166 ) 4167 and self.dialect.NULL_ORDERING != "nulls_are_last" 4168 ): 4169 nulls_first = True 4170 4171 if self._match_text_seq("WITH", "FILL"): 4172 with_fill = self.expression( 4173 exp.WithFill, 4174 **{ # type: ignore 4175 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4176 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4177 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4178 "interpolate": self._parse_interpolate(), 4179 }, 4180 ) 4181 else: 4182 with_fill = None 4183 4184 return self.expression( 4185 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4186 ) 4187 4188 def _parse_limit( 4189 self, 4190 this: t.Optional[exp.Expression] = None, 4191 top: bool = False, 4192 skip_limit_token: bool = False, 4193 ) -> t.Optional[exp.Expression]: 4194 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4195 comments = self._prev_comments 4196 if top: 4197 limit_paren = self._match(TokenType.L_PAREN) 4198 expression = self._parse_term() if limit_paren else self._parse_number() 4199 4200 if limit_paren: 4201 self._match_r_paren() 4202 else: 4203 expression = self._parse_term() 4204 4205 if self._match(TokenType.COMMA): 4206 offset = expression 4207 expression = self._parse_term() 4208 else: 4209 offset = None 4210 4211 limit_exp = self.expression( 4212 exp.Limit, 4213 this=this, 4214 expression=expression, 4215 offset=offset, 4216 comments=comments, 4217 expressions=self._parse_limit_by(), 4218 ) 4219 4220 return limit_exp 4221 4222 if self._match(TokenType.FETCH): 4223 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4224 direction = self._prev.text.upper() if direction else "FIRST" 4225 4226 count = self._parse_field(tokens=self.FETCH_TOKENS) 4227 percent = self._match(TokenType.PERCENT) 4228 4229 self._match_set((TokenType.ROW, TokenType.ROWS)) 4230 4231 only = self._match_text_seq("ONLY") 4232 with_ties = self._match_text_seq("WITH", "TIES") 4233 4234 if only and with_ties: 4235 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4236 4237 return self.expression( 4238 exp.Fetch, 4239 direction=direction, 4240 count=count, 4241 percent=percent, 4242 with_ties=with_ties, 4243 ) 4244 4245 return this 4246 4247 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4248 if not self._match(TokenType.OFFSET): 4249 return this 4250 4251 count = self._parse_term() 4252 self._match_set((TokenType.ROW, TokenType.ROWS)) 4253 4254 return self.expression( 4255 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4256 ) 4257 4258 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4259 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4260 4261 def _parse_locks(self) -> t.List[exp.Lock]: 4262 locks = [] 4263 while True: 4264 if self._match_text_seq("FOR", "UPDATE"): 4265 update = True 4266 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4267 "LOCK", "IN", "SHARE", "MODE" 4268 ): 4269 update = False 4270 else: 4271 break 4272 4273 expressions = None 4274 if self._match_text_seq("OF"): 4275 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4276 4277 wait: t.Optional[bool | exp.Expression] = None 4278 if self._match_text_seq("NOWAIT"): 4279 wait = True 4280 elif self._match_text_seq("WAIT"): 4281 wait = self._parse_primary() 4282 elif self._match_text_seq("SKIP", "LOCKED"): 4283 wait = False 4284 4285 locks.append( 4286 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4287 ) 4288 4289 return locks 4290 4291 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4292 while this and self._match_set(self.SET_OPERATIONS): 4293 token_type = self._prev.token_type 4294 4295 if token_type == TokenType.UNION: 4296 operation: t.Type[exp.SetOperation] = exp.Union 4297 elif token_type == TokenType.EXCEPT: 4298 operation = exp.Except 4299 else: 4300 operation = exp.Intersect 4301 4302 comments = self._prev.comments 4303 4304 if self._match(TokenType.DISTINCT): 4305 distinct: t.Optional[bool] = True 4306 elif self._match(TokenType.ALL): 4307 distinct = False 4308 else: 4309 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4310 if distinct is None: 4311 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4312 4313 by_name = self._match_text_seq("BY", "NAME") 4314 expression = self._parse_select(nested=True, parse_set_operation=False) 4315 4316 this = self.expression( 4317 operation, 4318 comments=comments, 4319 this=this, 4320 distinct=distinct, 4321 by_name=by_name, 4322 expression=expression, 4323 ) 4324 4325 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4326 expression = this.expression 4327 4328 if expression: 4329 for arg in self.SET_OP_MODIFIERS: 4330 expr = expression.args.get(arg) 4331 if expr: 4332 this.set(arg, expr.pop()) 4333 4334 return this 4335 4336 def _parse_expression(self) -> t.Optional[exp.Expression]: 4337 return self._parse_alias(self._parse_assignment()) 4338 4339 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4340 this = self._parse_disjunction() 4341 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4342 # This allows us to parse <non-identifier token> := <expr> 4343 this = exp.column( 4344 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4345 ) 4346 4347 while self._match_set(self.ASSIGNMENT): 4348 this = self.expression( 4349 self.ASSIGNMENT[self._prev.token_type], 4350 this=this, 4351 comments=self._prev_comments, 4352 expression=self._parse_assignment(), 4353 ) 4354 4355 return this 4356 4357 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4358 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4359 4360 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4361 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4362 4363 def _parse_equality(self) -> t.Optional[exp.Expression]: 4364 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4365 4366 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4367 return self._parse_tokens(self._parse_range, self.COMPARISON) 4368 4369 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4370 this = this or self._parse_bitwise() 4371 negate = self._match(TokenType.NOT) 4372 4373 if self._match_set(self.RANGE_PARSERS): 4374 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4375 if not expression: 4376 return this 4377 4378 this = expression 4379 elif self._match(TokenType.ISNULL): 4380 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4381 4382 # Postgres supports ISNULL and NOTNULL for conditions. 4383 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4384 if self._match(TokenType.NOTNULL): 4385 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4386 this = self.expression(exp.Not, this=this) 4387 4388 if negate: 4389 this = self._negate_range(this) 4390 4391 if self._match(TokenType.IS): 4392 this = self._parse_is(this) 4393 4394 return this 4395 4396 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4397 if not this: 4398 return this 4399 4400 return self.expression(exp.Not, this=this) 4401 4402 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4403 index = self._index - 1 4404 negate = self._match(TokenType.NOT) 4405 4406 if self._match_text_seq("DISTINCT", "FROM"): 4407 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4408 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4409 4410 if self._match(TokenType.JSON): 4411 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4412 4413 if self._match_text_seq("WITH"): 4414 _with = True 4415 elif self._match_text_seq("WITHOUT"): 4416 _with = False 4417 else: 4418 _with = None 4419 4420 unique = self._match(TokenType.UNIQUE) 4421 self._match_text_seq("KEYS") 4422 expression: t.Optional[exp.Expression] = self.expression( 4423 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4424 ) 4425 else: 4426 expression = self._parse_primary() or self._parse_null() 4427 if not expression: 4428 self._retreat(index) 4429 return None 4430 4431 this = self.expression(exp.Is, this=this, expression=expression) 4432 return self.expression(exp.Not, this=this) if negate else this 4433 4434 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4435 unnest = self._parse_unnest(with_alias=False) 4436 if unnest: 4437 this = self.expression(exp.In, this=this, unnest=unnest) 4438 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4439 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4440 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4441 4442 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4443 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4444 else: 4445 this = self.expression(exp.In, this=this, expressions=expressions) 4446 4447 if matched_l_paren: 4448 self._match_r_paren(this) 4449 elif not self._match(TokenType.R_BRACKET, expression=this): 4450 self.raise_error("Expecting ]") 4451 else: 4452 this = self.expression(exp.In, this=this, field=self._parse_field()) 4453 4454 return this 4455 4456 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4457 low = self._parse_bitwise() 4458 self._match(TokenType.AND) 4459 high = self._parse_bitwise() 4460 return self.expression(exp.Between, this=this, low=low, high=high) 4461 4462 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4463 if not self._match(TokenType.ESCAPE): 4464 return this 4465 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4466 4467 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4468 index = self._index 4469 4470 if not self._match(TokenType.INTERVAL) and match_interval: 4471 return None 4472 4473 if self._match(TokenType.STRING, advance=False): 4474 this = self._parse_primary() 4475 else: 4476 this = self._parse_term() 4477 4478 if not this or ( 4479 isinstance(this, exp.Column) 4480 and not this.table 4481 and not this.this.quoted 4482 and this.name.upper() == "IS" 4483 ): 4484 self._retreat(index) 4485 return None 4486 4487 unit = self._parse_function() or ( 4488 not self._match(TokenType.ALIAS, advance=False) 4489 and self._parse_var(any_token=True, upper=True) 4490 ) 4491 4492 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4493 # each INTERVAL expression into this canonical form so it's easy to transpile 4494 if this and this.is_number: 4495 this = exp.Literal.string(this.to_py()) 4496 elif this and this.is_string: 4497 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4498 if len(parts) == 1: 4499 if unit: 4500 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4501 self._retreat(self._index - 1) 4502 4503 this = exp.Literal.string(parts[0][0]) 4504 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4505 4506 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4507 unit = self.expression( 4508 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4509 ) 4510 4511 interval = self.expression(exp.Interval, this=this, unit=unit) 4512 4513 index = self._index 4514 self._match(TokenType.PLUS) 4515 4516 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4517 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4518 return self.expression( 4519 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4520 ) 4521 4522 self._retreat(index) 4523 return interval 4524 4525 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4526 this = self._parse_term() 4527 4528 while True: 4529 if self._match_set(self.BITWISE): 4530 this = self.expression( 4531 self.BITWISE[self._prev.token_type], 4532 this=this, 4533 expression=self._parse_term(), 4534 ) 4535 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4536 this = self.expression( 4537 exp.DPipe, 4538 this=this, 4539 expression=self._parse_term(), 4540 safe=not self.dialect.STRICT_STRING_CONCAT, 4541 ) 4542 elif self._match(TokenType.DQMARK): 4543 this = self.expression( 4544 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4545 ) 4546 elif self._match_pair(TokenType.LT, TokenType.LT): 4547 this = self.expression( 4548 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4549 ) 4550 elif self._match_pair(TokenType.GT, TokenType.GT): 4551 this = self.expression( 4552 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4553 ) 4554 else: 4555 break 4556 4557 return this 4558 4559 def _parse_term(self) -> t.Optional[exp.Expression]: 4560 this = self._parse_factor() 4561 4562 while self._match_set(self.TERM): 4563 klass = self.TERM[self._prev.token_type] 4564 comments = self._prev_comments 4565 expression = self._parse_factor() 4566 4567 this = self.expression(klass, this=this, comments=comments, expression=expression) 4568 4569 if isinstance(this, exp.Collate): 4570 expr = this.expression 4571 4572 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4573 # fallback to Identifier / Var 4574 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4575 ident = expr.this 4576 if isinstance(ident, exp.Identifier): 4577 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4578 4579 return this 4580 4581 def _parse_factor(self) -> t.Optional[exp.Expression]: 4582 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4583 this = parse_method() 4584 4585 while self._match_set(self.FACTOR): 4586 klass = self.FACTOR[self._prev.token_type] 4587 comments = self._prev_comments 4588 expression = parse_method() 4589 4590 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4591 self._retreat(self._index - 1) 4592 return this 4593 4594 this = self.expression(klass, this=this, comments=comments, expression=expression) 4595 4596 if isinstance(this, exp.Div): 4597 this.args["typed"] = self.dialect.TYPED_DIVISION 4598 this.args["safe"] = self.dialect.SAFE_DIVISION 4599 4600 return this 4601 4602 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4603 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4604 4605 def _parse_unary(self) -> t.Optional[exp.Expression]: 4606 if self._match_set(self.UNARY_PARSERS): 4607 return self.UNARY_PARSERS[self._prev.token_type](self) 4608 return self._parse_at_time_zone(self._parse_type()) 4609 4610 def _parse_type( 4611 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4612 ) -> t.Optional[exp.Expression]: 4613 interval = parse_interval and self._parse_interval() 4614 if interval: 4615 return interval 4616 4617 index = self._index 4618 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4619 4620 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4621 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4622 if isinstance(data_type, exp.Cast): 4623 # This constructor can contain ops directly after it, for instance struct unnesting: 4624 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4625 return self._parse_column_ops(data_type) 4626 4627 if data_type: 4628 index2 = self._index 4629 this = self._parse_primary() 4630 4631 if isinstance(this, exp.Literal): 4632 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4633 if parser: 4634 return parser(self, this, data_type) 4635 4636 return self.expression(exp.Cast, this=this, to=data_type) 4637 4638 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4639 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4640 # 4641 # If the index difference here is greater than 1, that means the parser itself must have 4642 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4643 # 4644 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4645 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4646 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4647 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4648 # 4649 # In these cases, we don't really want to return the converted type, but instead retreat 4650 # and try to parse a Column or Identifier in the section below. 4651 if data_type.expressions and index2 - index > 1: 4652 self._retreat(index2) 4653 return self._parse_column_ops(data_type) 4654 4655 self._retreat(index) 4656 4657 if fallback_to_identifier: 4658 return self._parse_id_var() 4659 4660 this = self._parse_column() 4661 return this and self._parse_column_ops(this) 4662 4663 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4664 this = self._parse_type() 4665 if not this: 4666 return None 4667 4668 if isinstance(this, exp.Column) and not this.table: 4669 this = exp.var(this.name.upper()) 4670 4671 return self.expression( 4672 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4673 ) 4674 4675 def _parse_types( 4676 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4677 ) -> t.Optional[exp.Expression]: 4678 index = self._index 4679 4680 this: t.Optional[exp.Expression] = None 4681 prefix = self._match_text_seq("SYSUDTLIB", ".") 4682 4683 if not self._match_set(self.TYPE_TOKENS): 4684 identifier = allow_identifiers and self._parse_id_var( 4685 any_token=False, tokens=(TokenType.VAR,) 4686 ) 4687 if isinstance(identifier, exp.Identifier): 4688 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4689 4690 if len(tokens) != 1: 4691 self.raise_error("Unexpected identifier", self._prev) 4692 4693 if tokens[0].token_type in self.TYPE_TOKENS: 4694 self._prev = tokens[0] 4695 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4696 type_name = identifier.name 4697 4698 while self._match(TokenType.DOT): 4699 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4700 4701 this = exp.DataType.build(type_name, udt=True) 4702 else: 4703 self._retreat(self._index - 1) 4704 return None 4705 else: 4706 return None 4707 4708 type_token = self._prev.token_type 4709 4710 if type_token == TokenType.PSEUDO_TYPE: 4711 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4712 4713 if type_token == TokenType.OBJECT_IDENTIFIER: 4714 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4715 4716 # https://materialize.com/docs/sql/types/map/ 4717 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4718 key_type = self._parse_types( 4719 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4720 ) 4721 if not self._match(TokenType.FARROW): 4722 self._retreat(index) 4723 return None 4724 4725 value_type = self._parse_types( 4726 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4727 ) 4728 if not self._match(TokenType.R_BRACKET): 4729 self._retreat(index) 4730 return None 4731 4732 return exp.DataType( 4733 this=exp.DataType.Type.MAP, 4734 expressions=[key_type, value_type], 4735 nested=True, 4736 prefix=prefix, 4737 ) 4738 4739 nested = type_token in self.NESTED_TYPE_TOKENS 4740 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4741 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4742 expressions = None 4743 maybe_func = False 4744 4745 if self._match(TokenType.L_PAREN): 4746 if is_struct: 4747 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4748 elif nested: 4749 expressions = self._parse_csv( 4750 lambda: self._parse_types( 4751 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4752 ) 4753 ) 4754 elif type_token in self.ENUM_TYPE_TOKENS: 4755 expressions = self._parse_csv(self._parse_equality) 4756 elif is_aggregate: 4757 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4758 any_token=False, tokens=(TokenType.VAR,) 4759 ) 4760 if not func_or_ident or not self._match(TokenType.COMMA): 4761 return None 4762 expressions = self._parse_csv( 4763 lambda: self._parse_types( 4764 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4765 ) 4766 ) 4767 expressions.insert(0, func_or_ident) 4768 else: 4769 expressions = self._parse_csv(self._parse_type_size) 4770 4771 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4772 if type_token == TokenType.VECTOR and len(expressions) == 2: 4773 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4774 4775 if not expressions or not self._match(TokenType.R_PAREN): 4776 self._retreat(index) 4777 return None 4778 4779 maybe_func = True 4780 4781 values: t.Optional[t.List[exp.Expression]] = None 4782 4783 if nested and self._match(TokenType.LT): 4784 if is_struct: 4785 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4786 else: 4787 expressions = self._parse_csv( 4788 lambda: self._parse_types( 4789 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4790 ) 4791 ) 4792 4793 if not self._match(TokenType.GT): 4794 self.raise_error("Expecting >") 4795 4796 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4797 values = self._parse_csv(self._parse_assignment) 4798 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4799 4800 if type_token in self.TIMESTAMPS: 4801 if self._match_text_seq("WITH", "TIME", "ZONE"): 4802 maybe_func = False 4803 tz_type = ( 4804 exp.DataType.Type.TIMETZ 4805 if type_token in self.TIMES 4806 else exp.DataType.Type.TIMESTAMPTZ 4807 ) 4808 this = exp.DataType(this=tz_type, expressions=expressions) 4809 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4810 maybe_func = False 4811 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4812 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4813 maybe_func = False 4814 elif type_token == TokenType.INTERVAL: 4815 unit = self._parse_var(upper=True) 4816 if unit: 4817 if self._match_text_seq("TO"): 4818 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4819 4820 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4821 else: 4822 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4823 4824 if maybe_func and check_func: 4825 index2 = self._index 4826 peek = self._parse_string() 4827 4828 if not peek: 4829 self._retreat(index) 4830 return None 4831 4832 self._retreat(index2) 4833 4834 if not this: 4835 if self._match_text_seq("UNSIGNED"): 4836 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4837 if not unsigned_type_token: 4838 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4839 4840 type_token = unsigned_type_token or type_token 4841 4842 this = exp.DataType( 4843 this=exp.DataType.Type[type_token.value], 4844 expressions=expressions, 4845 nested=nested, 4846 prefix=prefix, 4847 ) 4848 4849 # Empty arrays/structs are allowed 4850 if values is not None: 4851 cls = exp.Struct if is_struct else exp.Array 4852 this = exp.cast(cls(expressions=values), this, copy=False) 4853 4854 elif expressions: 4855 this.set("expressions", expressions) 4856 4857 # https://materialize.com/docs/sql/types/list/#type-name 4858 while self._match(TokenType.LIST): 4859 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4860 4861 index = self._index 4862 4863 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4864 matched_array = self._match(TokenType.ARRAY) 4865 4866 while self._curr: 4867 datatype_token = self._prev.token_type 4868 matched_l_bracket = self._match(TokenType.L_BRACKET) 4869 if not matched_l_bracket and not matched_array: 4870 break 4871 4872 matched_array = False 4873 values = self._parse_csv(self._parse_assignment) or None 4874 if ( 4875 values 4876 and not schema 4877 and ( 4878 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4879 ) 4880 ): 4881 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4882 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4883 self._retreat(index) 4884 break 4885 4886 this = exp.DataType( 4887 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4888 ) 4889 self._match(TokenType.R_BRACKET) 4890 4891 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4892 converter = self.TYPE_CONVERTERS.get(this.this) 4893 if converter: 4894 this = converter(t.cast(exp.DataType, this)) 4895 4896 return this 4897 4898 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4899 index = self._index 4900 4901 if ( 4902 self._curr 4903 and self._next 4904 and self._curr.token_type in self.TYPE_TOKENS 4905 and self._next.token_type in self.TYPE_TOKENS 4906 ): 4907 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4908 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4909 this = self._parse_id_var() 4910 else: 4911 this = ( 4912 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4913 or self._parse_id_var() 4914 ) 4915 4916 self._match(TokenType.COLON) 4917 4918 if ( 4919 type_required 4920 and not isinstance(this, exp.DataType) 4921 and not self._match_set(self.TYPE_TOKENS, advance=False) 4922 ): 4923 self._retreat(index) 4924 return self._parse_types() 4925 4926 return self._parse_column_def(this) 4927 4928 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4929 if not self._match_text_seq("AT", "TIME", "ZONE"): 4930 return this 4931 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4932 4933 def _parse_column(self) -> t.Optional[exp.Expression]: 4934 this = self._parse_column_reference() 4935 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4936 4937 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4938 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4939 4940 return column 4941 4942 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4943 this = self._parse_field() 4944 if ( 4945 not this 4946 and self._match(TokenType.VALUES, advance=False) 4947 and self.VALUES_FOLLOWED_BY_PAREN 4948 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4949 ): 4950 this = self._parse_id_var() 4951 4952 if isinstance(this, exp.Identifier): 4953 # We bubble up comments from the Identifier to the Column 4954 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4955 4956 return this 4957 4958 def _parse_colon_as_variant_extract( 4959 self, this: t.Optional[exp.Expression] 4960 ) -> t.Optional[exp.Expression]: 4961 casts = [] 4962 json_path = [] 4963 4964 while self._match(TokenType.COLON): 4965 start_index = self._index 4966 4967 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4968 path = self._parse_column_ops( 4969 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4970 ) 4971 4972 # The cast :: operator has a lower precedence than the extraction operator :, so 4973 # we rearrange the AST appropriately to avoid casting the JSON path 4974 while isinstance(path, exp.Cast): 4975 casts.append(path.to) 4976 path = path.this 4977 4978 if casts: 4979 dcolon_offset = next( 4980 i 4981 for i, t in enumerate(self._tokens[start_index:]) 4982 if t.token_type == TokenType.DCOLON 4983 ) 4984 end_token = self._tokens[start_index + dcolon_offset - 1] 4985 else: 4986 end_token = self._prev 4987 4988 if path: 4989 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4990 4991 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4992 # Databricks transforms it back to the colon/dot notation 4993 if json_path: 4994 this = self.expression( 4995 exp.JSONExtract, 4996 this=this, 4997 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4998 variant_extract=True, 4999 ) 5000 5001 while casts: 5002 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5003 5004 return this 5005 5006 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5007 return self._parse_types() 5008 5009 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5010 this = self._parse_bracket(this) 5011 5012 while self._match_set(self.COLUMN_OPERATORS): 5013 op_token = self._prev.token_type 5014 op = self.COLUMN_OPERATORS.get(op_token) 5015 5016 if op_token == TokenType.DCOLON: 5017 field = self._parse_dcolon() 5018 if not field: 5019 self.raise_error("Expected type") 5020 elif op and self._curr: 5021 field = self._parse_column_reference() 5022 else: 5023 field = self._parse_field(any_token=True, anonymous_func=True) 5024 5025 if isinstance(field, exp.Func) and this: 5026 # bigquery allows function calls like x.y.count(...) 5027 # SAFE.SUBSTR(...) 5028 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5029 this = exp.replace_tree( 5030 this, 5031 lambda n: ( 5032 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5033 if n.table 5034 else n.this 5035 ) 5036 if isinstance(n, exp.Column) 5037 else n, 5038 ) 5039 5040 if op: 5041 this = op(self, this, field) 5042 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5043 this = self.expression( 5044 exp.Column, 5045 this=field, 5046 table=this.this, 5047 db=this.args.get("table"), 5048 catalog=this.args.get("db"), 5049 ) 5050 else: 5051 this = self.expression(exp.Dot, this=this, expression=field) 5052 5053 this = self._parse_bracket(this) 5054 5055 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5056 5057 def _parse_primary(self) -> t.Optional[exp.Expression]: 5058 if self._match_set(self.PRIMARY_PARSERS): 5059 token_type = self._prev.token_type 5060 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5061 5062 if token_type == TokenType.STRING: 5063 expressions = [primary] 5064 while self._match(TokenType.STRING): 5065 expressions.append(exp.Literal.string(self._prev.text)) 5066 5067 if len(expressions) > 1: 5068 return self.expression(exp.Concat, expressions=expressions) 5069 5070 return primary 5071 5072 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5073 return exp.Literal.number(f"0.{self._prev.text}") 5074 5075 if self._match(TokenType.L_PAREN): 5076 comments = self._prev_comments 5077 query = self._parse_select() 5078 5079 if query: 5080 expressions = [query] 5081 else: 5082 expressions = self._parse_expressions() 5083 5084 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5085 5086 if not this and self._match(TokenType.R_PAREN, advance=False): 5087 this = self.expression(exp.Tuple) 5088 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5089 this = self._parse_subquery(this=this, parse_alias=False) 5090 elif isinstance(this, exp.Subquery): 5091 this = self._parse_subquery( 5092 this=self._parse_set_operations(this), parse_alias=False 5093 ) 5094 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5095 this = self.expression(exp.Tuple, expressions=expressions) 5096 else: 5097 this = self.expression(exp.Paren, this=this) 5098 5099 if this: 5100 this.add_comments(comments) 5101 5102 self._match_r_paren(expression=this) 5103 return this 5104 5105 return None 5106 5107 def _parse_field( 5108 self, 5109 any_token: bool = False, 5110 tokens: t.Optional[t.Collection[TokenType]] = None, 5111 anonymous_func: bool = False, 5112 ) -> t.Optional[exp.Expression]: 5113 if anonymous_func: 5114 field = ( 5115 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5116 or self._parse_primary() 5117 ) 5118 else: 5119 field = self._parse_primary() or self._parse_function( 5120 anonymous=anonymous_func, any_token=any_token 5121 ) 5122 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5123 5124 def _parse_function( 5125 self, 5126 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5127 anonymous: bool = False, 5128 optional_parens: bool = True, 5129 any_token: bool = False, 5130 ) -> t.Optional[exp.Expression]: 5131 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5132 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5133 fn_syntax = False 5134 if ( 5135 self._match(TokenType.L_BRACE, advance=False) 5136 and self._next 5137 and self._next.text.upper() == "FN" 5138 ): 5139 self._advance(2) 5140 fn_syntax = True 5141 5142 func = self._parse_function_call( 5143 functions=functions, 5144 anonymous=anonymous, 5145 optional_parens=optional_parens, 5146 any_token=any_token, 5147 ) 5148 5149 if fn_syntax: 5150 self._match(TokenType.R_BRACE) 5151 5152 return func 5153 5154 def _parse_function_call( 5155 self, 5156 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5157 anonymous: bool = False, 5158 optional_parens: bool = True, 5159 any_token: bool = False, 5160 ) -> t.Optional[exp.Expression]: 5161 if not self._curr: 5162 return None 5163 5164 comments = self._curr.comments 5165 token_type = self._curr.token_type 5166 this = self._curr.text 5167 upper = this.upper() 5168 5169 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5170 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5171 self._advance() 5172 return self._parse_window(parser(self)) 5173 5174 if not self._next or self._next.token_type != TokenType.L_PAREN: 5175 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5176 self._advance() 5177 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5178 5179 return None 5180 5181 if any_token: 5182 if token_type in self.RESERVED_TOKENS: 5183 return None 5184 elif token_type not in self.FUNC_TOKENS: 5185 return None 5186 5187 self._advance(2) 5188 5189 parser = self.FUNCTION_PARSERS.get(upper) 5190 if parser and not anonymous: 5191 this = parser(self) 5192 else: 5193 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5194 5195 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5196 this = self.expression(subquery_predicate, this=self._parse_select()) 5197 self._match_r_paren() 5198 return this 5199 5200 if functions is None: 5201 functions = self.FUNCTIONS 5202 5203 function = functions.get(upper) 5204 5205 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5206 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5207 5208 if alias: 5209 args = self._kv_to_prop_eq(args) 5210 5211 if function and not anonymous: 5212 if "dialect" in function.__code__.co_varnames: 5213 func = function(args, dialect=self.dialect) 5214 else: 5215 func = function(args) 5216 5217 func = self.validate_expression(func, args) 5218 if not self.dialect.NORMALIZE_FUNCTIONS: 5219 func.meta["name"] = this 5220 5221 this = func 5222 else: 5223 if token_type == TokenType.IDENTIFIER: 5224 this = exp.Identifier(this=this, quoted=True) 5225 this = self.expression(exp.Anonymous, this=this, expressions=args) 5226 5227 if isinstance(this, exp.Expression): 5228 this.add_comments(comments) 5229 5230 self._match_r_paren(this) 5231 return self._parse_window(this) 5232 5233 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5234 return expression 5235 5236 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5237 transformed = [] 5238 5239 for index, e in enumerate(expressions): 5240 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5241 if isinstance(e, exp.Alias): 5242 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5243 5244 if not isinstance(e, exp.PropertyEQ): 5245 e = self.expression( 5246 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5247 ) 5248 5249 if isinstance(e.this, exp.Column): 5250 e.this.replace(e.this.this) 5251 else: 5252 e = self._to_prop_eq(e, index) 5253 5254 transformed.append(e) 5255 5256 return transformed 5257 5258 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5259 return self._parse_column_def(self._parse_id_var()) 5260 5261 def _parse_user_defined_function( 5262 self, kind: t.Optional[TokenType] = None 5263 ) -> t.Optional[exp.Expression]: 5264 this = self._parse_id_var() 5265 5266 while self._match(TokenType.DOT): 5267 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5268 5269 if not self._match(TokenType.L_PAREN): 5270 return this 5271 5272 expressions = self._parse_csv(self._parse_function_parameter) 5273 self._match_r_paren() 5274 return self.expression( 5275 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5276 ) 5277 5278 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5279 literal = self._parse_primary() 5280 if literal: 5281 return self.expression(exp.Introducer, this=token.text, expression=literal) 5282 5283 return self.expression(exp.Identifier, this=token.text) 5284 5285 def _parse_session_parameter(self) -> exp.SessionParameter: 5286 kind = None 5287 this = self._parse_id_var() or self._parse_primary() 5288 5289 if this and self._match(TokenType.DOT): 5290 kind = this.name 5291 this = self._parse_var() or self._parse_primary() 5292 5293 return self.expression(exp.SessionParameter, this=this, kind=kind) 5294 5295 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5296 return self._parse_id_var() 5297 5298 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5299 index = self._index 5300 5301 if self._match(TokenType.L_PAREN): 5302 expressions = t.cast( 5303 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5304 ) 5305 5306 if not self._match(TokenType.R_PAREN): 5307 self._retreat(index) 5308 else: 5309 expressions = [self._parse_lambda_arg()] 5310 5311 if self._match_set(self.LAMBDAS): 5312 return self.LAMBDAS[self._prev.token_type](self, expressions) 5313 5314 self._retreat(index) 5315 5316 this: t.Optional[exp.Expression] 5317 5318 if self._match(TokenType.DISTINCT): 5319 this = self.expression( 5320 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5321 ) 5322 else: 5323 this = self._parse_select_or_expression(alias=alias) 5324 5325 return self._parse_limit( 5326 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5327 ) 5328 5329 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5330 index = self._index 5331 if not self._match(TokenType.L_PAREN): 5332 return this 5333 5334 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5335 # expr can be of both types 5336 if self._match_set(self.SELECT_START_TOKENS): 5337 self._retreat(index) 5338 return this 5339 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5340 self._match_r_paren() 5341 return self.expression(exp.Schema, this=this, expressions=args) 5342 5343 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5344 return self._parse_column_def(self._parse_field(any_token=True)) 5345 5346 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5347 # column defs are not really columns, they're identifiers 5348 if isinstance(this, exp.Column): 5349 this = this.this 5350 5351 kind = self._parse_types(schema=True) 5352 5353 if self._match_text_seq("FOR", "ORDINALITY"): 5354 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5355 5356 constraints: t.List[exp.Expression] = [] 5357 5358 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5359 ("ALIAS", "MATERIALIZED") 5360 ): 5361 persisted = self._prev.text.upper() == "MATERIALIZED" 5362 constraint_kind = exp.ComputedColumnConstraint( 5363 this=self._parse_assignment(), 5364 persisted=persisted or self._match_text_seq("PERSISTED"), 5365 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5366 ) 5367 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5368 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5369 self._match(TokenType.ALIAS) 5370 constraints.append( 5371 self.expression( 5372 exp.ColumnConstraint, 5373 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5374 ) 5375 ) 5376 5377 while True: 5378 constraint = self._parse_column_constraint() 5379 if not constraint: 5380 break 5381 constraints.append(constraint) 5382 5383 if not kind and not constraints: 5384 return this 5385 5386 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5387 5388 def _parse_auto_increment( 5389 self, 5390 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5391 start = None 5392 increment = None 5393 5394 if self._match(TokenType.L_PAREN, advance=False): 5395 args = self._parse_wrapped_csv(self._parse_bitwise) 5396 start = seq_get(args, 0) 5397 increment = seq_get(args, 1) 5398 elif self._match_text_seq("START"): 5399 start = self._parse_bitwise() 5400 self._match_text_seq("INCREMENT") 5401 increment = self._parse_bitwise() 5402 5403 if start and increment: 5404 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5405 5406 return exp.AutoIncrementColumnConstraint() 5407 5408 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5409 if not self._match_text_seq("REFRESH"): 5410 self._retreat(self._index - 1) 5411 return None 5412 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5413 5414 def _parse_compress(self) -> exp.CompressColumnConstraint: 5415 if self._match(TokenType.L_PAREN, advance=False): 5416 return self.expression( 5417 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5418 ) 5419 5420 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5421 5422 def _parse_generated_as_identity( 5423 self, 5424 ) -> ( 5425 exp.GeneratedAsIdentityColumnConstraint 5426 | exp.ComputedColumnConstraint 5427 | exp.GeneratedAsRowColumnConstraint 5428 ): 5429 if self._match_text_seq("BY", "DEFAULT"): 5430 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5431 this = self.expression( 5432 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5433 ) 5434 else: 5435 self._match_text_seq("ALWAYS") 5436 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5437 5438 self._match(TokenType.ALIAS) 5439 5440 if self._match_text_seq("ROW"): 5441 start = self._match_text_seq("START") 5442 if not start: 5443 self._match(TokenType.END) 5444 hidden = self._match_text_seq("HIDDEN") 5445 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5446 5447 identity = self._match_text_seq("IDENTITY") 5448 5449 if self._match(TokenType.L_PAREN): 5450 if self._match(TokenType.START_WITH): 5451 this.set("start", self._parse_bitwise()) 5452 if self._match_text_seq("INCREMENT", "BY"): 5453 this.set("increment", self._parse_bitwise()) 5454 if self._match_text_seq("MINVALUE"): 5455 this.set("minvalue", self._parse_bitwise()) 5456 if self._match_text_seq("MAXVALUE"): 5457 this.set("maxvalue", self._parse_bitwise()) 5458 5459 if self._match_text_seq("CYCLE"): 5460 this.set("cycle", True) 5461 elif self._match_text_seq("NO", "CYCLE"): 5462 this.set("cycle", False) 5463 5464 if not identity: 5465 this.set("expression", self._parse_range()) 5466 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5467 args = self._parse_csv(self._parse_bitwise) 5468 this.set("start", seq_get(args, 0)) 5469 this.set("increment", seq_get(args, 1)) 5470 5471 self._match_r_paren() 5472 5473 return this 5474 5475 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5476 self._match_text_seq("LENGTH") 5477 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5478 5479 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5480 if self._match_text_seq("NULL"): 5481 return self.expression(exp.NotNullColumnConstraint) 5482 if self._match_text_seq("CASESPECIFIC"): 5483 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5484 if self._match_text_seq("FOR", "REPLICATION"): 5485 return self.expression(exp.NotForReplicationColumnConstraint) 5486 5487 # Unconsume the `NOT` token 5488 self._retreat(self._index - 1) 5489 return None 5490 5491 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5492 if self._match(TokenType.CONSTRAINT): 5493 this = self._parse_id_var() 5494 else: 5495 this = None 5496 5497 if self._match_texts(self.CONSTRAINT_PARSERS): 5498 return self.expression( 5499 exp.ColumnConstraint, 5500 this=this, 5501 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5502 ) 5503 5504 return this 5505 5506 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5507 if not self._match(TokenType.CONSTRAINT): 5508 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5509 5510 return self.expression( 5511 exp.Constraint, 5512 this=self._parse_id_var(), 5513 expressions=self._parse_unnamed_constraints(), 5514 ) 5515 5516 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5517 constraints = [] 5518 while True: 5519 constraint = self._parse_unnamed_constraint() or self._parse_function() 5520 if not constraint: 5521 break 5522 constraints.append(constraint) 5523 5524 return constraints 5525 5526 def _parse_unnamed_constraint( 5527 self, constraints: t.Optional[t.Collection[str]] = None 5528 ) -> t.Optional[exp.Expression]: 5529 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5530 constraints or self.CONSTRAINT_PARSERS 5531 ): 5532 return None 5533 5534 constraint = self._prev.text.upper() 5535 if constraint not in self.CONSTRAINT_PARSERS: 5536 self.raise_error(f"No parser found for schema constraint {constraint}.") 5537 5538 return self.CONSTRAINT_PARSERS[constraint](self) 5539 5540 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5541 return self._parse_id_var(any_token=False) 5542 5543 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5544 self._match_text_seq("KEY") 5545 return self.expression( 5546 exp.UniqueColumnConstraint, 5547 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5548 this=self._parse_schema(self._parse_unique_key()), 5549 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5550 on_conflict=self._parse_on_conflict(), 5551 ) 5552 5553 def _parse_key_constraint_options(self) -> t.List[str]: 5554 options = [] 5555 while True: 5556 if not self._curr: 5557 break 5558 5559 if self._match(TokenType.ON): 5560 action = None 5561 on = self._advance_any() and self._prev.text 5562 5563 if self._match_text_seq("NO", "ACTION"): 5564 action = "NO ACTION" 5565 elif self._match_text_seq("CASCADE"): 5566 action = "CASCADE" 5567 elif self._match_text_seq("RESTRICT"): 5568 action = "RESTRICT" 5569 elif self._match_pair(TokenType.SET, TokenType.NULL): 5570 action = "SET NULL" 5571 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5572 action = "SET DEFAULT" 5573 else: 5574 self.raise_error("Invalid key constraint") 5575 5576 options.append(f"ON {on} {action}") 5577 else: 5578 var = self._parse_var_from_options( 5579 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5580 ) 5581 if not var: 5582 break 5583 options.append(var.name) 5584 5585 return options 5586 5587 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5588 if match and not self._match(TokenType.REFERENCES): 5589 return None 5590 5591 expressions = None 5592 this = self._parse_table(schema=True) 5593 options = self._parse_key_constraint_options() 5594 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5595 5596 def _parse_foreign_key(self) -> exp.ForeignKey: 5597 expressions = self._parse_wrapped_id_vars() 5598 reference = self._parse_references() 5599 options = {} 5600 5601 while self._match(TokenType.ON): 5602 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5603 self.raise_error("Expected DELETE or UPDATE") 5604 5605 kind = self._prev.text.lower() 5606 5607 if self._match_text_seq("NO", "ACTION"): 5608 action = "NO ACTION" 5609 elif self._match(TokenType.SET): 5610 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5611 action = "SET " + self._prev.text.upper() 5612 else: 5613 self._advance() 5614 action = self._prev.text.upper() 5615 5616 options[kind] = action 5617 5618 return self.expression( 5619 exp.ForeignKey, 5620 expressions=expressions, 5621 reference=reference, 5622 **options, # type: ignore 5623 ) 5624 5625 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5626 return self._parse_field() 5627 5628 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5629 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5630 self._retreat(self._index - 1) 5631 return None 5632 5633 id_vars = self._parse_wrapped_id_vars() 5634 return self.expression( 5635 exp.PeriodForSystemTimeConstraint, 5636 this=seq_get(id_vars, 0), 5637 expression=seq_get(id_vars, 1), 5638 ) 5639 5640 def _parse_primary_key( 5641 self, wrapped_optional: bool = False, in_props: bool = False 5642 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5643 desc = ( 5644 self._match_set((TokenType.ASC, TokenType.DESC)) 5645 and self._prev.token_type == TokenType.DESC 5646 ) 5647 5648 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5649 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5650 5651 expressions = self._parse_wrapped_csv( 5652 self._parse_primary_key_part, optional=wrapped_optional 5653 ) 5654 options = self._parse_key_constraint_options() 5655 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5656 5657 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5658 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5659 5660 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5661 """ 5662 Parses a datetime column in ODBC format. We parse the column into the corresponding 5663 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5664 same as we did for `DATE('yyyy-mm-dd')`. 5665 5666 Reference: 5667 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5668 """ 5669 self._match(TokenType.VAR) 5670 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5671 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5672 if not self._match(TokenType.R_BRACE): 5673 self.raise_error("Expected }") 5674 return expression 5675 5676 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5677 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5678 return this 5679 5680 bracket_kind = self._prev.token_type 5681 if ( 5682 bracket_kind == TokenType.L_BRACE 5683 and self._curr 5684 and self._curr.token_type == TokenType.VAR 5685 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5686 ): 5687 return self._parse_odbc_datetime_literal() 5688 5689 expressions = self._parse_csv( 5690 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5691 ) 5692 5693 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5694 self.raise_error("Expected ]") 5695 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5696 self.raise_error("Expected }") 5697 5698 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5699 if bracket_kind == TokenType.L_BRACE: 5700 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5701 elif not this: 5702 this = build_array_constructor( 5703 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5704 ) 5705 else: 5706 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5707 if constructor_type: 5708 return build_array_constructor( 5709 constructor_type, 5710 args=expressions, 5711 bracket_kind=bracket_kind, 5712 dialect=self.dialect, 5713 ) 5714 5715 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5716 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5717 5718 self._add_comments(this) 5719 return self._parse_bracket(this) 5720 5721 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5722 if self._match(TokenType.COLON): 5723 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5724 return this 5725 5726 def _parse_case(self) -> t.Optional[exp.Expression]: 5727 ifs = [] 5728 default = None 5729 5730 comments = self._prev_comments 5731 expression = self._parse_assignment() 5732 5733 while self._match(TokenType.WHEN): 5734 this = self._parse_assignment() 5735 self._match(TokenType.THEN) 5736 then = self._parse_assignment() 5737 ifs.append(self.expression(exp.If, this=this, true=then)) 5738 5739 if self._match(TokenType.ELSE): 5740 default = self._parse_assignment() 5741 5742 if not self._match(TokenType.END): 5743 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5744 default = exp.column("interval") 5745 else: 5746 self.raise_error("Expected END after CASE", self._prev) 5747 5748 return self.expression( 5749 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5750 ) 5751 5752 def _parse_if(self) -> t.Optional[exp.Expression]: 5753 if self._match(TokenType.L_PAREN): 5754 args = self._parse_csv(self._parse_assignment) 5755 this = self.validate_expression(exp.If.from_arg_list(args), args) 5756 self._match_r_paren() 5757 else: 5758 index = self._index - 1 5759 5760 if self.NO_PAREN_IF_COMMANDS and index == 0: 5761 return self._parse_as_command(self._prev) 5762 5763 condition = self._parse_assignment() 5764 5765 if not condition: 5766 self._retreat(index) 5767 return None 5768 5769 self._match(TokenType.THEN) 5770 true = self._parse_assignment() 5771 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5772 self._match(TokenType.END) 5773 this = self.expression(exp.If, this=condition, true=true, false=false) 5774 5775 return this 5776 5777 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5778 if not self._match_text_seq("VALUE", "FOR"): 5779 self._retreat(self._index - 1) 5780 return None 5781 5782 return self.expression( 5783 exp.NextValueFor, 5784 this=self._parse_column(), 5785 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5786 ) 5787 5788 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5789 this = self._parse_function() or self._parse_var_or_string(upper=True) 5790 5791 if self._match(TokenType.FROM): 5792 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5793 5794 if not self._match(TokenType.COMMA): 5795 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5796 5797 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5798 5799 def _parse_gap_fill(self) -> exp.GapFill: 5800 self._match(TokenType.TABLE) 5801 this = self._parse_table() 5802 5803 self._match(TokenType.COMMA) 5804 args = [this, *self._parse_csv(self._parse_lambda)] 5805 5806 gap_fill = exp.GapFill.from_arg_list(args) 5807 return self.validate_expression(gap_fill, args) 5808 5809 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5810 this = self._parse_assignment() 5811 5812 if not self._match(TokenType.ALIAS): 5813 if self._match(TokenType.COMMA): 5814 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5815 5816 self.raise_error("Expected AS after CAST") 5817 5818 fmt = None 5819 to = self._parse_types() 5820 5821 if self._match(TokenType.FORMAT): 5822 fmt_string = self._parse_string() 5823 fmt = self._parse_at_time_zone(fmt_string) 5824 5825 if not to: 5826 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5827 if to.this in exp.DataType.TEMPORAL_TYPES: 5828 this = self.expression( 5829 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5830 this=this, 5831 format=exp.Literal.string( 5832 format_time( 5833 fmt_string.this if fmt_string else "", 5834 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5835 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5836 ) 5837 ), 5838 safe=safe, 5839 ) 5840 5841 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5842 this.set("zone", fmt.args["zone"]) 5843 return this 5844 elif not to: 5845 self.raise_error("Expected TYPE after CAST") 5846 elif isinstance(to, exp.Identifier): 5847 to = exp.DataType.build(to.name, udt=True) 5848 elif to.this == exp.DataType.Type.CHAR: 5849 if self._match(TokenType.CHARACTER_SET): 5850 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5851 5852 return self.expression( 5853 exp.Cast if strict else exp.TryCast, 5854 this=this, 5855 to=to, 5856 format=fmt, 5857 safe=safe, 5858 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5859 ) 5860 5861 def _parse_string_agg(self) -> exp.Expression: 5862 if self._match(TokenType.DISTINCT): 5863 args: t.List[t.Optional[exp.Expression]] = [ 5864 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5865 ] 5866 if self._match(TokenType.COMMA): 5867 args.extend(self._parse_csv(self._parse_assignment)) 5868 else: 5869 args = self._parse_csv(self._parse_assignment) # type: ignore 5870 5871 index = self._index 5872 if not self._match(TokenType.R_PAREN) and args: 5873 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5874 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5875 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5876 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5877 5878 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5879 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5880 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5881 if not self._match_text_seq("WITHIN", "GROUP"): 5882 self._retreat(index) 5883 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5884 5885 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5886 order = self._parse_order(this=seq_get(args, 0)) 5887 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5888 5889 def _parse_convert( 5890 self, strict: bool, safe: t.Optional[bool] = None 5891 ) -> t.Optional[exp.Expression]: 5892 this = self._parse_bitwise() 5893 5894 if self._match(TokenType.USING): 5895 to: t.Optional[exp.Expression] = self.expression( 5896 exp.CharacterSet, this=self._parse_var() 5897 ) 5898 elif self._match(TokenType.COMMA): 5899 to = self._parse_types() 5900 else: 5901 to = None 5902 5903 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5904 5905 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5906 """ 5907 There are generally two variants of the DECODE function: 5908 5909 - DECODE(bin, charset) 5910 - DECODE(expression, search, result [, search, result] ... [, default]) 5911 5912 The second variant will always be parsed into a CASE expression. Note that NULL 5913 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5914 instead of relying on pattern matching. 5915 """ 5916 args = self._parse_csv(self._parse_assignment) 5917 5918 if len(args) < 3: 5919 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5920 5921 expression, *expressions = args 5922 if not expression: 5923 return None 5924 5925 ifs = [] 5926 for search, result in zip(expressions[::2], expressions[1::2]): 5927 if not search or not result: 5928 return None 5929 5930 if isinstance(search, exp.Literal): 5931 ifs.append( 5932 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5933 ) 5934 elif isinstance(search, exp.Null): 5935 ifs.append( 5936 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5937 ) 5938 else: 5939 cond = exp.or_( 5940 exp.EQ(this=expression.copy(), expression=search), 5941 exp.and_( 5942 exp.Is(this=expression.copy(), expression=exp.Null()), 5943 exp.Is(this=search.copy(), expression=exp.Null()), 5944 copy=False, 5945 ), 5946 copy=False, 5947 ) 5948 ifs.append(exp.If(this=cond, true=result)) 5949 5950 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5951 5952 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5953 self._match_text_seq("KEY") 5954 key = self._parse_column() 5955 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5956 self._match_text_seq("VALUE") 5957 value = self._parse_bitwise() 5958 5959 if not key and not value: 5960 return None 5961 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5962 5963 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5964 if not this or not self._match_text_seq("FORMAT", "JSON"): 5965 return this 5966 5967 return self.expression(exp.FormatJson, this=this) 5968 5969 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 5970 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 5971 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 5972 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5973 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5974 else: 5975 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5976 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5977 5978 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 5979 5980 if not empty and not error and not null: 5981 return None 5982 5983 return self.expression( 5984 exp.OnCondition, 5985 empty=empty, 5986 error=error, 5987 null=null, 5988 ) 5989 5990 def _parse_on_handling( 5991 self, on: str, *values: str 5992 ) -> t.Optional[str] | t.Optional[exp.Expression]: 5993 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 5994 for value in values: 5995 if self._match_text_seq(value, "ON", on): 5996 return f"{value} ON {on}" 5997 5998 index = self._index 5999 if self._match(TokenType.DEFAULT): 6000 default_value = self._parse_bitwise() 6001 if self._match_text_seq("ON", on): 6002 return default_value 6003 6004 self._retreat(index) 6005 6006 return None 6007 6008 @t.overload 6009 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6010 6011 @t.overload 6012 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6013 6014 def _parse_json_object(self, agg=False): 6015 star = self._parse_star() 6016 expressions = ( 6017 [star] 6018 if star 6019 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6020 ) 6021 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6022 6023 unique_keys = None 6024 if self._match_text_seq("WITH", "UNIQUE"): 6025 unique_keys = True 6026 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6027 unique_keys = False 6028 6029 self._match_text_seq("KEYS") 6030 6031 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6032 self._parse_type() 6033 ) 6034 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6035 6036 return self.expression( 6037 exp.JSONObjectAgg if agg else exp.JSONObject, 6038 expressions=expressions, 6039 null_handling=null_handling, 6040 unique_keys=unique_keys, 6041 return_type=return_type, 6042 encoding=encoding, 6043 ) 6044 6045 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6046 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6047 if not self._match_text_seq("NESTED"): 6048 this = self._parse_id_var() 6049 kind = self._parse_types(allow_identifiers=False) 6050 nested = None 6051 else: 6052 this = None 6053 kind = None 6054 nested = True 6055 6056 path = self._match_text_seq("PATH") and self._parse_string() 6057 nested_schema = nested and self._parse_json_schema() 6058 6059 return self.expression( 6060 exp.JSONColumnDef, 6061 this=this, 6062 kind=kind, 6063 path=path, 6064 nested_schema=nested_schema, 6065 ) 6066 6067 def _parse_json_schema(self) -> exp.JSONSchema: 6068 self._match_text_seq("COLUMNS") 6069 return self.expression( 6070 exp.JSONSchema, 6071 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6072 ) 6073 6074 def _parse_json_table(self) -> exp.JSONTable: 6075 this = self._parse_format_json(self._parse_bitwise()) 6076 path = self._match(TokenType.COMMA) and self._parse_string() 6077 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6078 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6079 schema = self._parse_json_schema() 6080 6081 return exp.JSONTable( 6082 this=this, 6083 schema=schema, 6084 path=path, 6085 error_handling=error_handling, 6086 empty_handling=empty_handling, 6087 ) 6088 6089 def _parse_match_against(self) -> exp.MatchAgainst: 6090 expressions = self._parse_csv(self._parse_column) 6091 6092 self._match_text_seq(")", "AGAINST", "(") 6093 6094 this = self._parse_string() 6095 6096 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6097 modifier = "IN NATURAL LANGUAGE MODE" 6098 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6099 modifier = f"{modifier} WITH QUERY EXPANSION" 6100 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6101 modifier = "IN BOOLEAN MODE" 6102 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6103 modifier = "WITH QUERY EXPANSION" 6104 else: 6105 modifier = None 6106 6107 return self.expression( 6108 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6109 ) 6110 6111 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6112 def _parse_open_json(self) -> exp.OpenJSON: 6113 this = self._parse_bitwise() 6114 path = self._match(TokenType.COMMA) and self._parse_string() 6115 6116 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6117 this = self._parse_field(any_token=True) 6118 kind = self._parse_types() 6119 path = self._parse_string() 6120 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6121 6122 return self.expression( 6123 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6124 ) 6125 6126 expressions = None 6127 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6128 self._match_l_paren() 6129 expressions = self._parse_csv(_parse_open_json_column_def) 6130 6131 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6132 6133 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6134 args = self._parse_csv(self._parse_bitwise) 6135 6136 if self._match(TokenType.IN): 6137 return self.expression( 6138 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6139 ) 6140 6141 if haystack_first: 6142 haystack = seq_get(args, 0) 6143 needle = seq_get(args, 1) 6144 else: 6145 needle = seq_get(args, 0) 6146 haystack = seq_get(args, 1) 6147 6148 return self.expression( 6149 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6150 ) 6151 6152 def _parse_predict(self) -> exp.Predict: 6153 self._match_text_seq("MODEL") 6154 this = self._parse_table() 6155 6156 self._match(TokenType.COMMA) 6157 self._match_text_seq("TABLE") 6158 6159 return self.expression( 6160 exp.Predict, 6161 this=this, 6162 expression=self._parse_table(), 6163 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6164 ) 6165 6166 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6167 args = self._parse_csv(self._parse_table) 6168 return exp.JoinHint(this=func_name.upper(), expressions=args) 6169 6170 def _parse_substring(self) -> exp.Substring: 6171 # Postgres supports the form: substring(string [from int] [for int]) 6172 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6173 6174 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6175 6176 if self._match(TokenType.FROM): 6177 args.append(self._parse_bitwise()) 6178 if self._match(TokenType.FOR): 6179 if len(args) == 1: 6180 args.append(exp.Literal.number(1)) 6181 args.append(self._parse_bitwise()) 6182 6183 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6184 6185 def _parse_trim(self) -> exp.Trim: 6186 # https://www.w3resource.com/sql/character-functions/trim.php 6187 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6188 6189 position = None 6190 collation = None 6191 expression = None 6192 6193 if self._match_texts(self.TRIM_TYPES): 6194 position = self._prev.text.upper() 6195 6196 this = self._parse_bitwise() 6197 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6198 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6199 expression = self._parse_bitwise() 6200 6201 if invert_order: 6202 this, expression = expression, this 6203 6204 if self._match(TokenType.COLLATE): 6205 collation = self._parse_bitwise() 6206 6207 return self.expression( 6208 exp.Trim, this=this, position=position, expression=expression, collation=collation 6209 ) 6210 6211 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6212 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6213 6214 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6215 return self._parse_window(self._parse_id_var(), alias=True) 6216 6217 def _parse_respect_or_ignore_nulls( 6218 self, this: t.Optional[exp.Expression] 6219 ) -> t.Optional[exp.Expression]: 6220 if self._match_text_seq("IGNORE", "NULLS"): 6221 return self.expression(exp.IgnoreNulls, this=this) 6222 if self._match_text_seq("RESPECT", "NULLS"): 6223 return self.expression(exp.RespectNulls, this=this) 6224 return this 6225 6226 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6227 if self._match(TokenType.HAVING): 6228 self._match_texts(("MAX", "MIN")) 6229 max = self._prev.text.upper() != "MIN" 6230 return self.expression( 6231 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6232 ) 6233 6234 return this 6235 6236 def _parse_window( 6237 self, this: t.Optional[exp.Expression], alias: bool = False 6238 ) -> t.Optional[exp.Expression]: 6239 func = this 6240 comments = func.comments if isinstance(func, exp.Expression) else None 6241 6242 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6243 self._match(TokenType.WHERE) 6244 this = self.expression( 6245 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6246 ) 6247 self._match_r_paren() 6248 6249 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6250 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6251 if self._match_text_seq("WITHIN", "GROUP"): 6252 order = self._parse_wrapped(self._parse_order) 6253 this = self.expression(exp.WithinGroup, this=this, expression=order) 6254 6255 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6256 # Some dialects choose to implement and some do not. 6257 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6258 6259 # There is some code above in _parse_lambda that handles 6260 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6261 6262 # The below changes handle 6263 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6264 6265 # Oracle allows both formats 6266 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6267 # and Snowflake chose to do the same for familiarity 6268 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6269 if isinstance(this, exp.AggFunc): 6270 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6271 6272 if ignore_respect and ignore_respect is not this: 6273 ignore_respect.replace(ignore_respect.this) 6274 this = self.expression(ignore_respect.__class__, this=this) 6275 6276 this = self._parse_respect_or_ignore_nulls(this) 6277 6278 # bigquery select from window x AS (partition by ...) 6279 if alias: 6280 over = None 6281 self._match(TokenType.ALIAS) 6282 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6283 return this 6284 else: 6285 over = self._prev.text.upper() 6286 6287 if comments and isinstance(func, exp.Expression): 6288 func.pop_comments() 6289 6290 if not self._match(TokenType.L_PAREN): 6291 return self.expression( 6292 exp.Window, 6293 comments=comments, 6294 this=this, 6295 alias=self._parse_id_var(False), 6296 over=over, 6297 ) 6298 6299 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6300 6301 first = self._match(TokenType.FIRST) 6302 if self._match_text_seq("LAST"): 6303 first = False 6304 6305 partition, order = self._parse_partition_and_order() 6306 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6307 6308 if kind: 6309 self._match(TokenType.BETWEEN) 6310 start = self._parse_window_spec() 6311 self._match(TokenType.AND) 6312 end = self._parse_window_spec() 6313 6314 spec = self.expression( 6315 exp.WindowSpec, 6316 kind=kind, 6317 start=start["value"], 6318 start_side=start["side"], 6319 end=end["value"], 6320 end_side=end["side"], 6321 ) 6322 else: 6323 spec = None 6324 6325 self._match_r_paren() 6326 6327 window = self.expression( 6328 exp.Window, 6329 comments=comments, 6330 this=this, 6331 partition_by=partition, 6332 order=order, 6333 spec=spec, 6334 alias=window_alias, 6335 over=over, 6336 first=first, 6337 ) 6338 6339 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6340 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6341 return self._parse_window(window, alias=alias) 6342 6343 return window 6344 6345 def _parse_partition_and_order( 6346 self, 6347 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6348 return self._parse_partition_by(), self._parse_order() 6349 6350 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6351 self._match(TokenType.BETWEEN) 6352 6353 return { 6354 "value": ( 6355 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6356 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6357 or self._parse_bitwise() 6358 ), 6359 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6360 } 6361 6362 def _parse_alias( 6363 self, this: t.Optional[exp.Expression], explicit: bool = False 6364 ) -> t.Optional[exp.Expression]: 6365 any_token = self._match(TokenType.ALIAS) 6366 comments = self._prev_comments or [] 6367 6368 if explicit and not any_token: 6369 return this 6370 6371 if self._match(TokenType.L_PAREN): 6372 aliases = self.expression( 6373 exp.Aliases, 6374 comments=comments, 6375 this=this, 6376 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6377 ) 6378 self._match_r_paren(aliases) 6379 return aliases 6380 6381 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6382 self.STRING_ALIASES and self._parse_string_as_identifier() 6383 ) 6384 6385 if alias: 6386 comments.extend(alias.pop_comments()) 6387 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6388 column = this.this 6389 6390 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6391 if not this.comments and column and column.comments: 6392 this.comments = column.pop_comments() 6393 6394 return this 6395 6396 def _parse_id_var( 6397 self, 6398 any_token: bool = True, 6399 tokens: t.Optional[t.Collection[TokenType]] = None, 6400 ) -> t.Optional[exp.Expression]: 6401 expression = self._parse_identifier() 6402 if not expression and ( 6403 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6404 ): 6405 quoted = self._prev.token_type == TokenType.STRING 6406 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6407 6408 return expression 6409 6410 def _parse_string(self) -> t.Optional[exp.Expression]: 6411 if self._match_set(self.STRING_PARSERS): 6412 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6413 return self._parse_placeholder() 6414 6415 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6416 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6417 6418 def _parse_number(self) -> t.Optional[exp.Expression]: 6419 if self._match_set(self.NUMERIC_PARSERS): 6420 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6421 return self._parse_placeholder() 6422 6423 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6424 if self._match(TokenType.IDENTIFIER): 6425 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6426 return self._parse_placeholder() 6427 6428 def _parse_var( 6429 self, 6430 any_token: bool = False, 6431 tokens: t.Optional[t.Collection[TokenType]] = None, 6432 upper: bool = False, 6433 ) -> t.Optional[exp.Expression]: 6434 if ( 6435 (any_token and self._advance_any()) 6436 or self._match(TokenType.VAR) 6437 or (self._match_set(tokens) if tokens else False) 6438 ): 6439 return self.expression( 6440 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6441 ) 6442 return self._parse_placeholder() 6443 6444 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6445 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6446 self._advance() 6447 return self._prev 6448 return None 6449 6450 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6451 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6452 6453 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6454 return self._parse_primary() or self._parse_var(any_token=True) 6455 6456 def _parse_null(self) -> t.Optional[exp.Expression]: 6457 if self._match_set(self.NULL_TOKENS): 6458 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6459 return self._parse_placeholder() 6460 6461 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6462 if self._match(TokenType.TRUE): 6463 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6464 if self._match(TokenType.FALSE): 6465 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6466 return self._parse_placeholder() 6467 6468 def _parse_star(self) -> t.Optional[exp.Expression]: 6469 if self._match(TokenType.STAR): 6470 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6471 return self._parse_placeholder() 6472 6473 def _parse_parameter(self) -> exp.Parameter: 6474 this = self._parse_identifier() or self._parse_primary_or_var() 6475 return self.expression(exp.Parameter, this=this) 6476 6477 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6478 if self._match_set(self.PLACEHOLDER_PARSERS): 6479 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6480 if placeholder: 6481 return placeholder 6482 self._advance(-1) 6483 return None 6484 6485 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6486 if not self._match_texts(keywords): 6487 return None 6488 if self._match(TokenType.L_PAREN, advance=False): 6489 return self._parse_wrapped_csv(self._parse_expression) 6490 6491 expression = self._parse_expression() 6492 return [expression] if expression else None 6493 6494 def _parse_csv( 6495 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6496 ) -> t.List[exp.Expression]: 6497 parse_result = parse_method() 6498 items = [parse_result] if parse_result is not None else [] 6499 6500 while self._match(sep): 6501 self._add_comments(parse_result) 6502 parse_result = parse_method() 6503 if parse_result is not None: 6504 items.append(parse_result) 6505 6506 return items 6507 6508 def _parse_tokens( 6509 self, parse_method: t.Callable, expressions: t.Dict 6510 ) -> t.Optional[exp.Expression]: 6511 this = parse_method() 6512 6513 while self._match_set(expressions): 6514 this = self.expression( 6515 expressions[self._prev.token_type], 6516 this=this, 6517 comments=self._prev_comments, 6518 expression=parse_method(), 6519 ) 6520 6521 return this 6522 6523 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6524 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6525 6526 def _parse_wrapped_csv( 6527 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6528 ) -> t.List[exp.Expression]: 6529 return self._parse_wrapped( 6530 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6531 ) 6532 6533 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6534 wrapped = self._match(TokenType.L_PAREN) 6535 if not wrapped and not optional: 6536 self.raise_error("Expecting (") 6537 parse_result = parse_method() 6538 if wrapped: 6539 self._match_r_paren() 6540 return parse_result 6541 6542 def _parse_expressions(self) -> t.List[exp.Expression]: 6543 return self._parse_csv(self._parse_expression) 6544 6545 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6546 return self._parse_select() or self._parse_set_operations( 6547 self._parse_expression() if alias else self._parse_assignment() 6548 ) 6549 6550 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6551 return self._parse_query_modifiers( 6552 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6553 ) 6554 6555 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6556 this = None 6557 if self._match_texts(self.TRANSACTION_KIND): 6558 this = self._prev.text 6559 6560 self._match_texts(("TRANSACTION", "WORK")) 6561 6562 modes = [] 6563 while True: 6564 mode = [] 6565 while self._match(TokenType.VAR): 6566 mode.append(self._prev.text) 6567 6568 if mode: 6569 modes.append(" ".join(mode)) 6570 if not self._match(TokenType.COMMA): 6571 break 6572 6573 return self.expression(exp.Transaction, this=this, modes=modes) 6574 6575 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6576 chain = None 6577 savepoint = None 6578 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6579 6580 self._match_texts(("TRANSACTION", "WORK")) 6581 6582 if self._match_text_seq("TO"): 6583 self._match_text_seq("SAVEPOINT") 6584 savepoint = self._parse_id_var() 6585 6586 if self._match(TokenType.AND): 6587 chain = not self._match_text_seq("NO") 6588 self._match_text_seq("CHAIN") 6589 6590 if is_rollback: 6591 return self.expression(exp.Rollback, savepoint=savepoint) 6592 6593 return self.expression(exp.Commit, chain=chain) 6594 6595 def _parse_refresh(self) -> exp.Refresh: 6596 self._match(TokenType.TABLE) 6597 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6598 6599 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6600 if not self._match_text_seq("ADD"): 6601 return None 6602 6603 self._match(TokenType.COLUMN) 6604 exists_column = self._parse_exists(not_=True) 6605 expression = self._parse_field_def() 6606 6607 if expression: 6608 expression.set("exists", exists_column) 6609 6610 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6611 if self._match_texts(("FIRST", "AFTER")): 6612 position = self._prev.text 6613 column_position = self.expression( 6614 exp.ColumnPosition, this=self._parse_column(), position=position 6615 ) 6616 expression.set("position", column_position) 6617 6618 return expression 6619 6620 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6621 drop = self._match(TokenType.DROP) and self._parse_drop() 6622 if drop and not isinstance(drop, exp.Command): 6623 drop.set("kind", drop.args.get("kind", "COLUMN")) 6624 return drop 6625 6626 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6627 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6628 return self.expression( 6629 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6630 ) 6631 6632 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6633 index = self._index - 1 6634 6635 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6636 return self._parse_csv( 6637 lambda: self.expression( 6638 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6639 ) 6640 ) 6641 6642 self._retreat(index) 6643 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6644 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6645 6646 if self._match_text_seq("ADD", "COLUMNS"): 6647 schema = self._parse_schema() 6648 if schema: 6649 return [schema] 6650 return [] 6651 6652 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6653 6654 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6655 if self._match_texts(self.ALTER_ALTER_PARSERS): 6656 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6657 6658 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6659 # keyword after ALTER we default to parsing this statement 6660 self._match(TokenType.COLUMN) 6661 column = self._parse_field(any_token=True) 6662 6663 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6664 return self.expression(exp.AlterColumn, this=column, drop=True) 6665 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6666 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6667 if self._match(TokenType.COMMENT): 6668 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6669 if self._match_text_seq("DROP", "NOT", "NULL"): 6670 return self.expression( 6671 exp.AlterColumn, 6672 this=column, 6673 drop=True, 6674 allow_null=True, 6675 ) 6676 if self._match_text_seq("SET", "NOT", "NULL"): 6677 return self.expression( 6678 exp.AlterColumn, 6679 this=column, 6680 allow_null=False, 6681 ) 6682 self._match_text_seq("SET", "DATA") 6683 self._match_text_seq("TYPE") 6684 return self.expression( 6685 exp.AlterColumn, 6686 this=column, 6687 dtype=self._parse_types(), 6688 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6689 using=self._match(TokenType.USING) and self._parse_assignment(), 6690 ) 6691 6692 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6693 if self._match_texts(("ALL", "EVEN", "AUTO")): 6694 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6695 6696 self._match_text_seq("KEY", "DISTKEY") 6697 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6698 6699 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6700 if compound: 6701 self._match_text_seq("SORTKEY") 6702 6703 if self._match(TokenType.L_PAREN, advance=False): 6704 return self.expression( 6705 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6706 ) 6707 6708 self._match_texts(("AUTO", "NONE")) 6709 return self.expression( 6710 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6711 ) 6712 6713 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6714 index = self._index - 1 6715 6716 partition_exists = self._parse_exists() 6717 if self._match(TokenType.PARTITION, advance=False): 6718 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6719 6720 self._retreat(index) 6721 return self._parse_csv(self._parse_drop_column) 6722 6723 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6724 if self._match(TokenType.COLUMN): 6725 exists = self._parse_exists() 6726 old_column = self._parse_column() 6727 to = self._match_text_seq("TO") 6728 new_column = self._parse_column() 6729 6730 if old_column is None or to is None or new_column is None: 6731 return None 6732 6733 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6734 6735 self._match_text_seq("TO") 6736 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6737 6738 def _parse_alter_table_set(self) -> exp.AlterSet: 6739 alter_set = self.expression(exp.AlterSet) 6740 6741 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6742 "TABLE", "PROPERTIES" 6743 ): 6744 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6745 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6746 alter_set.set("expressions", [self._parse_assignment()]) 6747 elif self._match_texts(("LOGGED", "UNLOGGED")): 6748 alter_set.set("option", exp.var(self._prev.text.upper())) 6749 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6750 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6751 elif self._match_text_seq("LOCATION"): 6752 alter_set.set("location", self._parse_field()) 6753 elif self._match_text_seq("ACCESS", "METHOD"): 6754 alter_set.set("access_method", self._parse_field()) 6755 elif self._match_text_seq("TABLESPACE"): 6756 alter_set.set("tablespace", self._parse_field()) 6757 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6758 alter_set.set("file_format", [self._parse_field()]) 6759 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6760 alter_set.set("file_format", self._parse_wrapped_options()) 6761 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6762 alter_set.set("copy_options", self._parse_wrapped_options()) 6763 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6764 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6765 else: 6766 if self._match_text_seq("SERDE"): 6767 alter_set.set("serde", self._parse_field()) 6768 6769 alter_set.set("expressions", [self._parse_properties()]) 6770 6771 return alter_set 6772 6773 def _parse_alter(self) -> exp.Alter | exp.Command: 6774 start = self._prev 6775 6776 alter_token = self._match_set(self.ALTERABLES) and self._prev 6777 if not alter_token: 6778 return self._parse_as_command(start) 6779 6780 exists = self._parse_exists() 6781 only = self._match_text_seq("ONLY") 6782 this = self._parse_table(schema=True) 6783 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6784 6785 if self._next: 6786 self._advance() 6787 6788 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6789 if parser: 6790 actions = ensure_list(parser(self)) 6791 not_valid = self._match_text_seq("NOT", "VALID") 6792 options = self._parse_csv(self._parse_property) 6793 6794 if not self._curr and actions: 6795 return self.expression( 6796 exp.Alter, 6797 this=this, 6798 kind=alter_token.text.upper(), 6799 exists=exists, 6800 actions=actions, 6801 only=only, 6802 options=options, 6803 cluster=cluster, 6804 not_valid=not_valid, 6805 ) 6806 6807 return self._parse_as_command(start) 6808 6809 def _parse_merge(self) -> exp.Merge: 6810 self._match(TokenType.INTO) 6811 target = self._parse_table() 6812 6813 if target and self._match(TokenType.ALIAS, advance=False): 6814 target.set("alias", self._parse_table_alias()) 6815 6816 self._match(TokenType.USING) 6817 using = self._parse_table() 6818 6819 self._match(TokenType.ON) 6820 on = self._parse_assignment() 6821 6822 return self.expression( 6823 exp.Merge, 6824 this=target, 6825 using=using, 6826 on=on, 6827 expressions=self._parse_when_matched(), 6828 ) 6829 6830 def _parse_when_matched(self) -> t.List[exp.When]: 6831 whens = [] 6832 6833 while self._match(TokenType.WHEN): 6834 matched = not self._match(TokenType.NOT) 6835 self._match_text_seq("MATCHED") 6836 source = ( 6837 False 6838 if self._match_text_seq("BY", "TARGET") 6839 else self._match_text_seq("BY", "SOURCE") 6840 ) 6841 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6842 6843 self._match(TokenType.THEN) 6844 6845 if self._match(TokenType.INSERT): 6846 _this = self._parse_star() 6847 if _this: 6848 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6849 else: 6850 then = self.expression( 6851 exp.Insert, 6852 this=self._parse_value(), 6853 expression=self._match_text_seq("VALUES") and self._parse_value(), 6854 ) 6855 elif self._match(TokenType.UPDATE): 6856 expressions = self._parse_star() 6857 if expressions: 6858 then = self.expression(exp.Update, expressions=expressions) 6859 else: 6860 then = self.expression( 6861 exp.Update, 6862 expressions=self._match(TokenType.SET) 6863 and self._parse_csv(self._parse_equality), 6864 ) 6865 elif self._match(TokenType.DELETE): 6866 then = self.expression(exp.Var, this=self._prev.text) 6867 else: 6868 then = None 6869 6870 whens.append( 6871 self.expression( 6872 exp.When, 6873 matched=matched, 6874 source=source, 6875 condition=condition, 6876 then=then, 6877 ) 6878 ) 6879 return whens 6880 6881 def _parse_show(self) -> t.Optional[exp.Expression]: 6882 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6883 if parser: 6884 return parser(self) 6885 return self._parse_as_command(self._prev) 6886 6887 def _parse_set_item_assignment( 6888 self, kind: t.Optional[str] = None 6889 ) -> t.Optional[exp.Expression]: 6890 index = self._index 6891 6892 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6893 return self._parse_set_transaction(global_=kind == "GLOBAL") 6894 6895 left = self._parse_primary() or self._parse_column() 6896 assignment_delimiter = self._match_texts(("=", "TO")) 6897 6898 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6899 self._retreat(index) 6900 return None 6901 6902 right = self._parse_statement() or self._parse_id_var() 6903 if isinstance(right, (exp.Column, exp.Identifier)): 6904 right = exp.var(right.name) 6905 6906 this = self.expression(exp.EQ, this=left, expression=right) 6907 return self.expression(exp.SetItem, this=this, kind=kind) 6908 6909 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6910 self._match_text_seq("TRANSACTION") 6911 characteristics = self._parse_csv( 6912 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6913 ) 6914 return self.expression( 6915 exp.SetItem, 6916 expressions=characteristics, 6917 kind="TRANSACTION", 6918 **{"global": global_}, # type: ignore 6919 ) 6920 6921 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6922 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6923 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6924 6925 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6926 index = self._index 6927 set_ = self.expression( 6928 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6929 ) 6930 6931 if self._curr: 6932 self._retreat(index) 6933 return self._parse_as_command(self._prev) 6934 6935 return set_ 6936 6937 def _parse_var_from_options( 6938 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6939 ) -> t.Optional[exp.Var]: 6940 start = self._curr 6941 if not start: 6942 return None 6943 6944 option = start.text.upper() 6945 continuations = options.get(option) 6946 6947 index = self._index 6948 self._advance() 6949 for keywords in continuations or []: 6950 if isinstance(keywords, str): 6951 keywords = (keywords,) 6952 6953 if self._match_text_seq(*keywords): 6954 option = f"{option} {' '.join(keywords)}" 6955 break 6956 else: 6957 if continuations or continuations is None: 6958 if raise_unmatched: 6959 self.raise_error(f"Unknown option {option}") 6960 6961 self._retreat(index) 6962 return None 6963 6964 return exp.var(option) 6965 6966 def _parse_as_command(self, start: Token) -> exp.Command: 6967 while self._curr: 6968 self._advance() 6969 text = self._find_sql(start, self._prev) 6970 size = len(start.text) 6971 self._warn_unsupported() 6972 return exp.Command(this=text[:size], expression=text[size:]) 6973 6974 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6975 settings = [] 6976 6977 self._match_l_paren() 6978 kind = self._parse_id_var() 6979 6980 if self._match(TokenType.L_PAREN): 6981 while True: 6982 key = self._parse_id_var() 6983 value = self._parse_primary() 6984 6985 if not key and value is None: 6986 break 6987 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6988 self._match(TokenType.R_PAREN) 6989 6990 self._match_r_paren() 6991 6992 return self.expression( 6993 exp.DictProperty, 6994 this=this, 6995 kind=kind.this if kind else None, 6996 settings=settings, 6997 ) 6998 6999 def _parse_dict_range(self, this: str) -> exp.DictRange: 7000 self._match_l_paren() 7001 has_min = self._match_text_seq("MIN") 7002 if has_min: 7003 min = self._parse_var() or self._parse_primary() 7004 self._match_text_seq("MAX") 7005 max = self._parse_var() or self._parse_primary() 7006 else: 7007 max = self._parse_var() or self._parse_primary() 7008 min = exp.Literal.number(0) 7009 self._match_r_paren() 7010 return self.expression(exp.DictRange, this=this, min=min, max=max) 7011 7012 def _parse_comprehension( 7013 self, this: t.Optional[exp.Expression] 7014 ) -> t.Optional[exp.Comprehension]: 7015 index = self._index 7016 expression = self._parse_column() 7017 if not self._match(TokenType.IN): 7018 self._retreat(index - 1) 7019 return None 7020 iterator = self._parse_column() 7021 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7022 return self.expression( 7023 exp.Comprehension, 7024 this=this, 7025 expression=expression, 7026 iterator=iterator, 7027 condition=condition, 7028 ) 7029 7030 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7031 if self._match(TokenType.HEREDOC_STRING): 7032 return self.expression(exp.Heredoc, this=self._prev.text) 7033 7034 if not self._match_text_seq("$"): 7035 return None 7036 7037 tags = ["$"] 7038 tag_text = None 7039 7040 if self._is_connected(): 7041 self._advance() 7042 tags.append(self._prev.text.upper()) 7043 else: 7044 self.raise_error("No closing $ found") 7045 7046 if tags[-1] != "$": 7047 if self._is_connected() and self._match_text_seq("$"): 7048 tag_text = tags[-1] 7049 tags.append("$") 7050 else: 7051 self.raise_error("No closing $ found") 7052 7053 heredoc_start = self._curr 7054 7055 while self._curr: 7056 if self._match_text_seq(*tags, advance=False): 7057 this = self._find_sql(heredoc_start, self._prev) 7058 self._advance(len(tags)) 7059 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7060 7061 self._advance() 7062 7063 self.raise_error(f"No closing {''.join(tags)} found") 7064 return None 7065 7066 def _find_parser( 7067 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7068 ) -> t.Optional[t.Callable]: 7069 if not self._curr: 7070 return None 7071 7072 index = self._index 7073 this = [] 7074 while True: 7075 # The current token might be multiple words 7076 curr = self._curr.text.upper() 7077 key = curr.split(" ") 7078 this.append(curr) 7079 7080 self._advance() 7081 result, trie = in_trie(trie, key) 7082 if result == TrieResult.FAILED: 7083 break 7084 7085 if result == TrieResult.EXISTS: 7086 subparser = parsers[" ".join(this)] 7087 return subparser 7088 7089 self._retreat(index) 7090 return None 7091 7092 def _match(self, token_type, advance=True, expression=None): 7093 if not self._curr: 7094 return None 7095 7096 if self._curr.token_type == token_type: 7097 if advance: 7098 self._advance() 7099 self._add_comments(expression) 7100 return True 7101 7102 return None 7103 7104 def _match_set(self, types, advance=True): 7105 if not self._curr: 7106 return None 7107 7108 if self._curr.token_type in types: 7109 if advance: 7110 self._advance() 7111 return True 7112 7113 return None 7114 7115 def _match_pair(self, token_type_a, token_type_b, advance=True): 7116 if not self._curr or not self._next: 7117 return None 7118 7119 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7120 if advance: 7121 self._advance(2) 7122 return True 7123 7124 return None 7125 7126 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7127 if not self._match(TokenType.L_PAREN, expression=expression): 7128 self.raise_error("Expecting (") 7129 7130 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7131 if not self._match(TokenType.R_PAREN, expression=expression): 7132 self.raise_error("Expecting )") 7133 7134 def _match_texts(self, texts, advance=True): 7135 if ( 7136 self._curr 7137 and self._curr.token_type != TokenType.STRING 7138 and self._curr.text.upper() in texts 7139 ): 7140 if advance: 7141 self._advance() 7142 return True 7143 return None 7144 7145 def _match_text_seq(self, *texts, advance=True): 7146 index = self._index 7147 for text in texts: 7148 if ( 7149 self._curr 7150 and self._curr.token_type != TokenType.STRING 7151 and self._curr.text.upper() == text 7152 ): 7153 self._advance() 7154 else: 7155 self._retreat(index) 7156 return None 7157 7158 if not advance: 7159 self._retreat(index) 7160 7161 return True 7162 7163 def _replace_lambda( 7164 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7165 ) -> t.Optional[exp.Expression]: 7166 if not node: 7167 return node 7168 7169 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7170 7171 for column in node.find_all(exp.Column): 7172 typ = lambda_types.get(column.parts[0].name) 7173 if typ is not None: 7174 dot_or_id = column.to_dot() if column.table else column.this 7175 7176 if typ: 7177 dot_or_id = self.expression( 7178 exp.Cast, 7179 this=dot_or_id, 7180 to=typ, 7181 ) 7182 7183 parent = column.parent 7184 7185 while isinstance(parent, exp.Dot): 7186 if not isinstance(parent.parent, exp.Dot): 7187 parent.replace(dot_or_id) 7188 break 7189 parent = parent.parent 7190 else: 7191 if column is node: 7192 node = dot_or_id 7193 else: 7194 column.replace(dot_or_id) 7195 return node 7196 7197 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7198 start = self._prev 7199 7200 # Not to be confused with TRUNCATE(number, decimals) function call 7201 if self._match(TokenType.L_PAREN): 7202 self._retreat(self._index - 2) 7203 return self._parse_function() 7204 7205 # Clickhouse supports TRUNCATE DATABASE as well 7206 is_database = self._match(TokenType.DATABASE) 7207 7208 self._match(TokenType.TABLE) 7209 7210 exists = self._parse_exists(not_=False) 7211 7212 expressions = self._parse_csv( 7213 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7214 ) 7215 7216 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7217 7218 if self._match_text_seq("RESTART", "IDENTITY"): 7219 identity = "RESTART" 7220 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7221 identity = "CONTINUE" 7222 else: 7223 identity = None 7224 7225 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7226 option = self._prev.text 7227 else: 7228 option = None 7229 7230 partition = self._parse_partition() 7231 7232 # Fallback case 7233 if self._curr: 7234 return self._parse_as_command(start) 7235 7236 return self.expression( 7237 exp.TruncateTable, 7238 expressions=expressions, 7239 is_database=is_database, 7240 exists=exists, 7241 cluster=cluster, 7242 identity=identity, 7243 option=option, 7244 partition=partition, 7245 ) 7246 7247 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7248 this = self._parse_ordered(self._parse_opclass) 7249 7250 if not self._match(TokenType.WITH): 7251 return this 7252 7253 op = self._parse_var(any_token=True) 7254 7255 return self.expression(exp.WithOperator, this=this, op=op) 7256 7257 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7258 self._match(TokenType.EQ) 7259 self._match(TokenType.L_PAREN) 7260 7261 opts: t.List[t.Optional[exp.Expression]] = [] 7262 while self._curr and not self._match(TokenType.R_PAREN): 7263 if self._match_text_seq("FORMAT_NAME", "="): 7264 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7265 # so we parse it separately to use _parse_field() 7266 prop = self.expression( 7267 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7268 ) 7269 opts.append(prop) 7270 else: 7271 opts.append(self._parse_property()) 7272 7273 self._match(TokenType.COMMA) 7274 7275 return opts 7276 7277 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7278 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7279 7280 options = [] 7281 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7282 option = self._parse_var(any_token=True) 7283 prev = self._prev.text.upper() 7284 7285 # Different dialects might separate options and values by white space, "=" and "AS" 7286 self._match(TokenType.EQ) 7287 self._match(TokenType.ALIAS) 7288 7289 param = self.expression(exp.CopyParameter, this=option) 7290 7291 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7292 TokenType.L_PAREN, advance=False 7293 ): 7294 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7295 param.set("expressions", self._parse_wrapped_options()) 7296 elif prev == "FILE_FORMAT": 7297 # T-SQL's external file format case 7298 param.set("expression", self._parse_field()) 7299 else: 7300 param.set("expression", self._parse_unquoted_field()) 7301 7302 options.append(param) 7303 self._match(sep) 7304 7305 return options 7306 7307 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7308 expr = self.expression(exp.Credentials) 7309 7310 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7311 expr.set("storage", self._parse_field()) 7312 if self._match_text_seq("CREDENTIALS"): 7313 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7314 creds = ( 7315 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7316 ) 7317 expr.set("credentials", creds) 7318 if self._match_text_seq("ENCRYPTION"): 7319 expr.set("encryption", self._parse_wrapped_options()) 7320 if self._match_text_seq("IAM_ROLE"): 7321 expr.set("iam_role", self._parse_field()) 7322 if self._match_text_seq("REGION"): 7323 expr.set("region", self._parse_field()) 7324 7325 return expr 7326 7327 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7328 return self._parse_field() 7329 7330 def _parse_copy(self) -> exp.Copy | exp.Command: 7331 start = self._prev 7332 7333 self._match(TokenType.INTO) 7334 7335 this = ( 7336 self._parse_select(nested=True, parse_subquery_alias=False) 7337 if self._match(TokenType.L_PAREN, advance=False) 7338 else self._parse_table(schema=True) 7339 ) 7340 7341 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7342 7343 files = self._parse_csv(self._parse_file_location) 7344 credentials = self._parse_credentials() 7345 7346 self._match_text_seq("WITH") 7347 7348 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7349 7350 # Fallback case 7351 if self._curr: 7352 return self._parse_as_command(start) 7353 7354 return self.expression( 7355 exp.Copy, 7356 this=this, 7357 kind=kind, 7358 credentials=credentials, 7359 files=files, 7360 params=params, 7361 ) 7362 7363 def _parse_normalize(self) -> exp.Normalize: 7364 return self.expression( 7365 exp.Normalize, 7366 this=self._parse_bitwise(), 7367 form=self._match(TokenType.COMMA) and self._parse_var(), 7368 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 192 "CONCAT": lambda args, dialect: exp.Concat( 193 expressions=args, 194 safe=not dialect.STRICT_STRING_CONCAT, 195 coalesce=dialect.CONCAT_COALESCE, 196 ), 197 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 198 expressions=args, 199 safe=not dialect.STRICT_STRING_CONCAT, 200 coalesce=dialect.CONCAT_COALESCE, 201 ), 202 "CONVERT_TIMEZONE": build_convert_timezone, 203 "DATE_TO_DATE_STR": lambda args: exp.Cast( 204 this=seq_get(args, 0), 205 to=exp.DataType(this=exp.DataType.Type.TEXT), 206 ), 207 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 208 start=seq_get(args, 0), 209 end=seq_get(args, 1), 210 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 211 ), 212 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 213 "HEX": build_hex, 214 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 215 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 216 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 217 "LIKE": build_like, 218 "LOG": build_logarithm, 219 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 220 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 221 "LOWER": build_lower, 222 "LPAD": lambda args: build_pad(args), 223 "LEFTPAD": lambda args: build_pad(args), 224 "LTRIM": lambda args: build_trim(args), 225 "MOD": build_mod, 226 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 227 "RPAD": lambda args: build_pad(args, is_left=False), 228 "RTRIM": lambda args: build_trim(args, is_left=False), 229 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 230 if len(args) != 2 231 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 232 "TIME_TO_TIME_STR": lambda args: exp.Cast( 233 this=seq_get(args, 0), 234 to=exp.DataType(this=exp.DataType.Type.TEXT), 235 ), 236 "TO_HEX": build_hex, 237 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 238 this=exp.Cast( 239 this=seq_get(args, 0), 240 to=exp.DataType(this=exp.DataType.Type.TEXT), 241 ), 242 start=exp.Literal.number(1), 243 length=exp.Literal.number(10), 244 ), 245 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 246 "UPPER": build_upper, 247 "VAR_MAP": build_var_map, 248 } 249 250 NO_PAREN_FUNCTIONS = { 251 TokenType.CURRENT_DATE: exp.CurrentDate, 252 TokenType.CURRENT_DATETIME: exp.CurrentDate, 253 TokenType.CURRENT_TIME: exp.CurrentTime, 254 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 255 TokenType.CURRENT_USER: exp.CurrentUser, 256 } 257 258 STRUCT_TYPE_TOKENS = { 259 TokenType.NESTED, 260 TokenType.OBJECT, 261 TokenType.STRUCT, 262 } 263 264 NESTED_TYPE_TOKENS = { 265 TokenType.ARRAY, 266 TokenType.LIST, 267 TokenType.LOWCARDINALITY, 268 TokenType.MAP, 269 TokenType.NULLABLE, 270 *STRUCT_TYPE_TOKENS, 271 } 272 273 ENUM_TYPE_TOKENS = { 274 TokenType.ENUM, 275 TokenType.ENUM8, 276 TokenType.ENUM16, 277 } 278 279 AGGREGATE_TYPE_TOKENS = { 280 TokenType.AGGREGATEFUNCTION, 281 TokenType.SIMPLEAGGREGATEFUNCTION, 282 } 283 284 TYPE_TOKENS = { 285 TokenType.BIT, 286 TokenType.BOOLEAN, 287 TokenType.TINYINT, 288 TokenType.UTINYINT, 289 TokenType.SMALLINT, 290 TokenType.USMALLINT, 291 TokenType.INT, 292 TokenType.UINT, 293 TokenType.BIGINT, 294 TokenType.UBIGINT, 295 TokenType.INT128, 296 TokenType.UINT128, 297 TokenType.INT256, 298 TokenType.UINT256, 299 TokenType.MEDIUMINT, 300 TokenType.UMEDIUMINT, 301 TokenType.FIXEDSTRING, 302 TokenType.FLOAT, 303 TokenType.DOUBLE, 304 TokenType.CHAR, 305 TokenType.NCHAR, 306 TokenType.VARCHAR, 307 TokenType.NVARCHAR, 308 TokenType.BPCHAR, 309 TokenType.TEXT, 310 TokenType.MEDIUMTEXT, 311 TokenType.LONGTEXT, 312 TokenType.MEDIUMBLOB, 313 TokenType.LONGBLOB, 314 TokenType.BINARY, 315 TokenType.VARBINARY, 316 TokenType.JSON, 317 TokenType.JSONB, 318 TokenType.INTERVAL, 319 TokenType.TINYBLOB, 320 TokenType.TINYTEXT, 321 TokenType.TIME, 322 TokenType.TIMETZ, 323 TokenType.TIMESTAMP, 324 TokenType.TIMESTAMP_S, 325 TokenType.TIMESTAMP_MS, 326 TokenType.TIMESTAMP_NS, 327 TokenType.TIMESTAMPTZ, 328 TokenType.TIMESTAMPLTZ, 329 TokenType.TIMESTAMPNTZ, 330 TokenType.DATETIME, 331 TokenType.DATETIME64, 332 TokenType.DATE, 333 TokenType.DATE32, 334 TokenType.INT4RANGE, 335 TokenType.INT4MULTIRANGE, 336 TokenType.INT8RANGE, 337 TokenType.INT8MULTIRANGE, 338 TokenType.NUMRANGE, 339 TokenType.NUMMULTIRANGE, 340 TokenType.TSRANGE, 341 TokenType.TSMULTIRANGE, 342 TokenType.TSTZRANGE, 343 TokenType.TSTZMULTIRANGE, 344 TokenType.DATERANGE, 345 TokenType.DATEMULTIRANGE, 346 TokenType.DECIMAL, 347 TokenType.DECIMAL32, 348 TokenType.DECIMAL64, 349 TokenType.DECIMAL128, 350 TokenType.UDECIMAL, 351 TokenType.BIGDECIMAL, 352 TokenType.UUID, 353 TokenType.GEOGRAPHY, 354 TokenType.GEOMETRY, 355 TokenType.HLLSKETCH, 356 TokenType.HSTORE, 357 TokenType.PSEUDO_TYPE, 358 TokenType.SUPER, 359 TokenType.SERIAL, 360 TokenType.SMALLSERIAL, 361 TokenType.BIGSERIAL, 362 TokenType.XML, 363 TokenType.YEAR, 364 TokenType.UNIQUEIDENTIFIER, 365 TokenType.USERDEFINED, 366 TokenType.MONEY, 367 TokenType.SMALLMONEY, 368 TokenType.ROWVERSION, 369 TokenType.IMAGE, 370 TokenType.VARIANT, 371 TokenType.VECTOR, 372 TokenType.OBJECT, 373 TokenType.OBJECT_IDENTIFIER, 374 TokenType.INET, 375 TokenType.IPADDRESS, 376 TokenType.IPPREFIX, 377 TokenType.IPV4, 378 TokenType.IPV6, 379 TokenType.UNKNOWN, 380 TokenType.NULL, 381 TokenType.NAME, 382 TokenType.TDIGEST, 383 *ENUM_TYPE_TOKENS, 384 *NESTED_TYPE_TOKENS, 385 *AGGREGATE_TYPE_TOKENS, 386 } 387 388 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 389 TokenType.BIGINT: TokenType.UBIGINT, 390 TokenType.INT: TokenType.UINT, 391 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 392 TokenType.SMALLINT: TokenType.USMALLINT, 393 TokenType.TINYINT: TokenType.UTINYINT, 394 TokenType.DECIMAL: TokenType.UDECIMAL, 395 } 396 397 SUBQUERY_PREDICATES = { 398 TokenType.ANY: exp.Any, 399 TokenType.ALL: exp.All, 400 TokenType.EXISTS: exp.Exists, 401 TokenType.SOME: exp.Any, 402 } 403 404 RESERVED_TOKENS = { 405 *Tokenizer.SINGLE_TOKENS.values(), 406 TokenType.SELECT, 407 } - {TokenType.IDENTIFIER} 408 409 DB_CREATABLES = { 410 TokenType.DATABASE, 411 TokenType.DICTIONARY, 412 TokenType.MODEL, 413 TokenType.SCHEMA, 414 TokenType.SEQUENCE, 415 TokenType.STORAGE_INTEGRATION, 416 TokenType.TABLE, 417 TokenType.TAG, 418 TokenType.VIEW, 419 TokenType.WAREHOUSE, 420 TokenType.STREAMLIT, 421 } 422 423 CREATABLES = { 424 TokenType.COLUMN, 425 TokenType.CONSTRAINT, 426 TokenType.FOREIGN_KEY, 427 TokenType.FUNCTION, 428 TokenType.INDEX, 429 TokenType.PROCEDURE, 430 *DB_CREATABLES, 431 } 432 433 ALTERABLES = { 434 TokenType.TABLE, 435 TokenType.VIEW, 436 } 437 438 # Tokens that can represent identifiers 439 ID_VAR_TOKENS = { 440 TokenType.ALL, 441 TokenType.VAR, 442 TokenType.ANTI, 443 TokenType.APPLY, 444 TokenType.ASC, 445 TokenType.ASOF, 446 TokenType.AUTO_INCREMENT, 447 TokenType.BEGIN, 448 TokenType.BPCHAR, 449 TokenType.CACHE, 450 TokenType.CASE, 451 TokenType.COLLATE, 452 TokenType.COMMAND, 453 TokenType.COMMENT, 454 TokenType.COMMIT, 455 TokenType.CONSTRAINT, 456 TokenType.COPY, 457 TokenType.CUBE, 458 TokenType.DEFAULT, 459 TokenType.DELETE, 460 TokenType.DESC, 461 TokenType.DESCRIBE, 462 TokenType.DICTIONARY, 463 TokenType.DIV, 464 TokenType.END, 465 TokenType.EXECUTE, 466 TokenType.ESCAPE, 467 TokenType.FALSE, 468 TokenType.FIRST, 469 TokenType.FILTER, 470 TokenType.FINAL, 471 TokenType.FORMAT, 472 TokenType.FULL, 473 TokenType.IDENTIFIER, 474 TokenType.IS, 475 TokenType.ISNULL, 476 TokenType.INTERVAL, 477 TokenType.KEEP, 478 TokenType.KILL, 479 TokenType.LEFT, 480 TokenType.LOAD, 481 TokenType.MERGE, 482 TokenType.NATURAL, 483 TokenType.NEXT, 484 TokenType.OFFSET, 485 TokenType.OPERATOR, 486 TokenType.ORDINALITY, 487 TokenType.OVERLAPS, 488 TokenType.OVERWRITE, 489 TokenType.PARTITION, 490 TokenType.PERCENT, 491 TokenType.PIVOT, 492 TokenType.PRAGMA, 493 TokenType.RANGE, 494 TokenType.RECURSIVE, 495 TokenType.REFERENCES, 496 TokenType.REFRESH, 497 TokenType.RENAME, 498 TokenType.REPLACE, 499 TokenType.RIGHT, 500 TokenType.ROLLUP, 501 TokenType.ROW, 502 TokenType.ROWS, 503 TokenType.SEMI, 504 TokenType.SET, 505 TokenType.SETTINGS, 506 TokenType.SHOW, 507 TokenType.TEMPORARY, 508 TokenType.TOP, 509 TokenType.TRUE, 510 TokenType.TRUNCATE, 511 TokenType.UNIQUE, 512 TokenType.UNNEST, 513 TokenType.UNPIVOT, 514 TokenType.UPDATE, 515 TokenType.USE, 516 TokenType.VOLATILE, 517 TokenType.WINDOW, 518 *CREATABLES, 519 *SUBQUERY_PREDICATES, 520 *TYPE_TOKENS, 521 *NO_PAREN_FUNCTIONS, 522 } 523 524 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 525 526 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 527 TokenType.ANTI, 528 TokenType.APPLY, 529 TokenType.ASOF, 530 TokenType.FULL, 531 TokenType.LEFT, 532 TokenType.LOCK, 533 TokenType.NATURAL, 534 TokenType.OFFSET, 535 TokenType.RIGHT, 536 TokenType.SEMI, 537 TokenType.WINDOW, 538 } 539 540 ALIAS_TOKENS = ID_VAR_TOKENS 541 542 ARRAY_CONSTRUCTORS = { 543 "ARRAY": exp.Array, 544 "LIST": exp.List, 545 } 546 547 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 548 549 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 550 551 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 552 553 FUNC_TOKENS = { 554 TokenType.COLLATE, 555 TokenType.COMMAND, 556 TokenType.CURRENT_DATE, 557 TokenType.CURRENT_DATETIME, 558 TokenType.CURRENT_TIMESTAMP, 559 TokenType.CURRENT_TIME, 560 TokenType.CURRENT_USER, 561 TokenType.FILTER, 562 TokenType.FIRST, 563 TokenType.FORMAT, 564 TokenType.GLOB, 565 TokenType.IDENTIFIER, 566 TokenType.INDEX, 567 TokenType.ISNULL, 568 TokenType.ILIKE, 569 TokenType.INSERT, 570 TokenType.LIKE, 571 TokenType.MERGE, 572 TokenType.OFFSET, 573 TokenType.PRIMARY_KEY, 574 TokenType.RANGE, 575 TokenType.REPLACE, 576 TokenType.RLIKE, 577 TokenType.ROW, 578 TokenType.UNNEST, 579 TokenType.VAR, 580 TokenType.LEFT, 581 TokenType.RIGHT, 582 TokenType.SEQUENCE, 583 TokenType.DATE, 584 TokenType.DATETIME, 585 TokenType.TABLE, 586 TokenType.TIMESTAMP, 587 TokenType.TIMESTAMPTZ, 588 TokenType.TRUNCATE, 589 TokenType.WINDOW, 590 TokenType.XOR, 591 *TYPE_TOKENS, 592 *SUBQUERY_PREDICATES, 593 } 594 595 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 596 TokenType.AND: exp.And, 597 } 598 599 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 600 TokenType.COLON_EQ: exp.PropertyEQ, 601 } 602 603 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 604 TokenType.OR: exp.Or, 605 } 606 607 EQUALITY = { 608 TokenType.EQ: exp.EQ, 609 TokenType.NEQ: exp.NEQ, 610 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 611 } 612 613 COMPARISON = { 614 TokenType.GT: exp.GT, 615 TokenType.GTE: exp.GTE, 616 TokenType.LT: exp.LT, 617 TokenType.LTE: exp.LTE, 618 } 619 620 BITWISE = { 621 TokenType.AMP: exp.BitwiseAnd, 622 TokenType.CARET: exp.BitwiseXor, 623 TokenType.PIPE: exp.BitwiseOr, 624 } 625 626 TERM = { 627 TokenType.DASH: exp.Sub, 628 TokenType.PLUS: exp.Add, 629 TokenType.MOD: exp.Mod, 630 TokenType.COLLATE: exp.Collate, 631 } 632 633 FACTOR = { 634 TokenType.DIV: exp.IntDiv, 635 TokenType.LR_ARROW: exp.Distance, 636 TokenType.SLASH: exp.Div, 637 TokenType.STAR: exp.Mul, 638 } 639 640 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 641 642 TIMES = { 643 TokenType.TIME, 644 TokenType.TIMETZ, 645 } 646 647 TIMESTAMPS = { 648 TokenType.TIMESTAMP, 649 TokenType.TIMESTAMPTZ, 650 TokenType.TIMESTAMPLTZ, 651 *TIMES, 652 } 653 654 SET_OPERATIONS = { 655 TokenType.UNION, 656 TokenType.INTERSECT, 657 TokenType.EXCEPT, 658 } 659 660 JOIN_METHODS = { 661 TokenType.ASOF, 662 TokenType.NATURAL, 663 TokenType.POSITIONAL, 664 } 665 666 JOIN_SIDES = { 667 TokenType.LEFT, 668 TokenType.RIGHT, 669 TokenType.FULL, 670 } 671 672 JOIN_KINDS = { 673 TokenType.ANTI, 674 TokenType.CROSS, 675 TokenType.INNER, 676 TokenType.OUTER, 677 TokenType.SEMI, 678 TokenType.STRAIGHT_JOIN, 679 } 680 681 JOIN_HINTS: t.Set[str] = set() 682 683 LAMBDAS = { 684 TokenType.ARROW: lambda self, expressions: self.expression( 685 exp.Lambda, 686 this=self._replace_lambda( 687 self._parse_assignment(), 688 expressions, 689 ), 690 expressions=expressions, 691 ), 692 TokenType.FARROW: lambda self, expressions: self.expression( 693 exp.Kwarg, 694 this=exp.var(expressions[0].name), 695 expression=self._parse_assignment(), 696 ), 697 } 698 699 COLUMN_OPERATORS = { 700 TokenType.DOT: None, 701 TokenType.DCOLON: lambda self, this, to: self.expression( 702 exp.Cast if self.STRICT_CAST else exp.TryCast, 703 this=this, 704 to=to, 705 ), 706 TokenType.ARROW: lambda self, this, path: self.expression( 707 exp.JSONExtract, 708 this=this, 709 expression=self.dialect.to_json_path(path), 710 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 711 ), 712 TokenType.DARROW: lambda self, this, path: self.expression( 713 exp.JSONExtractScalar, 714 this=this, 715 expression=self.dialect.to_json_path(path), 716 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 717 ), 718 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 719 exp.JSONBExtract, 720 this=this, 721 expression=path, 722 ), 723 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 724 exp.JSONBExtractScalar, 725 this=this, 726 expression=path, 727 ), 728 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 729 exp.JSONBContains, 730 this=this, 731 expression=key, 732 ), 733 } 734 735 EXPRESSION_PARSERS = { 736 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 737 exp.Column: lambda self: self._parse_column(), 738 exp.Condition: lambda self: self._parse_assignment(), 739 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 740 exp.Expression: lambda self: self._parse_expression(), 741 exp.From: lambda self: self._parse_from(joins=True), 742 exp.Group: lambda self: self._parse_group(), 743 exp.Having: lambda self: self._parse_having(), 744 exp.Identifier: lambda self: self._parse_id_var(), 745 exp.Join: lambda self: self._parse_join(), 746 exp.Lambda: lambda self: self._parse_lambda(), 747 exp.Lateral: lambda self: self._parse_lateral(), 748 exp.Limit: lambda self: self._parse_limit(), 749 exp.Offset: lambda self: self._parse_offset(), 750 exp.Order: lambda self: self._parse_order(), 751 exp.Ordered: lambda self: self._parse_ordered(), 752 exp.Properties: lambda self: self._parse_properties(), 753 exp.Qualify: lambda self: self._parse_qualify(), 754 exp.Returning: lambda self: self._parse_returning(), 755 exp.Select: lambda self: self._parse_select(), 756 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 757 exp.Table: lambda self: self._parse_table_parts(), 758 exp.TableAlias: lambda self: self._parse_table_alias(), 759 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 760 exp.Where: lambda self: self._parse_where(), 761 exp.Window: lambda self: self._parse_named_window(), 762 exp.With: lambda self: self._parse_with(), 763 "JOIN_TYPE": lambda self: self._parse_join_parts(), 764 } 765 766 STATEMENT_PARSERS = { 767 TokenType.ALTER: lambda self: self._parse_alter(), 768 TokenType.BEGIN: lambda self: self._parse_transaction(), 769 TokenType.CACHE: lambda self: self._parse_cache(), 770 TokenType.COMMENT: lambda self: self._parse_comment(), 771 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 772 TokenType.COPY: lambda self: self._parse_copy(), 773 TokenType.CREATE: lambda self: self._parse_create(), 774 TokenType.DELETE: lambda self: self._parse_delete(), 775 TokenType.DESC: lambda self: self._parse_describe(), 776 TokenType.DESCRIBE: lambda self: self._parse_describe(), 777 TokenType.DROP: lambda self: self._parse_drop(), 778 TokenType.INSERT: lambda self: self._parse_insert(), 779 TokenType.KILL: lambda self: self._parse_kill(), 780 TokenType.LOAD: lambda self: self._parse_load(), 781 TokenType.MERGE: lambda self: self._parse_merge(), 782 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 783 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 784 TokenType.REFRESH: lambda self: self._parse_refresh(), 785 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 786 TokenType.SET: lambda self: self._parse_set(), 787 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 788 TokenType.UNCACHE: lambda self: self._parse_uncache(), 789 TokenType.UPDATE: lambda self: self._parse_update(), 790 TokenType.USE: lambda self: self.expression( 791 exp.Use, 792 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 793 this=self._parse_table(schema=False), 794 ), 795 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 796 } 797 798 UNARY_PARSERS = { 799 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 800 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 801 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 802 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 803 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 804 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 805 } 806 807 STRING_PARSERS = { 808 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 809 exp.RawString, this=token.text 810 ), 811 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 812 exp.National, this=token.text 813 ), 814 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 815 TokenType.STRING: lambda self, token: self.expression( 816 exp.Literal, this=token.text, is_string=True 817 ), 818 TokenType.UNICODE_STRING: lambda self, token: self.expression( 819 exp.UnicodeString, 820 this=token.text, 821 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 822 ), 823 } 824 825 NUMERIC_PARSERS = { 826 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 827 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 828 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 829 TokenType.NUMBER: lambda self, token: self.expression( 830 exp.Literal, this=token.text, is_string=False 831 ), 832 } 833 834 PRIMARY_PARSERS = { 835 **STRING_PARSERS, 836 **NUMERIC_PARSERS, 837 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 838 TokenType.NULL: lambda self, _: self.expression(exp.Null), 839 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 840 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 841 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 842 TokenType.STAR: lambda self, _: self.expression( 843 exp.Star, 844 **{ 845 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 846 "replace": self._parse_star_op("REPLACE"), 847 "rename": self._parse_star_op("RENAME"), 848 }, 849 ), 850 } 851 852 PLACEHOLDER_PARSERS = { 853 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 854 TokenType.PARAMETER: lambda self: self._parse_parameter(), 855 TokenType.COLON: lambda self: ( 856 self.expression(exp.Placeholder, this=self._prev.text) 857 if self._match_set(self.ID_VAR_TOKENS) 858 else None 859 ), 860 } 861 862 RANGE_PARSERS = { 863 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 864 TokenType.GLOB: binary_range_parser(exp.Glob), 865 TokenType.ILIKE: binary_range_parser(exp.ILike), 866 TokenType.IN: lambda self, this: self._parse_in(this), 867 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 868 TokenType.IS: lambda self, this: self._parse_is(this), 869 TokenType.LIKE: binary_range_parser(exp.Like), 870 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 871 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 872 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 873 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 874 } 875 876 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 877 "ALLOWED_VALUES": lambda self: self.expression( 878 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 879 ), 880 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 881 "AUTO": lambda self: self._parse_auto_property(), 882 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 883 "BACKUP": lambda self: self.expression( 884 exp.BackupProperty, this=self._parse_var(any_token=True) 885 ), 886 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 887 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 888 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 889 "CHECKSUM": lambda self: self._parse_checksum(), 890 "CLUSTER BY": lambda self: self._parse_cluster(), 891 "CLUSTERED": lambda self: self._parse_clustered_by(), 892 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 893 exp.CollateProperty, **kwargs 894 ), 895 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 896 "CONTAINS": lambda self: self._parse_contains_property(), 897 "COPY": lambda self: self._parse_copy_property(), 898 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 899 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 900 "DEFINER": lambda self: self._parse_definer(), 901 "DETERMINISTIC": lambda self: self.expression( 902 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 903 ), 904 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 905 "DUPLICATE": lambda self: self._parse_duplicate(), 906 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 907 "DISTKEY": lambda self: self._parse_distkey(), 908 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 909 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 910 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 911 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 912 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 913 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 914 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 915 "FREESPACE": lambda self: self._parse_freespace(), 916 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 917 "HEAP": lambda self: self.expression(exp.HeapProperty), 918 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 919 "IMMUTABLE": lambda self: self.expression( 920 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 921 ), 922 "INHERITS": lambda self: self.expression( 923 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 924 ), 925 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 926 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 927 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 928 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 929 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 930 "LIKE": lambda self: self._parse_create_like(), 931 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 932 "LOCK": lambda self: self._parse_locking(), 933 "LOCKING": lambda self: self._parse_locking(), 934 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 935 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 936 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 937 "MODIFIES": lambda self: self._parse_modifies_property(), 938 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 939 "NO": lambda self: self._parse_no_property(), 940 "ON": lambda self: self._parse_on_property(), 941 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 942 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 943 "PARTITION": lambda self: self._parse_partitioned_of(), 944 "PARTITION BY": lambda self: self._parse_partitioned_by(), 945 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 946 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 947 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 948 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 949 "READS": lambda self: self._parse_reads_property(), 950 "REMOTE": lambda self: self._parse_remote_with_connection(), 951 "RETURNS": lambda self: self._parse_returns(), 952 "STRICT": lambda self: self.expression(exp.StrictProperty), 953 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 954 "ROW": lambda self: self._parse_row(), 955 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 956 "SAMPLE": lambda self: self.expression( 957 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 958 ), 959 "SECURE": lambda self: self.expression(exp.SecureProperty), 960 "SECURITY": lambda self: self._parse_security(), 961 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 962 "SETTINGS": lambda self: self._parse_settings_property(), 963 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 964 "SORTKEY": lambda self: self._parse_sortkey(), 965 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 966 "STABLE": lambda self: self.expression( 967 exp.StabilityProperty, this=exp.Literal.string("STABLE") 968 ), 969 "STORED": lambda self: self._parse_stored(), 970 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 971 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 972 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 973 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 974 "TO": lambda self: self._parse_to_table(), 975 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 976 "TRANSFORM": lambda self: self.expression( 977 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 978 ), 979 "TTL": lambda self: self._parse_ttl(), 980 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 981 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 982 "VOLATILE": lambda self: self._parse_volatile_property(), 983 "WITH": lambda self: self._parse_with_property(), 984 } 985 986 CONSTRAINT_PARSERS = { 987 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 988 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 989 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 990 "CHARACTER SET": lambda self: self.expression( 991 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 992 ), 993 "CHECK": lambda self: self.expression( 994 exp.CheckColumnConstraint, 995 this=self._parse_wrapped(self._parse_assignment), 996 enforced=self._match_text_seq("ENFORCED"), 997 ), 998 "COLLATE": lambda self: self.expression( 999 exp.CollateColumnConstraint, 1000 this=self._parse_identifier() or self._parse_column(), 1001 ), 1002 "COMMENT": lambda self: self.expression( 1003 exp.CommentColumnConstraint, this=self._parse_string() 1004 ), 1005 "COMPRESS": lambda self: self._parse_compress(), 1006 "CLUSTERED": lambda self: self.expression( 1007 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1008 ), 1009 "NONCLUSTERED": lambda self: self.expression( 1010 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1011 ), 1012 "DEFAULT": lambda self: self.expression( 1013 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1014 ), 1015 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1016 "EPHEMERAL": lambda self: self.expression( 1017 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1018 ), 1019 "EXCLUDE": lambda self: self.expression( 1020 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1021 ), 1022 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1023 "FORMAT": lambda self: self.expression( 1024 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1025 ), 1026 "GENERATED": lambda self: self._parse_generated_as_identity(), 1027 "IDENTITY": lambda self: self._parse_auto_increment(), 1028 "INLINE": lambda self: self._parse_inline(), 1029 "LIKE": lambda self: self._parse_create_like(), 1030 "NOT": lambda self: self._parse_not_constraint(), 1031 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1032 "ON": lambda self: ( 1033 self._match(TokenType.UPDATE) 1034 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1035 ) 1036 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1037 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1038 "PERIOD": lambda self: self._parse_period_for_system_time(), 1039 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1040 "REFERENCES": lambda self: self._parse_references(match=False), 1041 "TITLE": lambda self: self.expression( 1042 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1043 ), 1044 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1045 "UNIQUE": lambda self: self._parse_unique(), 1046 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1047 "WITH": lambda self: self.expression( 1048 exp.Properties, expressions=self._parse_wrapped_properties() 1049 ), 1050 } 1051 1052 ALTER_PARSERS = { 1053 "ADD": lambda self: self._parse_alter_table_add(), 1054 "ALTER": lambda self: self._parse_alter_table_alter(), 1055 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1056 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1057 "DROP": lambda self: self._parse_alter_table_drop(), 1058 "RENAME": lambda self: self._parse_alter_table_rename(), 1059 "SET": lambda self: self._parse_alter_table_set(), 1060 "AS": lambda self: self._parse_select(), 1061 } 1062 1063 ALTER_ALTER_PARSERS = { 1064 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1065 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1066 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1067 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1068 } 1069 1070 SCHEMA_UNNAMED_CONSTRAINTS = { 1071 "CHECK", 1072 "EXCLUDE", 1073 "FOREIGN KEY", 1074 "LIKE", 1075 "PERIOD", 1076 "PRIMARY KEY", 1077 "UNIQUE", 1078 } 1079 1080 NO_PAREN_FUNCTION_PARSERS = { 1081 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1082 "CASE": lambda self: self._parse_case(), 1083 "CONNECT_BY_ROOT": lambda self: self.expression( 1084 exp.ConnectByRoot, this=self._parse_column() 1085 ), 1086 "IF": lambda self: self._parse_if(), 1087 "NEXT": lambda self: self._parse_next_value_for(), 1088 } 1089 1090 INVALID_FUNC_NAME_TOKENS = { 1091 TokenType.IDENTIFIER, 1092 TokenType.STRING, 1093 } 1094 1095 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1096 1097 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1098 1099 FUNCTION_PARSERS = { 1100 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1101 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1102 "DECODE": lambda self: self._parse_decode(), 1103 "EXTRACT": lambda self: self._parse_extract(), 1104 "GAP_FILL": lambda self: self._parse_gap_fill(), 1105 "JSON_OBJECT": lambda self: self._parse_json_object(), 1106 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1107 "JSON_TABLE": lambda self: self._parse_json_table(), 1108 "MATCH": lambda self: self._parse_match_against(), 1109 "NORMALIZE": lambda self: self._parse_normalize(), 1110 "OPENJSON": lambda self: self._parse_open_json(), 1111 "POSITION": lambda self: self._parse_position(), 1112 "PREDICT": lambda self: self._parse_predict(), 1113 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1114 "STRING_AGG": lambda self: self._parse_string_agg(), 1115 "SUBSTRING": lambda self: self._parse_substring(), 1116 "TRIM": lambda self: self._parse_trim(), 1117 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1118 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1119 } 1120 1121 QUERY_MODIFIER_PARSERS = { 1122 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1123 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1124 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1125 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1126 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1127 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1128 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1129 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1130 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1131 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1132 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1133 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1134 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1135 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1136 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1137 TokenType.CLUSTER_BY: lambda self: ( 1138 "cluster", 1139 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1140 ), 1141 TokenType.DISTRIBUTE_BY: lambda self: ( 1142 "distribute", 1143 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1144 ), 1145 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1146 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1147 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1148 } 1149 1150 SET_PARSERS = { 1151 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1152 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1153 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1154 "TRANSACTION": lambda self: self._parse_set_transaction(), 1155 } 1156 1157 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1158 1159 TYPE_LITERAL_PARSERS = { 1160 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1161 } 1162 1163 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1164 1165 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1166 1167 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1168 1169 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1170 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1171 "ISOLATION": ( 1172 ("LEVEL", "REPEATABLE", "READ"), 1173 ("LEVEL", "READ", "COMMITTED"), 1174 ("LEVEL", "READ", "UNCOMITTED"), 1175 ("LEVEL", "SERIALIZABLE"), 1176 ), 1177 "READ": ("WRITE", "ONLY"), 1178 } 1179 1180 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1181 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1182 ) 1183 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1184 1185 CREATE_SEQUENCE: OPTIONS_TYPE = { 1186 "SCALE": ("EXTEND", "NOEXTEND"), 1187 "SHARD": ("EXTEND", "NOEXTEND"), 1188 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1189 **dict.fromkeys( 1190 ( 1191 "SESSION", 1192 "GLOBAL", 1193 "KEEP", 1194 "NOKEEP", 1195 "ORDER", 1196 "NOORDER", 1197 "NOCACHE", 1198 "CYCLE", 1199 "NOCYCLE", 1200 "NOMINVALUE", 1201 "NOMAXVALUE", 1202 "NOSCALE", 1203 "NOSHARD", 1204 ), 1205 tuple(), 1206 ), 1207 } 1208 1209 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1210 1211 USABLES: OPTIONS_TYPE = dict.fromkeys( 1212 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1213 ) 1214 1215 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1216 1217 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1218 "TYPE": ("EVOLUTION",), 1219 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1220 } 1221 1222 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1223 "NOT": ("ENFORCED",), 1224 "MATCH": ( 1225 "FULL", 1226 "PARTIAL", 1227 "SIMPLE", 1228 ), 1229 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1230 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1231 } 1232 1233 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1234 1235 CLONE_KEYWORDS = {"CLONE", "COPY"} 1236 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1237 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1238 1239 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1240 1241 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1242 1243 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1244 1245 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1246 1247 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1248 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1249 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1250 1251 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1252 1253 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1254 1255 ADD_CONSTRAINT_TOKENS = { 1256 TokenType.CONSTRAINT, 1257 TokenType.FOREIGN_KEY, 1258 TokenType.INDEX, 1259 TokenType.KEY, 1260 TokenType.PRIMARY_KEY, 1261 TokenType.UNIQUE, 1262 } 1263 1264 DISTINCT_TOKENS = {TokenType.DISTINCT} 1265 1266 NULL_TOKENS = {TokenType.NULL} 1267 1268 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1269 1270 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1271 1272 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1273 1274 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1275 1276 ODBC_DATETIME_LITERALS = { 1277 "d": exp.Date, 1278 "t": exp.Time, 1279 "ts": exp.Timestamp, 1280 } 1281 1282 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1283 1284 STRICT_CAST = True 1285 1286 PREFIXED_PIVOT_COLUMNS = False 1287 IDENTIFY_PIVOT_STRINGS = False 1288 1289 LOG_DEFAULTS_TO_LN = False 1290 1291 # Whether ADD is present for each column added by ALTER TABLE 1292 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1293 1294 # Whether the table sample clause expects CSV syntax 1295 TABLESAMPLE_CSV = False 1296 1297 # The default method used for table sampling 1298 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1299 1300 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1301 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1302 1303 # Whether the TRIM function expects the characters to trim as its first argument 1304 TRIM_PATTERN_FIRST = False 1305 1306 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1307 STRING_ALIASES = False 1308 1309 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1310 MODIFIERS_ATTACHED_TO_SET_OP = True 1311 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1312 1313 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1314 NO_PAREN_IF_COMMANDS = True 1315 1316 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1317 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1318 1319 # Whether the `:` operator is used to extract a value from a VARIANT column 1320 COLON_IS_VARIANT_EXTRACT = False 1321 1322 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1323 # If this is True and '(' is not found, the keyword will be treated as an identifier 1324 VALUES_FOLLOWED_BY_PAREN = True 1325 1326 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1327 SUPPORTS_IMPLICIT_UNNEST = False 1328 1329 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1330 INTERVAL_SPANS = True 1331 1332 # Whether a PARTITION clause can follow a table reference 1333 SUPPORTS_PARTITION_SELECTION = False 1334 1335 __slots__ = ( 1336 "error_level", 1337 "error_message_context", 1338 "max_errors", 1339 "dialect", 1340 "sql", 1341 "errors", 1342 "_tokens", 1343 "_index", 1344 "_curr", 1345 "_next", 1346 "_prev", 1347 "_prev_comments", 1348 ) 1349 1350 # Autofilled 1351 SHOW_TRIE: t.Dict = {} 1352 SET_TRIE: t.Dict = {} 1353 1354 def __init__( 1355 self, 1356 error_level: t.Optional[ErrorLevel] = None, 1357 error_message_context: int = 100, 1358 max_errors: int = 3, 1359 dialect: DialectType = None, 1360 ): 1361 from sqlglot.dialects import Dialect 1362 1363 self.error_level = error_level or ErrorLevel.IMMEDIATE 1364 self.error_message_context = error_message_context 1365 self.max_errors = max_errors 1366 self.dialect = Dialect.get_or_raise(dialect) 1367 self.reset() 1368 1369 def reset(self): 1370 self.sql = "" 1371 self.errors = [] 1372 self._tokens = [] 1373 self._index = 0 1374 self._curr = None 1375 self._next = None 1376 self._prev = None 1377 self._prev_comments = None 1378 1379 def parse( 1380 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1381 ) -> t.List[t.Optional[exp.Expression]]: 1382 """ 1383 Parses a list of tokens and returns a list of syntax trees, one tree 1384 per parsed SQL statement. 1385 1386 Args: 1387 raw_tokens: The list of tokens. 1388 sql: The original SQL string, used to produce helpful debug messages. 1389 1390 Returns: 1391 The list of the produced syntax trees. 1392 """ 1393 return self._parse( 1394 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1395 ) 1396 1397 def parse_into( 1398 self, 1399 expression_types: exp.IntoType, 1400 raw_tokens: t.List[Token], 1401 sql: t.Optional[str] = None, 1402 ) -> t.List[t.Optional[exp.Expression]]: 1403 """ 1404 Parses a list of tokens into a given Expression type. If a collection of Expression 1405 types is given instead, this method will try to parse the token list into each one 1406 of them, stopping at the first for which the parsing succeeds. 1407 1408 Args: 1409 expression_types: The expression type(s) to try and parse the token list into. 1410 raw_tokens: The list of tokens. 1411 sql: The original SQL string, used to produce helpful debug messages. 1412 1413 Returns: 1414 The target Expression. 1415 """ 1416 errors = [] 1417 for expression_type in ensure_list(expression_types): 1418 parser = self.EXPRESSION_PARSERS.get(expression_type) 1419 if not parser: 1420 raise TypeError(f"No parser registered for {expression_type}") 1421 1422 try: 1423 return self._parse(parser, raw_tokens, sql) 1424 except ParseError as e: 1425 e.errors[0]["into_expression"] = expression_type 1426 errors.append(e) 1427 1428 raise ParseError( 1429 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1430 errors=merge_errors(errors), 1431 ) from errors[-1] 1432 1433 def _parse( 1434 self, 1435 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1436 raw_tokens: t.List[Token], 1437 sql: t.Optional[str] = None, 1438 ) -> t.List[t.Optional[exp.Expression]]: 1439 self.reset() 1440 self.sql = sql or "" 1441 1442 total = len(raw_tokens) 1443 chunks: t.List[t.List[Token]] = [[]] 1444 1445 for i, token in enumerate(raw_tokens): 1446 if token.token_type == TokenType.SEMICOLON: 1447 if token.comments: 1448 chunks.append([token]) 1449 1450 if i < total - 1: 1451 chunks.append([]) 1452 else: 1453 chunks[-1].append(token) 1454 1455 expressions = [] 1456 1457 for tokens in chunks: 1458 self._index = -1 1459 self._tokens = tokens 1460 self._advance() 1461 1462 expressions.append(parse_method(self)) 1463 1464 if self._index < len(self._tokens): 1465 self.raise_error("Invalid expression / Unexpected token") 1466 1467 self.check_errors() 1468 1469 return expressions 1470 1471 def check_errors(self) -> None: 1472 """Logs or raises any found errors, depending on the chosen error level setting.""" 1473 if self.error_level == ErrorLevel.WARN: 1474 for error in self.errors: 1475 logger.error(str(error)) 1476 elif self.error_level == ErrorLevel.RAISE and self.errors: 1477 raise ParseError( 1478 concat_messages(self.errors, self.max_errors), 1479 errors=merge_errors(self.errors), 1480 ) 1481 1482 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1483 """ 1484 Appends an error in the list of recorded errors or raises it, depending on the chosen 1485 error level setting. 1486 """ 1487 token = token or self._curr or self._prev or Token.string("") 1488 start = token.start 1489 end = token.end + 1 1490 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1491 highlight = self.sql[start:end] 1492 end_context = self.sql[end : end + self.error_message_context] 1493 1494 error = ParseError.new( 1495 f"{message}. Line {token.line}, Col: {token.col}.\n" 1496 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1497 description=message, 1498 line=token.line, 1499 col=token.col, 1500 start_context=start_context, 1501 highlight=highlight, 1502 end_context=end_context, 1503 ) 1504 1505 if self.error_level == ErrorLevel.IMMEDIATE: 1506 raise error 1507 1508 self.errors.append(error) 1509 1510 def expression( 1511 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1512 ) -> E: 1513 """ 1514 Creates a new, validated Expression. 1515 1516 Args: 1517 exp_class: The expression class to instantiate. 1518 comments: An optional list of comments to attach to the expression. 1519 kwargs: The arguments to set for the expression along with their respective values. 1520 1521 Returns: 1522 The target expression. 1523 """ 1524 instance = exp_class(**kwargs) 1525 instance.add_comments(comments) if comments else self._add_comments(instance) 1526 return self.validate_expression(instance) 1527 1528 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1529 if expression and self._prev_comments: 1530 expression.add_comments(self._prev_comments) 1531 self._prev_comments = None 1532 1533 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1534 """ 1535 Validates an Expression, making sure that all its mandatory arguments are set. 1536 1537 Args: 1538 expression: The expression to validate. 1539 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1540 1541 Returns: 1542 The validated expression. 1543 """ 1544 if self.error_level != ErrorLevel.IGNORE: 1545 for error_message in expression.error_messages(args): 1546 self.raise_error(error_message) 1547 1548 return expression 1549 1550 def _find_sql(self, start: Token, end: Token) -> str: 1551 return self.sql[start.start : end.end + 1] 1552 1553 def _is_connected(self) -> bool: 1554 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1555 1556 def _advance(self, times: int = 1) -> None: 1557 self._index += times 1558 self._curr = seq_get(self._tokens, self._index) 1559 self._next = seq_get(self._tokens, self._index + 1) 1560 1561 if self._index > 0: 1562 self._prev = self._tokens[self._index - 1] 1563 self._prev_comments = self._prev.comments 1564 else: 1565 self._prev = None 1566 self._prev_comments = None 1567 1568 def _retreat(self, index: int) -> None: 1569 if index != self._index: 1570 self._advance(index - self._index) 1571 1572 def _warn_unsupported(self) -> None: 1573 if len(self._tokens) <= 1: 1574 return 1575 1576 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1577 # interested in emitting a warning for the one being currently processed. 1578 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1579 1580 logger.warning( 1581 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1582 ) 1583 1584 def _parse_command(self) -> exp.Command: 1585 self._warn_unsupported() 1586 return self.expression( 1587 exp.Command, 1588 comments=self._prev_comments, 1589 this=self._prev.text.upper(), 1590 expression=self._parse_string(), 1591 ) 1592 1593 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1594 """ 1595 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1596 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1597 solve this by setting & resetting the parser state accordingly 1598 """ 1599 index = self._index 1600 error_level = self.error_level 1601 1602 self.error_level = ErrorLevel.IMMEDIATE 1603 try: 1604 this = parse_method() 1605 except ParseError: 1606 this = None 1607 finally: 1608 if not this or retreat: 1609 self._retreat(index) 1610 self.error_level = error_level 1611 1612 return this 1613 1614 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1615 start = self._prev 1616 exists = self._parse_exists() if allow_exists else None 1617 1618 self._match(TokenType.ON) 1619 1620 materialized = self._match_text_seq("MATERIALIZED") 1621 kind = self._match_set(self.CREATABLES) and self._prev 1622 if not kind: 1623 return self._parse_as_command(start) 1624 1625 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1626 this = self._parse_user_defined_function(kind=kind.token_type) 1627 elif kind.token_type == TokenType.TABLE: 1628 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1629 elif kind.token_type == TokenType.COLUMN: 1630 this = self._parse_column() 1631 else: 1632 this = self._parse_id_var() 1633 1634 self._match(TokenType.IS) 1635 1636 return self.expression( 1637 exp.Comment, 1638 this=this, 1639 kind=kind.text, 1640 expression=self._parse_string(), 1641 exists=exists, 1642 materialized=materialized, 1643 ) 1644 1645 def _parse_to_table( 1646 self, 1647 ) -> exp.ToTableProperty: 1648 table = self._parse_table_parts(schema=True) 1649 return self.expression(exp.ToTableProperty, this=table) 1650 1651 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1652 def _parse_ttl(self) -> exp.Expression: 1653 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1654 this = self._parse_bitwise() 1655 1656 if self._match_text_seq("DELETE"): 1657 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1658 if self._match_text_seq("RECOMPRESS"): 1659 return self.expression( 1660 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1661 ) 1662 if self._match_text_seq("TO", "DISK"): 1663 return self.expression( 1664 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1665 ) 1666 if self._match_text_seq("TO", "VOLUME"): 1667 return self.expression( 1668 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1669 ) 1670 1671 return this 1672 1673 expressions = self._parse_csv(_parse_ttl_action) 1674 where = self._parse_where() 1675 group = self._parse_group() 1676 1677 aggregates = None 1678 if group and self._match(TokenType.SET): 1679 aggregates = self._parse_csv(self._parse_set_item) 1680 1681 return self.expression( 1682 exp.MergeTreeTTL, 1683 expressions=expressions, 1684 where=where, 1685 group=group, 1686 aggregates=aggregates, 1687 ) 1688 1689 def _parse_statement(self) -> t.Optional[exp.Expression]: 1690 if self._curr is None: 1691 return None 1692 1693 if self._match_set(self.STATEMENT_PARSERS): 1694 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1695 1696 if self._match_set(self.dialect.tokenizer.COMMANDS): 1697 return self._parse_command() 1698 1699 expression = self._parse_expression() 1700 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1701 return self._parse_query_modifiers(expression) 1702 1703 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1704 start = self._prev 1705 temporary = self._match(TokenType.TEMPORARY) 1706 materialized = self._match_text_seq("MATERIALIZED") 1707 1708 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1709 if not kind: 1710 return self._parse_as_command(start) 1711 1712 concurrently = self._match_text_seq("CONCURRENTLY") 1713 if_exists = exists or self._parse_exists() 1714 table = self._parse_table_parts( 1715 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1716 ) 1717 1718 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1719 1720 if self._match(TokenType.L_PAREN, advance=False): 1721 expressions = self._parse_wrapped_csv(self._parse_types) 1722 else: 1723 expressions = None 1724 1725 return self.expression( 1726 exp.Drop, 1727 comments=start.comments, 1728 exists=if_exists, 1729 this=table, 1730 expressions=expressions, 1731 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1732 temporary=temporary, 1733 materialized=materialized, 1734 cascade=self._match_text_seq("CASCADE"), 1735 constraints=self._match_text_seq("CONSTRAINTS"), 1736 purge=self._match_text_seq("PURGE"), 1737 cluster=cluster, 1738 concurrently=concurrently, 1739 ) 1740 1741 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1742 return ( 1743 self._match_text_seq("IF") 1744 and (not not_ or self._match(TokenType.NOT)) 1745 and self._match(TokenType.EXISTS) 1746 ) 1747 1748 def _parse_create(self) -> exp.Create | exp.Command: 1749 # Note: this can't be None because we've matched a statement parser 1750 start = self._prev 1751 comments = self._prev_comments 1752 1753 replace = ( 1754 start.token_type == TokenType.REPLACE 1755 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1756 or self._match_pair(TokenType.OR, TokenType.ALTER) 1757 ) 1758 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1759 1760 unique = self._match(TokenType.UNIQUE) 1761 1762 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1763 clustered = True 1764 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1765 "COLUMNSTORE" 1766 ): 1767 clustered = False 1768 else: 1769 clustered = None 1770 1771 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1772 self._advance() 1773 1774 properties = None 1775 create_token = self._match_set(self.CREATABLES) and self._prev 1776 1777 if not create_token: 1778 # exp.Properties.Location.POST_CREATE 1779 properties = self._parse_properties() 1780 create_token = self._match_set(self.CREATABLES) and self._prev 1781 1782 if not properties or not create_token: 1783 return self._parse_as_command(start) 1784 1785 concurrently = self._match_text_seq("CONCURRENTLY") 1786 exists = self._parse_exists(not_=True) 1787 this = None 1788 expression: t.Optional[exp.Expression] = None 1789 indexes = None 1790 no_schema_binding = None 1791 begin = None 1792 end = None 1793 clone = None 1794 1795 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1796 nonlocal properties 1797 if properties and temp_props: 1798 properties.expressions.extend(temp_props.expressions) 1799 elif temp_props: 1800 properties = temp_props 1801 1802 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1803 this = self._parse_user_defined_function(kind=create_token.token_type) 1804 1805 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1806 extend_props(self._parse_properties()) 1807 1808 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1809 extend_props(self._parse_properties()) 1810 1811 if not expression: 1812 if self._match(TokenType.COMMAND): 1813 expression = self._parse_as_command(self._prev) 1814 else: 1815 begin = self._match(TokenType.BEGIN) 1816 return_ = self._match_text_seq("RETURN") 1817 1818 if self._match(TokenType.STRING, advance=False): 1819 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1820 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1821 expression = self._parse_string() 1822 extend_props(self._parse_properties()) 1823 else: 1824 expression = self._parse_statement() 1825 1826 end = self._match_text_seq("END") 1827 1828 if return_: 1829 expression = self.expression(exp.Return, this=expression) 1830 elif create_token.token_type == TokenType.INDEX: 1831 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1832 if not self._match(TokenType.ON): 1833 index = self._parse_id_var() 1834 anonymous = False 1835 else: 1836 index = None 1837 anonymous = True 1838 1839 this = self._parse_index(index=index, anonymous=anonymous) 1840 elif create_token.token_type in self.DB_CREATABLES: 1841 table_parts = self._parse_table_parts( 1842 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1843 ) 1844 1845 # exp.Properties.Location.POST_NAME 1846 self._match(TokenType.COMMA) 1847 extend_props(self._parse_properties(before=True)) 1848 1849 this = self._parse_schema(this=table_parts) 1850 1851 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1852 extend_props(self._parse_properties()) 1853 1854 self._match(TokenType.ALIAS) 1855 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1856 # exp.Properties.Location.POST_ALIAS 1857 extend_props(self._parse_properties()) 1858 1859 if create_token.token_type == TokenType.SEQUENCE: 1860 expression = self._parse_types() 1861 extend_props(self._parse_properties()) 1862 else: 1863 expression = self._parse_ddl_select() 1864 1865 if create_token.token_type == TokenType.TABLE: 1866 # exp.Properties.Location.POST_EXPRESSION 1867 extend_props(self._parse_properties()) 1868 1869 indexes = [] 1870 while True: 1871 index = self._parse_index() 1872 1873 # exp.Properties.Location.POST_INDEX 1874 extend_props(self._parse_properties()) 1875 if not index: 1876 break 1877 else: 1878 self._match(TokenType.COMMA) 1879 indexes.append(index) 1880 elif create_token.token_type == TokenType.VIEW: 1881 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1882 no_schema_binding = True 1883 1884 shallow = self._match_text_seq("SHALLOW") 1885 1886 if self._match_texts(self.CLONE_KEYWORDS): 1887 copy = self._prev.text.lower() == "copy" 1888 clone = self.expression( 1889 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1890 ) 1891 1892 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1893 return self._parse_as_command(start) 1894 1895 create_kind_text = create_token.text.upper() 1896 return self.expression( 1897 exp.Create, 1898 comments=comments, 1899 this=this, 1900 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1901 replace=replace, 1902 refresh=refresh, 1903 unique=unique, 1904 expression=expression, 1905 exists=exists, 1906 properties=properties, 1907 indexes=indexes, 1908 no_schema_binding=no_schema_binding, 1909 begin=begin, 1910 end=end, 1911 clone=clone, 1912 concurrently=concurrently, 1913 clustered=clustered, 1914 ) 1915 1916 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1917 seq = exp.SequenceProperties() 1918 1919 options = [] 1920 index = self._index 1921 1922 while self._curr: 1923 self._match(TokenType.COMMA) 1924 if self._match_text_seq("INCREMENT"): 1925 self._match_text_seq("BY") 1926 self._match_text_seq("=") 1927 seq.set("increment", self._parse_term()) 1928 elif self._match_text_seq("MINVALUE"): 1929 seq.set("minvalue", self._parse_term()) 1930 elif self._match_text_seq("MAXVALUE"): 1931 seq.set("maxvalue", self._parse_term()) 1932 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1933 self._match_text_seq("=") 1934 seq.set("start", self._parse_term()) 1935 elif self._match_text_seq("CACHE"): 1936 # T-SQL allows empty CACHE which is initialized dynamically 1937 seq.set("cache", self._parse_number() or True) 1938 elif self._match_text_seq("OWNED", "BY"): 1939 # "OWNED BY NONE" is the default 1940 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1941 else: 1942 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1943 if opt: 1944 options.append(opt) 1945 else: 1946 break 1947 1948 seq.set("options", options if options else None) 1949 return None if self._index == index else seq 1950 1951 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1952 # only used for teradata currently 1953 self._match(TokenType.COMMA) 1954 1955 kwargs = { 1956 "no": self._match_text_seq("NO"), 1957 "dual": self._match_text_seq("DUAL"), 1958 "before": self._match_text_seq("BEFORE"), 1959 "default": self._match_text_seq("DEFAULT"), 1960 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1961 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1962 "after": self._match_text_seq("AFTER"), 1963 "minimum": self._match_texts(("MIN", "MINIMUM")), 1964 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1965 } 1966 1967 if self._match_texts(self.PROPERTY_PARSERS): 1968 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1969 try: 1970 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1971 except TypeError: 1972 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1973 1974 return None 1975 1976 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1977 return self._parse_wrapped_csv(self._parse_property) 1978 1979 def _parse_property(self) -> t.Optional[exp.Expression]: 1980 if self._match_texts(self.PROPERTY_PARSERS): 1981 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1982 1983 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1984 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1985 1986 if self._match_text_seq("COMPOUND", "SORTKEY"): 1987 return self._parse_sortkey(compound=True) 1988 1989 if self._match_text_seq("SQL", "SECURITY"): 1990 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1991 1992 index = self._index 1993 key = self._parse_column() 1994 1995 if not self._match(TokenType.EQ): 1996 self._retreat(index) 1997 return self._parse_sequence_properties() 1998 1999 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2000 if isinstance(key, exp.Column): 2001 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2002 2003 value = self._parse_bitwise() or self._parse_var(any_token=True) 2004 2005 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2006 if isinstance(value, exp.Column): 2007 value = exp.var(value.name) 2008 2009 return self.expression(exp.Property, this=key, value=value) 2010 2011 def _parse_stored(self) -> exp.FileFormatProperty: 2012 self._match(TokenType.ALIAS) 2013 2014 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2015 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2016 2017 return self.expression( 2018 exp.FileFormatProperty, 2019 this=( 2020 self.expression( 2021 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2022 ) 2023 if input_format or output_format 2024 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2025 ), 2026 ) 2027 2028 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2029 field = self._parse_field() 2030 if isinstance(field, exp.Identifier) and not field.quoted: 2031 field = exp.var(field) 2032 2033 return field 2034 2035 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2036 self._match(TokenType.EQ) 2037 self._match(TokenType.ALIAS) 2038 2039 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2040 2041 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2042 properties = [] 2043 while True: 2044 if before: 2045 prop = self._parse_property_before() 2046 else: 2047 prop = self._parse_property() 2048 if not prop: 2049 break 2050 for p in ensure_list(prop): 2051 properties.append(p) 2052 2053 if properties: 2054 return self.expression(exp.Properties, expressions=properties) 2055 2056 return None 2057 2058 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2059 return self.expression( 2060 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2061 ) 2062 2063 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2064 if self._match_texts(("DEFINER", "INVOKER")): 2065 security_specifier = self._prev.text.upper() 2066 return self.expression(exp.SecurityProperty, this=security_specifier) 2067 return None 2068 2069 def _parse_settings_property(self) -> exp.SettingsProperty: 2070 return self.expression( 2071 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2072 ) 2073 2074 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2075 if self._index >= 2: 2076 pre_volatile_token = self._tokens[self._index - 2] 2077 else: 2078 pre_volatile_token = None 2079 2080 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2081 return exp.VolatileProperty() 2082 2083 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2084 2085 def _parse_retention_period(self) -> exp.Var: 2086 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2087 number = self._parse_number() 2088 number_str = f"{number} " if number else "" 2089 unit = self._parse_var(any_token=True) 2090 return exp.var(f"{number_str}{unit}") 2091 2092 def _parse_system_versioning_property( 2093 self, with_: bool = False 2094 ) -> exp.WithSystemVersioningProperty: 2095 self._match(TokenType.EQ) 2096 prop = self.expression( 2097 exp.WithSystemVersioningProperty, 2098 **{ # type: ignore 2099 "on": True, 2100 "with": with_, 2101 }, 2102 ) 2103 2104 if self._match_text_seq("OFF"): 2105 prop.set("on", False) 2106 return prop 2107 2108 self._match(TokenType.ON) 2109 if self._match(TokenType.L_PAREN): 2110 while self._curr and not self._match(TokenType.R_PAREN): 2111 if self._match_text_seq("HISTORY_TABLE", "="): 2112 prop.set("this", self._parse_table_parts()) 2113 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2114 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2115 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2116 prop.set("retention_period", self._parse_retention_period()) 2117 2118 self._match(TokenType.COMMA) 2119 2120 return prop 2121 2122 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2123 self._match(TokenType.EQ) 2124 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2125 prop = self.expression(exp.DataDeletionProperty, on=on) 2126 2127 if self._match(TokenType.L_PAREN): 2128 while self._curr and not self._match(TokenType.R_PAREN): 2129 if self._match_text_seq("FILTER_COLUMN", "="): 2130 prop.set("filter_column", self._parse_column()) 2131 elif self._match_text_seq("RETENTION_PERIOD", "="): 2132 prop.set("retention_period", self._parse_retention_period()) 2133 2134 self._match(TokenType.COMMA) 2135 2136 return prop 2137 2138 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2139 kind = "HASH" 2140 expressions: t.Optional[t.List[exp.Expression]] = None 2141 if self._match_text_seq("BY", "HASH"): 2142 expressions = self._parse_wrapped_csv(self._parse_id_var) 2143 elif self._match_text_seq("BY", "RANDOM"): 2144 kind = "RANDOM" 2145 2146 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2147 buckets: t.Optional[exp.Expression] = None 2148 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2149 buckets = self._parse_number() 2150 2151 return self.expression( 2152 exp.DistributedByProperty, 2153 expressions=expressions, 2154 kind=kind, 2155 buckets=buckets, 2156 order=self._parse_order(), 2157 ) 2158 2159 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2160 self._match_text_seq("KEY") 2161 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2162 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2163 2164 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2165 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2166 prop = self._parse_system_versioning_property(with_=True) 2167 self._match_r_paren() 2168 return prop 2169 2170 if self._match(TokenType.L_PAREN, advance=False): 2171 return self._parse_wrapped_properties() 2172 2173 if self._match_text_seq("JOURNAL"): 2174 return self._parse_withjournaltable() 2175 2176 if self._match_texts(self.VIEW_ATTRIBUTES): 2177 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2178 2179 if self._match_text_seq("DATA"): 2180 return self._parse_withdata(no=False) 2181 elif self._match_text_seq("NO", "DATA"): 2182 return self._parse_withdata(no=True) 2183 2184 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2185 return self._parse_serde_properties(with_=True) 2186 2187 if self._match(TokenType.SCHEMA): 2188 return self.expression( 2189 exp.WithSchemaBindingProperty, 2190 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2191 ) 2192 2193 if not self._next: 2194 return None 2195 2196 return self._parse_withisolatedloading() 2197 2198 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2199 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2200 self._match(TokenType.EQ) 2201 2202 user = self._parse_id_var() 2203 self._match(TokenType.PARAMETER) 2204 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2205 2206 if not user or not host: 2207 return None 2208 2209 return exp.DefinerProperty(this=f"{user}@{host}") 2210 2211 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2212 self._match(TokenType.TABLE) 2213 self._match(TokenType.EQ) 2214 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2215 2216 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2217 return self.expression(exp.LogProperty, no=no) 2218 2219 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2220 return self.expression(exp.JournalProperty, **kwargs) 2221 2222 def _parse_checksum(self) -> exp.ChecksumProperty: 2223 self._match(TokenType.EQ) 2224 2225 on = None 2226 if self._match(TokenType.ON): 2227 on = True 2228 elif self._match_text_seq("OFF"): 2229 on = False 2230 2231 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2232 2233 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2234 return self.expression( 2235 exp.Cluster, 2236 expressions=( 2237 self._parse_wrapped_csv(self._parse_ordered) 2238 if wrapped 2239 else self._parse_csv(self._parse_ordered) 2240 ), 2241 ) 2242 2243 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2244 self._match_text_seq("BY") 2245 2246 self._match_l_paren() 2247 expressions = self._parse_csv(self._parse_column) 2248 self._match_r_paren() 2249 2250 if self._match_text_seq("SORTED", "BY"): 2251 self._match_l_paren() 2252 sorted_by = self._parse_csv(self._parse_ordered) 2253 self._match_r_paren() 2254 else: 2255 sorted_by = None 2256 2257 self._match(TokenType.INTO) 2258 buckets = self._parse_number() 2259 self._match_text_seq("BUCKETS") 2260 2261 return self.expression( 2262 exp.ClusteredByProperty, 2263 expressions=expressions, 2264 sorted_by=sorted_by, 2265 buckets=buckets, 2266 ) 2267 2268 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2269 if not self._match_text_seq("GRANTS"): 2270 self._retreat(self._index - 1) 2271 return None 2272 2273 return self.expression(exp.CopyGrantsProperty) 2274 2275 def _parse_freespace(self) -> exp.FreespaceProperty: 2276 self._match(TokenType.EQ) 2277 return self.expression( 2278 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2279 ) 2280 2281 def _parse_mergeblockratio( 2282 self, no: bool = False, default: bool = False 2283 ) -> exp.MergeBlockRatioProperty: 2284 if self._match(TokenType.EQ): 2285 return self.expression( 2286 exp.MergeBlockRatioProperty, 2287 this=self._parse_number(), 2288 percent=self._match(TokenType.PERCENT), 2289 ) 2290 2291 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2292 2293 def _parse_datablocksize( 2294 self, 2295 default: t.Optional[bool] = None, 2296 minimum: t.Optional[bool] = None, 2297 maximum: t.Optional[bool] = None, 2298 ) -> exp.DataBlocksizeProperty: 2299 self._match(TokenType.EQ) 2300 size = self._parse_number() 2301 2302 units = None 2303 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2304 units = self._prev.text 2305 2306 return self.expression( 2307 exp.DataBlocksizeProperty, 2308 size=size, 2309 units=units, 2310 default=default, 2311 minimum=minimum, 2312 maximum=maximum, 2313 ) 2314 2315 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2316 self._match(TokenType.EQ) 2317 always = self._match_text_seq("ALWAYS") 2318 manual = self._match_text_seq("MANUAL") 2319 never = self._match_text_seq("NEVER") 2320 default = self._match_text_seq("DEFAULT") 2321 2322 autotemp = None 2323 if self._match_text_seq("AUTOTEMP"): 2324 autotemp = self._parse_schema() 2325 2326 return self.expression( 2327 exp.BlockCompressionProperty, 2328 always=always, 2329 manual=manual, 2330 never=never, 2331 default=default, 2332 autotemp=autotemp, 2333 ) 2334 2335 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2336 index = self._index 2337 no = self._match_text_seq("NO") 2338 concurrent = self._match_text_seq("CONCURRENT") 2339 2340 if not self._match_text_seq("ISOLATED", "LOADING"): 2341 self._retreat(index) 2342 return None 2343 2344 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2345 return self.expression( 2346 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2347 ) 2348 2349 def _parse_locking(self) -> exp.LockingProperty: 2350 if self._match(TokenType.TABLE): 2351 kind = "TABLE" 2352 elif self._match(TokenType.VIEW): 2353 kind = "VIEW" 2354 elif self._match(TokenType.ROW): 2355 kind = "ROW" 2356 elif self._match_text_seq("DATABASE"): 2357 kind = "DATABASE" 2358 else: 2359 kind = None 2360 2361 if kind in ("DATABASE", "TABLE", "VIEW"): 2362 this = self._parse_table_parts() 2363 else: 2364 this = None 2365 2366 if self._match(TokenType.FOR): 2367 for_or_in = "FOR" 2368 elif self._match(TokenType.IN): 2369 for_or_in = "IN" 2370 else: 2371 for_or_in = None 2372 2373 if self._match_text_seq("ACCESS"): 2374 lock_type = "ACCESS" 2375 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2376 lock_type = "EXCLUSIVE" 2377 elif self._match_text_seq("SHARE"): 2378 lock_type = "SHARE" 2379 elif self._match_text_seq("READ"): 2380 lock_type = "READ" 2381 elif self._match_text_seq("WRITE"): 2382 lock_type = "WRITE" 2383 elif self._match_text_seq("CHECKSUM"): 2384 lock_type = "CHECKSUM" 2385 else: 2386 lock_type = None 2387 2388 override = self._match_text_seq("OVERRIDE") 2389 2390 return self.expression( 2391 exp.LockingProperty, 2392 this=this, 2393 kind=kind, 2394 for_or_in=for_or_in, 2395 lock_type=lock_type, 2396 override=override, 2397 ) 2398 2399 def _parse_partition_by(self) -> t.List[exp.Expression]: 2400 if self._match(TokenType.PARTITION_BY): 2401 return self._parse_csv(self._parse_assignment) 2402 return [] 2403 2404 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2405 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2406 if self._match_text_seq("MINVALUE"): 2407 return exp.var("MINVALUE") 2408 if self._match_text_seq("MAXVALUE"): 2409 return exp.var("MAXVALUE") 2410 return self._parse_bitwise() 2411 2412 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2413 expression = None 2414 from_expressions = None 2415 to_expressions = None 2416 2417 if self._match(TokenType.IN): 2418 this = self._parse_wrapped_csv(self._parse_bitwise) 2419 elif self._match(TokenType.FROM): 2420 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2421 self._match_text_seq("TO") 2422 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2423 elif self._match_text_seq("WITH", "(", "MODULUS"): 2424 this = self._parse_number() 2425 self._match_text_seq(",", "REMAINDER") 2426 expression = self._parse_number() 2427 self._match_r_paren() 2428 else: 2429 self.raise_error("Failed to parse partition bound spec.") 2430 2431 return self.expression( 2432 exp.PartitionBoundSpec, 2433 this=this, 2434 expression=expression, 2435 from_expressions=from_expressions, 2436 to_expressions=to_expressions, 2437 ) 2438 2439 # https://www.postgresql.org/docs/current/sql-createtable.html 2440 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2441 if not self._match_text_seq("OF"): 2442 self._retreat(self._index - 1) 2443 return None 2444 2445 this = self._parse_table(schema=True) 2446 2447 if self._match(TokenType.DEFAULT): 2448 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2449 elif self._match_text_seq("FOR", "VALUES"): 2450 expression = self._parse_partition_bound_spec() 2451 else: 2452 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2453 2454 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2455 2456 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2457 self._match(TokenType.EQ) 2458 return self.expression( 2459 exp.PartitionedByProperty, 2460 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2461 ) 2462 2463 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2464 if self._match_text_seq("AND", "STATISTICS"): 2465 statistics = True 2466 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2467 statistics = False 2468 else: 2469 statistics = None 2470 2471 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2472 2473 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2474 if self._match_text_seq("SQL"): 2475 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2476 return None 2477 2478 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2479 if self._match_text_seq("SQL", "DATA"): 2480 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2481 return None 2482 2483 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2484 if self._match_text_seq("PRIMARY", "INDEX"): 2485 return exp.NoPrimaryIndexProperty() 2486 if self._match_text_seq("SQL"): 2487 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2488 return None 2489 2490 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2491 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2492 return exp.OnCommitProperty() 2493 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2494 return exp.OnCommitProperty(delete=True) 2495 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2496 2497 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2498 if self._match_text_seq("SQL", "DATA"): 2499 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2500 return None 2501 2502 def _parse_distkey(self) -> exp.DistKeyProperty: 2503 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2504 2505 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2506 table = self._parse_table(schema=True) 2507 2508 options = [] 2509 while self._match_texts(("INCLUDING", "EXCLUDING")): 2510 this = self._prev.text.upper() 2511 2512 id_var = self._parse_id_var() 2513 if not id_var: 2514 return None 2515 2516 options.append( 2517 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2518 ) 2519 2520 return self.expression(exp.LikeProperty, this=table, expressions=options) 2521 2522 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2523 return self.expression( 2524 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2525 ) 2526 2527 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2528 self._match(TokenType.EQ) 2529 return self.expression( 2530 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2531 ) 2532 2533 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2534 self._match_text_seq("WITH", "CONNECTION") 2535 return self.expression( 2536 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2537 ) 2538 2539 def _parse_returns(self) -> exp.ReturnsProperty: 2540 value: t.Optional[exp.Expression] 2541 null = None 2542 is_table = self._match(TokenType.TABLE) 2543 2544 if is_table: 2545 if self._match(TokenType.LT): 2546 value = self.expression( 2547 exp.Schema, 2548 this="TABLE", 2549 expressions=self._parse_csv(self._parse_struct_types), 2550 ) 2551 if not self._match(TokenType.GT): 2552 self.raise_error("Expecting >") 2553 else: 2554 value = self._parse_schema(exp.var("TABLE")) 2555 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2556 null = True 2557 value = None 2558 else: 2559 value = self._parse_types() 2560 2561 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2562 2563 def _parse_describe(self) -> exp.Describe: 2564 kind = self._match_set(self.CREATABLES) and self._prev.text 2565 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2566 if self._match(TokenType.DOT): 2567 style = None 2568 self._retreat(self._index - 2) 2569 this = self._parse_table(schema=True) 2570 properties = self._parse_properties() 2571 expressions = properties.expressions if properties else None 2572 partition = self._parse_partition() 2573 return self.expression( 2574 exp.Describe, 2575 this=this, 2576 style=style, 2577 kind=kind, 2578 expressions=expressions, 2579 partition=partition, 2580 ) 2581 2582 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2583 kind = self._prev.text.upper() 2584 expressions = [] 2585 2586 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2587 if self._match(TokenType.WHEN): 2588 expression = self._parse_disjunction() 2589 self._match(TokenType.THEN) 2590 else: 2591 expression = None 2592 2593 else_ = self._match(TokenType.ELSE) 2594 2595 if not self._match(TokenType.INTO): 2596 return None 2597 2598 return self.expression( 2599 exp.ConditionalInsert, 2600 this=self.expression( 2601 exp.Insert, 2602 this=self._parse_table(schema=True), 2603 expression=self._parse_derived_table_values(), 2604 ), 2605 expression=expression, 2606 else_=else_, 2607 ) 2608 2609 expression = parse_conditional_insert() 2610 while expression is not None: 2611 expressions.append(expression) 2612 expression = parse_conditional_insert() 2613 2614 return self.expression( 2615 exp.MultitableInserts, 2616 kind=kind, 2617 comments=comments, 2618 expressions=expressions, 2619 source=self._parse_table(), 2620 ) 2621 2622 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2623 comments = ensure_list(self._prev_comments) 2624 hint = self._parse_hint() 2625 overwrite = self._match(TokenType.OVERWRITE) 2626 ignore = self._match(TokenType.IGNORE) 2627 local = self._match_text_seq("LOCAL") 2628 alternative = None 2629 is_function = None 2630 2631 if self._match_text_seq("DIRECTORY"): 2632 this: t.Optional[exp.Expression] = self.expression( 2633 exp.Directory, 2634 this=self._parse_var_or_string(), 2635 local=local, 2636 row_format=self._parse_row_format(match_row=True), 2637 ) 2638 else: 2639 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2640 comments += ensure_list(self._prev_comments) 2641 return self._parse_multitable_inserts(comments) 2642 2643 if self._match(TokenType.OR): 2644 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2645 2646 self._match(TokenType.INTO) 2647 comments += ensure_list(self._prev_comments) 2648 self._match(TokenType.TABLE) 2649 is_function = self._match(TokenType.FUNCTION) 2650 2651 this = ( 2652 self._parse_table(schema=True, parse_partition=True) 2653 if not is_function 2654 else self._parse_function() 2655 ) 2656 2657 returning = self._parse_returning() 2658 2659 return self.expression( 2660 exp.Insert, 2661 comments=comments, 2662 hint=hint, 2663 is_function=is_function, 2664 this=this, 2665 stored=self._match_text_seq("STORED") and self._parse_stored(), 2666 by_name=self._match_text_seq("BY", "NAME"), 2667 exists=self._parse_exists(), 2668 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2669 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2670 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2671 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2672 conflict=self._parse_on_conflict(), 2673 returning=returning or self._parse_returning(), 2674 overwrite=overwrite, 2675 alternative=alternative, 2676 ignore=ignore, 2677 source=self._match(TokenType.TABLE) and self._parse_table(), 2678 ) 2679 2680 def _parse_kill(self) -> exp.Kill: 2681 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2682 2683 return self.expression( 2684 exp.Kill, 2685 this=self._parse_primary(), 2686 kind=kind, 2687 ) 2688 2689 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2690 conflict = self._match_text_seq("ON", "CONFLICT") 2691 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2692 2693 if not conflict and not duplicate: 2694 return None 2695 2696 conflict_keys = None 2697 constraint = None 2698 2699 if conflict: 2700 if self._match_text_seq("ON", "CONSTRAINT"): 2701 constraint = self._parse_id_var() 2702 elif self._match(TokenType.L_PAREN): 2703 conflict_keys = self._parse_csv(self._parse_id_var) 2704 self._match_r_paren() 2705 2706 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2707 if self._prev.token_type == TokenType.UPDATE: 2708 self._match(TokenType.SET) 2709 expressions = self._parse_csv(self._parse_equality) 2710 else: 2711 expressions = None 2712 2713 return self.expression( 2714 exp.OnConflict, 2715 duplicate=duplicate, 2716 expressions=expressions, 2717 action=action, 2718 conflict_keys=conflict_keys, 2719 constraint=constraint, 2720 ) 2721 2722 def _parse_returning(self) -> t.Optional[exp.Returning]: 2723 if not self._match(TokenType.RETURNING): 2724 return None 2725 return self.expression( 2726 exp.Returning, 2727 expressions=self._parse_csv(self._parse_expression), 2728 into=self._match(TokenType.INTO) and self._parse_table_part(), 2729 ) 2730 2731 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2732 if not self._match(TokenType.FORMAT): 2733 return None 2734 return self._parse_row_format() 2735 2736 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2737 index = self._index 2738 with_ = with_ or self._match_text_seq("WITH") 2739 2740 if not self._match(TokenType.SERDE_PROPERTIES): 2741 self._retreat(index) 2742 return None 2743 return self.expression( 2744 exp.SerdeProperties, 2745 **{ # type: ignore 2746 "expressions": self._parse_wrapped_properties(), 2747 "with": with_, 2748 }, 2749 ) 2750 2751 def _parse_row_format( 2752 self, match_row: bool = False 2753 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2754 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2755 return None 2756 2757 if self._match_text_seq("SERDE"): 2758 this = self._parse_string() 2759 2760 serde_properties = self._parse_serde_properties() 2761 2762 return self.expression( 2763 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2764 ) 2765 2766 self._match_text_seq("DELIMITED") 2767 2768 kwargs = {} 2769 2770 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2771 kwargs["fields"] = self._parse_string() 2772 if self._match_text_seq("ESCAPED", "BY"): 2773 kwargs["escaped"] = self._parse_string() 2774 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2775 kwargs["collection_items"] = self._parse_string() 2776 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2777 kwargs["map_keys"] = self._parse_string() 2778 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2779 kwargs["lines"] = self._parse_string() 2780 if self._match_text_seq("NULL", "DEFINED", "AS"): 2781 kwargs["null"] = self._parse_string() 2782 2783 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2784 2785 def _parse_load(self) -> exp.LoadData | exp.Command: 2786 if self._match_text_seq("DATA"): 2787 local = self._match_text_seq("LOCAL") 2788 self._match_text_seq("INPATH") 2789 inpath = self._parse_string() 2790 overwrite = self._match(TokenType.OVERWRITE) 2791 self._match_pair(TokenType.INTO, TokenType.TABLE) 2792 2793 return self.expression( 2794 exp.LoadData, 2795 this=self._parse_table(schema=True), 2796 local=local, 2797 overwrite=overwrite, 2798 inpath=inpath, 2799 partition=self._parse_partition(), 2800 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2801 serde=self._match_text_seq("SERDE") and self._parse_string(), 2802 ) 2803 return self._parse_as_command(self._prev) 2804 2805 def _parse_delete(self) -> exp.Delete: 2806 # This handles MySQL's "Multiple-Table Syntax" 2807 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2808 tables = None 2809 comments = self._prev_comments 2810 if not self._match(TokenType.FROM, advance=False): 2811 tables = self._parse_csv(self._parse_table) or None 2812 2813 returning = self._parse_returning() 2814 2815 return self.expression( 2816 exp.Delete, 2817 comments=comments, 2818 tables=tables, 2819 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2820 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2821 where=self._parse_where(), 2822 returning=returning or self._parse_returning(), 2823 limit=self._parse_limit(), 2824 ) 2825 2826 def _parse_update(self) -> exp.Update: 2827 comments = self._prev_comments 2828 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2829 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2830 returning = self._parse_returning() 2831 return self.expression( 2832 exp.Update, 2833 comments=comments, 2834 **{ # type: ignore 2835 "this": this, 2836 "expressions": expressions, 2837 "from": self._parse_from(joins=True), 2838 "where": self._parse_where(), 2839 "returning": returning or self._parse_returning(), 2840 "order": self._parse_order(), 2841 "limit": self._parse_limit(), 2842 }, 2843 ) 2844 2845 def _parse_uncache(self) -> exp.Uncache: 2846 if not self._match(TokenType.TABLE): 2847 self.raise_error("Expecting TABLE after UNCACHE") 2848 2849 return self.expression( 2850 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2851 ) 2852 2853 def _parse_cache(self) -> exp.Cache: 2854 lazy = self._match_text_seq("LAZY") 2855 self._match(TokenType.TABLE) 2856 table = self._parse_table(schema=True) 2857 2858 options = [] 2859 if self._match_text_seq("OPTIONS"): 2860 self._match_l_paren() 2861 k = self._parse_string() 2862 self._match(TokenType.EQ) 2863 v = self._parse_string() 2864 options = [k, v] 2865 self._match_r_paren() 2866 2867 self._match(TokenType.ALIAS) 2868 return self.expression( 2869 exp.Cache, 2870 this=table, 2871 lazy=lazy, 2872 options=options, 2873 expression=self._parse_select(nested=True), 2874 ) 2875 2876 def _parse_partition(self) -> t.Optional[exp.Partition]: 2877 if not self._match(TokenType.PARTITION): 2878 return None 2879 2880 return self.expression( 2881 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2882 ) 2883 2884 def _parse_value(self) -> t.Optional[exp.Tuple]: 2885 if self._match(TokenType.L_PAREN): 2886 expressions = self._parse_csv(self._parse_expression) 2887 self._match_r_paren() 2888 return self.expression(exp.Tuple, expressions=expressions) 2889 2890 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2891 expression = self._parse_expression() 2892 if expression: 2893 return self.expression(exp.Tuple, expressions=[expression]) 2894 return None 2895 2896 def _parse_projections(self) -> t.List[exp.Expression]: 2897 return self._parse_expressions() 2898 2899 def _parse_select( 2900 self, 2901 nested: bool = False, 2902 table: bool = False, 2903 parse_subquery_alias: bool = True, 2904 parse_set_operation: bool = True, 2905 ) -> t.Optional[exp.Expression]: 2906 cte = self._parse_with() 2907 2908 if cte: 2909 this = self._parse_statement() 2910 2911 if not this: 2912 self.raise_error("Failed to parse any statement following CTE") 2913 return cte 2914 2915 if "with" in this.arg_types: 2916 this.set("with", cte) 2917 else: 2918 self.raise_error(f"{this.key} does not support CTE") 2919 this = cte 2920 2921 return this 2922 2923 # duckdb supports leading with FROM x 2924 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2925 2926 if self._match(TokenType.SELECT): 2927 comments = self._prev_comments 2928 2929 hint = self._parse_hint() 2930 2931 if self._next and not self._next.token_type == TokenType.DOT: 2932 all_ = self._match(TokenType.ALL) 2933 distinct = self._match_set(self.DISTINCT_TOKENS) 2934 else: 2935 all_, distinct = None, None 2936 2937 kind = ( 2938 self._match(TokenType.ALIAS) 2939 and self._match_texts(("STRUCT", "VALUE")) 2940 and self._prev.text.upper() 2941 ) 2942 2943 if distinct: 2944 distinct = self.expression( 2945 exp.Distinct, 2946 on=self._parse_value() if self._match(TokenType.ON) else None, 2947 ) 2948 2949 if all_ and distinct: 2950 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2951 2952 limit = self._parse_limit(top=True) 2953 projections = self._parse_projections() 2954 2955 this = self.expression( 2956 exp.Select, 2957 kind=kind, 2958 hint=hint, 2959 distinct=distinct, 2960 expressions=projections, 2961 limit=limit, 2962 ) 2963 this.comments = comments 2964 2965 into = self._parse_into() 2966 if into: 2967 this.set("into", into) 2968 2969 if not from_: 2970 from_ = self._parse_from() 2971 2972 if from_: 2973 this.set("from", from_) 2974 2975 this = self._parse_query_modifiers(this) 2976 elif (table or nested) and self._match(TokenType.L_PAREN): 2977 if self._match(TokenType.PIVOT): 2978 this = self._parse_simplified_pivot() 2979 elif self._match(TokenType.FROM): 2980 this = exp.select("*").from_( 2981 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2982 ) 2983 else: 2984 this = ( 2985 self._parse_table() 2986 if table 2987 else self._parse_select(nested=True, parse_set_operation=False) 2988 ) 2989 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2990 2991 self._match_r_paren() 2992 2993 # We return early here so that the UNION isn't attached to the subquery by the 2994 # following call to _parse_set_operations, but instead becomes the parent node 2995 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2996 elif self._match(TokenType.VALUES, advance=False): 2997 this = self._parse_derived_table_values() 2998 elif from_: 2999 this = exp.select("*").from_(from_.this, copy=False) 3000 elif self._match(TokenType.SUMMARIZE): 3001 table = self._match(TokenType.TABLE) 3002 this = self._parse_select() or self._parse_string() or self._parse_table() 3003 return self.expression(exp.Summarize, this=this, table=table) 3004 elif self._match(TokenType.DESCRIBE): 3005 this = self._parse_describe() 3006 elif self._match_text_seq("STREAM"): 3007 this = self.expression(exp.Stream, this=self._parse_function()) 3008 else: 3009 this = None 3010 3011 return self._parse_set_operations(this) if parse_set_operation else this 3012 3013 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3014 if not skip_with_token and not self._match(TokenType.WITH): 3015 return None 3016 3017 comments = self._prev_comments 3018 recursive = self._match(TokenType.RECURSIVE) 3019 3020 expressions = [] 3021 while True: 3022 expressions.append(self._parse_cte()) 3023 3024 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3025 break 3026 else: 3027 self._match(TokenType.WITH) 3028 3029 return self.expression( 3030 exp.With, comments=comments, expressions=expressions, recursive=recursive 3031 ) 3032 3033 def _parse_cte(self) -> exp.CTE: 3034 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3035 if not alias or not alias.this: 3036 self.raise_error("Expected CTE to have alias") 3037 3038 self._match(TokenType.ALIAS) 3039 comments = self._prev_comments 3040 3041 if self._match_text_seq("NOT", "MATERIALIZED"): 3042 materialized = False 3043 elif self._match_text_seq("MATERIALIZED"): 3044 materialized = True 3045 else: 3046 materialized = None 3047 3048 return self.expression( 3049 exp.CTE, 3050 this=self._parse_wrapped(self._parse_statement), 3051 alias=alias, 3052 materialized=materialized, 3053 comments=comments, 3054 ) 3055 3056 def _parse_table_alias( 3057 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3058 ) -> t.Optional[exp.TableAlias]: 3059 any_token = self._match(TokenType.ALIAS) 3060 alias = ( 3061 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3062 or self._parse_string_as_identifier() 3063 ) 3064 3065 index = self._index 3066 if self._match(TokenType.L_PAREN): 3067 columns = self._parse_csv(self._parse_function_parameter) 3068 self._match_r_paren() if columns else self._retreat(index) 3069 else: 3070 columns = None 3071 3072 if not alias and not columns: 3073 return None 3074 3075 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3076 3077 # We bubble up comments from the Identifier to the TableAlias 3078 if isinstance(alias, exp.Identifier): 3079 table_alias.add_comments(alias.pop_comments()) 3080 3081 return table_alias 3082 3083 def _parse_subquery( 3084 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3085 ) -> t.Optional[exp.Subquery]: 3086 if not this: 3087 return None 3088 3089 return self.expression( 3090 exp.Subquery, 3091 this=this, 3092 pivots=self._parse_pivots(), 3093 alias=self._parse_table_alias() if parse_alias else None, 3094 sample=self._parse_table_sample(), 3095 ) 3096 3097 def _implicit_unnests_to_explicit(self, this: E) -> E: 3098 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3099 3100 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3101 for i, join in enumerate(this.args.get("joins") or []): 3102 table = join.this 3103 normalized_table = table.copy() 3104 normalized_table.meta["maybe_column"] = True 3105 normalized_table = _norm(normalized_table, dialect=self.dialect) 3106 3107 if isinstance(table, exp.Table) and not join.args.get("on"): 3108 if normalized_table.parts[0].name in refs: 3109 table_as_column = table.to_column() 3110 unnest = exp.Unnest(expressions=[table_as_column]) 3111 3112 # Table.to_column creates a parent Alias node that we want to convert to 3113 # a TableAlias and attach to the Unnest, so it matches the parser's output 3114 if isinstance(table.args.get("alias"), exp.TableAlias): 3115 table_as_column.replace(table_as_column.this) 3116 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3117 3118 table.replace(unnest) 3119 3120 refs.add(normalized_table.alias_or_name) 3121 3122 return this 3123 3124 def _parse_query_modifiers( 3125 self, this: t.Optional[exp.Expression] 3126 ) -> t.Optional[exp.Expression]: 3127 if isinstance(this, (exp.Query, exp.Table)): 3128 for join in self._parse_joins(): 3129 this.append("joins", join) 3130 for lateral in iter(self._parse_lateral, None): 3131 this.append("laterals", lateral) 3132 3133 while True: 3134 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3135 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3136 key, expression = parser(self) 3137 3138 if expression: 3139 this.set(key, expression) 3140 if key == "limit": 3141 offset = expression.args.pop("offset", None) 3142 3143 if offset: 3144 offset = exp.Offset(expression=offset) 3145 this.set("offset", offset) 3146 3147 limit_by_expressions = expression.expressions 3148 expression.set("expressions", None) 3149 offset.set("expressions", limit_by_expressions) 3150 continue 3151 break 3152 3153 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3154 this = self._implicit_unnests_to_explicit(this) 3155 3156 return this 3157 3158 def _parse_hint(self) -> t.Optional[exp.Hint]: 3159 if self._match(TokenType.HINT): 3160 hints = [] 3161 for hint in iter( 3162 lambda: self._parse_csv( 3163 lambda: self._parse_function() or self._parse_var(upper=True) 3164 ), 3165 [], 3166 ): 3167 hints.extend(hint) 3168 3169 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3170 self.raise_error("Expected */ after HINT") 3171 3172 return self.expression(exp.Hint, expressions=hints) 3173 3174 return None 3175 3176 def _parse_into(self) -> t.Optional[exp.Into]: 3177 if not self._match(TokenType.INTO): 3178 return None 3179 3180 temp = self._match(TokenType.TEMPORARY) 3181 unlogged = self._match_text_seq("UNLOGGED") 3182 self._match(TokenType.TABLE) 3183 3184 return self.expression( 3185 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3186 ) 3187 3188 def _parse_from( 3189 self, joins: bool = False, skip_from_token: bool = False 3190 ) -> t.Optional[exp.From]: 3191 if not skip_from_token and not self._match(TokenType.FROM): 3192 return None 3193 3194 return self.expression( 3195 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3196 ) 3197 3198 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3199 return self.expression( 3200 exp.MatchRecognizeMeasure, 3201 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3202 this=self._parse_expression(), 3203 ) 3204 3205 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3206 if not self._match(TokenType.MATCH_RECOGNIZE): 3207 return None 3208 3209 self._match_l_paren() 3210 3211 partition = self._parse_partition_by() 3212 order = self._parse_order() 3213 3214 measures = ( 3215 self._parse_csv(self._parse_match_recognize_measure) 3216 if self._match_text_seq("MEASURES") 3217 else None 3218 ) 3219 3220 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3221 rows = exp.var("ONE ROW PER MATCH") 3222 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3223 text = "ALL ROWS PER MATCH" 3224 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3225 text += " SHOW EMPTY MATCHES" 3226 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3227 text += " OMIT EMPTY MATCHES" 3228 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3229 text += " WITH UNMATCHED ROWS" 3230 rows = exp.var(text) 3231 else: 3232 rows = None 3233 3234 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3235 text = "AFTER MATCH SKIP" 3236 if self._match_text_seq("PAST", "LAST", "ROW"): 3237 text += " PAST LAST ROW" 3238 elif self._match_text_seq("TO", "NEXT", "ROW"): 3239 text += " TO NEXT ROW" 3240 elif self._match_text_seq("TO", "FIRST"): 3241 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3242 elif self._match_text_seq("TO", "LAST"): 3243 text += f" TO LAST {self._advance_any().text}" # type: ignore 3244 after = exp.var(text) 3245 else: 3246 after = None 3247 3248 if self._match_text_seq("PATTERN"): 3249 self._match_l_paren() 3250 3251 if not self._curr: 3252 self.raise_error("Expecting )", self._curr) 3253 3254 paren = 1 3255 start = self._curr 3256 3257 while self._curr and paren > 0: 3258 if self._curr.token_type == TokenType.L_PAREN: 3259 paren += 1 3260 if self._curr.token_type == TokenType.R_PAREN: 3261 paren -= 1 3262 3263 end = self._prev 3264 self._advance() 3265 3266 if paren > 0: 3267 self.raise_error("Expecting )", self._curr) 3268 3269 pattern = exp.var(self._find_sql(start, end)) 3270 else: 3271 pattern = None 3272 3273 define = ( 3274 self._parse_csv(self._parse_name_as_expression) 3275 if self._match_text_seq("DEFINE") 3276 else None 3277 ) 3278 3279 self._match_r_paren() 3280 3281 return self.expression( 3282 exp.MatchRecognize, 3283 partition_by=partition, 3284 order=order, 3285 measures=measures, 3286 rows=rows, 3287 after=after, 3288 pattern=pattern, 3289 define=define, 3290 alias=self._parse_table_alias(), 3291 ) 3292 3293 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3294 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3295 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3296 cross_apply = False 3297 3298 if cross_apply is not None: 3299 this = self._parse_select(table=True) 3300 view = None 3301 outer = None 3302 elif self._match(TokenType.LATERAL): 3303 this = self._parse_select(table=True) 3304 view = self._match(TokenType.VIEW) 3305 outer = self._match(TokenType.OUTER) 3306 else: 3307 return None 3308 3309 if not this: 3310 this = ( 3311 self._parse_unnest() 3312 or self._parse_function() 3313 or self._parse_id_var(any_token=False) 3314 ) 3315 3316 while self._match(TokenType.DOT): 3317 this = exp.Dot( 3318 this=this, 3319 expression=self._parse_function() or self._parse_id_var(any_token=False), 3320 ) 3321 3322 if view: 3323 table = self._parse_id_var(any_token=False) 3324 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3325 table_alias: t.Optional[exp.TableAlias] = self.expression( 3326 exp.TableAlias, this=table, columns=columns 3327 ) 3328 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3329 # We move the alias from the lateral's child node to the lateral itself 3330 table_alias = this.args["alias"].pop() 3331 else: 3332 table_alias = self._parse_table_alias() 3333 3334 return self.expression( 3335 exp.Lateral, 3336 this=this, 3337 view=view, 3338 outer=outer, 3339 alias=table_alias, 3340 cross_apply=cross_apply, 3341 ) 3342 3343 def _parse_join_parts( 3344 self, 3345 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3346 return ( 3347 self._match_set(self.JOIN_METHODS) and self._prev, 3348 self._match_set(self.JOIN_SIDES) and self._prev, 3349 self._match_set(self.JOIN_KINDS) and self._prev, 3350 ) 3351 3352 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3353 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3354 this = self._parse_column() 3355 if isinstance(this, exp.Column): 3356 return this.this 3357 return this 3358 3359 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3360 3361 def _parse_join( 3362 self, skip_join_token: bool = False, parse_bracket: bool = False 3363 ) -> t.Optional[exp.Join]: 3364 if self._match(TokenType.COMMA): 3365 return self.expression(exp.Join, this=self._parse_table()) 3366 3367 index = self._index 3368 method, side, kind = self._parse_join_parts() 3369 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3370 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3371 3372 if not skip_join_token and not join: 3373 self._retreat(index) 3374 kind = None 3375 method = None 3376 side = None 3377 3378 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3379 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3380 3381 if not skip_join_token and not join and not outer_apply and not cross_apply: 3382 return None 3383 3384 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3385 3386 if method: 3387 kwargs["method"] = method.text 3388 if side: 3389 kwargs["side"] = side.text 3390 if kind: 3391 kwargs["kind"] = kind.text 3392 if hint: 3393 kwargs["hint"] = hint 3394 3395 if self._match(TokenType.MATCH_CONDITION): 3396 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3397 3398 if self._match(TokenType.ON): 3399 kwargs["on"] = self._parse_assignment() 3400 elif self._match(TokenType.USING): 3401 kwargs["using"] = self._parse_using_identifiers() 3402 elif ( 3403 not (outer_apply or cross_apply) 3404 and not isinstance(kwargs["this"], exp.Unnest) 3405 and not (kind and kind.token_type == TokenType.CROSS) 3406 ): 3407 index = self._index 3408 joins: t.Optional[list] = list(self._parse_joins()) 3409 3410 if joins and self._match(TokenType.ON): 3411 kwargs["on"] = self._parse_assignment() 3412 elif joins and self._match(TokenType.USING): 3413 kwargs["using"] = self._parse_using_identifiers() 3414 else: 3415 joins = None 3416 self._retreat(index) 3417 3418 kwargs["this"].set("joins", joins if joins else None) 3419 3420 comments = [c for token in (method, side, kind) if token for c in token.comments] 3421 return self.expression(exp.Join, comments=comments, **kwargs) 3422 3423 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3424 this = self._parse_assignment() 3425 3426 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3427 return this 3428 3429 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3430 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3431 3432 return this 3433 3434 def _parse_index_params(self) -> exp.IndexParameters: 3435 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3436 3437 if self._match(TokenType.L_PAREN, advance=False): 3438 columns = self._parse_wrapped_csv(self._parse_with_operator) 3439 else: 3440 columns = None 3441 3442 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3443 partition_by = self._parse_partition_by() 3444 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3445 tablespace = ( 3446 self._parse_var(any_token=True) 3447 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3448 else None 3449 ) 3450 where = self._parse_where() 3451 3452 on = self._parse_field() if self._match(TokenType.ON) else None 3453 3454 return self.expression( 3455 exp.IndexParameters, 3456 using=using, 3457 columns=columns, 3458 include=include, 3459 partition_by=partition_by, 3460 where=where, 3461 with_storage=with_storage, 3462 tablespace=tablespace, 3463 on=on, 3464 ) 3465 3466 def _parse_index( 3467 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3468 ) -> t.Optional[exp.Index]: 3469 if index or anonymous: 3470 unique = None 3471 primary = None 3472 amp = None 3473 3474 self._match(TokenType.ON) 3475 self._match(TokenType.TABLE) # hive 3476 table = self._parse_table_parts(schema=True) 3477 else: 3478 unique = self._match(TokenType.UNIQUE) 3479 primary = self._match_text_seq("PRIMARY") 3480 amp = self._match_text_seq("AMP") 3481 3482 if not self._match(TokenType.INDEX): 3483 return None 3484 3485 index = self._parse_id_var() 3486 table = None 3487 3488 params = self._parse_index_params() 3489 3490 return self.expression( 3491 exp.Index, 3492 this=index, 3493 table=table, 3494 unique=unique, 3495 primary=primary, 3496 amp=amp, 3497 params=params, 3498 ) 3499 3500 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3501 hints: t.List[exp.Expression] = [] 3502 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3503 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3504 hints.append( 3505 self.expression( 3506 exp.WithTableHint, 3507 expressions=self._parse_csv( 3508 lambda: self._parse_function() or self._parse_var(any_token=True) 3509 ), 3510 ) 3511 ) 3512 self._match_r_paren() 3513 else: 3514 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3515 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3516 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3517 3518 self._match_set((TokenType.INDEX, TokenType.KEY)) 3519 if self._match(TokenType.FOR): 3520 hint.set("target", self._advance_any() and self._prev.text.upper()) 3521 3522 hint.set("expressions", self._parse_wrapped_id_vars()) 3523 hints.append(hint) 3524 3525 return hints or None 3526 3527 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3528 return ( 3529 (not schema and self._parse_function(optional_parens=False)) 3530 or self._parse_id_var(any_token=False) 3531 or self._parse_string_as_identifier() 3532 or self._parse_placeholder() 3533 ) 3534 3535 def _parse_table_parts( 3536 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3537 ) -> exp.Table: 3538 catalog = None 3539 db = None 3540 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3541 3542 while self._match(TokenType.DOT): 3543 if catalog: 3544 # This allows nesting the table in arbitrarily many dot expressions if needed 3545 table = self.expression( 3546 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3547 ) 3548 else: 3549 catalog = db 3550 db = table 3551 # "" used for tsql FROM a..b case 3552 table = self._parse_table_part(schema=schema) or "" 3553 3554 if ( 3555 wildcard 3556 and self._is_connected() 3557 and (isinstance(table, exp.Identifier) or not table) 3558 and self._match(TokenType.STAR) 3559 ): 3560 if isinstance(table, exp.Identifier): 3561 table.args["this"] += "*" 3562 else: 3563 table = exp.Identifier(this="*") 3564 3565 # We bubble up comments from the Identifier to the Table 3566 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3567 3568 if is_db_reference: 3569 catalog = db 3570 db = table 3571 table = None 3572 3573 if not table and not is_db_reference: 3574 self.raise_error(f"Expected table name but got {self._curr}") 3575 if not db and is_db_reference: 3576 self.raise_error(f"Expected database name but got {self._curr}") 3577 3578 table = self.expression( 3579 exp.Table, 3580 comments=comments, 3581 this=table, 3582 db=db, 3583 catalog=catalog, 3584 ) 3585 3586 changes = self._parse_changes() 3587 if changes: 3588 table.set("changes", changes) 3589 3590 at_before = self._parse_historical_data() 3591 if at_before: 3592 table.set("when", at_before) 3593 3594 pivots = self._parse_pivots() 3595 if pivots: 3596 table.set("pivots", pivots) 3597 3598 return table 3599 3600 def _parse_table( 3601 self, 3602 schema: bool = False, 3603 joins: bool = False, 3604 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3605 parse_bracket: bool = False, 3606 is_db_reference: bool = False, 3607 parse_partition: bool = False, 3608 ) -> t.Optional[exp.Expression]: 3609 lateral = self._parse_lateral() 3610 if lateral: 3611 return lateral 3612 3613 unnest = self._parse_unnest() 3614 if unnest: 3615 return unnest 3616 3617 values = self._parse_derived_table_values() 3618 if values: 3619 return values 3620 3621 subquery = self._parse_select(table=True) 3622 if subquery: 3623 if not subquery.args.get("pivots"): 3624 subquery.set("pivots", self._parse_pivots()) 3625 return subquery 3626 3627 bracket = parse_bracket and self._parse_bracket(None) 3628 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3629 3630 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3631 self._parse_table 3632 ) 3633 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3634 3635 only = self._match(TokenType.ONLY) 3636 3637 this = t.cast( 3638 exp.Expression, 3639 bracket 3640 or rows_from 3641 or self._parse_bracket( 3642 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3643 ), 3644 ) 3645 3646 if only: 3647 this.set("only", only) 3648 3649 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3650 self._match_text_seq("*") 3651 3652 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3653 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3654 this.set("partition", self._parse_partition()) 3655 3656 if schema: 3657 return self._parse_schema(this=this) 3658 3659 version = self._parse_version() 3660 3661 if version: 3662 this.set("version", version) 3663 3664 if self.dialect.ALIAS_POST_TABLESAMPLE: 3665 this.set("sample", self._parse_table_sample()) 3666 3667 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3668 if alias: 3669 this.set("alias", alias) 3670 3671 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3672 return self.expression( 3673 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3674 ) 3675 3676 this.set("hints", self._parse_table_hints()) 3677 3678 if not this.args.get("pivots"): 3679 this.set("pivots", self._parse_pivots()) 3680 3681 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3682 this.set("sample", self._parse_table_sample()) 3683 3684 if joins: 3685 for join in self._parse_joins(): 3686 this.append("joins", join) 3687 3688 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3689 this.set("ordinality", True) 3690 this.set("alias", self._parse_table_alias()) 3691 3692 return this 3693 3694 def _parse_version(self) -> t.Optional[exp.Version]: 3695 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3696 this = "TIMESTAMP" 3697 elif self._match(TokenType.VERSION_SNAPSHOT): 3698 this = "VERSION" 3699 else: 3700 return None 3701 3702 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3703 kind = self._prev.text.upper() 3704 start = self._parse_bitwise() 3705 self._match_texts(("TO", "AND")) 3706 end = self._parse_bitwise() 3707 expression: t.Optional[exp.Expression] = self.expression( 3708 exp.Tuple, expressions=[start, end] 3709 ) 3710 elif self._match_text_seq("CONTAINED", "IN"): 3711 kind = "CONTAINED IN" 3712 expression = self.expression( 3713 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3714 ) 3715 elif self._match(TokenType.ALL): 3716 kind = "ALL" 3717 expression = None 3718 else: 3719 self._match_text_seq("AS", "OF") 3720 kind = "AS OF" 3721 expression = self._parse_type() 3722 3723 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3724 3725 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3726 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3727 index = self._index 3728 historical_data = None 3729 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3730 this = self._prev.text.upper() 3731 kind = ( 3732 self._match(TokenType.L_PAREN) 3733 and self._match_texts(self.HISTORICAL_DATA_KIND) 3734 and self._prev.text.upper() 3735 ) 3736 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3737 3738 if expression: 3739 self._match_r_paren() 3740 historical_data = self.expression( 3741 exp.HistoricalData, this=this, kind=kind, expression=expression 3742 ) 3743 else: 3744 self._retreat(index) 3745 3746 return historical_data 3747 3748 def _parse_changes(self) -> t.Optional[exp.Changes]: 3749 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3750 return None 3751 3752 information = self._parse_var(any_token=True) 3753 self._match_r_paren() 3754 3755 return self.expression( 3756 exp.Changes, 3757 information=information, 3758 at_before=self._parse_historical_data(), 3759 end=self._parse_historical_data(), 3760 ) 3761 3762 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3763 if not self._match(TokenType.UNNEST): 3764 return None 3765 3766 expressions = self._parse_wrapped_csv(self._parse_equality) 3767 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3768 3769 alias = self._parse_table_alias() if with_alias else None 3770 3771 if alias: 3772 if self.dialect.UNNEST_COLUMN_ONLY: 3773 if alias.args.get("columns"): 3774 self.raise_error("Unexpected extra column alias in unnest.") 3775 3776 alias.set("columns", [alias.this]) 3777 alias.set("this", None) 3778 3779 columns = alias.args.get("columns") or [] 3780 if offset and len(expressions) < len(columns): 3781 offset = columns.pop() 3782 3783 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3784 self._match(TokenType.ALIAS) 3785 offset = self._parse_id_var( 3786 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3787 ) or exp.to_identifier("offset") 3788 3789 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3790 3791 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3792 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3793 if not is_derived and not ( 3794 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3795 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3796 ): 3797 return None 3798 3799 expressions = self._parse_csv(self._parse_value) 3800 alias = self._parse_table_alias() 3801 3802 if is_derived: 3803 self._match_r_paren() 3804 3805 return self.expression( 3806 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3807 ) 3808 3809 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3810 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3811 as_modifier and self._match_text_seq("USING", "SAMPLE") 3812 ): 3813 return None 3814 3815 bucket_numerator = None 3816 bucket_denominator = None 3817 bucket_field = None 3818 percent = None 3819 size = None 3820 seed = None 3821 3822 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3823 matched_l_paren = self._match(TokenType.L_PAREN) 3824 3825 if self.TABLESAMPLE_CSV: 3826 num = None 3827 expressions = self._parse_csv(self._parse_primary) 3828 else: 3829 expressions = None 3830 num = ( 3831 self._parse_factor() 3832 if self._match(TokenType.NUMBER, advance=False) 3833 else self._parse_primary() or self._parse_placeholder() 3834 ) 3835 3836 if self._match_text_seq("BUCKET"): 3837 bucket_numerator = self._parse_number() 3838 self._match_text_seq("OUT", "OF") 3839 bucket_denominator = bucket_denominator = self._parse_number() 3840 self._match(TokenType.ON) 3841 bucket_field = self._parse_field() 3842 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3843 percent = num 3844 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3845 size = num 3846 else: 3847 percent = num 3848 3849 if matched_l_paren: 3850 self._match_r_paren() 3851 3852 if self._match(TokenType.L_PAREN): 3853 method = self._parse_var(upper=True) 3854 seed = self._match(TokenType.COMMA) and self._parse_number() 3855 self._match_r_paren() 3856 elif self._match_texts(("SEED", "REPEATABLE")): 3857 seed = self._parse_wrapped(self._parse_number) 3858 3859 if not method and self.DEFAULT_SAMPLING_METHOD: 3860 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3861 3862 return self.expression( 3863 exp.TableSample, 3864 expressions=expressions, 3865 method=method, 3866 bucket_numerator=bucket_numerator, 3867 bucket_denominator=bucket_denominator, 3868 bucket_field=bucket_field, 3869 percent=percent, 3870 size=size, 3871 seed=seed, 3872 ) 3873 3874 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3875 return list(iter(self._parse_pivot, None)) or None 3876 3877 def _parse_joins(self) -> t.Iterator[exp.Join]: 3878 return iter(self._parse_join, None) 3879 3880 # https://duckdb.org/docs/sql/statements/pivot 3881 def _parse_simplified_pivot(self) -> exp.Pivot: 3882 def _parse_on() -> t.Optional[exp.Expression]: 3883 this = self._parse_bitwise() 3884 return self._parse_in(this) if self._match(TokenType.IN) else this 3885 3886 this = self._parse_table() 3887 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3888 using = self._match(TokenType.USING) and self._parse_csv( 3889 lambda: self._parse_alias(self._parse_function()) 3890 ) 3891 group = self._parse_group() 3892 return self.expression( 3893 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3894 ) 3895 3896 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3897 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3898 this = self._parse_select_or_expression() 3899 3900 self._match(TokenType.ALIAS) 3901 alias = self._parse_bitwise() 3902 if alias: 3903 if isinstance(alias, exp.Column) and not alias.db: 3904 alias = alias.this 3905 return self.expression(exp.PivotAlias, this=this, alias=alias) 3906 3907 return this 3908 3909 value = self._parse_column() 3910 3911 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3912 self.raise_error("Expecting IN (") 3913 3914 if self._match(TokenType.ANY): 3915 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3916 else: 3917 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3918 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3919 3920 self._match_r_paren() 3921 return expr 3922 3923 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3924 index = self._index 3925 include_nulls = None 3926 3927 if self._match(TokenType.PIVOT): 3928 unpivot = False 3929 elif self._match(TokenType.UNPIVOT): 3930 unpivot = True 3931 3932 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3933 if self._match_text_seq("INCLUDE", "NULLS"): 3934 include_nulls = True 3935 elif self._match_text_seq("EXCLUDE", "NULLS"): 3936 include_nulls = False 3937 else: 3938 return None 3939 3940 expressions = [] 3941 3942 if not self._match(TokenType.L_PAREN): 3943 self._retreat(index) 3944 return None 3945 3946 if unpivot: 3947 expressions = self._parse_csv(self._parse_column) 3948 else: 3949 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3950 3951 if not expressions: 3952 self.raise_error("Failed to parse PIVOT's aggregation list") 3953 3954 if not self._match(TokenType.FOR): 3955 self.raise_error("Expecting FOR") 3956 3957 field = self._parse_pivot_in() 3958 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3959 self._parse_bitwise 3960 ) 3961 3962 self._match_r_paren() 3963 3964 pivot = self.expression( 3965 exp.Pivot, 3966 expressions=expressions, 3967 field=field, 3968 unpivot=unpivot, 3969 include_nulls=include_nulls, 3970 default_on_null=default_on_null, 3971 ) 3972 3973 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3974 pivot.set("alias", self._parse_table_alias()) 3975 3976 if not unpivot: 3977 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3978 3979 columns: t.List[exp.Expression] = [] 3980 for fld in pivot.args["field"].expressions: 3981 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3982 for name in names: 3983 if self.PREFIXED_PIVOT_COLUMNS: 3984 name = f"{name}_{field_name}" if name else field_name 3985 else: 3986 name = f"{field_name}_{name}" if name else field_name 3987 3988 columns.append(exp.to_identifier(name)) 3989 3990 pivot.set("columns", columns) 3991 3992 return pivot 3993 3994 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3995 return [agg.alias for agg in aggregations] 3996 3997 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3998 if not skip_where_token and not self._match(TokenType.PREWHERE): 3999 return None 4000 4001 return self.expression( 4002 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4003 ) 4004 4005 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4006 if not skip_where_token and not self._match(TokenType.WHERE): 4007 return None 4008 4009 return self.expression( 4010 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4011 ) 4012 4013 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4014 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4015 return None 4016 4017 elements: t.Dict[str, t.Any] = defaultdict(list) 4018 4019 if self._match(TokenType.ALL): 4020 elements["all"] = True 4021 elif self._match(TokenType.DISTINCT): 4022 elements["all"] = False 4023 4024 while True: 4025 index = self._index 4026 4027 elements["expressions"].extend( 4028 self._parse_csv( 4029 lambda: None 4030 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4031 else self._parse_assignment() 4032 ) 4033 ) 4034 4035 before_with_index = self._index 4036 with_prefix = self._match(TokenType.WITH) 4037 4038 if self._match(TokenType.ROLLUP): 4039 elements["rollup"].append( 4040 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4041 ) 4042 elif self._match(TokenType.CUBE): 4043 elements["cube"].append( 4044 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4045 ) 4046 elif self._match(TokenType.GROUPING_SETS): 4047 elements["grouping_sets"].append( 4048 self.expression( 4049 exp.GroupingSets, 4050 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4051 ) 4052 ) 4053 elif self._match_text_seq("TOTALS"): 4054 elements["totals"] = True # type: ignore 4055 4056 if before_with_index <= self._index <= before_with_index + 1: 4057 self._retreat(before_with_index) 4058 break 4059 4060 if index == self._index: 4061 break 4062 4063 return self.expression(exp.Group, **elements) # type: ignore 4064 4065 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4066 return self.expression( 4067 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4068 ) 4069 4070 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4071 if self._match(TokenType.L_PAREN): 4072 grouping_set = self._parse_csv(self._parse_column) 4073 self._match_r_paren() 4074 return self.expression(exp.Tuple, expressions=grouping_set) 4075 4076 return self._parse_column() 4077 4078 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4079 if not skip_having_token and not self._match(TokenType.HAVING): 4080 return None 4081 return self.expression(exp.Having, this=self._parse_assignment()) 4082 4083 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4084 if not self._match(TokenType.QUALIFY): 4085 return None 4086 return self.expression(exp.Qualify, this=self._parse_assignment()) 4087 4088 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4089 if skip_start_token: 4090 start = None 4091 elif self._match(TokenType.START_WITH): 4092 start = self._parse_assignment() 4093 else: 4094 return None 4095 4096 self._match(TokenType.CONNECT_BY) 4097 nocycle = self._match_text_seq("NOCYCLE") 4098 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4099 exp.Prior, this=self._parse_bitwise() 4100 ) 4101 connect = self._parse_assignment() 4102 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4103 4104 if not start and self._match(TokenType.START_WITH): 4105 start = self._parse_assignment() 4106 4107 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4108 4109 def _parse_name_as_expression(self) -> exp.Alias: 4110 return self.expression( 4111 exp.Alias, 4112 alias=self._parse_id_var(any_token=True), 4113 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4114 ) 4115 4116 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4117 if self._match_text_seq("INTERPOLATE"): 4118 return self._parse_wrapped_csv(self._parse_name_as_expression) 4119 return None 4120 4121 def _parse_order( 4122 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4123 ) -> t.Optional[exp.Expression]: 4124 siblings = None 4125 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4126 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4127 return this 4128 4129 siblings = True 4130 4131 return self.expression( 4132 exp.Order, 4133 this=this, 4134 expressions=self._parse_csv(self._parse_ordered), 4135 siblings=siblings, 4136 ) 4137 4138 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4139 if not self._match(token): 4140 return None 4141 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4142 4143 def _parse_ordered( 4144 self, parse_method: t.Optional[t.Callable] = None 4145 ) -> t.Optional[exp.Ordered]: 4146 this = parse_method() if parse_method else self._parse_assignment() 4147 if not this: 4148 return None 4149 4150 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4151 this = exp.var("ALL") 4152 4153 asc = self._match(TokenType.ASC) 4154 desc = self._match(TokenType.DESC) or (asc and False) 4155 4156 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4157 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4158 4159 nulls_first = is_nulls_first or False 4160 explicitly_null_ordered = is_nulls_first or is_nulls_last 4161 4162 if ( 4163 not explicitly_null_ordered 4164 and ( 4165 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4166 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4167 ) 4168 and self.dialect.NULL_ORDERING != "nulls_are_last" 4169 ): 4170 nulls_first = True 4171 4172 if self._match_text_seq("WITH", "FILL"): 4173 with_fill = self.expression( 4174 exp.WithFill, 4175 **{ # type: ignore 4176 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4177 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4178 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4179 "interpolate": self._parse_interpolate(), 4180 }, 4181 ) 4182 else: 4183 with_fill = None 4184 4185 return self.expression( 4186 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4187 ) 4188 4189 def _parse_limit( 4190 self, 4191 this: t.Optional[exp.Expression] = None, 4192 top: bool = False, 4193 skip_limit_token: bool = False, 4194 ) -> t.Optional[exp.Expression]: 4195 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4196 comments = self._prev_comments 4197 if top: 4198 limit_paren = self._match(TokenType.L_PAREN) 4199 expression = self._parse_term() if limit_paren else self._parse_number() 4200 4201 if limit_paren: 4202 self._match_r_paren() 4203 else: 4204 expression = self._parse_term() 4205 4206 if self._match(TokenType.COMMA): 4207 offset = expression 4208 expression = self._parse_term() 4209 else: 4210 offset = None 4211 4212 limit_exp = self.expression( 4213 exp.Limit, 4214 this=this, 4215 expression=expression, 4216 offset=offset, 4217 comments=comments, 4218 expressions=self._parse_limit_by(), 4219 ) 4220 4221 return limit_exp 4222 4223 if self._match(TokenType.FETCH): 4224 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4225 direction = self._prev.text.upper() if direction else "FIRST" 4226 4227 count = self._parse_field(tokens=self.FETCH_TOKENS) 4228 percent = self._match(TokenType.PERCENT) 4229 4230 self._match_set((TokenType.ROW, TokenType.ROWS)) 4231 4232 only = self._match_text_seq("ONLY") 4233 with_ties = self._match_text_seq("WITH", "TIES") 4234 4235 if only and with_ties: 4236 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4237 4238 return self.expression( 4239 exp.Fetch, 4240 direction=direction, 4241 count=count, 4242 percent=percent, 4243 with_ties=with_ties, 4244 ) 4245 4246 return this 4247 4248 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4249 if not self._match(TokenType.OFFSET): 4250 return this 4251 4252 count = self._parse_term() 4253 self._match_set((TokenType.ROW, TokenType.ROWS)) 4254 4255 return self.expression( 4256 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4257 ) 4258 4259 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4260 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4261 4262 def _parse_locks(self) -> t.List[exp.Lock]: 4263 locks = [] 4264 while True: 4265 if self._match_text_seq("FOR", "UPDATE"): 4266 update = True 4267 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4268 "LOCK", "IN", "SHARE", "MODE" 4269 ): 4270 update = False 4271 else: 4272 break 4273 4274 expressions = None 4275 if self._match_text_seq("OF"): 4276 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4277 4278 wait: t.Optional[bool | exp.Expression] = None 4279 if self._match_text_seq("NOWAIT"): 4280 wait = True 4281 elif self._match_text_seq("WAIT"): 4282 wait = self._parse_primary() 4283 elif self._match_text_seq("SKIP", "LOCKED"): 4284 wait = False 4285 4286 locks.append( 4287 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4288 ) 4289 4290 return locks 4291 4292 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4293 while this and self._match_set(self.SET_OPERATIONS): 4294 token_type = self._prev.token_type 4295 4296 if token_type == TokenType.UNION: 4297 operation: t.Type[exp.SetOperation] = exp.Union 4298 elif token_type == TokenType.EXCEPT: 4299 operation = exp.Except 4300 else: 4301 operation = exp.Intersect 4302 4303 comments = self._prev.comments 4304 4305 if self._match(TokenType.DISTINCT): 4306 distinct: t.Optional[bool] = True 4307 elif self._match(TokenType.ALL): 4308 distinct = False 4309 else: 4310 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4311 if distinct is None: 4312 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4313 4314 by_name = self._match_text_seq("BY", "NAME") 4315 expression = self._parse_select(nested=True, parse_set_operation=False) 4316 4317 this = self.expression( 4318 operation, 4319 comments=comments, 4320 this=this, 4321 distinct=distinct, 4322 by_name=by_name, 4323 expression=expression, 4324 ) 4325 4326 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4327 expression = this.expression 4328 4329 if expression: 4330 for arg in self.SET_OP_MODIFIERS: 4331 expr = expression.args.get(arg) 4332 if expr: 4333 this.set(arg, expr.pop()) 4334 4335 return this 4336 4337 def _parse_expression(self) -> t.Optional[exp.Expression]: 4338 return self._parse_alias(self._parse_assignment()) 4339 4340 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4341 this = self._parse_disjunction() 4342 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4343 # This allows us to parse <non-identifier token> := <expr> 4344 this = exp.column( 4345 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4346 ) 4347 4348 while self._match_set(self.ASSIGNMENT): 4349 this = self.expression( 4350 self.ASSIGNMENT[self._prev.token_type], 4351 this=this, 4352 comments=self._prev_comments, 4353 expression=self._parse_assignment(), 4354 ) 4355 4356 return this 4357 4358 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4359 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4360 4361 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4362 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4363 4364 def _parse_equality(self) -> t.Optional[exp.Expression]: 4365 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4366 4367 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4368 return self._parse_tokens(self._parse_range, self.COMPARISON) 4369 4370 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4371 this = this or self._parse_bitwise() 4372 negate = self._match(TokenType.NOT) 4373 4374 if self._match_set(self.RANGE_PARSERS): 4375 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4376 if not expression: 4377 return this 4378 4379 this = expression 4380 elif self._match(TokenType.ISNULL): 4381 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4382 4383 # Postgres supports ISNULL and NOTNULL for conditions. 4384 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4385 if self._match(TokenType.NOTNULL): 4386 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4387 this = self.expression(exp.Not, this=this) 4388 4389 if negate: 4390 this = self._negate_range(this) 4391 4392 if self._match(TokenType.IS): 4393 this = self._parse_is(this) 4394 4395 return this 4396 4397 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4398 if not this: 4399 return this 4400 4401 return self.expression(exp.Not, this=this) 4402 4403 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4404 index = self._index - 1 4405 negate = self._match(TokenType.NOT) 4406 4407 if self._match_text_seq("DISTINCT", "FROM"): 4408 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4409 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4410 4411 if self._match(TokenType.JSON): 4412 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4413 4414 if self._match_text_seq("WITH"): 4415 _with = True 4416 elif self._match_text_seq("WITHOUT"): 4417 _with = False 4418 else: 4419 _with = None 4420 4421 unique = self._match(TokenType.UNIQUE) 4422 self._match_text_seq("KEYS") 4423 expression: t.Optional[exp.Expression] = self.expression( 4424 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4425 ) 4426 else: 4427 expression = self._parse_primary() or self._parse_null() 4428 if not expression: 4429 self._retreat(index) 4430 return None 4431 4432 this = self.expression(exp.Is, this=this, expression=expression) 4433 return self.expression(exp.Not, this=this) if negate else this 4434 4435 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4436 unnest = self._parse_unnest(with_alias=False) 4437 if unnest: 4438 this = self.expression(exp.In, this=this, unnest=unnest) 4439 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4440 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4441 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4442 4443 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4444 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4445 else: 4446 this = self.expression(exp.In, this=this, expressions=expressions) 4447 4448 if matched_l_paren: 4449 self._match_r_paren(this) 4450 elif not self._match(TokenType.R_BRACKET, expression=this): 4451 self.raise_error("Expecting ]") 4452 else: 4453 this = self.expression(exp.In, this=this, field=self._parse_field()) 4454 4455 return this 4456 4457 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4458 low = self._parse_bitwise() 4459 self._match(TokenType.AND) 4460 high = self._parse_bitwise() 4461 return self.expression(exp.Between, this=this, low=low, high=high) 4462 4463 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4464 if not self._match(TokenType.ESCAPE): 4465 return this 4466 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4467 4468 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4469 index = self._index 4470 4471 if not self._match(TokenType.INTERVAL) and match_interval: 4472 return None 4473 4474 if self._match(TokenType.STRING, advance=False): 4475 this = self._parse_primary() 4476 else: 4477 this = self._parse_term() 4478 4479 if not this or ( 4480 isinstance(this, exp.Column) 4481 and not this.table 4482 and not this.this.quoted 4483 and this.name.upper() == "IS" 4484 ): 4485 self._retreat(index) 4486 return None 4487 4488 unit = self._parse_function() or ( 4489 not self._match(TokenType.ALIAS, advance=False) 4490 and self._parse_var(any_token=True, upper=True) 4491 ) 4492 4493 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4494 # each INTERVAL expression into this canonical form so it's easy to transpile 4495 if this and this.is_number: 4496 this = exp.Literal.string(this.to_py()) 4497 elif this and this.is_string: 4498 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4499 if len(parts) == 1: 4500 if unit: 4501 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4502 self._retreat(self._index - 1) 4503 4504 this = exp.Literal.string(parts[0][0]) 4505 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4506 4507 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4508 unit = self.expression( 4509 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4510 ) 4511 4512 interval = self.expression(exp.Interval, this=this, unit=unit) 4513 4514 index = self._index 4515 self._match(TokenType.PLUS) 4516 4517 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4518 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4519 return self.expression( 4520 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4521 ) 4522 4523 self._retreat(index) 4524 return interval 4525 4526 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4527 this = self._parse_term() 4528 4529 while True: 4530 if self._match_set(self.BITWISE): 4531 this = self.expression( 4532 self.BITWISE[self._prev.token_type], 4533 this=this, 4534 expression=self._parse_term(), 4535 ) 4536 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4537 this = self.expression( 4538 exp.DPipe, 4539 this=this, 4540 expression=self._parse_term(), 4541 safe=not self.dialect.STRICT_STRING_CONCAT, 4542 ) 4543 elif self._match(TokenType.DQMARK): 4544 this = self.expression( 4545 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4546 ) 4547 elif self._match_pair(TokenType.LT, TokenType.LT): 4548 this = self.expression( 4549 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4550 ) 4551 elif self._match_pair(TokenType.GT, TokenType.GT): 4552 this = self.expression( 4553 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4554 ) 4555 else: 4556 break 4557 4558 return this 4559 4560 def _parse_term(self) -> t.Optional[exp.Expression]: 4561 this = self._parse_factor() 4562 4563 while self._match_set(self.TERM): 4564 klass = self.TERM[self._prev.token_type] 4565 comments = self._prev_comments 4566 expression = self._parse_factor() 4567 4568 this = self.expression(klass, this=this, comments=comments, expression=expression) 4569 4570 if isinstance(this, exp.Collate): 4571 expr = this.expression 4572 4573 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4574 # fallback to Identifier / Var 4575 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4576 ident = expr.this 4577 if isinstance(ident, exp.Identifier): 4578 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4579 4580 return this 4581 4582 def _parse_factor(self) -> t.Optional[exp.Expression]: 4583 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4584 this = parse_method() 4585 4586 while self._match_set(self.FACTOR): 4587 klass = self.FACTOR[self._prev.token_type] 4588 comments = self._prev_comments 4589 expression = parse_method() 4590 4591 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4592 self._retreat(self._index - 1) 4593 return this 4594 4595 this = self.expression(klass, this=this, comments=comments, expression=expression) 4596 4597 if isinstance(this, exp.Div): 4598 this.args["typed"] = self.dialect.TYPED_DIVISION 4599 this.args["safe"] = self.dialect.SAFE_DIVISION 4600 4601 return this 4602 4603 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4604 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4605 4606 def _parse_unary(self) -> t.Optional[exp.Expression]: 4607 if self._match_set(self.UNARY_PARSERS): 4608 return self.UNARY_PARSERS[self._prev.token_type](self) 4609 return self._parse_at_time_zone(self._parse_type()) 4610 4611 def _parse_type( 4612 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4613 ) -> t.Optional[exp.Expression]: 4614 interval = parse_interval and self._parse_interval() 4615 if interval: 4616 return interval 4617 4618 index = self._index 4619 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4620 4621 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4622 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4623 if isinstance(data_type, exp.Cast): 4624 # This constructor can contain ops directly after it, for instance struct unnesting: 4625 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4626 return self._parse_column_ops(data_type) 4627 4628 if data_type: 4629 index2 = self._index 4630 this = self._parse_primary() 4631 4632 if isinstance(this, exp.Literal): 4633 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4634 if parser: 4635 return parser(self, this, data_type) 4636 4637 return self.expression(exp.Cast, this=this, to=data_type) 4638 4639 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4640 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4641 # 4642 # If the index difference here is greater than 1, that means the parser itself must have 4643 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4644 # 4645 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4646 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4647 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4648 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4649 # 4650 # In these cases, we don't really want to return the converted type, but instead retreat 4651 # and try to parse a Column or Identifier in the section below. 4652 if data_type.expressions and index2 - index > 1: 4653 self._retreat(index2) 4654 return self._parse_column_ops(data_type) 4655 4656 self._retreat(index) 4657 4658 if fallback_to_identifier: 4659 return self._parse_id_var() 4660 4661 this = self._parse_column() 4662 return this and self._parse_column_ops(this) 4663 4664 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4665 this = self._parse_type() 4666 if not this: 4667 return None 4668 4669 if isinstance(this, exp.Column) and not this.table: 4670 this = exp.var(this.name.upper()) 4671 4672 return self.expression( 4673 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4674 ) 4675 4676 def _parse_types( 4677 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4678 ) -> t.Optional[exp.Expression]: 4679 index = self._index 4680 4681 this: t.Optional[exp.Expression] = None 4682 prefix = self._match_text_seq("SYSUDTLIB", ".") 4683 4684 if not self._match_set(self.TYPE_TOKENS): 4685 identifier = allow_identifiers and self._parse_id_var( 4686 any_token=False, tokens=(TokenType.VAR,) 4687 ) 4688 if isinstance(identifier, exp.Identifier): 4689 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4690 4691 if len(tokens) != 1: 4692 self.raise_error("Unexpected identifier", self._prev) 4693 4694 if tokens[0].token_type in self.TYPE_TOKENS: 4695 self._prev = tokens[0] 4696 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4697 type_name = identifier.name 4698 4699 while self._match(TokenType.DOT): 4700 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4701 4702 this = exp.DataType.build(type_name, udt=True) 4703 else: 4704 self._retreat(self._index - 1) 4705 return None 4706 else: 4707 return None 4708 4709 type_token = self._prev.token_type 4710 4711 if type_token == TokenType.PSEUDO_TYPE: 4712 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4713 4714 if type_token == TokenType.OBJECT_IDENTIFIER: 4715 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4716 4717 # https://materialize.com/docs/sql/types/map/ 4718 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4719 key_type = self._parse_types( 4720 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4721 ) 4722 if not self._match(TokenType.FARROW): 4723 self._retreat(index) 4724 return None 4725 4726 value_type = self._parse_types( 4727 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4728 ) 4729 if not self._match(TokenType.R_BRACKET): 4730 self._retreat(index) 4731 return None 4732 4733 return exp.DataType( 4734 this=exp.DataType.Type.MAP, 4735 expressions=[key_type, value_type], 4736 nested=True, 4737 prefix=prefix, 4738 ) 4739 4740 nested = type_token in self.NESTED_TYPE_TOKENS 4741 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4742 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4743 expressions = None 4744 maybe_func = False 4745 4746 if self._match(TokenType.L_PAREN): 4747 if is_struct: 4748 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4749 elif nested: 4750 expressions = self._parse_csv( 4751 lambda: self._parse_types( 4752 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4753 ) 4754 ) 4755 elif type_token in self.ENUM_TYPE_TOKENS: 4756 expressions = self._parse_csv(self._parse_equality) 4757 elif is_aggregate: 4758 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4759 any_token=False, tokens=(TokenType.VAR,) 4760 ) 4761 if not func_or_ident or not self._match(TokenType.COMMA): 4762 return None 4763 expressions = self._parse_csv( 4764 lambda: self._parse_types( 4765 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4766 ) 4767 ) 4768 expressions.insert(0, func_or_ident) 4769 else: 4770 expressions = self._parse_csv(self._parse_type_size) 4771 4772 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4773 if type_token == TokenType.VECTOR and len(expressions) == 2: 4774 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4775 4776 if not expressions or not self._match(TokenType.R_PAREN): 4777 self._retreat(index) 4778 return None 4779 4780 maybe_func = True 4781 4782 values: t.Optional[t.List[exp.Expression]] = None 4783 4784 if nested and self._match(TokenType.LT): 4785 if is_struct: 4786 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4787 else: 4788 expressions = self._parse_csv( 4789 lambda: self._parse_types( 4790 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4791 ) 4792 ) 4793 4794 if not self._match(TokenType.GT): 4795 self.raise_error("Expecting >") 4796 4797 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4798 values = self._parse_csv(self._parse_assignment) 4799 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4800 4801 if type_token in self.TIMESTAMPS: 4802 if self._match_text_seq("WITH", "TIME", "ZONE"): 4803 maybe_func = False 4804 tz_type = ( 4805 exp.DataType.Type.TIMETZ 4806 if type_token in self.TIMES 4807 else exp.DataType.Type.TIMESTAMPTZ 4808 ) 4809 this = exp.DataType(this=tz_type, expressions=expressions) 4810 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4811 maybe_func = False 4812 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4813 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4814 maybe_func = False 4815 elif type_token == TokenType.INTERVAL: 4816 unit = self._parse_var(upper=True) 4817 if unit: 4818 if self._match_text_seq("TO"): 4819 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4820 4821 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4822 else: 4823 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4824 4825 if maybe_func and check_func: 4826 index2 = self._index 4827 peek = self._parse_string() 4828 4829 if not peek: 4830 self._retreat(index) 4831 return None 4832 4833 self._retreat(index2) 4834 4835 if not this: 4836 if self._match_text_seq("UNSIGNED"): 4837 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4838 if not unsigned_type_token: 4839 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4840 4841 type_token = unsigned_type_token or type_token 4842 4843 this = exp.DataType( 4844 this=exp.DataType.Type[type_token.value], 4845 expressions=expressions, 4846 nested=nested, 4847 prefix=prefix, 4848 ) 4849 4850 # Empty arrays/structs are allowed 4851 if values is not None: 4852 cls = exp.Struct if is_struct else exp.Array 4853 this = exp.cast(cls(expressions=values), this, copy=False) 4854 4855 elif expressions: 4856 this.set("expressions", expressions) 4857 4858 # https://materialize.com/docs/sql/types/list/#type-name 4859 while self._match(TokenType.LIST): 4860 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4861 4862 index = self._index 4863 4864 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4865 matched_array = self._match(TokenType.ARRAY) 4866 4867 while self._curr: 4868 datatype_token = self._prev.token_type 4869 matched_l_bracket = self._match(TokenType.L_BRACKET) 4870 if not matched_l_bracket and not matched_array: 4871 break 4872 4873 matched_array = False 4874 values = self._parse_csv(self._parse_assignment) or None 4875 if ( 4876 values 4877 and not schema 4878 and ( 4879 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4880 ) 4881 ): 4882 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4883 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4884 self._retreat(index) 4885 break 4886 4887 this = exp.DataType( 4888 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4889 ) 4890 self._match(TokenType.R_BRACKET) 4891 4892 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4893 converter = self.TYPE_CONVERTERS.get(this.this) 4894 if converter: 4895 this = converter(t.cast(exp.DataType, this)) 4896 4897 return this 4898 4899 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4900 index = self._index 4901 4902 if ( 4903 self._curr 4904 and self._next 4905 and self._curr.token_type in self.TYPE_TOKENS 4906 and self._next.token_type in self.TYPE_TOKENS 4907 ): 4908 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4909 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4910 this = self._parse_id_var() 4911 else: 4912 this = ( 4913 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4914 or self._parse_id_var() 4915 ) 4916 4917 self._match(TokenType.COLON) 4918 4919 if ( 4920 type_required 4921 and not isinstance(this, exp.DataType) 4922 and not self._match_set(self.TYPE_TOKENS, advance=False) 4923 ): 4924 self._retreat(index) 4925 return self._parse_types() 4926 4927 return self._parse_column_def(this) 4928 4929 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4930 if not self._match_text_seq("AT", "TIME", "ZONE"): 4931 return this 4932 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4933 4934 def _parse_column(self) -> t.Optional[exp.Expression]: 4935 this = self._parse_column_reference() 4936 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4937 4938 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4939 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4940 4941 return column 4942 4943 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4944 this = self._parse_field() 4945 if ( 4946 not this 4947 and self._match(TokenType.VALUES, advance=False) 4948 and self.VALUES_FOLLOWED_BY_PAREN 4949 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4950 ): 4951 this = self._parse_id_var() 4952 4953 if isinstance(this, exp.Identifier): 4954 # We bubble up comments from the Identifier to the Column 4955 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4956 4957 return this 4958 4959 def _parse_colon_as_variant_extract( 4960 self, this: t.Optional[exp.Expression] 4961 ) -> t.Optional[exp.Expression]: 4962 casts = [] 4963 json_path = [] 4964 4965 while self._match(TokenType.COLON): 4966 start_index = self._index 4967 4968 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4969 path = self._parse_column_ops( 4970 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4971 ) 4972 4973 # The cast :: operator has a lower precedence than the extraction operator :, so 4974 # we rearrange the AST appropriately to avoid casting the JSON path 4975 while isinstance(path, exp.Cast): 4976 casts.append(path.to) 4977 path = path.this 4978 4979 if casts: 4980 dcolon_offset = next( 4981 i 4982 for i, t in enumerate(self._tokens[start_index:]) 4983 if t.token_type == TokenType.DCOLON 4984 ) 4985 end_token = self._tokens[start_index + dcolon_offset - 1] 4986 else: 4987 end_token = self._prev 4988 4989 if path: 4990 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4991 4992 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4993 # Databricks transforms it back to the colon/dot notation 4994 if json_path: 4995 this = self.expression( 4996 exp.JSONExtract, 4997 this=this, 4998 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4999 variant_extract=True, 5000 ) 5001 5002 while casts: 5003 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5004 5005 return this 5006 5007 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5008 return self._parse_types() 5009 5010 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5011 this = self._parse_bracket(this) 5012 5013 while self._match_set(self.COLUMN_OPERATORS): 5014 op_token = self._prev.token_type 5015 op = self.COLUMN_OPERATORS.get(op_token) 5016 5017 if op_token == TokenType.DCOLON: 5018 field = self._parse_dcolon() 5019 if not field: 5020 self.raise_error("Expected type") 5021 elif op and self._curr: 5022 field = self._parse_column_reference() 5023 else: 5024 field = self._parse_field(any_token=True, anonymous_func=True) 5025 5026 if isinstance(field, exp.Func) and this: 5027 # bigquery allows function calls like x.y.count(...) 5028 # SAFE.SUBSTR(...) 5029 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5030 this = exp.replace_tree( 5031 this, 5032 lambda n: ( 5033 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5034 if n.table 5035 else n.this 5036 ) 5037 if isinstance(n, exp.Column) 5038 else n, 5039 ) 5040 5041 if op: 5042 this = op(self, this, field) 5043 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5044 this = self.expression( 5045 exp.Column, 5046 this=field, 5047 table=this.this, 5048 db=this.args.get("table"), 5049 catalog=this.args.get("db"), 5050 ) 5051 else: 5052 this = self.expression(exp.Dot, this=this, expression=field) 5053 5054 this = self._parse_bracket(this) 5055 5056 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5057 5058 def _parse_primary(self) -> t.Optional[exp.Expression]: 5059 if self._match_set(self.PRIMARY_PARSERS): 5060 token_type = self._prev.token_type 5061 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5062 5063 if token_type == TokenType.STRING: 5064 expressions = [primary] 5065 while self._match(TokenType.STRING): 5066 expressions.append(exp.Literal.string(self._prev.text)) 5067 5068 if len(expressions) > 1: 5069 return self.expression(exp.Concat, expressions=expressions) 5070 5071 return primary 5072 5073 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5074 return exp.Literal.number(f"0.{self._prev.text}") 5075 5076 if self._match(TokenType.L_PAREN): 5077 comments = self._prev_comments 5078 query = self._parse_select() 5079 5080 if query: 5081 expressions = [query] 5082 else: 5083 expressions = self._parse_expressions() 5084 5085 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5086 5087 if not this and self._match(TokenType.R_PAREN, advance=False): 5088 this = self.expression(exp.Tuple) 5089 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5090 this = self._parse_subquery(this=this, parse_alias=False) 5091 elif isinstance(this, exp.Subquery): 5092 this = self._parse_subquery( 5093 this=self._parse_set_operations(this), parse_alias=False 5094 ) 5095 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5096 this = self.expression(exp.Tuple, expressions=expressions) 5097 else: 5098 this = self.expression(exp.Paren, this=this) 5099 5100 if this: 5101 this.add_comments(comments) 5102 5103 self._match_r_paren(expression=this) 5104 return this 5105 5106 return None 5107 5108 def _parse_field( 5109 self, 5110 any_token: bool = False, 5111 tokens: t.Optional[t.Collection[TokenType]] = None, 5112 anonymous_func: bool = False, 5113 ) -> t.Optional[exp.Expression]: 5114 if anonymous_func: 5115 field = ( 5116 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5117 or self._parse_primary() 5118 ) 5119 else: 5120 field = self._parse_primary() or self._parse_function( 5121 anonymous=anonymous_func, any_token=any_token 5122 ) 5123 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5124 5125 def _parse_function( 5126 self, 5127 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5128 anonymous: bool = False, 5129 optional_parens: bool = True, 5130 any_token: bool = False, 5131 ) -> t.Optional[exp.Expression]: 5132 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5133 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5134 fn_syntax = False 5135 if ( 5136 self._match(TokenType.L_BRACE, advance=False) 5137 and self._next 5138 and self._next.text.upper() == "FN" 5139 ): 5140 self._advance(2) 5141 fn_syntax = True 5142 5143 func = self._parse_function_call( 5144 functions=functions, 5145 anonymous=anonymous, 5146 optional_parens=optional_parens, 5147 any_token=any_token, 5148 ) 5149 5150 if fn_syntax: 5151 self._match(TokenType.R_BRACE) 5152 5153 return func 5154 5155 def _parse_function_call( 5156 self, 5157 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5158 anonymous: bool = False, 5159 optional_parens: bool = True, 5160 any_token: bool = False, 5161 ) -> t.Optional[exp.Expression]: 5162 if not self._curr: 5163 return None 5164 5165 comments = self._curr.comments 5166 token_type = self._curr.token_type 5167 this = self._curr.text 5168 upper = this.upper() 5169 5170 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5171 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5172 self._advance() 5173 return self._parse_window(parser(self)) 5174 5175 if not self._next or self._next.token_type != TokenType.L_PAREN: 5176 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5177 self._advance() 5178 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5179 5180 return None 5181 5182 if any_token: 5183 if token_type in self.RESERVED_TOKENS: 5184 return None 5185 elif token_type not in self.FUNC_TOKENS: 5186 return None 5187 5188 self._advance(2) 5189 5190 parser = self.FUNCTION_PARSERS.get(upper) 5191 if parser and not anonymous: 5192 this = parser(self) 5193 else: 5194 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5195 5196 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5197 this = self.expression(subquery_predicate, this=self._parse_select()) 5198 self._match_r_paren() 5199 return this 5200 5201 if functions is None: 5202 functions = self.FUNCTIONS 5203 5204 function = functions.get(upper) 5205 5206 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5207 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5208 5209 if alias: 5210 args = self._kv_to_prop_eq(args) 5211 5212 if function and not anonymous: 5213 if "dialect" in function.__code__.co_varnames: 5214 func = function(args, dialect=self.dialect) 5215 else: 5216 func = function(args) 5217 5218 func = self.validate_expression(func, args) 5219 if not self.dialect.NORMALIZE_FUNCTIONS: 5220 func.meta["name"] = this 5221 5222 this = func 5223 else: 5224 if token_type == TokenType.IDENTIFIER: 5225 this = exp.Identifier(this=this, quoted=True) 5226 this = self.expression(exp.Anonymous, this=this, expressions=args) 5227 5228 if isinstance(this, exp.Expression): 5229 this.add_comments(comments) 5230 5231 self._match_r_paren(this) 5232 return self._parse_window(this) 5233 5234 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5235 return expression 5236 5237 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5238 transformed = [] 5239 5240 for index, e in enumerate(expressions): 5241 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5242 if isinstance(e, exp.Alias): 5243 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5244 5245 if not isinstance(e, exp.PropertyEQ): 5246 e = self.expression( 5247 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5248 ) 5249 5250 if isinstance(e.this, exp.Column): 5251 e.this.replace(e.this.this) 5252 else: 5253 e = self._to_prop_eq(e, index) 5254 5255 transformed.append(e) 5256 5257 return transformed 5258 5259 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5260 return self._parse_column_def(self._parse_id_var()) 5261 5262 def _parse_user_defined_function( 5263 self, kind: t.Optional[TokenType] = None 5264 ) -> t.Optional[exp.Expression]: 5265 this = self._parse_id_var() 5266 5267 while self._match(TokenType.DOT): 5268 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5269 5270 if not self._match(TokenType.L_PAREN): 5271 return this 5272 5273 expressions = self._parse_csv(self._parse_function_parameter) 5274 self._match_r_paren() 5275 return self.expression( 5276 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5277 ) 5278 5279 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5280 literal = self._parse_primary() 5281 if literal: 5282 return self.expression(exp.Introducer, this=token.text, expression=literal) 5283 5284 return self.expression(exp.Identifier, this=token.text) 5285 5286 def _parse_session_parameter(self) -> exp.SessionParameter: 5287 kind = None 5288 this = self._parse_id_var() or self._parse_primary() 5289 5290 if this and self._match(TokenType.DOT): 5291 kind = this.name 5292 this = self._parse_var() or self._parse_primary() 5293 5294 return self.expression(exp.SessionParameter, this=this, kind=kind) 5295 5296 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5297 return self._parse_id_var() 5298 5299 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5300 index = self._index 5301 5302 if self._match(TokenType.L_PAREN): 5303 expressions = t.cast( 5304 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5305 ) 5306 5307 if not self._match(TokenType.R_PAREN): 5308 self._retreat(index) 5309 else: 5310 expressions = [self._parse_lambda_arg()] 5311 5312 if self._match_set(self.LAMBDAS): 5313 return self.LAMBDAS[self._prev.token_type](self, expressions) 5314 5315 self._retreat(index) 5316 5317 this: t.Optional[exp.Expression] 5318 5319 if self._match(TokenType.DISTINCT): 5320 this = self.expression( 5321 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5322 ) 5323 else: 5324 this = self._parse_select_or_expression(alias=alias) 5325 5326 return self._parse_limit( 5327 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5328 ) 5329 5330 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5331 index = self._index 5332 if not self._match(TokenType.L_PAREN): 5333 return this 5334 5335 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5336 # expr can be of both types 5337 if self._match_set(self.SELECT_START_TOKENS): 5338 self._retreat(index) 5339 return this 5340 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5341 self._match_r_paren() 5342 return self.expression(exp.Schema, this=this, expressions=args) 5343 5344 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5345 return self._parse_column_def(self._parse_field(any_token=True)) 5346 5347 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5348 # column defs are not really columns, they're identifiers 5349 if isinstance(this, exp.Column): 5350 this = this.this 5351 5352 kind = self._parse_types(schema=True) 5353 5354 if self._match_text_seq("FOR", "ORDINALITY"): 5355 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5356 5357 constraints: t.List[exp.Expression] = [] 5358 5359 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5360 ("ALIAS", "MATERIALIZED") 5361 ): 5362 persisted = self._prev.text.upper() == "MATERIALIZED" 5363 constraint_kind = exp.ComputedColumnConstraint( 5364 this=self._parse_assignment(), 5365 persisted=persisted or self._match_text_seq("PERSISTED"), 5366 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5367 ) 5368 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5369 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5370 self._match(TokenType.ALIAS) 5371 constraints.append( 5372 self.expression( 5373 exp.ColumnConstraint, 5374 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5375 ) 5376 ) 5377 5378 while True: 5379 constraint = self._parse_column_constraint() 5380 if not constraint: 5381 break 5382 constraints.append(constraint) 5383 5384 if not kind and not constraints: 5385 return this 5386 5387 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5388 5389 def _parse_auto_increment( 5390 self, 5391 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5392 start = None 5393 increment = None 5394 5395 if self._match(TokenType.L_PAREN, advance=False): 5396 args = self._parse_wrapped_csv(self._parse_bitwise) 5397 start = seq_get(args, 0) 5398 increment = seq_get(args, 1) 5399 elif self._match_text_seq("START"): 5400 start = self._parse_bitwise() 5401 self._match_text_seq("INCREMENT") 5402 increment = self._parse_bitwise() 5403 5404 if start and increment: 5405 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5406 5407 return exp.AutoIncrementColumnConstraint() 5408 5409 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5410 if not self._match_text_seq("REFRESH"): 5411 self._retreat(self._index - 1) 5412 return None 5413 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5414 5415 def _parse_compress(self) -> exp.CompressColumnConstraint: 5416 if self._match(TokenType.L_PAREN, advance=False): 5417 return self.expression( 5418 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5419 ) 5420 5421 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5422 5423 def _parse_generated_as_identity( 5424 self, 5425 ) -> ( 5426 exp.GeneratedAsIdentityColumnConstraint 5427 | exp.ComputedColumnConstraint 5428 | exp.GeneratedAsRowColumnConstraint 5429 ): 5430 if self._match_text_seq("BY", "DEFAULT"): 5431 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5432 this = self.expression( 5433 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5434 ) 5435 else: 5436 self._match_text_seq("ALWAYS") 5437 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5438 5439 self._match(TokenType.ALIAS) 5440 5441 if self._match_text_seq("ROW"): 5442 start = self._match_text_seq("START") 5443 if not start: 5444 self._match(TokenType.END) 5445 hidden = self._match_text_seq("HIDDEN") 5446 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5447 5448 identity = self._match_text_seq("IDENTITY") 5449 5450 if self._match(TokenType.L_PAREN): 5451 if self._match(TokenType.START_WITH): 5452 this.set("start", self._parse_bitwise()) 5453 if self._match_text_seq("INCREMENT", "BY"): 5454 this.set("increment", self._parse_bitwise()) 5455 if self._match_text_seq("MINVALUE"): 5456 this.set("minvalue", self._parse_bitwise()) 5457 if self._match_text_seq("MAXVALUE"): 5458 this.set("maxvalue", self._parse_bitwise()) 5459 5460 if self._match_text_seq("CYCLE"): 5461 this.set("cycle", True) 5462 elif self._match_text_seq("NO", "CYCLE"): 5463 this.set("cycle", False) 5464 5465 if not identity: 5466 this.set("expression", self._parse_range()) 5467 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5468 args = self._parse_csv(self._parse_bitwise) 5469 this.set("start", seq_get(args, 0)) 5470 this.set("increment", seq_get(args, 1)) 5471 5472 self._match_r_paren() 5473 5474 return this 5475 5476 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5477 self._match_text_seq("LENGTH") 5478 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5479 5480 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5481 if self._match_text_seq("NULL"): 5482 return self.expression(exp.NotNullColumnConstraint) 5483 if self._match_text_seq("CASESPECIFIC"): 5484 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5485 if self._match_text_seq("FOR", "REPLICATION"): 5486 return self.expression(exp.NotForReplicationColumnConstraint) 5487 5488 # Unconsume the `NOT` token 5489 self._retreat(self._index - 1) 5490 return None 5491 5492 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5493 if self._match(TokenType.CONSTRAINT): 5494 this = self._parse_id_var() 5495 else: 5496 this = None 5497 5498 if self._match_texts(self.CONSTRAINT_PARSERS): 5499 return self.expression( 5500 exp.ColumnConstraint, 5501 this=this, 5502 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5503 ) 5504 5505 return this 5506 5507 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5508 if not self._match(TokenType.CONSTRAINT): 5509 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5510 5511 return self.expression( 5512 exp.Constraint, 5513 this=self._parse_id_var(), 5514 expressions=self._parse_unnamed_constraints(), 5515 ) 5516 5517 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5518 constraints = [] 5519 while True: 5520 constraint = self._parse_unnamed_constraint() or self._parse_function() 5521 if not constraint: 5522 break 5523 constraints.append(constraint) 5524 5525 return constraints 5526 5527 def _parse_unnamed_constraint( 5528 self, constraints: t.Optional[t.Collection[str]] = None 5529 ) -> t.Optional[exp.Expression]: 5530 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5531 constraints or self.CONSTRAINT_PARSERS 5532 ): 5533 return None 5534 5535 constraint = self._prev.text.upper() 5536 if constraint not in self.CONSTRAINT_PARSERS: 5537 self.raise_error(f"No parser found for schema constraint {constraint}.") 5538 5539 return self.CONSTRAINT_PARSERS[constraint](self) 5540 5541 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5542 return self._parse_id_var(any_token=False) 5543 5544 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5545 self._match_text_seq("KEY") 5546 return self.expression( 5547 exp.UniqueColumnConstraint, 5548 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5549 this=self._parse_schema(self._parse_unique_key()), 5550 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5551 on_conflict=self._parse_on_conflict(), 5552 ) 5553 5554 def _parse_key_constraint_options(self) -> t.List[str]: 5555 options = [] 5556 while True: 5557 if not self._curr: 5558 break 5559 5560 if self._match(TokenType.ON): 5561 action = None 5562 on = self._advance_any() and self._prev.text 5563 5564 if self._match_text_seq("NO", "ACTION"): 5565 action = "NO ACTION" 5566 elif self._match_text_seq("CASCADE"): 5567 action = "CASCADE" 5568 elif self._match_text_seq("RESTRICT"): 5569 action = "RESTRICT" 5570 elif self._match_pair(TokenType.SET, TokenType.NULL): 5571 action = "SET NULL" 5572 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5573 action = "SET DEFAULT" 5574 else: 5575 self.raise_error("Invalid key constraint") 5576 5577 options.append(f"ON {on} {action}") 5578 else: 5579 var = self._parse_var_from_options( 5580 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5581 ) 5582 if not var: 5583 break 5584 options.append(var.name) 5585 5586 return options 5587 5588 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5589 if match and not self._match(TokenType.REFERENCES): 5590 return None 5591 5592 expressions = None 5593 this = self._parse_table(schema=True) 5594 options = self._parse_key_constraint_options() 5595 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5596 5597 def _parse_foreign_key(self) -> exp.ForeignKey: 5598 expressions = self._parse_wrapped_id_vars() 5599 reference = self._parse_references() 5600 options = {} 5601 5602 while self._match(TokenType.ON): 5603 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5604 self.raise_error("Expected DELETE or UPDATE") 5605 5606 kind = self._prev.text.lower() 5607 5608 if self._match_text_seq("NO", "ACTION"): 5609 action = "NO ACTION" 5610 elif self._match(TokenType.SET): 5611 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5612 action = "SET " + self._prev.text.upper() 5613 else: 5614 self._advance() 5615 action = self._prev.text.upper() 5616 5617 options[kind] = action 5618 5619 return self.expression( 5620 exp.ForeignKey, 5621 expressions=expressions, 5622 reference=reference, 5623 **options, # type: ignore 5624 ) 5625 5626 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5627 return self._parse_field() 5628 5629 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5630 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5631 self._retreat(self._index - 1) 5632 return None 5633 5634 id_vars = self._parse_wrapped_id_vars() 5635 return self.expression( 5636 exp.PeriodForSystemTimeConstraint, 5637 this=seq_get(id_vars, 0), 5638 expression=seq_get(id_vars, 1), 5639 ) 5640 5641 def _parse_primary_key( 5642 self, wrapped_optional: bool = False, in_props: bool = False 5643 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5644 desc = ( 5645 self._match_set((TokenType.ASC, TokenType.DESC)) 5646 and self._prev.token_type == TokenType.DESC 5647 ) 5648 5649 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5650 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5651 5652 expressions = self._parse_wrapped_csv( 5653 self._parse_primary_key_part, optional=wrapped_optional 5654 ) 5655 options = self._parse_key_constraint_options() 5656 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5657 5658 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5659 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5660 5661 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5662 """ 5663 Parses a datetime column in ODBC format. We parse the column into the corresponding 5664 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5665 same as we did for `DATE('yyyy-mm-dd')`. 5666 5667 Reference: 5668 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5669 """ 5670 self._match(TokenType.VAR) 5671 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5672 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5673 if not self._match(TokenType.R_BRACE): 5674 self.raise_error("Expected }") 5675 return expression 5676 5677 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5678 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5679 return this 5680 5681 bracket_kind = self._prev.token_type 5682 if ( 5683 bracket_kind == TokenType.L_BRACE 5684 and self._curr 5685 and self._curr.token_type == TokenType.VAR 5686 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5687 ): 5688 return self._parse_odbc_datetime_literal() 5689 5690 expressions = self._parse_csv( 5691 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5692 ) 5693 5694 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5695 self.raise_error("Expected ]") 5696 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5697 self.raise_error("Expected }") 5698 5699 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5700 if bracket_kind == TokenType.L_BRACE: 5701 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5702 elif not this: 5703 this = build_array_constructor( 5704 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5705 ) 5706 else: 5707 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5708 if constructor_type: 5709 return build_array_constructor( 5710 constructor_type, 5711 args=expressions, 5712 bracket_kind=bracket_kind, 5713 dialect=self.dialect, 5714 ) 5715 5716 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5717 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5718 5719 self._add_comments(this) 5720 return self._parse_bracket(this) 5721 5722 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5723 if self._match(TokenType.COLON): 5724 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5725 return this 5726 5727 def _parse_case(self) -> t.Optional[exp.Expression]: 5728 ifs = [] 5729 default = None 5730 5731 comments = self._prev_comments 5732 expression = self._parse_assignment() 5733 5734 while self._match(TokenType.WHEN): 5735 this = self._parse_assignment() 5736 self._match(TokenType.THEN) 5737 then = self._parse_assignment() 5738 ifs.append(self.expression(exp.If, this=this, true=then)) 5739 5740 if self._match(TokenType.ELSE): 5741 default = self._parse_assignment() 5742 5743 if not self._match(TokenType.END): 5744 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5745 default = exp.column("interval") 5746 else: 5747 self.raise_error("Expected END after CASE", self._prev) 5748 5749 return self.expression( 5750 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5751 ) 5752 5753 def _parse_if(self) -> t.Optional[exp.Expression]: 5754 if self._match(TokenType.L_PAREN): 5755 args = self._parse_csv(self._parse_assignment) 5756 this = self.validate_expression(exp.If.from_arg_list(args), args) 5757 self._match_r_paren() 5758 else: 5759 index = self._index - 1 5760 5761 if self.NO_PAREN_IF_COMMANDS and index == 0: 5762 return self._parse_as_command(self._prev) 5763 5764 condition = self._parse_assignment() 5765 5766 if not condition: 5767 self._retreat(index) 5768 return None 5769 5770 self._match(TokenType.THEN) 5771 true = self._parse_assignment() 5772 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5773 self._match(TokenType.END) 5774 this = self.expression(exp.If, this=condition, true=true, false=false) 5775 5776 return this 5777 5778 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5779 if not self._match_text_seq("VALUE", "FOR"): 5780 self._retreat(self._index - 1) 5781 return None 5782 5783 return self.expression( 5784 exp.NextValueFor, 5785 this=self._parse_column(), 5786 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5787 ) 5788 5789 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5790 this = self._parse_function() or self._parse_var_or_string(upper=True) 5791 5792 if self._match(TokenType.FROM): 5793 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5794 5795 if not self._match(TokenType.COMMA): 5796 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5797 5798 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5799 5800 def _parse_gap_fill(self) -> exp.GapFill: 5801 self._match(TokenType.TABLE) 5802 this = self._parse_table() 5803 5804 self._match(TokenType.COMMA) 5805 args = [this, *self._parse_csv(self._parse_lambda)] 5806 5807 gap_fill = exp.GapFill.from_arg_list(args) 5808 return self.validate_expression(gap_fill, args) 5809 5810 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5811 this = self._parse_assignment() 5812 5813 if not self._match(TokenType.ALIAS): 5814 if self._match(TokenType.COMMA): 5815 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5816 5817 self.raise_error("Expected AS after CAST") 5818 5819 fmt = None 5820 to = self._parse_types() 5821 5822 if self._match(TokenType.FORMAT): 5823 fmt_string = self._parse_string() 5824 fmt = self._parse_at_time_zone(fmt_string) 5825 5826 if not to: 5827 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5828 if to.this in exp.DataType.TEMPORAL_TYPES: 5829 this = self.expression( 5830 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5831 this=this, 5832 format=exp.Literal.string( 5833 format_time( 5834 fmt_string.this if fmt_string else "", 5835 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5836 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5837 ) 5838 ), 5839 safe=safe, 5840 ) 5841 5842 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5843 this.set("zone", fmt.args["zone"]) 5844 return this 5845 elif not to: 5846 self.raise_error("Expected TYPE after CAST") 5847 elif isinstance(to, exp.Identifier): 5848 to = exp.DataType.build(to.name, udt=True) 5849 elif to.this == exp.DataType.Type.CHAR: 5850 if self._match(TokenType.CHARACTER_SET): 5851 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5852 5853 return self.expression( 5854 exp.Cast if strict else exp.TryCast, 5855 this=this, 5856 to=to, 5857 format=fmt, 5858 safe=safe, 5859 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5860 ) 5861 5862 def _parse_string_agg(self) -> exp.Expression: 5863 if self._match(TokenType.DISTINCT): 5864 args: t.List[t.Optional[exp.Expression]] = [ 5865 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5866 ] 5867 if self._match(TokenType.COMMA): 5868 args.extend(self._parse_csv(self._parse_assignment)) 5869 else: 5870 args = self._parse_csv(self._parse_assignment) # type: ignore 5871 5872 index = self._index 5873 if not self._match(TokenType.R_PAREN) and args: 5874 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5875 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5876 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5877 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5878 5879 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5880 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5881 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5882 if not self._match_text_seq("WITHIN", "GROUP"): 5883 self._retreat(index) 5884 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5885 5886 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5887 order = self._parse_order(this=seq_get(args, 0)) 5888 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5889 5890 def _parse_convert( 5891 self, strict: bool, safe: t.Optional[bool] = None 5892 ) -> t.Optional[exp.Expression]: 5893 this = self._parse_bitwise() 5894 5895 if self._match(TokenType.USING): 5896 to: t.Optional[exp.Expression] = self.expression( 5897 exp.CharacterSet, this=self._parse_var() 5898 ) 5899 elif self._match(TokenType.COMMA): 5900 to = self._parse_types() 5901 else: 5902 to = None 5903 5904 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5905 5906 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5907 """ 5908 There are generally two variants of the DECODE function: 5909 5910 - DECODE(bin, charset) 5911 - DECODE(expression, search, result [, search, result] ... [, default]) 5912 5913 The second variant will always be parsed into a CASE expression. Note that NULL 5914 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5915 instead of relying on pattern matching. 5916 """ 5917 args = self._parse_csv(self._parse_assignment) 5918 5919 if len(args) < 3: 5920 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5921 5922 expression, *expressions = args 5923 if not expression: 5924 return None 5925 5926 ifs = [] 5927 for search, result in zip(expressions[::2], expressions[1::2]): 5928 if not search or not result: 5929 return None 5930 5931 if isinstance(search, exp.Literal): 5932 ifs.append( 5933 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5934 ) 5935 elif isinstance(search, exp.Null): 5936 ifs.append( 5937 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5938 ) 5939 else: 5940 cond = exp.or_( 5941 exp.EQ(this=expression.copy(), expression=search), 5942 exp.and_( 5943 exp.Is(this=expression.copy(), expression=exp.Null()), 5944 exp.Is(this=search.copy(), expression=exp.Null()), 5945 copy=False, 5946 ), 5947 copy=False, 5948 ) 5949 ifs.append(exp.If(this=cond, true=result)) 5950 5951 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5952 5953 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5954 self._match_text_seq("KEY") 5955 key = self._parse_column() 5956 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5957 self._match_text_seq("VALUE") 5958 value = self._parse_bitwise() 5959 5960 if not key and not value: 5961 return None 5962 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5963 5964 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5965 if not this or not self._match_text_seq("FORMAT", "JSON"): 5966 return this 5967 5968 return self.expression(exp.FormatJson, this=this) 5969 5970 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 5971 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 5972 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 5973 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5974 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5975 else: 5976 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5977 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5978 5979 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 5980 5981 if not empty and not error and not null: 5982 return None 5983 5984 return self.expression( 5985 exp.OnCondition, 5986 empty=empty, 5987 error=error, 5988 null=null, 5989 ) 5990 5991 def _parse_on_handling( 5992 self, on: str, *values: str 5993 ) -> t.Optional[str] | t.Optional[exp.Expression]: 5994 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 5995 for value in values: 5996 if self._match_text_seq(value, "ON", on): 5997 return f"{value} ON {on}" 5998 5999 index = self._index 6000 if self._match(TokenType.DEFAULT): 6001 default_value = self._parse_bitwise() 6002 if self._match_text_seq("ON", on): 6003 return default_value 6004 6005 self._retreat(index) 6006 6007 return None 6008 6009 @t.overload 6010 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6011 6012 @t.overload 6013 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6014 6015 def _parse_json_object(self, agg=False): 6016 star = self._parse_star() 6017 expressions = ( 6018 [star] 6019 if star 6020 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6021 ) 6022 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6023 6024 unique_keys = None 6025 if self._match_text_seq("WITH", "UNIQUE"): 6026 unique_keys = True 6027 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6028 unique_keys = False 6029 6030 self._match_text_seq("KEYS") 6031 6032 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6033 self._parse_type() 6034 ) 6035 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6036 6037 return self.expression( 6038 exp.JSONObjectAgg if agg else exp.JSONObject, 6039 expressions=expressions, 6040 null_handling=null_handling, 6041 unique_keys=unique_keys, 6042 return_type=return_type, 6043 encoding=encoding, 6044 ) 6045 6046 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6047 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6048 if not self._match_text_seq("NESTED"): 6049 this = self._parse_id_var() 6050 kind = self._parse_types(allow_identifiers=False) 6051 nested = None 6052 else: 6053 this = None 6054 kind = None 6055 nested = True 6056 6057 path = self._match_text_seq("PATH") and self._parse_string() 6058 nested_schema = nested and self._parse_json_schema() 6059 6060 return self.expression( 6061 exp.JSONColumnDef, 6062 this=this, 6063 kind=kind, 6064 path=path, 6065 nested_schema=nested_schema, 6066 ) 6067 6068 def _parse_json_schema(self) -> exp.JSONSchema: 6069 self._match_text_seq("COLUMNS") 6070 return self.expression( 6071 exp.JSONSchema, 6072 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6073 ) 6074 6075 def _parse_json_table(self) -> exp.JSONTable: 6076 this = self._parse_format_json(self._parse_bitwise()) 6077 path = self._match(TokenType.COMMA) and self._parse_string() 6078 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6079 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6080 schema = self._parse_json_schema() 6081 6082 return exp.JSONTable( 6083 this=this, 6084 schema=schema, 6085 path=path, 6086 error_handling=error_handling, 6087 empty_handling=empty_handling, 6088 ) 6089 6090 def _parse_match_against(self) -> exp.MatchAgainst: 6091 expressions = self._parse_csv(self._parse_column) 6092 6093 self._match_text_seq(")", "AGAINST", "(") 6094 6095 this = self._parse_string() 6096 6097 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6098 modifier = "IN NATURAL LANGUAGE MODE" 6099 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6100 modifier = f"{modifier} WITH QUERY EXPANSION" 6101 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6102 modifier = "IN BOOLEAN MODE" 6103 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6104 modifier = "WITH QUERY EXPANSION" 6105 else: 6106 modifier = None 6107 6108 return self.expression( 6109 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6110 ) 6111 6112 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6113 def _parse_open_json(self) -> exp.OpenJSON: 6114 this = self._parse_bitwise() 6115 path = self._match(TokenType.COMMA) and self._parse_string() 6116 6117 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6118 this = self._parse_field(any_token=True) 6119 kind = self._parse_types() 6120 path = self._parse_string() 6121 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6122 6123 return self.expression( 6124 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6125 ) 6126 6127 expressions = None 6128 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6129 self._match_l_paren() 6130 expressions = self._parse_csv(_parse_open_json_column_def) 6131 6132 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6133 6134 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6135 args = self._parse_csv(self._parse_bitwise) 6136 6137 if self._match(TokenType.IN): 6138 return self.expression( 6139 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6140 ) 6141 6142 if haystack_first: 6143 haystack = seq_get(args, 0) 6144 needle = seq_get(args, 1) 6145 else: 6146 needle = seq_get(args, 0) 6147 haystack = seq_get(args, 1) 6148 6149 return self.expression( 6150 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6151 ) 6152 6153 def _parse_predict(self) -> exp.Predict: 6154 self._match_text_seq("MODEL") 6155 this = self._parse_table() 6156 6157 self._match(TokenType.COMMA) 6158 self._match_text_seq("TABLE") 6159 6160 return self.expression( 6161 exp.Predict, 6162 this=this, 6163 expression=self._parse_table(), 6164 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6165 ) 6166 6167 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6168 args = self._parse_csv(self._parse_table) 6169 return exp.JoinHint(this=func_name.upper(), expressions=args) 6170 6171 def _parse_substring(self) -> exp.Substring: 6172 # Postgres supports the form: substring(string [from int] [for int]) 6173 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6174 6175 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6176 6177 if self._match(TokenType.FROM): 6178 args.append(self._parse_bitwise()) 6179 if self._match(TokenType.FOR): 6180 if len(args) == 1: 6181 args.append(exp.Literal.number(1)) 6182 args.append(self._parse_bitwise()) 6183 6184 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6185 6186 def _parse_trim(self) -> exp.Trim: 6187 # https://www.w3resource.com/sql/character-functions/trim.php 6188 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6189 6190 position = None 6191 collation = None 6192 expression = None 6193 6194 if self._match_texts(self.TRIM_TYPES): 6195 position = self._prev.text.upper() 6196 6197 this = self._parse_bitwise() 6198 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6199 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6200 expression = self._parse_bitwise() 6201 6202 if invert_order: 6203 this, expression = expression, this 6204 6205 if self._match(TokenType.COLLATE): 6206 collation = self._parse_bitwise() 6207 6208 return self.expression( 6209 exp.Trim, this=this, position=position, expression=expression, collation=collation 6210 ) 6211 6212 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6213 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6214 6215 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6216 return self._parse_window(self._parse_id_var(), alias=True) 6217 6218 def _parse_respect_or_ignore_nulls( 6219 self, this: t.Optional[exp.Expression] 6220 ) -> t.Optional[exp.Expression]: 6221 if self._match_text_seq("IGNORE", "NULLS"): 6222 return self.expression(exp.IgnoreNulls, this=this) 6223 if self._match_text_seq("RESPECT", "NULLS"): 6224 return self.expression(exp.RespectNulls, this=this) 6225 return this 6226 6227 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6228 if self._match(TokenType.HAVING): 6229 self._match_texts(("MAX", "MIN")) 6230 max = self._prev.text.upper() != "MIN" 6231 return self.expression( 6232 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6233 ) 6234 6235 return this 6236 6237 def _parse_window( 6238 self, this: t.Optional[exp.Expression], alias: bool = False 6239 ) -> t.Optional[exp.Expression]: 6240 func = this 6241 comments = func.comments if isinstance(func, exp.Expression) else None 6242 6243 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6244 self._match(TokenType.WHERE) 6245 this = self.expression( 6246 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6247 ) 6248 self._match_r_paren() 6249 6250 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6251 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6252 if self._match_text_seq("WITHIN", "GROUP"): 6253 order = self._parse_wrapped(self._parse_order) 6254 this = self.expression(exp.WithinGroup, this=this, expression=order) 6255 6256 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6257 # Some dialects choose to implement and some do not. 6258 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6259 6260 # There is some code above in _parse_lambda that handles 6261 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6262 6263 # The below changes handle 6264 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6265 6266 # Oracle allows both formats 6267 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6268 # and Snowflake chose to do the same for familiarity 6269 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6270 if isinstance(this, exp.AggFunc): 6271 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6272 6273 if ignore_respect and ignore_respect is not this: 6274 ignore_respect.replace(ignore_respect.this) 6275 this = self.expression(ignore_respect.__class__, this=this) 6276 6277 this = self._parse_respect_or_ignore_nulls(this) 6278 6279 # bigquery select from window x AS (partition by ...) 6280 if alias: 6281 over = None 6282 self._match(TokenType.ALIAS) 6283 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6284 return this 6285 else: 6286 over = self._prev.text.upper() 6287 6288 if comments and isinstance(func, exp.Expression): 6289 func.pop_comments() 6290 6291 if not self._match(TokenType.L_PAREN): 6292 return self.expression( 6293 exp.Window, 6294 comments=comments, 6295 this=this, 6296 alias=self._parse_id_var(False), 6297 over=over, 6298 ) 6299 6300 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6301 6302 first = self._match(TokenType.FIRST) 6303 if self._match_text_seq("LAST"): 6304 first = False 6305 6306 partition, order = self._parse_partition_and_order() 6307 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6308 6309 if kind: 6310 self._match(TokenType.BETWEEN) 6311 start = self._parse_window_spec() 6312 self._match(TokenType.AND) 6313 end = self._parse_window_spec() 6314 6315 spec = self.expression( 6316 exp.WindowSpec, 6317 kind=kind, 6318 start=start["value"], 6319 start_side=start["side"], 6320 end=end["value"], 6321 end_side=end["side"], 6322 ) 6323 else: 6324 spec = None 6325 6326 self._match_r_paren() 6327 6328 window = self.expression( 6329 exp.Window, 6330 comments=comments, 6331 this=this, 6332 partition_by=partition, 6333 order=order, 6334 spec=spec, 6335 alias=window_alias, 6336 over=over, 6337 first=first, 6338 ) 6339 6340 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6341 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6342 return self._parse_window(window, alias=alias) 6343 6344 return window 6345 6346 def _parse_partition_and_order( 6347 self, 6348 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6349 return self._parse_partition_by(), self._parse_order() 6350 6351 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6352 self._match(TokenType.BETWEEN) 6353 6354 return { 6355 "value": ( 6356 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6357 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6358 or self._parse_bitwise() 6359 ), 6360 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6361 } 6362 6363 def _parse_alias( 6364 self, this: t.Optional[exp.Expression], explicit: bool = False 6365 ) -> t.Optional[exp.Expression]: 6366 any_token = self._match(TokenType.ALIAS) 6367 comments = self._prev_comments or [] 6368 6369 if explicit and not any_token: 6370 return this 6371 6372 if self._match(TokenType.L_PAREN): 6373 aliases = self.expression( 6374 exp.Aliases, 6375 comments=comments, 6376 this=this, 6377 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6378 ) 6379 self._match_r_paren(aliases) 6380 return aliases 6381 6382 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6383 self.STRING_ALIASES and self._parse_string_as_identifier() 6384 ) 6385 6386 if alias: 6387 comments.extend(alias.pop_comments()) 6388 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6389 column = this.this 6390 6391 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6392 if not this.comments and column and column.comments: 6393 this.comments = column.pop_comments() 6394 6395 return this 6396 6397 def _parse_id_var( 6398 self, 6399 any_token: bool = True, 6400 tokens: t.Optional[t.Collection[TokenType]] = None, 6401 ) -> t.Optional[exp.Expression]: 6402 expression = self._parse_identifier() 6403 if not expression and ( 6404 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6405 ): 6406 quoted = self._prev.token_type == TokenType.STRING 6407 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6408 6409 return expression 6410 6411 def _parse_string(self) -> t.Optional[exp.Expression]: 6412 if self._match_set(self.STRING_PARSERS): 6413 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6414 return self._parse_placeholder() 6415 6416 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6417 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6418 6419 def _parse_number(self) -> t.Optional[exp.Expression]: 6420 if self._match_set(self.NUMERIC_PARSERS): 6421 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6422 return self._parse_placeholder() 6423 6424 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6425 if self._match(TokenType.IDENTIFIER): 6426 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6427 return self._parse_placeholder() 6428 6429 def _parse_var( 6430 self, 6431 any_token: bool = False, 6432 tokens: t.Optional[t.Collection[TokenType]] = None, 6433 upper: bool = False, 6434 ) -> t.Optional[exp.Expression]: 6435 if ( 6436 (any_token and self._advance_any()) 6437 or self._match(TokenType.VAR) 6438 or (self._match_set(tokens) if tokens else False) 6439 ): 6440 return self.expression( 6441 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6442 ) 6443 return self._parse_placeholder() 6444 6445 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6446 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6447 self._advance() 6448 return self._prev 6449 return None 6450 6451 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6452 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6453 6454 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6455 return self._parse_primary() or self._parse_var(any_token=True) 6456 6457 def _parse_null(self) -> t.Optional[exp.Expression]: 6458 if self._match_set(self.NULL_TOKENS): 6459 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6460 return self._parse_placeholder() 6461 6462 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6463 if self._match(TokenType.TRUE): 6464 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6465 if self._match(TokenType.FALSE): 6466 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6467 return self._parse_placeholder() 6468 6469 def _parse_star(self) -> t.Optional[exp.Expression]: 6470 if self._match(TokenType.STAR): 6471 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6472 return self._parse_placeholder() 6473 6474 def _parse_parameter(self) -> exp.Parameter: 6475 this = self._parse_identifier() or self._parse_primary_or_var() 6476 return self.expression(exp.Parameter, this=this) 6477 6478 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6479 if self._match_set(self.PLACEHOLDER_PARSERS): 6480 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6481 if placeholder: 6482 return placeholder 6483 self._advance(-1) 6484 return None 6485 6486 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6487 if not self._match_texts(keywords): 6488 return None 6489 if self._match(TokenType.L_PAREN, advance=False): 6490 return self._parse_wrapped_csv(self._parse_expression) 6491 6492 expression = self._parse_expression() 6493 return [expression] if expression else None 6494 6495 def _parse_csv( 6496 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6497 ) -> t.List[exp.Expression]: 6498 parse_result = parse_method() 6499 items = [parse_result] if parse_result is not None else [] 6500 6501 while self._match(sep): 6502 self._add_comments(parse_result) 6503 parse_result = parse_method() 6504 if parse_result is not None: 6505 items.append(parse_result) 6506 6507 return items 6508 6509 def _parse_tokens( 6510 self, parse_method: t.Callable, expressions: t.Dict 6511 ) -> t.Optional[exp.Expression]: 6512 this = parse_method() 6513 6514 while self._match_set(expressions): 6515 this = self.expression( 6516 expressions[self._prev.token_type], 6517 this=this, 6518 comments=self._prev_comments, 6519 expression=parse_method(), 6520 ) 6521 6522 return this 6523 6524 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6525 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6526 6527 def _parse_wrapped_csv( 6528 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6529 ) -> t.List[exp.Expression]: 6530 return self._parse_wrapped( 6531 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6532 ) 6533 6534 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6535 wrapped = self._match(TokenType.L_PAREN) 6536 if not wrapped and not optional: 6537 self.raise_error("Expecting (") 6538 parse_result = parse_method() 6539 if wrapped: 6540 self._match_r_paren() 6541 return parse_result 6542 6543 def _parse_expressions(self) -> t.List[exp.Expression]: 6544 return self._parse_csv(self._parse_expression) 6545 6546 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6547 return self._parse_select() or self._parse_set_operations( 6548 self._parse_expression() if alias else self._parse_assignment() 6549 ) 6550 6551 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6552 return self._parse_query_modifiers( 6553 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6554 ) 6555 6556 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6557 this = None 6558 if self._match_texts(self.TRANSACTION_KIND): 6559 this = self._prev.text 6560 6561 self._match_texts(("TRANSACTION", "WORK")) 6562 6563 modes = [] 6564 while True: 6565 mode = [] 6566 while self._match(TokenType.VAR): 6567 mode.append(self._prev.text) 6568 6569 if mode: 6570 modes.append(" ".join(mode)) 6571 if not self._match(TokenType.COMMA): 6572 break 6573 6574 return self.expression(exp.Transaction, this=this, modes=modes) 6575 6576 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6577 chain = None 6578 savepoint = None 6579 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6580 6581 self._match_texts(("TRANSACTION", "WORK")) 6582 6583 if self._match_text_seq("TO"): 6584 self._match_text_seq("SAVEPOINT") 6585 savepoint = self._parse_id_var() 6586 6587 if self._match(TokenType.AND): 6588 chain = not self._match_text_seq("NO") 6589 self._match_text_seq("CHAIN") 6590 6591 if is_rollback: 6592 return self.expression(exp.Rollback, savepoint=savepoint) 6593 6594 return self.expression(exp.Commit, chain=chain) 6595 6596 def _parse_refresh(self) -> exp.Refresh: 6597 self._match(TokenType.TABLE) 6598 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6599 6600 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6601 if not self._match_text_seq("ADD"): 6602 return None 6603 6604 self._match(TokenType.COLUMN) 6605 exists_column = self._parse_exists(not_=True) 6606 expression = self._parse_field_def() 6607 6608 if expression: 6609 expression.set("exists", exists_column) 6610 6611 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6612 if self._match_texts(("FIRST", "AFTER")): 6613 position = self._prev.text 6614 column_position = self.expression( 6615 exp.ColumnPosition, this=self._parse_column(), position=position 6616 ) 6617 expression.set("position", column_position) 6618 6619 return expression 6620 6621 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6622 drop = self._match(TokenType.DROP) and self._parse_drop() 6623 if drop and not isinstance(drop, exp.Command): 6624 drop.set("kind", drop.args.get("kind", "COLUMN")) 6625 return drop 6626 6627 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6628 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6629 return self.expression( 6630 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6631 ) 6632 6633 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6634 index = self._index - 1 6635 6636 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6637 return self._parse_csv( 6638 lambda: self.expression( 6639 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6640 ) 6641 ) 6642 6643 self._retreat(index) 6644 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6645 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6646 6647 if self._match_text_seq("ADD", "COLUMNS"): 6648 schema = self._parse_schema() 6649 if schema: 6650 return [schema] 6651 return [] 6652 6653 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6654 6655 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6656 if self._match_texts(self.ALTER_ALTER_PARSERS): 6657 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6658 6659 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6660 # keyword after ALTER we default to parsing this statement 6661 self._match(TokenType.COLUMN) 6662 column = self._parse_field(any_token=True) 6663 6664 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6665 return self.expression(exp.AlterColumn, this=column, drop=True) 6666 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6667 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6668 if self._match(TokenType.COMMENT): 6669 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6670 if self._match_text_seq("DROP", "NOT", "NULL"): 6671 return self.expression( 6672 exp.AlterColumn, 6673 this=column, 6674 drop=True, 6675 allow_null=True, 6676 ) 6677 if self._match_text_seq("SET", "NOT", "NULL"): 6678 return self.expression( 6679 exp.AlterColumn, 6680 this=column, 6681 allow_null=False, 6682 ) 6683 self._match_text_seq("SET", "DATA") 6684 self._match_text_seq("TYPE") 6685 return self.expression( 6686 exp.AlterColumn, 6687 this=column, 6688 dtype=self._parse_types(), 6689 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6690 using=self._match(TokenType.USING) and self._parse_assignment(), 6691 ) 6692 6693 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6694 if self._match_texts(("ALL", "EVEN", "AUTO")): 6695 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6696 6697 self._match_text_seq("KEY", "DISTKEY") 6698 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6699 6700 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6701 if compound: 6702 self._match_text_seq("SORTKEY") 6703 6704 if self._match(TokenType.L_PAREN, advance=False): 6705 return self.expression( 6706 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6707 ) 6708 6709 self._match_texts(("AUTO", "NONE")) 6710 return self.expression( 6711 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6712 ) 6713 6714 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6715 index = self._index - 1 6716 6717 partition_exists = self._parse_exists() 6718 if self._match(TokenType.PARTITION, advance=False): 6719 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6720 6721 self._retreat(index) 6722 return self._parse_csv(self._parse_drop_column) 6723 6724 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6725 if self._match(TokenType.COLUMN): 6726 exists = self._parse_exists() 6727 old_column = self._parse_column() 6728 to = self._match_text_seq("TO") 6729 new_column = self._parse_column() 6730 6731 if old_column is None or to is None or new_column is None: 6732 return None 6733 6734 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6735 6736 self._match_text_seq("TO") 6737 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6738 6739 def _parse_alter_table_set(self) -> exp.AlterSet: 6740 alter_set = self.expression(exp.AlterSet) 6741 6742 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6743 "TABLE", "PROPERTIES" 6744 ): 6745 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6746 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6747 alter_set.set("expressions", [self._parse_assignment()]) 6748 elif self._match_texts(("LOGGED", "UNLOGGED")): 6749 alter_set.set("option", exp.var(self._prev.text.upper())) 6750 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6751 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6752 elif self._match_text_seq("LOCATION"): 6753 alter_set.set("location", self._parse_field()) 6754 elif self._match_text_seq("ACCESS", "METHOD"): 6755 alter_set.set("access_method", self._parse_field()) 6756 elif self._match_text_seq("TABLESPACE"): 6757 alter_set.set("tablespace", self._parse_field()) 6758 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6759 alter_set.set("file_format", [self._parse_field()]) 6760 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6761 alter_set.set("file_format", self._parse_wrapped_options()) 6762 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6763 alter_set.set("copy_options", self._parse_wrapped_options()) 6764 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6765 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6766 else: 6767 if self._match_text_seq("SERDE"): 6768 alter_set.set("serde", self._parse_field()) 6769 6770 alter_set.set("expressions", [self._parse_properties()]) 6771 6772 return alter_set 6773 6774 def _parse_alter(self) -> exp.Alter | exp.Command: 6775 start = self._prev 6776 6777 alter_token = self._match_set(self.ALTERABLES) and self._prev 6778 if not alter_token: 6779 return self._parse_as_command(start) 6780 6781 exists = self._parse_exists() 6782 only = self._match_text_seq("ONLY") 6783 this = self._parse_table(schema=True) 6784 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6785 6786 if self._next: 6787 self._advance() 6788 6789 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6790 if parser: 6791 actions = ensure_list(parser(self)) 6792 not_valid = self._match_text_seq("NOT", "VALID") 6793 options = self._parse_csv(self._parse_property) 6794 6795 if not self._curr and actions: 6796 return self.expression( 6797 exp.Alter, 6798 this=this, 6799 kind=alter_token.text.upper(), 6800 exists=exists, 6801 actions=actions, 6802 only=only, 6803 options=options, 6804 cluster=cluster, 6805 not_valid=not_valid, 6806 ) 6807 6808 return self._parse_as_command(start) 6809 6810 def _parse_merge(self) -> exp.Merge: 6811 self._match(TokenType.INTO) 6812 target = self._parse_table() 6813 6814 if target and self._match(TokenType.ALIAS, advance=False): 6815 target.set("alias", self._parse_table_alias()) 6816 6817 self._match(TokenType.USING) 6818 using = self._parse_table() 6819 6820 self._match(TokenType.ON) 6821 on = self._parse_assignment() 6822 6823 return self.expression( 6824 exp.Merge, 6825 this=target, 6826 using=using, 6827 on=on, 6828 expressions=self._parse_when_matched(), 6829 ) 6830 6831 def _parse_when_matched(self) -> t.List[exp.When]: 6832 whens = [] 6833 6834 while self._match(TokenType.WHEN): 6835 matched = not self._match(TokenType.NOT) 6836 self._match_text_seq("MATCHED") 6837 source = ( 6838 False 6839 if self._match_text_seq("BY", "TARGET") 6840 else self._match_text_seq("BY", "SOURCE") 6841 ) 6842 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6843 6844 self._match(TokenType.THEN) 6845 6846 if self._match(TokenType.INSERT): 6847 _this = self._parse_star() 6848 if _this: 6849 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6850 else: 6851 then = self.expression( 6852 exp.Insert, 6853 this=self._parse_value(), 6854 expression=self._match_text_seq("VALUES") and self._parse_value(), 6855 ) 6856 elif self._match(TokenType.UPDATE): 6857 expressions = self._parse_star() 6858 if expressions: 6859 then = self.expression(exp.Update, expressions=expressions) 6860 else: 6861 then = self.expression( 6862 exp.Update, 6863 expressions=self._match(TokenType.SET) 6864 and self._parse_csv(self._parse_equality), 6865 ) 6866 elif self._match(TokenType.DELETE): 6867 then = self.expression(exp.Var, this=self._prev.text) 6868 else: 6869 then = None 6870 6871 whens.append( 6872 self.expression( 6873 exp.When, 6874 matched=matched, 6875 source=source, 6876 condition=condition, 6877 then=then, 6878 ) 6879 ) 6880 return whens 6881 6882 def _parse_show(self) -> t.Optional[exp.Expression]: 6883 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6884 if parser: 6885 return parser(self) 6886 return self._parse_as_command(self._prev) 6887 6888 def _parse_set_item_assignment( 6889 self, kind: t.Optional[str] = None 6890 ) -> t.Optional[exp.Expression]: 6891 index = self._index 6892 6893 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6894 return self._parse_set_transaction(global_=kind == "GLOBAL") 6895 6896 left = self._parse_primary() or self._parse_column() 6897 assignment_delimiter = self._match_texts(("=", "TO")) 6898 6899 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6900 self._retreat(index) 6901 return None 6902 6903 right = self._parse_statement() or self._parse_id_var() 6904 if isinstance(right, (exp.Column, exp.Identifier)): 6905 right = exp.var(right.name) 6906 6907 this = self.expression(exp.EQ, this=left, expression=right) 6908 return self.expression(exp.SetItem, this=this, kind=kind) 6909 6910 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6911 self._match_text_seq("TRANSACTION") 6912 characteristics = self._parse_csv( 6913 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6914 ) 6915 return self.expression( 6916 exp.SetItem, 6917 expressions=characteristics, 6918 kind="TRANSACTION", 6919 **{"global": global_}, # type: ignore 6920 ) 6921 6922 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6923 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6924 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6925 6926 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6927 index = self._index 6928 set_ = self.expression( 6929 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6930 ) 6931 6932 if self._curr: 6933 self._retreat(index) 6934 return self._parse_as_command(self._prev) 6935 6936 return set_ 6937 6938 def _parse_var_from_options( 6939 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6940 ) -> t.Optional[exp.Var]: 6941 start = self._curr 6942 if not start: 6943 return None 6944 6945 option = start.text.upper() 6946 continuations = options.get(option) 6947 6948 index = self._index 6949 self._advance() 6950 for keywords in continuations or []: 6951 if isinstance(keywords, str): 6952 keywords = (keywords,) 6953 6954 if self._match_text_seq(*keywords): 6955 option = f"{option} {' '.join(keywords)}" 6956 break 6957 else: 6958 if continuations or continuations is None: 6959 if raise_unmatched: 6960 self.raise_error(f"Unknown option {option}") 6961 6962 self._retreat(index) 6963 return None 6964 6965 return exp.var(option) 6966 6967 def _parse_as_command(self, start: Token) -> exp.Command: 6968 while self._curr: 6969 self._advance() 6970 text = self._find_sql(start, self._prev) 6971 size = len(start.text) 6972 self._warn_unsupported() 6973 return exp.Command(this=text[:size], expression=text[size:]) 6974 6975 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6976 settings = [] 6977 6978 self._match_l_paren() 6979 kind = self._parse_id_var() 6980 6981 if self._match(TokenType.L_PAREN): 6982 while True: 6983 key = self._parse_id_var() 6984 value = self._parse_primary() 6985 6986 if not key and value is None: 6987 break 6988 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6989 self._match(TokenType.R_PAREN) 6990 6991 self._match_r_paren() 6992 6993 return self.expression( 6994 exp.DictProperty, 6995 this=this, 6996 kind=kind.this if kind else None, 6997 settings=settings, 6998 ) 6999 7000 def _parse_dict_range(self, this: str) -> exp.DictRange: 7001 self._match_l_paren() 7002 has_min = self._match_text_seq("MIN") 7003 if has_min: 7004 min = self._parse_var() or self._parse_primary() 7005 self._match_text_seq("MAX") 7006 max = self._parse_var() or self._parse_primary() 7007 else: 7008 max = self._parse_var() or self._parse_primary() 7009 min = exp.Literal.number(0) 7010 self._match_r_paren() 7011 return self.expression(exp.DictRange, this=this, min=min, max=max) 7012 7013 def _parse_comprehension( 7014 self, this: t.Optional[exp.Expression] 7015 ) -> t.Optional[exp.Comprehension]: 7016 index = self._index 7017 expression = self._parse_column() 7018 if not self._match(TokenType.IN): 7019 self._retreat(index - 1) 7020 return None 7021 iterator = self._parse_column() 7022 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7023 return self.expression( 7024 exp.Comprehension, 7025 this=this, 7026 expression=expression, 7027 iterator=iterator, 7028 condition=condition, 7029 ) 7030 7031 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7032 if self._match(TokenType.HEREDOC_STRING): 7033 return self.expression(exp.Heredoc, this=self._prev.text) 7034 7035 if not self._match_text_seq("$"): 7036 return None 7037 7038 tags = ["$"] 7039 tag_text = None 7040 7041 if self._is_connected(): 7042 self._advance() 7043 tags.append(self._prev.text.upper()) 7044 else: 7045 self.raise_error("No closing $ found") 7046 7047 if tags[-1] != "$": 7048 if self._is_connected() and self._match_text_seq("$"): 7049 tag_text = tags[-1] 7050 tags.append("$") 7051 else: 7052 self.raise_error("No closing $ found") 7053 7054 heredoc_start = self._curr 7055 7056 while self._curr: 7057 if self._match_text_seq(*tags, advance=False): 7058 this = self._find_sql(heredoc_start, self._prev) 7059 self._advance(len(tags)) 7060 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7061 7062 self._advance() 7063 7064 self.raise_error(f"No closing {''.join(tags)} found") 7065 return None 7066 7067 def _find_parser( 7068 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7069 ) -> t.Optional[t.Callable]: 7070 if not self._curr: 7071 return None 7072 7073 index = self._index 7074 this = [] 7075 while True: 7076 # The current token might be multiple words 7077 curr = self._curr.text.upper() 7078 key = curr.split(" ") 7079 this.append(curr) 7080 7081 self._advance() 7082 result, trie = in_trie(trie, key) 7083 if result == TrieResult.FAILED: 7084 break 7085 7086 if result == TrieResult.EXISTS: 7087 subparser = parsers[" ".join(this)] 7088 return subparser 7089 7090 self._retreat(index) 7091 return None 7092 7093 def _match(self, token_type, advance=True, expression=None): 7094 if not self._curr: 7095 return None 7096 7097 if self._curr.token_type == token_type: 7098 if advance: 7099 self._advance() 7100 self._add_comments(expression) 7101 return True 7102 7103 return None 7104 7105 def _match_set(self, types, advance=True): 7106 if not self._curr: 7107 return None 7108 7109 if self._curr.token_type in types: 7110 if advance: 7111 self._advance() 7112 return True 7113 7114 return None 7115 7116 def _match_pair(self, token_type_a, token_type_b, advance=True): 7117 if not self._curr or not self._next: 7118 return None 7119 7120 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7121 if advance: 7122 self._advance(2) 7123 return True 7124 7125 return None 7126 7127 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7128 if not self._match(TokenType.L_PAREN, expression=expression): 7129 self.raise_error("Expecting (") 7130 7131 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7132 if not self._match(TokenType.R_PAREN, expression=expression): 7133 self.raise_error("Expecting )") 7134 7135 def _match_texts(self, texts, advance=True): 7136 if ( 7137 self._curr 7138 and self._curr.token_type != TokenType.STRING 7139 and self._curr.text.upper() in texts 7140 ): 7141 if advance: 7142 self._advance() 7143 return True 7144 return None 7145 7146 def _match_text_seq(self, *texts, advance=True): 7147 index = self._index 7148 for text in texts: 7149 if ( 7150 self._curr 7151 and self._curr.token_type != TokenType.STRING 7152 and self._curr.text.upper() == text 7153 ): 7154 self._advance() 7155 else: 7156 self._retreat(index) 7157 return None 7158 7159 if not advance: 7160 self._retreat(index) 7161 7162 return True 7163 7164 def _replace_lambda( 7165 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7166 ) -> t.Optional[exp.Expression]: 7167 if not node: 7168 return node 7169 7170 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7171 7172 for column in node.find_all(exp.Column): 7173 typ = lambda_types.get(column.parts[0].name) 7174 if typ is not None: 7175 dot_or_id = column.to_dot() if column.table else column.this 7176 7177 if typ: 7178 dot_or_id = self.expression( 7179 exp.Cast, 7180 this=dot_or_id, 7181 to=typ, 7182 ) 7183 7184 parent = column.parent 7185 7186 while isinstance(parent, exp.Dot): 7187 if not isinstance(parent.parent, exp.Dot): 7188 parent.replace(dot_or_id) 7189 break 7190 parent = parent.parent 7191 else: 7192 if column is node: 7193 node = dot_or_id 7194 else: 7195 column.replace(dot_or_id) 7196 return node 7197 7198 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7199 start = self._prev 7200 7201 # Not to be confused with TRUNCATE(number, decimals) function call 7202 if self._match(TokenType.L_PAREN): 7203 self._retreat(self._index - 2) 7204 return self._parse_function() 7205 7206 # Clickhouse supports TRUNCATE DATABASE as well 7207 is_database = self._match(TokenType.DATABASE) 7208 7209 self._match(TokenType.TABLE) 7210 7211 exists = self._parse_exists(not_=False) 7212 7213 expressions = self._parse_csv( 7214 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7215 ) 7216 7217 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7218 7219 if self._match_text_seq("RESTART", "IDENTITY"): 7220 identity = "RESTART" 7221 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7222 identity = "CONTINUE" 7223 else: 7224 identity = None 7225 7226 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7227 option = self._prev.text 7228 else: 7229 option = None 7230 7231 partition = self._parse_partition() 7232 7233 # Fallback case 7234 if self._curr: 7235 return self._parse_as_command(start) 7236 7237 return self.expression( 7238 exp.TruncateTable, 7239 expressions=expressions, 7240 is_database=is_database, 7241 exists=exists, 7242 cluster=cluster, 7243 identity=identity, 7244 option=option, 7245 partition=partition, 7246 ) 7247 7248 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7249 this = self._parse_ordered(self._parse_opclass) 7250 7251 if not self._match(TokenType.WITH): 7252 return this 7253 7254 op = self._parse_var(any_token=True) 7255 7256 return self.expression(exp.WithOperator, this=this, op=op) 7257 7258 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7259 self._match(TokenType.EQ) 7260 self._match(TokenType.L_PAREN) 7261 7262 opts: t.List[t.Optional[exp.Expression]] = [] 7263 while self._curr and not self._match(TokenType.R_PAREN): 7264 if self._match_text_seq("FORMAT_NAME", "="): 7265 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7266 # so we parse it separately to use _parse_field() 7267 prop = self.expression( 7268 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7269 ) 7270 opts.append(prop) 7271 else: 7272 opts.append(self._parse_property()) 7273 7274 self._match(TokenType.COMMA) 7275 7276 return opts 7277 7278 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7279 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7280 7281 options = [] 7282 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7283 option = self._parse_var(any_token=True) 7284 prev = self._prev.text.upper() 7285 7286 # Different dialects might separate options and values by white space, "=" and "AS" 7287 self._match(TokenType.EQ) 7288 self._match(TokenType.ALIAS) 7289 7290 param = self.expression(exp.CopyParameter, this=option) 7291 7292 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7293 TokenType.L_PAREN, advance=False 7294 ): 7295 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7296 param.set("expressions", self._parse_wrapped_options()) 7297 elif prev == "FILE_FORMAT": 7298 # T-SQL's external file format case 7299 param.set("expression", self._parse_field()) 7300 else: 7301 param.set("expression", self._parse_unquoted_field()) 7302 7303 options.append(param) 7304 self._match(sep) 7305 7306 return options 7307 7308 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7309 expr = self.expression(exp.Credentials) 7310 7311 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7312 expr.set("storage", self._parse_field()) 7313 if self._match_text_seq("CREDENTIALS"): 7314 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7315 creds = ( 7316 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7317 ) 7318 expr.set("credentials", creds) 7319 if self._match_text_seq("ENCRYPTION"): 7320 expr.set("encryption", self._parse_wrapped_options()) 7321 if self._match_text_seq("IAM_ROLE"): 7322 expr.set("iam_role", self._parse_field()) 7323 if self._match_text_seq("REGION"): 7324 expr.set("region", self._parse_field()) 7325 7326 return expr 7327 7328 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7329 return self._parse_field() 7330 7331 def _parse_copy(self) -> exp.Copy | exp.Command: 7332 start = self._prev 7333 7334 self._match(TokenType.INTO) 7335 7336 this = ( 7337 self._parse_select(nested=True, parse_subquery_alias=False) 7338 if self._match(TokenType.L_PAREN, advance=False) 7339 else self._parse_table(schema=True) 7340 ) 7341 7342 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7343 7344 files = self._parse_csv(self._parse_file_location) 7345 credentials = self._parse_credentials() 7346 7347 self._match_text_seq("WITH") 7348 7349 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7350 7351 # Fallback case 7352 if self._curr: 7353 return self._parse_as_command(start) 7354 7355 return self.expression( 7356 exp.Copy, 7357 this=this, 7358 kind=kind, 7359 credentials=credentials, 7360 files=files, 7361 params=params, 7362 ) 7363 7364 def _parse_normalize(self) -> exp.Normalize: 7365 return self.expression( 7366 exp.Normalize, 7367 this=self._parse_bitwise(), 7368 form=self._match(TokenType.COMMA) and self._parse_var(), 7369 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1354 def __init__( 1355 self, 1356 error_level: t.Optional[ErrorLevel] = None, 1357 error_message_context: int = 100, 1358 max_errors: int = 3, 1359 dialect: DialectType = None, 1360 ): 1361 from sqlglot.dialects import Dialect 1362 1363 self.error_level = error_level or ErrorLevel.IMMEDIATE 1364 self.error_message_context = error_message_context 1365 self.max_errors = max_errors 1366 self.dialect = Dialect.get_or_raise(dialect) 1367 self.reset()
1379 def parse( 1380 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1381 ) -> t.List[t.Optional[exp.Expression]]: 1382 """ 1383 Parses a list of tokens and returns a list of syntax trees, one tree 1384 per parsed SQL statement. 1385 1386 Args: 1387 raw_tokens: The list of tokens. 1388 sql: The original SQL string, used to produce helpful debug messages. 1389 1390 Returns: 1391 The list of the produced syntax trees. 1392 """ 1393 return self._parse( 1394 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1395 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1397 def parse_into( 1398 self, 1399 expression_types: exp.IntoType, 1400 raw_tokens: t.List[Token], 1401 sql: t.Optional[str] = None, 1402 ) -> t.List[t.Optional[exp.Expression]]: 1403 """ 1404 Parses a list of tokens into a given Expression type. If a collection of Expression 1405 types is given instead, this method will try to parse the token list into each one 1406 of them, stopping at the first for which the parsing succeeds. 1407 1408 Args: 1409 expression_types: The expression type(s) to try and parse the token list into. 1410 raw_tokens: The list of tokens. 1411 sql: The original SQL string, used to produce helpful debug messages. 1412 1413 Returns: 1414 The target Expression. 1415 """ 1416 errors = [] 1417 for expression_type in ensure_list(expression_types): 1418 parser = self.EXPRESSION_PARSERS.get(expression_type) 1419 if not parser: 1420 raise TypeError(f"No parser registered for {expression_type}") 1421 1422 try: 1423 return self._parse(parser, raw_tokens, sql) 1424 except ParseError as e: 1425 e.errors[0]["into_expression"] = expression_type 1426 errors.append(e) 1427 1428 raise ParseError( 1429 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1430 errors=merge_errors(errors), 1431 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1471 def check_errors(self) -> None: 1472 """Logs or raises any found errors, depending on the chosen error level setting.""" 1473 if self.error_level == ErrorLevel.WARN: 1474 for error in self.errors: 1475 logger.error(str(error)) 1476 elif self.error_level == ErrorLevel.RAISE and self.errors: 1477 raise ParseError( 1478 concat_messages(self.errors, self.max_errors), 1479 errors=merge_errors(self.errors), 1480 )
Logs or raises any found errors, depending on the chosen error level setting.
1482 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1483 """ 1484 Appends an error in the list of recorded errors or raises it, depending on the chosen 1485 error level setting. 1486 """ 1487 token = token or self._curr or self._prev or Token.string("") 1488 start = token.start 1489 end = token.end + 1 1490 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1491 highlight = self.sql[start:end] 1492 end_context = self.sql[end : end + self.error_message_context] 1493 1494 error = ParseError.new( 1495 f"{message}. Line {token.line}, Col: {token.col}.\n" 1496 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1497 description=message, 1498 line=token.line, 1499 col=token.col, 1500 start_context=start_context, 1501 highlight=highlight, 1502 end_context=end_context, 1503 ) 1504 1505 if self.error_level == ErrorLevel.IMMEDIATE: 1506 raise error 1507 1508 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1510 def expression( 1511 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1512 ) -> E: 1513 """ 1514 Creates a new, validated Expression. 1515 1516 Args: 1517 exp_class: The expression class to instantiate. 1518 comments: An optional list of comments to attach to the expression. 1519 kwargs: The arguments to set for the expression along with their respective values. 1520 1521 Returns: 1522 The target expression. 1523 """ 1524 instance = exp_class(**kwargs) 1525 instance.add_comments(comments) if comments else self._add_comments(instance) 1526 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1533 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1534 """ 1535 Validates an Expression, making sure that all its mandatory arguments are set. 1536 1537 Args: 1538 expression: The expression to validate. 1539 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1540 1541 Returns: 1542 The validated expression. 1543 """ 1544 if self.error_level != ErrorLevel.IGNORE: 1545 for error_message in expression.error_messages(args): 1546 self.raise_error(error_message) 1547 1548 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.