sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 216 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 217 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 218 "LIKE": build_like, 219 "LOG": build_logarithm, 220 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 221 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 222 "LOWER": build_lower, 223 "LPAD": lambda args: build_pad(args), 224 "LEFTPAD": lambda args: build_pad(args), 225 "LTRIM": lambda args: build_trim(args), 226 "MOD": build_mod, 227 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 228 "RPAD": lambda args: build_pad(args, is_left=False), 229 "RTRIM": lambda args: build_trim(args, is_left=False), 230 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 231 if len(args) != 2 232 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 233 "TIME_TO_TIME_STR": lambda args: exp.Cast( 234 this=seq_get(args, 0), 235 to=exp.DataType(this=exp.DataType.Type.TEXT), 236 ), 237 "TO_HEX": build_hex, 238 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 239 this=exp.Cast( 240 this=seq_get(args, 0), 241 to=exp.DataType(this=exp.DataType.Type.TEXT), 242 ), 243 start=exp.Literal.number(1), 244 length=exp.Literal.number(10), 245 ), 246 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 247 "UPPER": build_upper, 248 "VAR_MAP": build_var_map, 249 } 250 251 NO_PAREN_FUNCTIONS = { 252 TokenType.CURRENT_DATE: exp.CurrentDate, 253 TokenType.CURRENT_DATETIME: exp.CurrentDate, 254 TokenType.CURRENT_TIME: exp.CurrentTime, 255 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 256 TokenType.CURRENT_USER: exp.CurrentUser, 257 } 258 259 STRUCT_TYPE_TOKENS = { 260 TokenType.NESTED, 261 TokenType.OBJECT, 262 TokenType.STRUCT, 263 TokenType.UNION, 264 } 265 266 NESTED_TYPE_TOKENS = { 267 TokenType.ARRAY, 268 TokenType.LIST, 269 TokenType.LOWCARDINALITY, 270 TokenType.MAP, 271 TokenType.NULLABLE, 272 TokenType.RANGE, 273 *STRUCT_TYPE_TOKENS, 274 } 275 276 ENUM_TYPE_TOKENS = { 277 TokenType.ENUM, 278 TokenType.ENUM8, 279 TokenType.ENUM16, 280 } 281 282 AGGREGATE_TYPE_TOKENS = { 283 TokenType.AGGREGATEFUNCTION, 284 TokenType.SIMPLEAGGREGATEFUNCTION, 285 } 286 287 TYPE_TOKENS = { 288 TokenType.BIT, 289 TokenType.BOOLEAN, 290 TokenType.TINYINT, 291 TokenType.UTINYINT, 292 TokenType.SMALLINT, 293 TokenType.USMALLINT, 294 TokenType.INT, 295 TokenType.UINT, 296 TokenType.BIGINT, 297 TokenType.UBIGINT, 298 TokenType.INT128, 299 TokenType.UINT128, 300 TokenType.INT256, 301 TokenType.UINT256, 302 TokenType.MEDIUMINT, 303 TokenType.UMEDIUMINT, 304 TokenType.FIXEDSTRING, 305 TokenType.FLOAT, 306 TokenType.DOUBLE, 307 TokenType.CHAR, 308 TokenType.NCHAR, 309 TokenType.VARCHAR, 310 TokenType.NVARCHAR, 311 TokenType.BPCHAR, 312 TokenType.TEXT, 313 TokenType.MEDIUMTEXT, 314 TokenType.LONGTEXT, 315 TokenType.MEDIUMBLOB, 316 TokenType.LONGBLOB, 317 TokenType.BINARY, 318 TokenType.VARBINARY, 319 TokenType.JSON, 320 TokenType.JSONB, 321 TokenType.INTERVAL, 322 TokenType.TINYBLOB, 323 TokenType.TINYTEXT, 324 TokenType.TIME, 325 TokenType.TIMETZ, 326 TokenType.TIMESTAMP, 327 TokenType.TIMESTAMP_S, 328 TokenType.TIMESTAMP_MS, 329 TokenType.TIMESTAMP_NS, 330 TokenType.TIMESTAMPTZ, 331 TokenType.TIMESTAMPLTZ, 332 TokenType.TIMESTAMPNTZ, 333 TokenType.DATETIME, 334 TokenType.DATETIME64, 335 TokenType.DATE, 336 TokenType.DATE32, 337 TokenType.INT4RANGE, 338 TokenType.INT4MULTIRANGE, 339 TokenType.INT8RANGE, 340 TokenType.INT8MULTIRANGE, 341 TokenType.NUMRANGE, 342 TokenType.NUMMULTIRANGE, 343 TokenType.TSRANGE, 344 TokenType.TSMULTIRANGE, 345 TokenType.TSTZRANGE, 346 TokenType.TSTZMULTIRANGE, 347 TokenType.DATERANGE, 348 TokenType.DATEMULTIRANGE, 349 TokenType.DECIMAL, 350 TokenType.DECIMAL32, 351 TokenType.DECIMAL64, 352 TokenType.DECIMAL128, 353 TokenType.UDECIMAL, 354 TokenType.BIGDECIMAL, 355 TokenType.UUID, 356 TokenType.GEOGRAPHY, 357 TokenType.GEOMETRY, 358 TokenType.HLLSKETCH, 359 TokenType.HSTORE, 360 TokenType.PSEUDO_TYPE, 361 TokenType.SUPER, 362 TokenType.SERIAL, 363 TokenType.SMALLSERIAL, 364 TokenType.BIGSERIAL, 365 TokenType.XML, 366 TokenType.YEAR, 367 TokenType.UNIQUEIDENTIFIER, 368 TokenType.USERDEFINED, 369 TokenType.MONEY, 370 TokenType.SMALLMONEY, 371 TokenType.ROWVERSION, 372 TokenType.IMAGE, 373 TokenType.VARIANT, 374 TokenType.VECTOR, 375 TokenType.OBJECT, 376 TokenType.OBJECT_IDENTIFIER, 377 TokenType.INET, 378 TokenType.IPADDRESS, 379 TokenType.IPPREFIX, 380 TokenType.IPV4, 381 TokenType.IPV6, 382 TokenType.UNKNOWN, 383 TokenType.NULL, 384 TokenType.NAME, 385 TokenType.TDIGEST, 386 *ENUM_TYPE_TOKENS, 387 *NESTED_TYPE_TOKENS, 388 *AGGREGATE_TYPE_TOKENS, 389 } 390 391 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 392 TokenType.BIGINT: TokenType.UBIGINT, 393 TokenType.INT: TokenType.UINT, 394 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 395 TokenType.SMALLINT: TokenType.USMALLINT, 396 TokenType.TINYINT: TokenType.UTINYINT, 397 TokenType.DECIMAL: TokenType.UDECIMAL, 398 } 399 400 SUBQUERY_PREDICATES = { 401 TokenType.ANY: exp.Any, 402 TokenType.ALL: exp.All, 403 TokenType.EXISTS: exp.Exists, 404 TokenType.SOME: exp.Any, 405 } 406 407 RESERVED_TOKENS = { 408 *Tokenizer.SINGLE_TOKENS.values(), 409 TokenType.SELECT, 410 } - {TokenType.IDENTIFIER} 411 412 DB_CREATABLES = { 413 TokenType.DATABASE, 414 TokenType.DICTIONARY, 415 TokenType.MODEL, 416 TokenType.SCHEMA, 417 TokenType.SEQUENCE, 418 TokenType.STORAGE_INTEGRATION, 419 TokenType.TABLE, 420 TokenType.TAG, 421 TokenType.VIEW, 422 TokenType.WAREHOUSE, 423 TokenType.STREAMLIT, 424 } 425 426 CREATABLES = { 427 TokenType.COLUMN, 428 TokenType.CONSTRAINT, 429 TokenType.FOREIGN_KEY, 430 TokenType.FUNCTION, 431 TokenType.INDEX, 432 TokenType.PROCEDURE, 433 *DB_CREATABLES, 434 } 435 436 ALTERABLES = { 437 TokenType.INDEX, 438 TokenType.TABLE, 439 TokenType.VIEW, 440 } 441 442 # Tokens that can represent identifiers 443 ID_VAR_TOKENS = { 444 TokenType.ALL, 445 TokenType.VAR, 446 TokenType.ANTI, 447 TokenType.APPLY, 448 TokenType.ASC, 449 TokenType.ASOF, 450 TokenType.AUTO_INCREMENT, 451 TokenType.BEGIN, 452 TokenType.BPCHAR, 453 TokenType.CACHE, 454 TokenType.CASE, 455 TokenType.COLLATE, 456 TokenType.COMMAND, 457 TokenType.COMMENT, 458 TokenType.COMMIT, 459 TokenType.CONSTRAINT, 460 TokenType.COPY, 461 TokenType.CUBE, 462 TokenType.DEFAULT, 463 TokenType.DELETE, 464 TokenType.DESC, 465 TokenType.DESCRIBE, 466 TokenType.DICTIONARY, 467 TokenType.DIV, 468 TokenType.END, 469 TokenType.EXECUTE, 470 TokenType.ESCAPE, 471 TokenType.FALSE, 472 TokenType.FIRST, 473 TokenType.FILTER, 474 TokenType.FINAL, 475 TokenType.FORMAT, 476 TokenType.FULL, 477 TokenType.IDENTIFIER, 478 TokenType.IS, 479 TokenType.ISNULL, 480 TokenType.INTERVAL, 481 TokenType.KEEP, 482 TokenType.KILL, 483 TokenType.LEFT, 484 TokenType.LOAD, 485 TokenType.MERGE, 486 TokenType.NATURAL, 487 TokenType.NEXT, 488 TokenType.OFFSET, 489 TokenType.OPERATOR, 490 TokenType.ORDINALITY, 491 TokenType.OVERLAPS, 492 TokenType.OVERWRITE, 493 TokenType.PARTITION, 494 TokenType.PERCENT, 495 TokenType.PIVOT, 496 TokenType.PRAGMA, 497 TokenType.RANGE, 498 TokenType.RECURSIVE, 499 TokenType.REFERENCES, 500 TokenType.REFRESH, 501 TokenType.RENAME, 502 TokenType.REPLACE, 503 TokenType.RIGHT, 504 TokenType.ROLLUP, 505 TokenType.ROW, 506 TokenType.ROWS, 507 TokenType.SEMI, 508 TokenType.SET, 509 TokenType.SETTINGS, 510 TokenType.SHOW, 511 TokenType.TEMPORARY, 512 TokenType.TOP, 513 TokenType.TRUE, 514 TokenType.TRUNCATE, 515 TokenType.UNIQUE, 516 TokenType.UNNEST, 517 TokenType.UNPIVOT, 518 TokenType.UPDATE, 519 TokenType.USE, 520 TokenType.VOLATILE, 521 TokenType.WINDOW, 522 *CREATABLES, 523 *SUBQUERY_PREDICATES, 524 *TYPE_TOKENS, 525 *NO_PAREN_FUNCTIONS, 526 } 527 ID_VAR_TOKENS.remove(TokenType.UNION) 528 529 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 530 531 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 532 TokenType.ANTI, 533 TokenType.APPLY, 534 TokenType.ASOF, 535 TokenType.FULL, 536 TokenType.LEFT, 537 TokenType.LOCK, 538 TokenType.NATURAL, 539 TokenType.OFFSET, 540 TokenType.RIGHT, 541 TokenType.SEMI, 542 TokenType.WINDOW, 543 } 544 545 ALIAS_TOKENS = ID_VAR_TOKENS 546 547 ARRAY_CONSTRUCTORS = { 548 "ARRAY": exp.Array, 549 "LIST": exp.List, 550 } 551 552 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 553 554 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 555 556 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 557 558 FUNC_TOKENS = { 559 TokenType.COLLATE, 560 TokenType.COMMAND, 561 TokenType.CURRENT_DATE, 562 TokenType.CURRENT_DATETIME, 563 TokenType.CURRENT_TIMESTAMP, 564 TokenType.CURRENT_TIME, 565 TokenType.CURRENT_USER, 566 TokenType.FILTER, 567 TokenType.FIRST, 568 TokenType.FORMAT, 569 TokenType.GLOB, 570 TokenType.IDENTIFIER, 571 TokenType.INDEX, 572 TokenType.ISNULL, 573 TokenType.ILIKE, 574 TokenType.INSERT, 575 TokenType.LIKE, 576 TokenType.MERGE, 577 TokenType.OFFSET, 578 TokenType.PRIMARY_KEY, 579 TokenType.RANGE, 580 TokenType.REPLACE, 581 TokenType.RLIKE, 582 TokenType.ROW, 583 TokenType.UNNEST, 584 TokenType.VAR, 585 TokenType.LEFT, 586 TokenType.RIGHT, 587 TokenType.SEQUENCE, 588 TokenType.DATE, 589 TokenType.DATETIME, 590 TokenType.TABLE, 591 TokenType.TIMESTAMP, 592 TokenType.TIMESTAMPTZ, 593 TokenType.TRUNCATE, 594 TokenType.WINDOW, 595 TokenType.XOR, 596 *TYPE_TOKENS, 597 *SUBQUERY_PREDICATES, 598 } 599 600 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 601 TokenType.AND: exp.And, 602 } 603 604 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 605 TokenType.COLON_EQ: exp.PropertyEQ, 606 } 607 608 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 609 TokenType.OR: exp.Or, 610 } 611 612 EQUALITY = { 613 TokenType.EQ: exp.EQ, 614 TokenType.NEQ: exp.NEQ, 615 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 616 } 617 618 COMPARISON = { 619 TokenType.GT: exp.GT, 620 TokenType.GTE: exp.GTE, 621 TokenType.LT: exp.LT, 622 TokenType.LTE: exp.LTE, 623 } 624 625 BITWISE = { 626 TokenType.AMP: exp.BitwiseAnd, 627 TokenType.CARET: exp.BitwiseXor, 628 TokenType.PIPE: exp.BitwiseOr, 629 } 630 631 TERM = { 632 TokenType.DASH: exp.Sub, 633 TokenType.PLUS: exp.Add, 634 TokenType.MOD: exp.Mod, 635 TokenType.COLLATE: exp.Collate, 636 } 637 638 FACTOR = { 639 TokenType.DIV: exp.IntDiv, 640 TokenType.LR_ARROW: exp.Distance, 641 TokenType.SLASH: exp.Div, 642 TokenType.STAR: exp.Mul, 643 } 644 645 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 646 647 TIMES = { 648 TokenType.TIME, 649 TokenType.TIMETZ, 650 } 651 652 TIMESTAMPS = { 653 TokenType.TIMESTAMP, 654 TokenType.TIMESTAMPTZ, 655 TokenType.TIMESTAMPLTZ, 656 *TIMES, 657 } 658 659 SET_OPERATIONS = { 660 TokenType.UNION, 661 TokenType.INTERSECT, 662 TokenType.EXCEPT, 663 } 664 665 JOIN_METHODS = { 666 TokenType.ASOF, 667 TokenType.NATURAL, 668 TokenType.POSITIONAL, 669 } 670 671 JOIN_SIDES = { 672 TokenType.LEFT, 673 TokenType.RIGHT, 674 TokenType.FULL, 675 } 676 677 JOIN_KINDS = { 678 TokenType.ANTI, 679 TokenType.CROSS, 680 TokenType.INNER, 681 TokenType.OUTER, 682 TokenType.SEMI, 683 TokenType.STRAIGHT_JOIN, 684 } 685 686 JOIN_HINTS: t.Set[str] = set() 687 688 LAMBDAS = { 689 TokenType.ARROW: lambda self, expressions: self.expression( 690 exp.Lambda, 691 this=self._replace_lambda( 692 self._parse_assignment(), 693 expressions, 694 ), 695 expressions=expressions, 696 ), 697 TokenType.FARROW: lambda self, expressions: self.expression( 698 exp.Kwarg, 699 this=exp.var(expressions[0].name), 700 expression=self._parse_assignment(), 701 ), 702 } 703 704 COLUMN_OPERATORS = { 705 TokenType.DOT: None, 706 TokenType.DCOLON: lambda self, this, to: self.expression( 707 exp.Cast if self.STRICT_CAST else exp.TryCast, 708 this=this, 709 to=to, 710 ), 711 TokenType.ARROW: lambda self, this, path: self.expression( 712 exp.JSONExtract, 713 this=this, 714 expression=self.dialect.to_json_path(path), 715 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 716 ), 717 TokenType.DARROW: lambda self, this, path: self.expression( 718 exp.JSONExtractScalar, 719 this=this, 720 expression=self.dialect.to_json_path(path), 721 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 722 ), 723 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 724 exp.JSONBExtract, 725 this=this, 726 expression=path, 727 ), 728 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 729 exp.JSONBExtractScalar, 730 this=this, 731 expression=path, 732 ), 733 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 734 exp.JSONBContains, 735 this=this, 736 expression=key, 737 ), 738 } 739 740 EXPRESSION_PARSERS = { 741 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 742 exp.Column: lambda self: self._parse_column(), 743 exp.Condition: lambda self: self._parse_assignment(), 744 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 745 exp.Expression: lambda self: self._parse_expression(), 746 exp.From: lambda self: self._parse_from(joins=True), 747 exp.Group: lambda self: self._parse_group(), 748 exp.Having: lambda self: self._parse_having(), 749 exp.Identifier: lambda self: self._parse_id_var(), 750 exp.Join: lambda self: self._parse_join(), 751 exp.Lambda: lambda self: self._parse_lambda(), 752 exp.Lateral: lambda self: self._parse_lateral(), 753 exp.Limit: lambda self: self._parse_limit(), 754 exp.Offset: lambda self: self._parse_offset(), 755 exp.Order: lambda self: self._parse_order(), 756 exp.Ordered: lambda self: self._parse_ordered(), 757 exp.Properties: lambda self: self._parse_properties(), 758 exp.Qualify: lambda self: self._parse_qualify(), 759 exp.Returning: lambda self: self._parse_returning(), 760 exp.Select: lambda self: self._parse_select(), 761 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 762 exp.Table: lambda self: self._parse_table_parts(), 763 exp.TableAlias: lambda self: self._parse_table_alias(), 764 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 765 exp.Where: lambda self: self._parse_where(), 766 exp.Window: lambda self: self._parse_named_window(), 767 exp.With: lambda self: self._parse_with(), 768 "JOIN_TYPE": lambda self: self._parse_join_parts(), 769 } 770 771 STATEMENT_PARSERS = { 772 TokenType.ALTER: lambda self: self._parse_alter(), 773 TokenType.BEGIN: lambda self: self._parse_transaction(), 774 TokenType.CACHE: lambda self: self._parse_cache(), 775 TokenType.COMMENT: lambda self: self._parse_comment(), 776 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 777 TokenType.COPY: lambda self: self._parse_copy(), 778 TokenType.CREATE: lambda self: self._parse_create(), 779 TokenType.DELETE: lambda self: self._parse_delete(), 780 TokenType.DESC: lambda self: self._parse_describe(), 781 TokenType.DESCRIBE: lambda self: self._parse_describe(), 782 TokenType.DROP: lambda self: self._parse_drop(), 783 TokenType.GRANT: lambda self: self._parse_grant(), 784 TokenType.INSERT: lambda self: self._parse_insert(), 785 TokenType.KILL: lambda self: self._parse_kill(), 786 TokenType.LOAD: lambda self: self._parse_load(), 787 TokenType.MERGE: lambda self: self._parse_merge(), 788 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 789 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 790 TokenType.REFRESH: lambda self: self._parse_refresh(), 791 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 792 TokenType.SET: lambda self: self._parse_set(), 793 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 794 TokenType.UNCACHE: lambda self: self._parse_uncache(), 795 TokenType.UPDATE: lambda self: self._parse_update(), 796 TokenType.USE: lambda self: self.expression( 797 exp.Use, 798 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 799 this=self._parse_table(schema=False), 800 ), 801 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 802 } 803 804 UNARY_PARSERS = { 805 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 806 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 807 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 808 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 809 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 810 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 811 } 812 813 STRING_PARSERS = { 814 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 815 exp.RawString, this=token.text 816 ), 817 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 818 exp.National, this=token.text 819 ), 820 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 821 TokenType.STRING: lambda self, token: self.expression( 822 exp.Literal, this=token.text, is_string=True 823 ), 824 TokenType.UNICODE_STRING: lambda self, token: self.expression( 825 exp.UnicodeString, 826 this=token.text, 827 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 828 ), 829 } 830 831 NUMERIC_PARSERS = { 832 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 833 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 834 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 835 TokenType.NUMBER: lambda self, token: self.expression( 836 exp.Literal, this=token.text, is_string=False 837 ), 838 } 839 840 PRIMARY_PARSERS = { 841 **STRING_PARSERS, 842 **NUMERIC_PARSERS, 843 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 844 TokenType.NULL: lambda self, _: self.expression(exp.Null), 845 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 846 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 847 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 848 TokenType.STAR: lambda self, _: self._parse_star_ops(), 849 } 850 851 PLACEHOLDER_PARSERS = { 852 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 853 TokenType.PARAMETER: lambda self: self._parse_parameter(), 854 TokenType.COLON: lambda self: ( 855 self.expression(exp.Placeholder, this=self._prev.text) 856 if self._match_set(self.ID_VAR_TOKENS) 857 else None 858 ), 859 } 860 861 RANGE_PARSERS = { 862 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 863 TokenType.GLOB: binary_range_parser(exp.Glob), 864 TokenType.ILIKE: binary_range_parser(exp.ILike), 865 TokenType.IN: lambda self, this: self._parse_in(this), 866 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 867 TokenType.IS: lambda self, this: self._parse_is(this), 868 TokenType.LIKE: binary_range_parser(exp.Like), 869 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 870 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 871 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 872 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 873 } 874 875 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 876 "ALLOWED_VALUES": lambda self: self.expression( 877 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 878 ), 879 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 880 "AUTO": lambda self: self._parse_auto_property(), 881 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 882 "BACKUP": lambda self: self.expression( 883 exp.BackupProperty, this=self._parse_var(any_token=True) 884 ), 885 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 886 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 887 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 888 "CHECKSUM": lambda self: self._parse_checksum(), 889 "CLUSTER BY": lambda self: self._parse_cluster(), 890 "CLUSTERED": lambda self: self._parse_clustered_by(), 891 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 892 exp.CollateProperty, **kwargs 893 ), 894 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 895 "CONTAINS": lambda self: self._parse_contains_property(), 896 "COPY": lambda self: self._parse_copy_property(), 897 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 898 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 899 "DEFINER": lambda self: self._parse_definer(), 900 "DETERMINISTIC": lambda self: self.expression( 901 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 902 ), 903 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 904 "DUPLICATE": lambda self: self._parse_duplicate(), 905 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 906 "DISTKEY": lambda self: self._parse_distkey(), 907 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 908 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 909 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 910 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 911 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 912 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 913 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 914 "FREESPACE": lambda self: self._parse_freespace(), 915 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 916 "HEAP": lambda self: self.expression(exp.HeapProperty), 917 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 918 "IMMUTABLE": lambda self: self.expression( 919 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 920 ), 921 "INHERITS": lambda self: self.expression( 922 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 923 ), 924 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 925 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 926 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 927 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 928 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 929 "LIKE": lambda self: self._parse_create_like(), 930 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 931 "LOCK": lambda self: self._parse_locking(), 932 "LOCKING": lambda self: self._parse_locking(), 933 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 934 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 935 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 936 "MODIFIES": lambda self: self._parse_modifies_property(), 937 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 938 "NO": lambda self: self._parse_no_property(), 939 "ON": lambda self: self._parse_on_property(), 940 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 941 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 942 "PARTITION": lambda self: self._parse_partitioned_of(), 943 "PARTITION BY": lambda self: self._parse_partitioned_by(), 944 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 945 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 946 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 947 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 948 "READS": lambda self: self._parse_reads_property(), 949 "REMOTE": lambda self: self._parse_remote_with_connection(), 950 "RETURNS": lambda self: self._parse_returns(), 951 "STRICT": lambda self: self.expression(exp.StrictProperty), 952 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 953 "ROW": lambda self: self._parse_row(), 954 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 955 "SAMPLE": lambda self: self.expression( 956 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 957 ), 958 "SECURE": lambda self: self.expression(exp.SecureProperty), 959 "SECURITY": lambda self: self._parse_security(), 960 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 961 "SETTINGS": lambda self: self._parse_settings_property(), 962 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 963 "SORTKEY": lambda self: self._parse_sortkey(), 964 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 965 "STABLE": lambda self: self.expression( 966 exp.StabilityProperty, this=exp.Literal.string("STABLE") 967 ), 968 "STORED": lambda self: self._parse_stored(), 969 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 970 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 971 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 972 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 973 "TO": lambda self: self._parse_to_table(), 974 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 975 "TRANSFORM": lambda self: self.expression( 976 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 977 ), 978 "TTL": lambda self: self._parse_ttl(), 979 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 980 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 981 "VOLATILE": lambda self: self._parse_volatile_property(), 982 "WITH": lambda self: self._parse_with_property(), 983 } 984 985 CONSTRAINT_PARSERS = { 986 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 987 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 988 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 989 "CHARACTER SET": lambda self: self.expression( 990 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 991 ), 992 "CHECK": lambda self: self.expression( 993 exp.CheckColumnConstraint, 994 this=self._parse_wrapped(self._parse_assignment), 995 enforced=self._match_text_seq("ENFORCED"), 996 ), 997 "COLLATE": lambda self: self.expression( 998 exp.CollateColumnConstraint, 999 this=self._parse_identifier() or self._parse_column(), 1000 ), 1001 "COMMENT": lambda self: self.expression( 1002 exp.CommentColumnConstraint, this=self._parse_string() 1003 ), 1004 "COMPRESS": lambda self: self._parse_compress(), 1005 "CLUSTERED": lambda self: self.expression( 1006 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1007 ), 1008 "NONCLUSTERED": lambda self: self.expression( 1009 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1010 ), 1011 "DEFAULT": lambda self: self.expression( 1012 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1013 ), 1014 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1015 "EPHEMERAL": lambda self: self.expression( 1016 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1017 ), 1018 "EXCLUDE": lambda self: self.expression( 1019 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1020 ), 1021 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1022 "FORMAT": lambda self: self.expression( 1023 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1024 ), 1025 "GENERATED": lambda self: self._parse_generated_as_identity(), 1026 "IDENTITY": lambda self: self._parse_auto_increment(), 1027 "INLINE": lambda self: self._parse_inline(), 1028 "LIKE": lambda self: self._parse_create_like(), 1029 "NOT": lambda self: self._parse_not_constraint(), 1030 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1031 "ON": lambda self: ( 1032 self._match(TokenType.UPDATE) 1033 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1034 ) 1035 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1036 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1037 "PERIOD": lambda self: self._parse_period_for_system_time(), 1038 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1039 "REFERENCES": lambda self: self._parse_references(match=False), 1040 "TITLE": lambda self: self.expression( 1041 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1042 ), 1043 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1044 "UNIQUE": lambda self: self._parse_unique(), 1045 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1046 "WITH": lambda self: self.expression( 1047 exp.Properties, expressions=self._parse_wrapped_properties() 1048 ), 1049 } 1050 1051 ALTER_PARSERS = { 1052 "ADD": lambda self: self._parse_alter_table_add(), 1053 "ALTER": lambda self: self._parse_alter_table_alter(), 1054 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1055 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1056 "DROP": lambda self: self._parse_alter_table_drop(), 1057 "RENAME": lambda self: self._parse_alter_table_rename(), 1058 "SET": lambda self: self._parse_alter_table_set(), 1059 "AS": lambda self: self._parse_select(), 1060 } 1061 1062 ALTER_ALTER_PARSERS = { 1063 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1064 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1065 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1066 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1067 } 1068 1069 SCHEMA_UNNAMED_CONSTRAINTS = { 1070 "CHECK", 1071 "EXCLUDE", 1072 "FOREIGN KEY", 1073 "LIKE", 1074 "PERIOD", 1075 "PRIMARY KEY", 1076 "UNIQUE", 1077 } 1078 1079 NO_PAREN_FUNCTION_PARSERS = { 1080 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1081 "CASE": lambda self: self._parse_case(), 1082 "CONNECT_BY_ROOT": lambda self: self.expression( 1083 exp.ConnectByRoot, this=self._parse_column() 1084 ), 1085 "IF": lambda self: self._parse_if(), 1086 "NEXT": lambda self: self._parse_next_value_for(), 1087 } 1088 1089 INVALID_FUNC_NAME_TOKENS = { 1090 TokenType.IDENTIFIER, 1091 TokenType.STRING, 1092 } 1093 1094 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1095 1096 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1097 1098 FUNCTION_PARSERS = { 1099 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1100 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1101 "DECODE": lambda self: self._parse_decode(), 1102 "EXTRACT": lambda self: self._parse_extract(), 1103 "GAP_FILL": lambda self: self._parse_gap_fill(), 1104 "JSON_OBJECT": lambda self: self._parse_json_object(), 1105 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1106 "JSON_TABLE": lambda self: self._parse_json_table(), 1107 "MATCH": lambda self: self._parse_match_against(), 1108 "NORMALIZE": lambda self: self._parse_normalize(), 1109 "OPENJSON": lambda self: self._parse_open_json(), 1110 "POSITION": lambda self: self._parse_position(), 1111 "PREDICT": lambda self: self._parse_predict(), 1112 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1113 "STRING_AGG": lambda self: self._parse_string_agg(), 1114 "SUBSTRING": lambda self: self._parse_substring(), 1115 "TRIM": lambda self: self._parse_trim(), 1116 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1117 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1118 } 1119 1120 QUERY_MODIFIER_PARSERS = { 1121 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1122 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1123 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1124 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1125 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1126 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1127 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1128 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1129 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1130 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1131 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1132 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1133 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1134 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1135 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1136 TokenType.CLUSTER_BY: lambda self: ( 1137 "cluster", 1138 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1139 ), 1140 TokenType.DISTRIBUTE_BY: lambda self: ( 1141 "distribute", 1142 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1143 ), 1144 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1145 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1146 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1147 } 1148 1149 SET_PARSERS = { 1150 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1151 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1152 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1153 "TRANSACTION": lambda self: self._parse_set_transaction(), 1154 } 1155 1156 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1157 1158 TYPE_LITERAL_PARSERS = { 1159 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1160 } 1161 1162 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1163 1164 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1165 1166 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1167 1168 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1169 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1170 "ISOLATION": ( 1171 ("LEVEL", "REPEATABLE", "READ"), 1172 ("LEVEL", "READ", "COMMITTED"), 1173 ("LEVEL", "READ", "UNCOMITTED"), 1174 ("LEVEL", "SERIALIZABLE"), 1175 ), 1176 "READ": ("WRITE", "ONLY"), 1177 } 1178 1179 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1180 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1181 ) 1182 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1183 1184 CREATE_SEQUENCE: OPTIONS_TYPE = { 1185 "SCALE": ("EXTEND", "NOEXTEND"), 1186 "SHARD": ("EXTEND", "NOEXTEND"), 1187 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1188 **dict.fromkeys( 1189 ( 1190 "SESSION", 1191 "GLOBAL", 1192 "KEEP", 1193 "NOKEEP", 1194 "ORDER", 1195 "NOORDER", 1196 "NOCACHE", 1197 "CYCLE", 1198 "NOCYCLE", 1199 "NOMINVALUE", 1200 "NOMAXVALUE", 1201 "NOSCALE", 1202 "NOSHARD", 1203 ), 1204 tuple(), 1205 ), 1206 } 1207 1208 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1209 1210 USABLES: OPTIONS_TYPE = dict.fromkeys( 1211 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1212 ) 1213 1214 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1215 1216 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1217 "TYPE": ("EVOLUTION",), 1218 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1219 } 1220 1221 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1222 "NOT": ("ENFORCED",), 1223 "MATCH": ( 1224 "FULL", 1225 "PARTIAL", 1226 "SIMPLE", 1227 ), 1228 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1229 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1230 } 1231 1232 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1233 1234 CLONE_KEYWORDS = {"CLONE", "COPY"} 1235 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1236 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1237 1238 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1239 1240 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1241 1242 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1243 1244 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1245 1246 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1247 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1248 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1249 1250 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1251 1252 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1253 1254 ADD_CONSTRAINT_TOKENS = { 1255 TokenType.CONSTRAINT, 1256 TokenType.FOREIGN_KEY, 1257 TokenType.INDEX, 1258 TokenType.KEY, 1259 TokenType.PRIMARY_KEY, 1260 TokenType.UNIQUE, 1261 } 1262 1263 DISTINCT_TOKENS = {TokenType.DISTINCT} 1264 1265 NULL_TOKENS = {TokenType.NULL} 1266 1267 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1268 1269 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1270 1271 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1272 1273 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1274 1275 ODBC_DATETIME_LITERALS = { 1276 "d": exp.Date, 1277 "t": exp.Time, 1278 "ts": exp.Timestamp, 1279 } 1280 1281 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1282 1283 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1284 1285 STRICT_CAST = True 1286 1287 PREFIXED_PIVOT_COLUMNS = False 1288 IDENTIFY_PIVOT_STRINGS = False 1289 1290 LOG_DEFAULTS_TO_LN = False 1291 1292 # Whether ADD is present for each column added by ALTER TABLE 1293 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1294 1295 # Whether the table sample clause expects CSV syntax 1296 TABLESAMPLE_CSV = False 1297 1298 # The default method used for table sampling 1299 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1300 1301 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1302 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1303 1304 # Whether the TRIM function expects the characters to trim as its first argument 1305 TRIM_PATTERN_FIRST = False 1306 1307 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1308 STRING_ALIASES = False 1309 1310 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1311 MODIFIERS_ATTACHED_TO_SET_OP = True 1312 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1313 1314 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1315 NO_PAREN_IF_COMMANDS = True 1316 1317 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1318 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1319 1320 # Whether the `:` operator is used to extract a value from a VARIANT column 1321 COLON_IS_VARIANT_EXTRACT = False 1322 1323 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1324 # If this is True and '(' is not found, the keyword will be treated as an identifier 1325 VALUES_FOLLOWED_BY_PAREN = True 1326 1327 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1328 SUPPORTS_IMPLICIT_UNNEST = False 1329 1330 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1331 INTERVAL_SPANS = True 1332 1333 # Whether a PARTITION clause can follow a table reference 1334 SUPPORTS_PARTITION_SELECTION = False 1335 1336 __slots__ = ( 1337 "error_level", 1338 "error_message_context", 1339 "max_errors", 1340 "dialect", 1341 "sql", 1342 "errors", 1343 "_tokens", 1344 "_index", 1345 "_curr", 1346 "_next", 1347 "_prev", 1348 "_prev_comments", 1349 ) 1350 1351 # Autofilled 1352 SHOW_TRIE: t.Dict = {} 1353 SET_TRIE: t.Dict = {} 1354 1355 def __init__( 1356 self, 1357 error_level: t.Optional[ErrorLevel] = None, 1358 error_message_context: int = 100, 1359 max_errors: int = 3, 1360 dialect: DialectType = None, 1361 ): 1362 from sqlglot.dialects import Dialect 1363 1364 self.error_level = error_level or ErrorLevel.IMMEDIATE 1365 self.error_message_context = error_message_context 1366 self.max_errors = max_errors 1367 self.dialect = Dialect.get_or_raise(dialect) 1368 self.reset() 1369 1370 def reset(self): 1371 self.sql = "" 1372 self.errors = [] 1373 self._tokens = [] 1374 self._index = 0 1375 self._curr = None 1376 self._next = None 1377 self._prev = None 1378 self._prev_comments = None 1379 1380 def parse( 1381 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1382 ) -> t.List[t.Optional[exp.Expression]]: 1383 """ 1384 Parses a list of tokens and returns a list of syntax trees, one tree 1385 per parsed SQL statement. 1386 1387 Args: 1388 raw_tokens: The list of tokens. 1389 sql: The original SQL string, used to produce helpful debug messages. 1390 1391 Returns: 1392 The list of the produced syntax trees. 1393 """ 1394 return self._parse( 1395 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1396 ) 1397 1398 def parse_into( 1399 self, 1400 expression_types: exp.IntoType, 1401 raw_tokens: t.List[Token], 1402 sql: t.Optional[str] = None, 1403 ) -> t.List[t.Optional[exp.Expression]]: 1404 """ 1405 Parses a list of tokens into a given Expression type. If a collection of Expression 1406 types is given instead, this method will try to parse the token list into each one 1407 of them, stopping at the first for which the parsing succeeds. 1408 1409 Args: 1410 expression_types: The expression type(s) to try and parse the token list into. 1411 raw_tokens: The list of tokens. 1412 sql: The original SQL string, used to produce helpful debug messages. 1413 1414 Returns: 1415 The target Expression. 1416 """ 1417 errors = [] 1418 for expression_type in ensure_list(expression_types): 1419 parser = self.EXPRESSION_PARSERS.get(expression_type) 1420 if not parser: 1421 raise TypeError(f"No parser registered for {expression_type}") 1422 1423 try: 1424 return self._parse(parser, raw_tokens, sql) 1425 except ParseError as e: 1426 e.errors[0]["into_expression"] = expression_type 1427 errors.append(e) 1428 1429 raise ParseError( 1430 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1431 errors=merge_errors(errors), 1432 ) from errors[-1] 1433 1434 def _parse( 1435 self, 1436 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1437 raw_tokens: t.List[Token], 1438 sql: t.Optional[str] = None, 1439 ) -> t.List[t.Optional[exp.Expression]]: 1440 self.reset() 1441 self.sql = sql or "" 1442 1443 total = len(raw_tokens) 1444 chunks: t.List[t.List[Token]] = [[]] 1445 1446 for i, token in enumerate(raw_tokens): 1447 if token.token_type == TokenType.SEMICOLON: 1448 if token.comments: 1449 chunks.append([token]) 1450 1451 if i < total - 1: 1452 chunks.append([]) 1453 else: 1454 chunks[-1].append(token) 1455 1456 expressions = [] 1457 1458 for tokens in chunks: 1459 self._index = -1 1460 self._tokens = tokens 1461 self._advance() 1462 1463 expressions.append(parse_method(self)) 1464 1465 if self._index < len(self._tokens): 1466 self.raise_error("Invalid expression / Unexpected token") 1467 1468 self.check_errors() 1469 1470 return expressions 1471 1472 def check_errors(self) -> None: 1473 """Logs or raises any found errors, depending on the chosen error level setting.""" 1474 if self.error_level == ErrorLevel.WARN: 1475 for error in self.errors: 1476 logger.error(str(error)) 1477 elif self.error_level == ErrorLevel.RAISE and self.errors: 1478 raise ParseError( 1479 concat_messages(self.errors, self.max_errors), 1480 errors=merge_errors(self.errors), 1481 ) 1482 1483 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1484 """ 1485 Appends an error in the list of recorded errors or raises it, depending on the chosen 1486 error level setting. 1487 """ 1488 token = token or self._curr or self._prev or Token.string("") 1489 start = token.start 1490 end = token.end + 1 1491 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1492 highlight = self.sql[start:end] 1493 end_context = self.sql[end : end + self.error_message_context] 1494 1495 error = ParseError.new( 1496 f"{message}. Line {token.line}, Col: {token.col}.\n" 1497 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1498 description=message, 1499 line=token.line, 1500 col=token.col, 1501 start_context=start_context, 1502 highlight=highlight, 1503 end_context=end_context, 1504 ) 1505 1506 if self.error_level == ErrorLevel.IMMEDIATE: 1507 raise error 1508 1509 self.errors.append(error) 1510 1511 def expression( 1512 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1513 ) -> E: 1514 """ 1515 Creates a new, validated Expression. 1516 1517 Args: 1518 exp_class: The expression class to instantiate. 1519 comments: An optional list of comments to attach to the expression. 1520 kwargs: The arguments to set for the expression along with their respective values. 1521 1522 Returns: 1523 The target expression. 1524 """ 1525 instance = exp_class(**kwargs) 1526 instance.add_comments(comments) if comments else self._add_comments(instance) 1527 return self.validate_expression(instance) 1528 1529 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1530 if expression and self._prev_comments: 1531 expression.add_comments(self._prev_comments) 1532 self._prev_comments = None 1533 1534 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1535 """ 1536 Validates an Expression, making sure that all its mandatory arguments are set. 1537 1538 Args: 1539 expression: The expression to validate. 1540 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1541 1542 Returns: 1543 The validated expression. 1544 """ 1545 if self.error_level != ErrorLevel.IGNORE: 1546 for error_message in expression.error_messages(args): 1547 self.raise_error(error_message) 1548 1549 return expression 1550 1551 def _find_sql(self, start: Token, end: Token) -> str: 1552 return self.sql[start.start : end.end + 1] 1553 1554 def _is_connected(self) -> bool: 1555 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1556 1557 def _advance(self, times: int = 1) -> None: 1558 self._index += times 1559 self._curr = seq_get(self._tokens, self._index) 1560 self._next = seq_get(self._tokens, self._index + 1) 1561 1562 if self._index > 0: 1563 self._prev = self._tokens[self._index - 1] 1564 self._prev_comments = self._prev.comments 1565 else: 1566 self._prev = None 1567 self._prev_comments = None 1568 1569 def _retreat(self, index: int) -> None: 1570 if index != self._index: 1571 self._advance(index - self._index) 1572 1573 def _warn_unsupported(self) -> None: 1574 if len(self._tokens) <= 1: 1575 return 1576 1577 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1578 # interested in emitting a warning for the one being currently processed. 1579 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1580 1581 logger.warning( 1582 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1583 ) 1584 1585 def _parse_command(self) -> exp.Command: 1586 self._warn_unsupported() 1587 return self.expression( 1588 exp.Command, 1589 comments=self._prev_comments, 1590 this=self._prev.text.upper(), 1591 expression=self._parse_string(), 1592 ) 1593 1594 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1595 """ 1596 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1597 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1598 solve this by setting & resetting the parser state accordingly 1599 """ 1600 index = self._index 1601 error_level = self.error_level 1602 1603 self.error_level = ErrorLevel.IMMEDIATE 1604 try: 1605 this = parse_method() 1606 except ParseError: 1607 this = None 1608 finally: 1609 if not this or retreat: 1610 self._retreat(index) 1611 self.error_level = error_level 1612 1613 return this 1614 1615 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1616 start = self._prev 1617 exists = self._parse_exists() if allow_exists else None 1618 1619 self._match(TokenType.ON) 1620 1621 materialized = self._match_text_seq("MATERIALIZED") 1622 kind = self._match_set(self.CREATABLES) and self._prev 1623 if not kind: 1624 return self._parse_as_command(start) 1625 1626 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1627 this = self._parse_user_defined_function(kind=kind.token_type) 1628 elif kind.token_type == TokenType.TABLE: 1629 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1630 elif kind.token_type == TokenType.COLUMN: 1631 this = self._parse_column() 1632 else: 1633 this = self._parse_id_var() 1634 1635 self._match(TokenType.IS) 1636 1637 return self.expression( 1638 exp.Comment, 1639 this=this, 1640 kind=kind.text, 1641 expression=self._parse_string(), 1642 exists=exists, 1643 materialized=materialized, 1644 ) 1645 1646 def _parse_to_table( 1647 self, 1648 ) -> exp.ToTableProperty: 1649 table = self._parse_table_parts(schema=True) 1650 return self.expression(exp.ToTableProperty, this=table) 1651 1652 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1653 def _parse_ttl(self) -> exp.Expression: 1654 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1655 this = self._parse_bitwise() 1656 1657 if self._match_text_seq("DELETE"): 1658 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1659 if self._match_text_seq("RECOMPRESS"): 1660 return self.expression( 1661 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1662 ) 1663 if self._match_text_seq("TO", "DISK"): 1664 return self.expression( 1665 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1666 ) 1667 if self._match_text_seq("TO", "VOLUME"): 1668 return self.expression( 1669 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1670 ) 1671 1672 return this 1673 1674 expressions = self._parse_csv(_parse_ttl_action) 1675 where = self._parse_where() 1676 group = self._parse_group() 1677 1678 aggregates = None 1679 if group and self._match(TokenType.SET): 1680 aggregates = self._parse_csv(self._parse_set_item) 1681 1682 return self.expression( 1683 exp.MergeTreeTTL, 1684 expressions=expressions, 1685 where=where, 1686 group=group, 1687 aggregates=aggregates, 1688 ) 1689 1690 def _parse_statement(self) -> t.Optional[exp.Expression]: 1691 if self._curr is None: 1692 return None 1693 1694 if self._match_set(self.STATEMENT_PARSERS): 1695 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1696 1697 if self._match_set(self.dialect.tokenizer.COMMANDS): 1698 return self._parse_command() 1699 1700 expression = self._parse_expression() 1701 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1702 return self._parse_query_modifiers(expression) 1703 1704 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1705 start = self._prev 1706 temporary = self._match(TokenType.TEMPORARY) 1707 materialized = self._match_text_seq("MATERIALIZED") 1708 1709 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1710 if not kind: 1711 return self._parse_as_command(start) 1712 1713 concurrently = self._match_text_seq("CONCURRENTLY") 1714 if_exists = exists or self._parse_exists() 1715 table = self._parse_table_parts( 1716 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1717 ) 1718 1719 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1720 1721 if self._match(TokenType.L_PAREN, advance=False): 1722 expressions = self._parse_wrapped_csv(self._parse_types) 1723 else: 1724 expressions = None 1725 1726 return self.expression( 1727 exp.Drop, 1728 comments=start.comments, 1729 exists=if_exists, 1730 this=table, 1731 expressions=expressions, 1732 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1733 temporary=temporary, 1734 materialized=materialized, 1735 cascade=self._match_text_seq("CASCADE"), 1736 constraints=self._match_text_seq("CONSTRAINTS"), 1737 purge=self._match_text_seq("PURGE"), 1738 cluster=cluster, 1739 concurrently=concurrently, 1740 ) 1741 1742 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1743 return ( 1744 self._match_text_seq("IF") 1745 and (not not_ or self._match(TokenType.NOT)) 1746 and self._match(TokenType.EXISTS) 1747 ) 1748 1749 def _parse_create(self) -> exp.Create | exp.Command: 1750 # Note: this can't be None because we've matched a statement parser 1751 start = self._prev 1752 comments = self._prev_comments 1753 1754 replace = ( 1755 start.token_type == TokenType.REPLACE 1756 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1757 or self._match_pair(TokenType.OR, TokenType.ALTER) 1758 ) 1759 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1760 1761 unique = self._match(TokenType.UNIQUE) 1762 1763 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1764 clustered = True 1765 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1766 "COLUMNSTORE" 1767 ): 1768 clustered = False 1769 else: 1770 clustered = None 1771 1772 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1773 self._advance() 1774 1775 properties = None 1776 create_token = self._match_set(self.CREATABLES) and self._prev 1777 1778 if not create_token: 1779 # exp.Properties.Location.POST_CREATE 1780 properties = self._parse_properties() 1781 create_token = self._match_set(self.CREATABLES) and self._prev 1782 1783 if not properties or not create_token: 1784 return self._parse_as_command(start) 1785 1786 concurrently = self._match_text_seq("CONCURRENTLY") 1787 exists = self._parse_exists(not_=True) 1788 this = None 1789 expression: t.Optional[exp.Expression] = None 1790 indexes = None 1791 no_schema_binding = None 1792 begin = None 1793 end = None 1794 clone = None 1795 1796 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1797 nonlocal properties 1798 if properties and temp_props: 1799 properties.expressions.extend(temp_props.expressions) 1800 elif temp_props: 1801 properties = temp_props 1802 1803 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1804 this = self._parse_user_defined_function(kind=create_token.token_type) 1805 1806 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1807 extend_props(self._parse_properties()) 1808 1809 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1810 extend_props(self._parse_properties()) 1811 1812 if not expression: 1813 if self._match(TokenType.COMMAND): 1814 expression = self._parse_as_command(self._prev) 1815 else: 1816 begin = self._match(TokenType.BEGIN) 1817 return_ = self._match_text_seq("RETURN") 1818 1819 if self._match(TokenType.STRING, advance=False): 1820 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1821 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1822 expression = self._parse_string() 1823 extend_props(self._parse_properties()) 1824 else: 1825 expression = self._parse_statement() 1826 1827 end = self._match_text_seq("END") 1828 1829 if return_: 1830 expression = self.expression(exp.Return, this=expression) 1831 elif create_token.token_type == TokenType.INDEX: 1832 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1833 if not self._match(TokenType.ON): 1834 index = self._parse_id_var() 1835 anonymous = False 1836 else: 1837 index = None 1838 anonymous = True 1839 1840 this = self._parse_index(index=index, anonymous=anonymous) 1841 elif create_token.token_type in self.DB_CREATABLES: 1842 table_parts = self._parse_table_parts( 1843 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1844 ) 1845 1846 # exp.Properties.Location.POST_NAME 1847 self._match(TokenType.COMMA) 1848 extend_props(self._parse_properties(before=True)) 1849 1850 this = self._parse_schema(this=table_parts) 1851 1852 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1853 extend_props(self._parse_properties()) 1854 1855 self._match(TokenType.ALIAS) 1856 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1857 # exp.Properties.Location.POST_ALIAS 1858 extend_props(self._parse_properties()) 1859 1860 if create_token.token_type == TokenType.SEQUENCE: 1861 expression = self._parse_types() 1862 extend_props(self._parse_properties()) 1863 else: 1864 expression = self._parse_ddl_select() 1865 1866 if create_token.token_type == TokenType.TABLE: 1867 # exp.Properties.Location.POST_EXPRESSION 1868 extend_props(self._parse_properties()) 1869 1870 indexes = [] 1871 while True: 1872 index = self._parse_index() 1873 1874 # exp.Properties.Location.POST_INDEX 1875 extend_props(self._parse_properties()) 1876 if not index: 1877 break 1878 else: 1879 self._match(TokenType.COMMA) 1880 indexes.append(index) 1881 elif create_token.token_type == TokenType.VIEW: 1882 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1883 no_schema_binding = True 1884 1885 shallow = self._match_text_seq("SHALLOW") 1886 1887 if self._match_texts(self.CLONE_KEYWORDS): 1888 copy = self._prev.text.lower() == "copy" 1889 clone = self.expression( 1890 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1891 ) 1892 1893 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1894 return self._parse_as_command(start) 1895 1896 create_kind_text = create_token.text.upper() 1897 return self.expression( 1898 exp.Create, 1899 comments=comments, 1900 this=this, 1901 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1902 replace=replace, 1903 refresh=refresh, 1904 unique=unique, 1905 expression=expression, 1906 exists=exists, 1907 properties=properties, 1908 indexes=indexes, 1909 no_schema_binding=no_schema_binding, 1910 begin=begin, 1911 end=end, 1912 clone=clone, 1913 concurrently=concurrently, 1914 clustered=clustered, 1915 ) 1916 1917 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1918 seq = exp.SequenceProperties() 1919 1920 options = [] 1921 index = self._index 1922 1923 while self._curr: 1924 self._match(TokenType.COMMA) 1925 if self._match_text_seq("INCREMENT"): 1926 self._match_text_seq("BY") 1927 self._match_text_seq("=") 1928 seq.set("increment", self._parse_term()) 1929 elif self._match_text_seq("MINVALUE"): 1930 seq.set("minvalue", self._parse_term()) 1931 elif self._match_text_seq("MAXVALUE"): 1932 seq.set("maxvalue", self._parse_term()) 1933 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1934 self._match_text_seq("=") 1935 seq.set("start", self._parse_term()) 1936 elif self._match_text_seq("CACHE"): 1937 # T-SQL allows empty CACHE which is initialized dynamically 1938 seq.set("cache", self._parse_number() or True) 1939 elif self._match_text_seq("OWNED", "BY"): 1940 # "OWNED BY NONE" is the default 1941 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1942 else: 1943 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1944 if opt: 1945 options.append(opt) 1946 else: 1947 break 1948 1949 seq.set("options", options if options else None) 1950 return None if self._index == index else seq 1951 1952 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1953 # only used for teradata currently 1954 self._match(TokenType.COMMA) 1955 1956 kwargs = { 1957 "no": self._match_text_seq("NO"), 1958 "dual": self._match_text_seq("DUAL"), 1959 "before": self._match_text_seq("BEFORE"), 1960 "default": self._match_text_seq("DEFAULT"), 1961 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1962 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1963 "after": self._match_text_seq("AFTER"), 1964 "minimum": self._match_texts(("MIN", "MINIMUM")), 1965 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1966 } 1967 1968 if self._match_texts(self.PROPERTY_PARSERS): 1969 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1970 try: 1971 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1972 except TypeError: 1973 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1974 1975 return None 1976 1977 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1978 return self._parse_wrapped_csv(self._parse_property) 1979 1980 def _parse_property(self) -> t.Optional[exp.Expression]: 1981 if self._match_texts(self.PROPERTY_PARSERS): 1982 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1983 1984 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1985 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1986 1987 if self._match_text_seq("COMPOUND", "SORTKEY"): 1988 return self._parse_sortkey(compound=True) 1989 1990 if self._match_text_seq("SQL", "SECURITY"): 1991 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1992 1993 index = self._index 1994 key = self._parse_column() 1995 1996 if not self._match(TokenType.EQ): 1997 self._retreat(index) 1998 return self._parse_sequence_properties() 1999 2000 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2001 if isinstance(key, exp.Column): 2002 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2003 2004 value = self._parse_bitwise() or self._parse_var(any_token=True) 2005 2006 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2007 if isinstance(value, exp.Column): 2008 value = exp.var(value.name) 2009 2010 return self.expression(exp.Property, this=key, value=value) 2011 2012 def _parse_stored(self) -> exp.FileFormatProperty: 2013 self._match(TokenType.ALIAS) 2014 2015 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2016 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2017 2018 return self.expression( 2019 exp.FileFormatProperty, 2020 this=( 2021 self.expression( 2022 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2023 ) 2024 if input_format or output_format 2025 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2026 ), 2027 ) 2028 2029 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2030 field = self._parse_field() 2031 if isinstance(field, exp.Identifier) and not field.quoted: 2032 field = exp.var(field) 2033 2034 return field 2035 2036 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2037 self._match(TokenType.EQ) 2038 self._match(TokenType.ALIAS) 2039 2040 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2041 2042 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2043 properties = [] 2044 while True: 2045 if before: 2046 prop = self._parse_property_before() 2047 else: 2048 prop = self._parse_property() 2049 if not prop: 2050 break 2051 for p in ensure_list(prop): 2052 properties.append(p) 2053 2054 if properties: 2055 return self.expression(exp.Properties, expressions=properties) 2056 2057 return None 2058 2059 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2060 return self.expression( 2061 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2062 ) 2063 2064 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2065 if self._match_texts(("DEFINER", "INVOKER")): 2066 security_specifier = self._prev.text.upper() 2067 return self.expression(exp.SecurityProperty, this=security_specifier) 2068 return None 2069 2070 def _parse_settings_property(self) -> exp.SettingsProperty: 2071 return self.expression( 2072 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2073 ) 2074 2075 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2076 if self._index >= 2: 2077 pre_volatile_token = self._tokens[self._index - 2] 2078 else: 2079 pre_volatile_token = None 2080 2081 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2082 return exp.VolatileProperty() 2083 2084 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2085 2086 def _parse_retention_period(self) -> exp.Var: 2087 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2088 number = self._parse_number() 2089 number_str = f"{number} " if number else "" 2090 unit = self._parse_var(any_token=True) 2091 return exp.var(f"{number_str}{unit}") 2092 2093 def _parse_system_versioning_property( 2094 self, with_: bool = False 2095 ) -> exp.WithSystemVersioningProperty: 2096 self._match(TokenType.EQ) 2097 prop = self.expression( 2098 exp.WithSystemVersioningProperty, 2099 **{ # type: ignore 2100 "on": True, 2101 "with": with_, 2102 }, 2103 ) 2104 2105 if self._match_text_seq("OFF"): 2106 prop.set("on", False) 2107 return prop 2108 2109 self._match(TokenType.ON) 2110 if self._match(TokenType.L_PAREN): 2111 while self._curr and not self._match(TokenType.R_PAREN): 2112 if self._match_text_seq("HISTORY_TABLE", "="): 2113 prop.set("this", self._parse_table_parts()) 2114 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2115 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2116 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2117 prop.set("retention_period", self._parse_retention_period()) 2118 2119 self._match(TokenType.COMMA) 2120 2121 return prop 2122 2123 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2124 self._match(TokenType.EQ) 2125 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2126 prop = self.expression(exp.DataDeletionProperty, on=on) 2127 2128 if self._match(TokenType.L_PAREN): 2129 while self._curr and not self._match(TokenType.R_PAREN): 2130 if self._match_text_seq("FILTER_COLUMN", "="): 2131 prop.set("filter_column", self._parse_column()) 2132 elif self._match_text_seq("RETENTION_PERIOD", "="): 2133 prop.set("retention_period", self._parse_retention_period()) 2134 2135 self._match(TokenType.COMMA) 2136 2137 return prop 2138 2139 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2140 kind = "HASH" 2141 expressions: t.Optional[t.List[exp.Expression]] = None 2142 if self._match_text_seq("BY", "HASH"): 2143 expressions = self._parse_wrapped_csv(self._parse_id_var) 2144 elif self._match_text_seq("BY", "RANDOM"): 2145 kind = "RANDOM" 2146 2147 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2148 buckets: t.Optional[exp.Expression] = None 2149 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2150 buckets = self._parse_number() 2151 2152 return self.expression( 2153 exp.DistributedByProperty, 2154 expressions=expressions, 2155 kind=kind, 2156 buckets=buckets, 2157 order=self._parse_order(), 2158 ) 2159 2160 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2161 self._match_text_seq("KEY") 2162 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2163 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2164 2165 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2166 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2167 prop = self._parse_system_versioning_property(with_=True) 2168 self._match_r_paren() 2169 return prop 2170 2171 if self._match(TokenType.L_PAREN, advance=False): 2172 return self._parse_wrapped_properties() 2173 2174 if self._match_text_seq("JOURNAL"): 2175 return self._parse_withjournaltable() 2176 2177 if self._match_texts(self.VIEW_ATTRIBUTES): 2178 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2179 2180 if self._match_text_seq("DATA"): 2181 return self._parse_withdata(no=False) 2182 elif self._match_text_seq("NO", "DATA"): 2183 return self._parse_withdata(no=True) 2184 2185 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2186 return self._parse_serde_properties(with_=True) 2187 2188 if self._match(TokenType.SCHEMA): 2189 return self.expression( 2190 exp.WithSchemaBindingProperty, 2191 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2192 ) 2193 2194 if not self._next: 2195 return None 2196 2197 return self._parse_withisolatedloading() 2198 2199 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2200 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2201 self._match(TokenType.EQ) 2202 2203 user = self._parse_id_var() 2204 self._match(TokenType.PARAMETER) 2205 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2206 2207 if not user or not host: 2208 return None 2209 2210 return exp.DefinerProperty(this=f"{user}@{host}") 2211 2212 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2213 self._match(TokenType.TABLE) 2214 self._match(TokenType.EQ) 2215 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2216 2217 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2218 return self.expression(exp.LogProperty, no=no) 2219 2220 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2221 return self.expression(exp.JournalProperty, **kwargs) 2222 2223 def _parse_checksum(self) -> exp.ChecksumProperty: 2224 self._match(TokenType.EQ) 2225 2226 on = None 2227 if self._match(TokenType.ON): 2228 on = True 2229 elif self._match_text_seq("OFF"): 2230 on = False 2231 2232 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2233 2234 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2235 return self.expression( 2236 exp.Cluster, 2237 expressions=( 2238 self._parse_wrapped_csv(self._parse_ordered) 2239 if wrapped 2240 else self._parse_csv(self._parse_ordered) 2241 ), 2242 ) 2243 2244 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2245 self._match_text_seq("BY") 2246 2247 self._match_l_paren() 2248 expressions = self._parse_csv(self._parse_column) 2249 self._match_r_paren() 2250 2251 if self._match_text_seq("SORTED", "BY"): 2252 self._match_l_paren() 2253 sorted_by = self._parse_csv(self._parse_ordered) 2254 self._match_r_paren() 2255 else: 2256 sorted_by = None 2257 2258 self._match(TokenType.INTO) 2259 buckets = self._parse_number() 2260 self._match_text_seq("BUCKETS") 2261 2262 return self.expression( 2263 exp.ClusteredByProperty, 2264 expressions=expressions, 2265 sorted_by=sorted_by, 2266 buckets=buckets, 2267 ) 2268 2269 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2270 if not self._match_text_seq("GRANTS"): 2271 self._retreat(self._index - 1) 2272 return None 2273 2274 return self.expression(exp.CopyGrantsProperty) 2275 2276 def _parse_freespace(self) -> exp.FreespaceProperty: 2277 self._match(TokenType.EQ) 2278 return self.expression( 2279 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2280 ) 2281 2282 def _parse_mergeblockratio( 2283 self, no: bool = False, default: bool = False 2284 ) -> exp.MergeBlockRatioProperty: 2285 if self._match(TokenType.EQ): 2286 return self.expression( 2287 exp.MergeBlockRatioProperty, 2288 this=self._parse_number(), 2289 percent=self._match(TokenType.PERCENT), 2290 ) 2291 2292 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2293 2294 def _parse_datablocksize( 2295 self, 2296 default: t.Optional[bool] = None, 2297 minimum: t.Optional[bool] = None, 2298 maximum: t.Optional[bool] = None, 2299 ) -> exp.DataBlocksizeProperty: 2300 self._match(TokenType.EQ) 2301 size = self._parse_number() 2302 2303 units = None 2304 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2305 units = self._prev.text 2306 2307 return self.expression( 2308 exp.DataBlocksizeProperty, 2309 size=size, 2310 units=units, 2311 default=default, 2312 minimum=minimum, 2313 maximum=maximum, 2314 ) 2315 2316 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2317 self._match(TokenType.EQ) 2318 always = self._match_text_seq("ALWAYS") 2319 manual = self._match_text_seq("MANUAL") 2320 never = self._match_text_seq("NEVER") 2321 default = self._match_text_seq("DEFAULT") 2322 2323 autotemp = None 2324 if self._match_text_seq("AUTOTEMP"): 2325 autotemp = self._parse_schema() 2326 2327 return self.expression( 2328 exp.BlockCompressionProperty, 2329 always=always, 2330 manual=manual, 2331 never=never, 2332 default=default, 2333 autotemp=autotemp, 2334 ) 2335 2336 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2337 index = self._index 2338 no = self._match_text_seq("NO") 2339 concurrent = self._match_text_seq("CONCURRENT") 2340 2341 if not self._match_text_seq("ISOLATED", "LOADING"): 2342 self._retreat(index) 2343 return None 2344 2345 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2346 return self.expression( 2347 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2348 ) 2349 2350 def _parse_locking(self) -> exp.LockingProperty: 2351 if self._match(TokenType.TABLE): 2352 kind = "TABLE" 2353 elif self._match(TokenType.VIEW): 2354 kind = "VIEW" 2355 elif self._match(TokenType.ROW): 2356 kind = "ROW" 2357 elif self._match_text_seq("DATABASE"): 2358 kind = "DATABASE" 2359 else: 2360 kind = None 2361 2362 if kind in ("DATABASE", "TABLE", "VIEW"): 2363 this = self._parse_table_parts() 2364 else: 2365 this = None 2366 2367 if self._match(TokenType.FOR): 2368 for_or_in = "FOR" 2369 elif self._match(TokenType.IN): 2370 for_or_in = "IN" 2371 else: 2372 for_or_in = None 2373 2374 if self._match_text_seq("ACCESS"): 2375 lock_type = "ACCESS" 2376 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2377 lock_type = "EXCLUSIVE" 2378 elif self._match_text_seq("SHARE"): 2379 lock_type = "SHARE" 2380 elif self._match_text_seq("READ"): 2381 lock_type = "READ" 2382 elif self._match_text_seq("WRITE"): 2383 lock_type = "WRITE" 2384 elif self._match_text_seq("CHECKSUM"): 2385 lock_type = "CHECKSUM" 2386 else: 2387 lock_type = None 2388 2389 override = self._match_text_seq("OVERRIDE") 2390 2391 return self.expression( 2392 exp.LockingProperty, 2393 this=this, 2394 kind=kind, 2395 for_or_in=for_or_in, 2396 lock_type=lock_type, 2397 override=override, 2398 ) 2399 2400 def _parse_partition_by(self) -> t.List[exp.Expression]: 2401 if self._match(TokenType.PARTITION_BY): 2402 return self._parse_csv(self._parse_assignment) 2403 return [] 2404 2405 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2406 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2407 if self._match_text_seq("MINVALUE"): 2408 return exp.var("MINVALUE") 2409 if self._match_text_seq("MAXVALUE"): 2410 return exp.var("MAXVALUE") 2411 return self._parse_bitwise() 2412 2413 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2414 expression = None 2415 from_expressions = None 2416 to_expressions = None 2417 2418 if self._match(TokenType.IN): 2419 this = self._parse_wrapped_csv(self._parse_bitwise) 2420 elif self._match(TokenType.FROM): 2421 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2422 self._match_text_seq("TO") 2423 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2424 elif self._match_text_seq("WITH", "(", "MODULUS"): 2425 this = self._parse_number() 2426 self._match_text_seq(",", "REMAINDER") 2427 expression = self._parse_number() 2428 self._match_r_paren() 2429 else: 2430 self.raise_error("Failed to parse partition bound spec.") 2431 2432 return self.expression( 2433 exp.PartitionBoundSpec, 2434 this=this, 2435 expression=expression, 2436 from_expressions=from_expressions, 2437 to_expressions=to_expressions, 2438 ) 2439 2440 # https://www.postgresql.org/docs/current/sql-createtable.html 2441 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2442 if not self._match_text_seq("OF"): 2443 self._retreat(self._index - 1) 2444 return None 2445 2446 this = self._parse_table(schema=True) 2447 2448 if self._match(TokenType.DEFAULT): 2449 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2450 elif self._match_text_seq("FOR", "VALUES"): 2451 expression = self._parse_partition_bound_spec() 2452 else: 2453 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2454 2455 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2456 2457 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2458 self._match(TokenType.EQ) 2459 return self.expression( 2460 exp.PartitionedByProperty, 2461 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2462 ) 2463 2464 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2465 if self._match_text_seq("AND", "STATISTICS"): 2466 statistics = True 2467 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2468 statistics = False 2469 else: 2470 statistics = None 2471 2472 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2473 2474 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2475 if self._match_text_seq("SQL"): 2476 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2477 return None 2478 2479 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2480 if self._match_text_seq("SQL", "DATA"): 2481 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2482 return None 2483 2484 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2485 if self._match_text_seq("PRIMARY", "INDEX"): 2486 return exp.NoPrimaryIndexProperty() 2487 if self._match_text_seq("SQL"): 2488 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2489 return None 2490 2491 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2492 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2493 return exp.OnCommitProperty() 2494 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2495 return exp.OnCommitProperty(delete=True) 2496 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2497 2498 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2499 if self._match_text_seq("SQL", "DATA"): 2500 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2501 return None 2502 2503 def _parse_distkey(self) -> exp.DistKeyProperty: 2504 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2505 2506 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2507 table = self._parse_table(schema=True) 2508 2509 options = [] 2510 while self._match_texts(("INCLUDING", "EXCLUDING")): 2511 this = self._prev.text.upper() 2512 2513 id_var = self._parse_id_var() 2514 if not id_var: 2515 return None 2516 2517 options.append( 2518 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2519 ) 2520 2521 return self.expression(exp.LikeProperty, this=table, expressions=options) 2522 2523 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2524 return self.expression( 2525 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2526 ) 2527 2528 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2529 self._match(TokenType.EQ) 2530 return self.expression( 2531 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2532 ) 2533 2534 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2535 self._match_text_seq("WITH", "CONNECTION") 2536 return self.expression( 2537 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2538 ) 2539 2540 def _parse_returns(self) -> exp.ReturnsProperty: 2541 value: t.Optional[exp.Expression] 2542 null = None 2543 is_table = self._match(TokenType.TABLE) 2544 2545 if is_table: 2546 if self._match(TokenType.LT): 2547 value = self.expression( 2548 exp.Schema, 2549 this="TABLE", 2550 expressions=self._parse_csv(self._parse_struct_types), 2551 ) 2552 if not self._match(TokenType.GT): 2553 self.raise_error("Expecting >") 2554 else: 2555 value = self._parse_schema(exp.var("TABLE")) 2556 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2557 null = True 2558 value = None 2559 else: 2560 value = self._parse_types() 2561 2562 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2563 2564 def _parse_describe(self) -> exp.Describe: 2565 kind = self._match_set(self.CREATABLES) and self._prev.text 2566 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2567 if self._match(TokenType.DOT): 2568 style = None 2569 self._retreat(self._index - 2) 2570 this = self._parse_table(schema=True) 2571 properties = self._parse_properties() 2572 expressions = properties.expressions if properties else None 2573 partition = self._parse_partition() 2574 return self.expression( 2575 exp.Describe, 2576 this=this, 2577 style=style, 2578 kind=kind, 2579 expressions=expressions, 2580 partition=partition, 2581 ) 2582 2583 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2584 kind = self._prev.text.upper() 2585 expressions = [] 2586 2587 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2588 if self._match(TokenType.WHEN): 2589 expression = self._parse_disjunction() 2590 self._match(TokenType.THEN) 2591 else: 2592 expression = None 2593 2594 else_ = self._match(TokenType.ELSE) 2595 2596 if not self._match(TokenType.INTO): 2597 return None 2598 2599 return self.expression( 2600 exp.ConditionalInsert, 2601 this=self.expression( 2602 exp.Insert, 2603 this=self._parse_table(schema=True), 2604 expression=self._parse_derived_table_values(), 2605 ), 2606 expression=expression, 2607 else_=else_, 2608 ) 2609 2610 expression = parse_conditional_insert() 2611 while expression is not None: 2612 expressions.append(expression) 2613 expression = parse_conditional_insert() 2614 2615 return self.expression( 2616 exp.MultitableInserts, 2617 kind=kind, 2618 comments=comments, 2619 expressions=expressions, 2620 source=self._parse_table(), 2621 ) 2622 2623 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2624 comments = ensure_list(self._prev_comments) 2625 hint = self._parse_hint() 2626 overwrite = self._match(TokenType.OVERWRITE) 2627 ignore = self._match(TokenType.IGNORE) 2628 local = self._match_text_seq("LOCAL") 2629 alternative = None 2630 is_function = None 2631 2632 if self._match_text_seq("DIRECTORY"): 2633 this: t.Optional[exp.Expression] = self.expression( 2634 exp.Directory, 2635 this=self._parse_var_or_string(), 2636 local=local, 2637 row_format=self._parse_row_format(match_row=True), 2638 ) 2639 else: 2640 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2641 comments += ensure_list(self._prev_comments) 2642 return self._parse_multitable_inserts(comments) 2643 2644 if self._match(TokenType.OR): 2645 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2646 2647 self._match(TokenType.INTO) 2648 comments += ensure_list(self._prev_comments) 2649 self._match(TokenType.TABLE) 2650 is_function = self._match(TokenType.FUNCTION) 2651 2652 this = ( 2653 self._parse_table(schema=True, parse_partition=True) 2654 if not is_function 2655 else self._parse_function() 2656 ) 2657 2658 returning = self._parse_returning() 2659 2660 return self.expression( 2661 exp.Insert, 2662 comments=comments, 2663 hint=hint, 2664 is_function=is_function, 2665 this=this, 2666 stored=self._match_text_seq("STORED") and self._parse_stored(), 2667 by_name=self._match_text_seq("BY", "NAME"), 2668 exists=self._parse_exists(), 2669 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2670 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2671 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2672 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2673 conflict=self._parse_on_conflict(), 2674 returning=returning or self._parse_returning(), 2675 overwrite=overwrite, 2676 alternative=alternative, 2677 ignore=ignore, 2678 source=self._match(TokenType.TABLE) and self._parse_table(), 2679 ) 2680 2681 def _parse_kill(self) -> exp.Kill: 2682 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2683 2684 return self.expression( 2685 exp.Kill, 2686 this=self._parse_primary(), 2687 kind=kind, 2688 ) 2689 2690 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2691 conflict = self._match_text_seq("ON", "CONFLICT") 2692 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2693 2694 if not conflict and not duplicate: 2695 return None 2696 2697 conflict_keys = None 2698 constraint = None 2699 2700 if conflict: 2701 if self._match_text_seq("ON", "CONSTRAINT"): 2702 constraint = self._parse_id_var() 2703 elif self._match(TokenType.L_PAREN): 2704 conflict_keys = self._parse_csv(self._parse_id_var) 2705 self._match_r_paren() 2706 2707 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2708 if self._prev.token_type == TokenType.UPDATE: 2709 self._match(TokenType.SET) 2710 expressions = self._parse_csv(self._parse_equality) 2711 else: 2712 expressions = None 2713 2714 return self.expression( 2715 exp.OnConflict, 2716 duplicate=duplicate, 2717 expressions=expressions, 2718 action=action, 2719 conflict_keys=conflict_keys, 2720 constraint=constraint, 2721 ) 2722 2723 def _parse_returning(self) -> t.Optional[exp.Returning]: 2724 if not self._match(TokenType.RETURNING): 2725 return None 2726 return self.expression( 2727 exp.Returning, 2728 expressions=self._parse_csv(self._parse_expression), 2729 into=self._match(TokenType.INTO) and self._parse_table_part(), 2730 ) 2731 2732 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2733 if not self._match(TokenType.FORMAT): 2734 return None 2735 return self._parse_row_format() 2736 2737 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2738 index = self._index 2739 with_ = with_ or self._match_text_seq("WITH") 2740 2741 if not self._match(TokenType.SERDE_PROPERTIES): 2742 self._retreat(index) 2743 return None 2744 return self.expression( 2745 exp.SerdeProperties, 2746 **{ # type: ignore 2747 "expressions": self._parse_wrapped_properties(), 2748 "with": with_, 2749 }, 2750 ) 2751 2752 def _parse_row_format( 2753 self, match_row: bool = False 2754 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2755 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2756 return None 2757 2758 if self._match_text_seq("SERDE"): 2759 this = self._parse_string() 2760 2761 serde_properties = self._parse_serde_properties() 2762 2763 return self.expression( 2764 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2765 ) 2766 2767 self._match_text_seq("DELIMITED") 2768 2769 kwargs = {} 2770 2771 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2772 kwargs["fields"] = self._parse_string() 2773 if self._match_text_seq("ESCAPED", "BY"): 2774 kwargs["escaped"] = self._parse_string() 2775 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2776 kwargs["collection_items"] = self._parse_string() 2777 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2778 kwargs["map_keys"] = self._parse_string() 2779 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2780 kwargs["lines"] = self._parse_string() 2781 if self._match_text_seq("NULL", "DEFINED", "AS"): 2782 kwargs["null"] = self._parse_string() 2783 2784 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2785 2786 def _parse_load(self) -> exp.LoadData | exp.Command: 2787 if self._match_text_seq("DATA"): 2788 local = self._match_text_seq("LOCAL") 2789 self._match_text_seq("INPATH") 2790 inpath = self._parse_string() 2791 overwrite = self._match(TokenType.OVERWRITE) 2792 self._match_pair(TokenType.INTO, TokenType.TABLE) 2793 2794 return self.expression( 2795 exp.LoadData, 2796 this=self._parse_table(schema=True), 2797 local=local, 2798 overwrite=overwrite, 2799 inpath=inpath, 2800 partition=self._parse_partition(), 2801 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2802 serde=self._match_text_seq("SERDE") and self._parse_string(), 2803 ) 2804 return self._parse_as_command(self._prev) 2805 2806 def _parse_delete(self) -> exp.Delete: 2807 # This handles MySQL's "Multiple-Table Syntax" 2808 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2809 tables = None 2810 comments = self._prev_comments 2811 if not self._match(TokenType.FROM, advance=False): 2812 tables = self._parse_csv(self._parse_table) or None 2813 2814 returning = self._parse_returning() 2815 2816 return self.expression( 2817 exp.Delete, 2818 comments=comments, 2819 tables=tables, 2820 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2821 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2822 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2823 where=self._parse_where(), 2824 returning=returning or self._parse_returning(), 2825 limit=self._parse_limit(), 2826 ) 2827 2828 def _parse_update(self) -> exp.Update: 2829 comments = self._prev_comments 2830 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2831 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2832 returning = self._parse_returning() 2833 return self.expression( 2834 exp.Update, 2835 comments=comments, 2836 **{ # type: ignore 2837 "this": this, 2838 "expressions": expressions, 2839 "from": self._parse_from(joins=True), 2840 "where": self._parse_where(), 2841 "returning": returning or self._parse_returning(), 2842 "order": self._parse_order(), 2843 "limit": self._parse_limit(), 2844 }, 2845 ) 2846 2847 def _parse_uncache(self) -> exp.Uncache: 2848 if not self._match(TokenType.TABLE): 2849 self.raise_error("Expecting TABLE after UNCACHE") 2850 2851 return self.expression( 2852 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2853 ) 2854 2855 def _parse_cache(self) -> exp.Cache: 2856 lazy = self._match_text_seq("LAZY") 2857 self._match(TokenType.TABLE) 2858 table = self._parse_table(schema=True) 2859 2860 options = [] 2861 if self._match_text_seq("OPTIONS"): 2862 self._match_l_paren() 2863 k = self._parse_string() 2864 self._match(TokenType.EQ) 2865 v = self._parse_string() 2866 options = [k, v] 2867 self._match_r_paren() 2868 2869 self._match(TokenType.ALIAS) 2870 return self.expression( 2871 exp.Cache, 2872 this=table, 2873 lazy=lazy, 2874 options=options, 2875 expression=self._parse_select(nested=True), 2876 ) 2877 2878 def _parse_partition(self) -> t.Optional[exp.Partition]: 2879 if not self._match(TokenType.PARTITION): 2880 return None 2881 2882 return self.expression( 2883 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2884 ) 2885 2886 def _parse_value(self) -> t.Optional[exp.Tuple]: 2887 if self._match(TokenType.L_PAREN): 2888 expressions = self._parse_csv(self._parse_expression) 2889 self._match_r_paren() 2890 return self.expression(exp.Tuple, expressions=expressions) 2891 2892 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2893 expression = self._parse_expression() 2894 if expression: 2895 return self.expression(exp.Tuple, expressions=[expression]) 2896 return None 2897 2898 def _parse_projections(self) -> t.List[exp.Expression]: 2899 return self._parse_expressions() 2900 2901 def _parse_select( 2902 self, 2903 nested: bool = False, 2904 table: bool = False, 2905 parse_subquery_alias: bool = True, 2906 parse_set_operation: bool = True, 2907 ) -> t.Optional[exp.Expression]: 2908 cte = self._parse_with() 2909 2910 if cte: 2911 this = self._parse_statement() 2912 2913 if not this: 2914 self.raise_error("Failed to parse any statement following CTE") 2915 return cte 2916 2917 if "with" in this.arg_types: 2918 this.set("with", cte) 2919 else: 2920 self.raise_error(f"{this.key} does not support CTE") 2921 this = cte 2922 2923 return this 2924 2925 # duckdb supports leading with FROM x 2926 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2927 2928 if self._match(TokenType.SELECT): 2929 comments = self._prev_comments 2930 2931 hint = self._parse_hint() 2932 2933 if self._next and not self._next.token_type == TokenType.DOT: 2934 all_ = self._match(TokenType.ALL) 2935 distinct = self._match_set(self.DISTINCT_TOKENS) 2936 else: 2937 all_, distinct = None, None 2938 2939 kind = ( 2940 self._match(TokenType.ALIAS) 2941 and self._match_texts(("STRUCT", "VALUE")) 2942 and self._prev.text.upper() 2943 ) 2944 2945 if distinct: 2946 distinct = self.expression( 2947 exp.Distinct, 2948 on=self._parse_value() if self._match(TokenType.ON) else None, 2949 ) 2950 2951 if all_ and distinct: 2952 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2953 2954 limit = self._parse_limit(top=True) 2955 projections = self._parse_projections() 2956 2957 this = self.expression( 2958 exp.Select, 2959 kind=kind, 2960 hint=hint, 2961 distinct=distinct, 2962 expressions=projections, 2963 limit=limit, 2964 ) 2965 this.comments = comments 2966 2967 into = self._parse_into() 2968 if into: 2969 this.set("into", into) 2970 2971 if not from_: 2972 from_ = self._parse_from() 2973 2974 if from_: 2975 this.set("from", from_) 2976 2977 this = self._parse_query_modifiers(this) 2978 elif (table or nested) and self._match(TokenType.L_PAREN): 2979 if self._match(TokenType.PIVOT): 2980 this = self._parse_simplified_pivot() 2981 elif self._match(TokenType.FROM): 2982 this = exp.select("*").from_( 2983 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2984 ) 2985 else: 2986 this = ( 2987 self._parse_table() 2988 if table 2989 else self._parse_select(nested=True, parse_set_operation=False) 2990 ) 2991 2992 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 2993 # in case a modifier (e.g. join) is following 2994 if table and isinstance(this, exp.Values) and this.alias: 2995 alias = this.args["alias"].pop() 2996 this = exp.Table(this=this, alias=alias) 2997 2998 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2999 3000 self._match_r_paren() 3001 3002 # We return early here so that the UNION isn't attached to the subquery by the 3003 # following call to _parse_set_operations, but instead becomes the parent node 3004 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3005 elif self._match(TokenType.VALUES, advance=False): 3006 this = self._parse_derived_table_values() 3007 elif from_: 3008 this = exp.select("*").from_(from_.this, copy=False) 3009 elif self._match(TokenType.SUMMARIZE): 3010 table = self._match(TokenType.TABLE) 3011 this = self._parse_select() or self._parse_string() or self._parse_table() 3012 return self.expression(exp.Summarize, this=this, table=table) 3013 elif self._match(TokenType.DESCRIBE): 3014 this = self._parse_describe() 3015 elif self._match_text_seq("STREAM"): 3016 this = self.expression(exp.Stream, this=self._parse_function()) 3017 else: 3018 this = None 3019 3020 return self._parse_set_operations(this) if parse_set_operation else this 3021 3022 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3023 if not skip_with_token and not self._match(TokenType.WITH): 3024 return None 3025 3026 comments = self._prev_comments 3027 recursive = self._match(TokenType.RECURSIVE) 3028 3029 expressions = [] 3030 while True: 3031 expressions.append(self._parse_cte()) 3032 3033 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3034 break 3035 else: 3036 self._match(TokenType.WITH) 3037 3038 return self.expression( 3039 exp.With, comments=comments, expressions=expressions, recursive=recursive 3040 ) 3041 3042 def _parse_cte(self) -> exp.CTE: 3043 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3044 if not alias or not alias.this: 3045 self.raise_error("Expected CTE to have alias") 3046 3047 self._match(TokenType.ALIAS) 3048 comments = self._prev_comments 3049 3050 if self._match_text_seq("NOT", "MATERIALIZED"): 3051 materialized = False 3052 elif self._match_text_seq("MATERIALIZED"): 3053 materialized = True 3054 else: 3055 materialized = None 3056 3057 return self.expression( 3058 exp.CTE, 3059 this=self._parse_wrapped(self._parse_statement), 3060 alias=alias, 3061 materialized=materialized, 3062 comments=comments, 3063 ) 3064 3065 def _parse_table_alias( 3066 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3067 ) -> t.Optional[exp.TableAlias]: 3068 any_token = self._match(TokenType.ALIAS) 3069 alias = ( 3070 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3071 or self._parse_string_as_identifier() 3072 ) 3073 3074 index = self._index 3075 if self._match(TokenType.L_PAREN): 3076 columns = self._parse_csv(self._parse_function_parameter) 3077 self._match_r_paren() if columns else self._retreat(index) 3078 else: 3079 columns = None 3080 3081 if not alias and not columns: 3082 return None 3083 3084 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3085 3086 # We bubble up comments from the Identifier to the TableAlias 3087 if isinstance(alias, exp.Identifier): 3088 table_alias.add_comments(alias.pop_comments()) 3089 3090 return table_alias 3091 3092 def _parse_subquery( 3093 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3094 ) -> t.Optional[exp.Subquery]: 3095 if not this: 3096 return None 3097 3098 return self.expression( 3099 exp.Subquery, 3100 this=this, 3101 pivots=self._parse_pivots(), 3102 alias=self._parse_table_alias() if parse_alias else None, 3103 sample=self._parse_table_sample(), 3104 ) 3105 3106 def _implicit_unnests_to_explicit(self, this: E) -> E: 3107 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3108 3109 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3110 for i, join in enumerate(this.args.get("joins") or []): 3111 table = join.this 3112 normalized_table = table.copy() 3113 normalized_table.meta["maybe_column"] = True 3114 normalized_table = _norm(normalized_table, dialect=self.dialect) 3115 3116 if isinstance(table, exp.Table) and not join.args.get("on"): 3117 if normalized_table.parts[0].name in refs: 3118 table_as_column = table.to_column() 3119 unnest = exp.Unnest(expressions=[table_as_column]) 3120 3121 # Table.to_column creates a parent Alias node that we want to convert to 3122 # a TableAlias and attach to the Unnest, so it matches the parser's output 3123 if isinstance(table.args.get("alias"), exp.TableAlias): 3124 table_as_column.replace(table_as_column.this) 3125 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3126 3127 table.replace(unnest) 3128 3129 refs.add(normalized_table.alias_or_name) 3130 3131 return this 3132 3133 def _parse_query_modifiers( 3134 self, this: t.Optional[exp.Expression] 3135 ) -> t.Optional[exp.Expression]: 3136 if isinstance(this, (exp.Query, exp.Table)): 3137 for join in self._parse_joins(): 3138 this.append("joins", join) 3139 for lateral in iter(self._parse_lateral, None): 3140 this.append("laterals", lateral) 3141 3142 while True: 3143 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3144 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3145 key, expression = parser(self) 3146 3147 if expression: 3148 this.set(key, expression) 3149 if key == "limit": 3150 offset = expression.args.pop("offset", None) 3151 3152 if offset: 3153 offset = exp.Offset(expression=offset) 3154 this.set("offset", offset) 3155 3156 limit_by_expressions = expression.expressions 3157 expression.set("expressions", None) 3158 offset.set("expressions", limit_by_expressions) 3159 continue 3160 break 3161 3162 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3163 this = self._implicit_unnests_to_explicit(this) 3164 3165 return this 3166 3167 def _parse_hint(self) -> t.Optional[exp.Hint]: 3168 if self._match(TokenType.HINT): 3169 hints = [] 3170 for hint in iter( 3171 lambda: self._parse_csv( 3172 lambda: self._parse_function() or self._parse_var(upper=True) 3173 ), 3174 [], 3175 ): 3176 hints.extend(hint) 3177 3178 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3179 self.raise_error("Expected */ after HINT") 3180 3181 return self.expression(exp.Hint, expressions=hints) 3182 3183 return None 3184 3185 def _parse_into(self) -> t.Optional[exp.Into]: 3186 if not self._match(TokenType.INTO): 3187 return None 3188 3189 temp = self._match(TokenType.TEMPORARY) 3190 unlogged = self._match_text_seq("UNLOGGED") 3191 self._match(TokenType.TABLE) 3192 3193 return self.expression( 3194 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3195 ) 3196 3197 def _parse_from( 3198 self, joins: bool = False, skip_from_token: bool = False 3199 ) -> t.Optional[exp.From]: 3200 if not skip_from_token and not self._match(TokenType.FROM): 3201 return None 3202 3203 return self.expression( 3204 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3205 ) 3206 3207 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3208 return self.expression( 3209 exp.MatchRecognizeMeasure, 3210 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3211 this=self._parse_expression(), 3212 ) 3213 3214 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3215 if not self._match(TokenType.MATCH_RECOGNIZE): 3216 return None 3217 3218 self._match_l_paren() 3219 3220 partition = self._parse_partition_by() 3221 order = self._parse_order() 3222 3223 measures = ( 3224 self._parse_csv(self._parse_match_recognize_measure) 3225 if self._match_text_seq("MEASURES") 3226 else None 3227 ) 3228 3229 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3230 rows = exp.var("ONE ROW PER MATCH") 3231 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3232 text = "ALL ROWS PER MATCH" 3233 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3234 text += " SHOW EMPTY MATCHES" 3235 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3236 text += " OMIT EMPTY MATCHES" 3237 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3238 text += " WITH UNMATCHED ROWS" 3239 rows = exp.var(text) 3240 else: 3241 rows = None 3242 3243 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3244 text = "AFTER MATCH SKIP" 3245 if self._match_text_seq("PAST", "LAST", "ROW"): 3246 text += " PAST LAST ROW" 3247 elif self._match_text_seq("TO", "NEXT", "ROW"): 3248 text += " TO NEXT ROW" 3249 elif self._match_text_seq("TO", "FIRST"): 3250 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3251 elif self._match_text_seq("TO", "LAST"): 3252 text += f" TO LAST {self._advance_any().text}" # type: ignore 3253 after = exp.var(text) 3254 else: 3255 after = None 3256 3257 if self._match_text_seq("PATTERN"): 3258 self._match_l_paren() 3259 3260 if not self._curr: 3261 self.raise_error("Expecting )", self._curr) 3262 3263 paren = 1 3264 start = self._curr 3265 3266 while self._curr and paren > 0: 3267 if self._curr.token_type == TokenType.L_PAREN: 3268 paren += 1 3269 if self._curr.token_type == TokenType.R_PAREN: 3270 paren -= 1 3271 3272 end = self._prev 3273 self._advance() 3274 3275 if paren > 0: 3276 self.raise_error("Expecting )", self._curr) 3277 3278 pattern = exp.var(self._find_sql(start, end)) 3279 else: 3280 pattern = None 3281 3282 define = ( 3283 self._parse_csv(self._parse_name_as_expression) 3284 if self._match_text_seq("DEFINE") 3285 else None 3286 ) 3287 3288 self._match_r_paren() 3289 3290 return self.expression( 3291 exp.MatchRecognize, 3292 partition_by=partition, 3293 order=order, 3294 measures=measures, 3295 rows=rows, 3296 after=after, 3297 pattern=pattern, 3298 define=define, 3299 alias=self._parse_table_alias(), 3300 ) 3301 3302 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3303 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3304 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3305 cross_apply = False 3306 3307 if cross_apply is not None: 3308 this = self._parse_select(table=True) 3309 view = None 3310 outer = None 3311 elif self._match(TokenType.LATERAL): 3312 this = self._parse_select(table=True) 3313 view = self._match(TokenType.VIEW) 3314 outer = self._match(TokenType.OUTER) 3315 else: 3316 return None 3317 3318 if not this: 3319 this = ( 3320 self._parse_unnest() 3321 or self._parse_function() 3322 or self._parse_id_var(any_token=False) 3323 ) 3324 3325 while self._match(TokenType.DOT): 3326 this = exp.Dot( 3327 this=this, 3328 expression=self._parse_function() or self._parse_id_var(any_token=False), 3329 ) 3330 3331 if view: 3332 table = self._parse_id_var(any_token=False) 3333 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3334 table_alias: t.Optional[exp.TableAlias] = self.expression( 3335 exp.TableAlias, this=table, columns=columns 3336 ) 3337 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3338 # We move the alias from the lateral's child node to the lateral itself 3339 table_alias = this.args["alias"].pop() 3340 else: 3341 table_alias = self._parse_table_alias() 3342 3343 return self.expression( 3344 exp.Lateral, 3345 this=this, 3346 view=view, 3347 outer=outer, 3348 alias=table_alias, 3349 cross_apply=cross_apply, 3350 ) 3351 3352 def _parse_join_parts( 3353 self, 3354 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3355 return ( 3356 self._match_set(self.JOIN_METHODS) and self._prev, 3357 self._match_set(self.JOIN_SIDES) and self._prev, 3358 self._match_set(self.JOIN_KINDS) and self._prev, 3359 ) 3360 3361 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3362 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3363 this = self._parse_column() 3364 if isinstance(this, exp.Column): 3365 return this.this 3366 return this 3367 3368 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3369 3370 def _parse_join( 3371 self, skip_join_token: bool = False, parse_bracket: bool = False 3372 ) -> t.Optional[exp.Join]: 3373 if self._match(TokenType.COMMA): 3374 return self.expression(exp.Join, this=self._parse_table()) 3375 3376 index = self._index 3377 method, side, kind = self._parse_join_parts() 3378 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3379 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3380 3381 if not skip_join_token and not join: 3382 self._retreat(index) 3383 kind = None 3384 method = None 3385 side = None 3386 3387 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3388 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3389 3390 if not skip_join_token and not join and not outer_apply and not cross_apply: 3391 return None 3392 3393 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3394 3395 if method: 3396 kwargs["method"] = method.text 3397 if side: 3398 kwargs["side"] = side.text 3399 if kind: 3400 kwargs["kind"] = kind.text 3401 if hint: 3402 kwargs["hint"] = hint 3403 3404 if self._match(TokenType.MATCH_CONDITION): 3405 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3406 3407 if self._match(TokenType.ON): 3408 kwargs["on"] = self._parse_assignment() 3409 elif self._match(TokenType.USING): 3410 kwargs["using"] = self._parse_using_identifiers() 3411 elif ( 3412 not (outer_apply or cross_apply) 3413 and not isinstance(kwargs["this"], exp.Unnest) 3414 and not (kind and kind.token_type == TokenType.CROSS) 3415 ): 3416 index = self._index 3417 joins: t.Optional[list] = list(self._parse_joins()) 3418 3419 if joins and self._match(TokenType.ON): 3420 kwargs["on"] = self._parse_assignment() 3421 elif joins and self._match(TokenType.USING): 3422 kwargs["using"] = self._parse_using_identifiers() 3423 else: 3424 joins = None 3425 self._retreat(index) 3426 3427 kwargs["this"].set("joins", joins if joins else None) 3428 3429 comments = [c for token in (method, side, kind) if token for c in token.comments] 3430 return self.expression(exp.Join, comments=comments, **kwargs) 3431 3432 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3433 this = self._parse_assignment() 3434 3435 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3436 return this 3437 3438 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3439 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3440 3441 return this 3442 3443 def _parse_index_params(self) -> exp.IndexParameters: 3444 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3445 3446 if self._match(TokenType.L_PAREN, advance=False): 3447 columns = self._parse_wrapped_csv(self._parse_with_operator) 3448 else: 3449 columns = None 3450 3451 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3452 partition_by = self._parse_partition_by() 3453 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3454 tablespace = ( 3455 self._parse_var(any_token=True) 3456 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3457 else None 3458 ) 3459 where = self._parse_where() 3460 3461 on = self._parse_field() if self._match(TokenType.ON) else None 3462 3463 return self.expression( 3464 exp.IndexParameters, 3465 using=using, 3466 columns=columns, 3467 include=include, 3468 partition_by=partition_by, 3469 where=where, 3470 with_storage=with_storage, 3471 tablespace=tablespace, 3472 on=on, 3473 ) 3474 3475 def _parse_index( 3476 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3477 ) -> t.Optional[exp.Index]: 3478 if index or anonymous: 3479 unique = None 3480 primary = None 3481 amp = None 3482 3483 self._match(TokenType.ON) 3484 self._match(TokenType.TABLE) # hive 3485 table = self._parse_table_parts(schema=True) 3486 else: 3487 unique = self._match(TokenType.UNIQUE) 3488 primary = self._match_text_seq("PRIMARY") 3489 amp = self._match_text_seq("AMP") 3490 3491 if not self._match(TokenType.INDEX): 3492 return None 3493 3494 index = self._parse_id_var() 3495 table = None 3496 3497 params = self._parse_index_params() 3498 3499 return self.expression( 3500 exp.Index, 3501 this=index, 3502 table=table, 3503 unique=unique, 3504 primary=primary, 3505 amp=amp, 3506 params=params, 3507 ) 3508 3509 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3510 hints: t.List[exp.Expression] = [] 3511 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3512 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3513 hints.append( 3514 self.expression( 3515 exp.WithTableHint, 3516 expressions=self._parse_csv( 3517 lambda: self._parse_function() or self._parse_var(any_token=True) 3518 ), 3519 ) 3520 ) 3521 self._match_r_paren() 3522 else: 3523 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3524 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3525 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3526 3527 self._match_set((TokenType.INDEX, TokenType.KEY)) 3528 if self._match(TokenType.FOR): 3529 hint.set("target", self._advance_any() and self._prev.text.upper()) 3530 3531 hint.set("expressions", self._parse_wrapped_id_vars()) 3532 hints.append(hint) 3533 3534 return hints or None 3535 3536 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3537 return ( 3538 (not schema and self._parse_function(optional_parens=False)) 3539 or self._parse_id_var(any_token=False) 3540 or self._parse_string_as_identifier() 3541 or self._parse_placeholder() 3542 ) 3543 3544 def _parse_table_parts( 3545 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3546 ) -> exp.Table: 3547 catalog = None 3548 db = None 3549 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3550 3551 while self._match(TokenType.DOT): 3552 if catalog: 3553 # This allows nesting the table in arbitrarily many dot expressions if needed 3554 table = self.expression( 3555 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3556 ) 3557 else: 3558 catalog = db 3559 db = table 3560 # "" used for tsql FROM a..b case 3561 table = self._parse_table_part(schema=schema) or "" 3562 3563 if ( 3564 wildcard 3565 and self._is_connected() 3566 and (isinstance(table, exp.Identifier) or not table) 3567 and self._match(TokenType.STAR) 3568 ): 3569 if isinstance(table, exp.Identifier): 3570 table.args["this"] += "*" 3571 else: 3572 table = exp.Identifier(this="*") 3573 3574 # We bubble up comments from the Identifier to the Table 3575 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3576 3577 if is_db_reference: 3578 catalog = db 3579 db = table 3580 table = None 3581 3582 if not table and not is_db_reference: 3583 self.raise_error(f"Expected table name but got {self._curr}") 3584 if not db and is_db_reference: 3585 self.raise_error(f"Expected database name but got {self._curr}") 3586 3587 table = self.expression( 3588 exp.Table, 3589 comments=comments, 3590 this=table, 3591 db=db, 3592 catalog=catalog, 3593 ) 3594 3595 changes = self._parse_changes() 3596 if changes: 3597 table.set("changes", changes) 3598 3599 at_before = self._parse_historical_data() 3600 if at_before: 3601 table.set("when", at_before) 3602 3603 pivots = self._parse_pivots() 3604 if pivots: 3605 table.set("pivots", pivots) 3606 3607 return table 3608 3609 def _parse_table( 3610 self, 3611 schema: bool = False, 3612 joins: bool = False, 3613 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3614 parse_bracket: bool = False, 3615 is_db_reference: bool = False, 3616 parse_partition: bool = False, 3617 ) -> t.Optional[exp.Expression]: 3618 lateral = self._parse_lateral() 3619 if lateral: 3620 return lateral 3621 3622 unnest = self._parse_unnest() 3623 if unnest: 3624 return unnest 3625 3626 values = self._parse_derived_table_values() 3627 if values: 3628 return values 3629 3630 subquery = self._parse_select(table=True) 3631 if subquery: 3632 if not subquery.args.get("pivots"): 3633 subquery.set("pivots", self._parse_pivots()) 3634 return subquery 3635 3636 bracket = parse_bracket and self._parse_bracket(None) 3637 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3638 3639 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3640 self._parse_table 3641 ) 3642 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3643 3644 only = self._match(TokenType.ONLY) 3645 3646 this = t.cast( 3647 exp.Expression, 3648 bracket 3649 or rows_from 3650 or self._parse_bracket( 3651 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3652 ), 3653 ) 3654 3655 if only: 3656 this.set("only", only) 3657 3658 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3659 self._match_text_seq("*") 3660 3661 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3662 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3663 this.set("partition", self._parse_partition()) 3664 3665 if schema: 3666 return self._parse_schema(this=this) 3667 3668 version = self._parse_version() 3669 3670 if version: 3671 this.set("version", version) 3672 3673 if self.dialect.ALIAS_POST_TABLESAMPLE: 3674 this.set("sample", self._parse_table_sample()) 3675 3676 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3677 if alias: 3678 this.set("alias", alias) 3679 3680 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3681 return self.expression( 3682 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3683 ) 3684 3685 this.set("hints", self._parse_table_hints()) 3686 3687 if not this.args.get("pivots"): 3688 this.set("pivots", self._parse_pivots()) 3689 3690 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3691 this.set("sample", self._parse_table_sample()) 3692 3693 if joins: 3694 for join in self._parse_joins(): 3695 this.append("joins", join) 3696 3697 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3698 this.set("ordinality", True) 3699 this.set("alias", self._parse_table_alias()) 3700 3701 return this 3702 3703 def _parse_version(self) -> t.Optional[exp.Version]: 3704 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3705 this = "TIMESTAMP" 3706 elif self._match(TokenType.VERSION_SNAPSHOT): 3707 this = "VERSION" 3708 else: 3709 return None 3710 3711 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3712 kind = self._prev.text.upper() 3713 start = self._parse_bitwise() 3714 self._match_texts(("TO", "AND")) 3715 end = self._parse_bitwise() 3716 expression: t.Optional[exp.Expression] = self.expression( 3717 exp.Tuple, expressions=[start, end] 3718 ) 3719 elif self._match_text_seq("CONTAINED", "IN"): 3720 kind = "CONTAINED IN" 3721 expression = self.expression( 3722 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3723 ) 3724 elif self._match(TokenType.ALL): 3725 kind = "ALL" 3726 expression = None 3727 else: 3728 self._match_text_seq("AS", "OF") 3729 kind = "AS OF" 3730 expression = self._parse_type() 3731 3732 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3733 3734 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3735 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3736 index = self._index 3737 historical_data = None 3738 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3739 this = self._prev.text.upper() 3740 kind = ( 3741 self._match(TokenType.L_PAREN) 3742 and self._match_texts(self.HISTORICAL_DATA_KIND) 3743 and self._prev.text.upper() 3744 ) 3745 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3746 3747 if expression: 3748 self._match_r_paren() 3749 historical_data = self.expression( 3750 exp.HistoricalData, this=this, kind=kind, expression=expression 3751 ) 3752 else: 3753 self._retreat(index) 3754 3755 return historical_data 3756 3757 def _parse_changes(self) -> t.Optional[exp.Changes]: 3758 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3759 return None 3760 3761 information = self._parse_var(any_token=True) 3762 self._match_r_paren() 3763 3764 return self.expression( 3765 exp.Changes, 3766 information=information, 3767 at_before=self._parse_historical_data(), 3768 end=self._parse_historical_data(), 3769 ) 3770 3771 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3772 if not self._match(TokenType.UNNEST): 3773 return None 3774 3775 expressions = self._parse_wrapped_csv(self._parse_equality) 3776 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3777 3778 alias = self._parse_table_alias() if with_alias else None 3779 3780 if alias: 3781 if self.dialect.UNNEST_COLUMN_ONLY: 3782 if alias.args.get("columns"): 3783 self.raise_error("Unexpected extra column alias in unnest.") 3784 3785 alias.set("columns", [alias.this]) 3786 alias.set("this", None) 3787 3788 columns = alias.args.get("columns") or [] 3789 if offset and len(expressions) < len(columns): 3790 offset = columns.pop() 3791 3792 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3793 self._match(TokenType.ALIAS) 3794 offset = self._parse_id_var( 3795 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3796 ) or exp.to_identifier("offset") 3797 3798 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3799 3800 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3801 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3802 if not is_derived and not ( 3803 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3804 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3805 ): 3806 return None 3807 3808 expressions = self._parse_csv(self._parse_value) 3809 alias = self._parse_table_alias() 3810 3811 if is_derived: 3812 self._match_r_paren() 3813 3814 return self.expression( 3815 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3816 ) 3817 3818 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3819 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3820 as_modifier and self._match_text_seq("USING", "SAMPLE") 3821 ): 3822 return None 3823 3824 bucket_numerator = None 3825 bucket_denominator = None 3826 bucket_field = None 3827 percent = None 3828 size = None 3829 seed = None 3830 3831 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3832 matched_l_paren = self._match(TokenType.L_PAREN) 3833 3834 if self.TABLESAMPLE_CSV: 3835 num = None 3836 expressions = self._parse_csv(self._parse_primary) 3837 else: 3838 expressions = None 3839 num = ( 3840 self._parse_factor() 3841 if self._match(TokenType.NUMBER, advance=False) 3842 else self._parse_primary() or self._parse_placeholder() 3843 ) 3844 3845 if self._match_text_seq("BUCKET"): 3846 bucket_numerator = self._parse_number() 3847 self._match_text_seq("OUT", "OF") 3848 bucket_denominator = bucket_denominator = self._parse_number() 3849 self._match(TokenType.ON) 3850 bucket_field = self._parse_field() 3851 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3852 percent = num 3853 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3854 size = num 3855 else: 3856 percent = num 3857 3858 if matched_l_paren: 3859 self._match_r_paren() 3860 3861 if self._match(TokenType.L_PAREN): 3862 method = self._parse_var(upper=True) 3863 seed = self._match(TokenType.COMMA) and self._parse_number() 3864 self._match_r_paren() 3865 elif self._match_texts(("SEED", "REPEATABLE")): 3866 seed = self._parse_wrapped(self._parse_number) 3867 3868 if not method and self.DEFAULT_SAMPLING_METHOD: 3869 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3870 3871 return self.expression( 3872 exp.TableSample, 3873 expressions=expressions, 3874 method=method, 3875 bucket_numerator=bucket_numerator, 3876 bucket_denominator=bucket_denominator, 3877 bucket_field=bucket_field, 3878 percent=percent, 3879 size=size, 3880 seed=seed, 3881 ) 3882 3883 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3884 return list(iter(self._parse_pivot, None)) or None 3885 3886 def _parse_joins(self) -> t.Iterator[exp.Join]: 3887 return iter(self._parse_join, None) 3888 3889 # https://duckdb.org/docs/sql/statements/pivot 3890 def _parse_simplified_pivot(self) -> exp.Pivot: 3891 def _parse_on() -> t.Optional[exp.Expression]: 3892 this = self._parse_bitwise() 3893 return self._parse_in(this) if self._match(TokenType.IN) else this 3894 3895 this = self._parse_table() 3896 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3897 using = self._match(TokenType.USING) and self._parse_csv( 3898 lambda: self._parse_alias(self._parse_function()) 3899 ) 3900 group = self._parse_group() 3901 return self.expression( 3902 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3903 ) 3904 3905 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3906 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3907 this = self._parse_select_or_expression() 3908 3909 self._match(TokenType.ALIAS) 3910 alias = self._parse_bitwise() 3911 if alias: 3912 if isinstance(alias, exp.Column) and not alias.db: 3913 alias = alias.this 3914 return self.expression(exp.PivotAlias, this=this, alias=alias) 3915 3916 return this 3917 3918 value = self._parse_column() 3919 3920 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3921 self.raise_error("Expecting IN (") 3922 3923 if self._match(TokenType.ANY): 3924 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3925 else: 3926 exprs = self._parse_csv(_parse_aliased_expression) 3927 3928 self._match_r_paren() 3929 return self.expression(exp.In, this=value, expressions=exprs) 3930 3931 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3932 index = self._index 3933 include_nulls = None 3934 3935 if self._match(TokenType.PIVOT): 3936 unpivot = False 3937 elif self._match(TokenType.UNPIVOT): 3938 unpivot = True 3939 3940 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3941 if self._match_text_seq("INCLUDE", "NULLS"): 3942 include_nulls = True 3943 elif self._match_text_seq("EXCLUDE", "NULLS"): 3944 include_nulls = False 3945 else: 3946 return None 3947 3948 expressions = [] 3949 3950 if not self._match(TokenType.L_PAREN): 3951 self._retreat(index) 3952 return None 3953 3954 if unpivot: 3955 expressions = self._parse_csv(self._parse_column) 3956 else: 3957 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3958 3959 if not expressions: 3960 self.raise_error("Failed to parse PIVOT's aggregation list") 3961 3962 if not self._match(TokenType.FOR): 3963 self.raise_error("Expecting FOR") 3964 3965 field = self._parse_pivot_in() 3966 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3967 self._parse_bitwise 3968 ) 3969 3970 self._match_r_paren() 3971 3972 pivot = self.expression( 3973 exp.Pivot, 3974 expressions=expressions, 3975 field=field, 3976 unpivot=unpivot, 3977 include_nulls=include_nulls, 3978 default_on_null=default_on_null, 3979 ) 3980 3981 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3982 pivot.set("alias", self._parse_table_alias()) 3983 3984 if not unpivot: 3985 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3986 3987 columns: t.List[exp.Expression] = [] 3988 for fld in pivot.args["field"].expressions: 3989 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3990 for name in names: 3991 if self.PREFIXED_PIVOT_COLUMNS: 3992 name = f"{name}_{field_name}" if name else field_name 3993 else: 3994 name = f"{field_name}_{name}" if name else field_name 3995 3996 columns.append(exp.to_identifier(name)) 3997 3998 pivot.set("columns", columns) 3999 4000 return pivot 4001 4002 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4003 return [agg.alias for agg in aggregations] 4004 4005 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4006 if not skip_where_token and not self._match(TokenType.PREWHERE): 4007 return None 4008 4009 return self.expression( 4010 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4011 ) 4012 4013 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4014 if not skip_where_token and not self._match(TokenType.WHERE): 4015 return None 4016 4017 return self.expression( 4018 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4019 ) 4020 4021 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4022 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4023 return None 4024 4025 elements: t.Dict[str, t.Any] = defaultdict(list) 4026 4027 if self._match(TokenType.ALL): 4028 elements["all"] = True 4029 elif self._match(TokenType.DISTINCT): 4030 elements["all"] = False 4031 4032 while True: 4033 index = self._index 4034 4035 elements["expressions"].extend( 4036 self._parse_csv( 4037 lambda: None 4038 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4039 else self._parse_assignment() 4040 ) 4041 ) 4042 4043 before_with_index = self._index 4044 with_prefix = self._match(TokenType.WITH) 4045 4046 if self._match(TokenType.ROLLUP): 4047 elements["rollup"].append( 4048 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4049 ) 4050 elif self._match(TokenType.CUBE): 4051 elements["cube"].append( 4052 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4053 ) 4054 elif self._match(TokenType.GROUPING_SETS): 4055 elements["grouping_sets"].append( 4056 self.expression( 4057 exp.GroupingSets, 4058 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4059 ) 4060 ) 4061 elif self._match_text_seq("TOTALS"): 4062 elements["totals"] = True # type: ignore 4063 4064 if before_with_index <= self._index <= before_with_index + 1: 4065 self._retreat(before_with_index) 4066 break 4067 4068 if index == self._index: 4069 break 4070 4071 return self.expression(exp.Group, **elements) # type: ignore 4072 4073 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4074 return self.expression( 4075 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4076 ) 4077 4078 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4079 if self._match(TokenType.L_PAREN): 4080 grouping_set = self._parse_csv(self._parse_column) 4081 self._match_r_paren() 4082 return self.expression(exp.Tuple, expressions=grouping_set) 4083 4084 return self._parse_column() 4085 4086 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4087 if not skip_having_token and not self._match(TokenType.HAVING): 4088 return None 4089 return self.expression(exp.Having, this=self._parse_assignment()) 4090 4091 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4092 if not self._match(TokenType.QUALIFY): 4093 return None 4094 return self.expression(exp.Qualify, this=self._parse_assignment()) 4095 4096 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4097 if skip_start_token: 4098 start = None 4099 elif self._match(TokenType.START_WITH): 4100 start = self._parse_assignment() 4101 else: 4102 return None 4103 4104 self._match(TokenType.CONNECT_BY) 4105 nocycle = self._match_text_seq("NOCYCLE") 4106 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4107 exp.Prior, this=self._parse_bitwise() 4108 ) 4109 connect = self._parse_assignment() 4110 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4111 4112 if not start and self._match(TokenType.START_WITH): 4113 start = self._parse_assignment() 4114 4115 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4116 4117 def _parse_name_as_expression(self) -> exp.Alias: 4118 return self.expression( 4119 exp.Alias, 4120 alias=self._parse_id_var(any_token=True), 4121 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4122 ) 4123 4124 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4125 if self._match_text_seq("INTERPOLATE"): 4126 return self._parse_wrapped_csv(self._parse_name_as_expression) 4127 return None 4128 4129 def _parse_order( 4130 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4131 ) -> t.Optional[exp.Expression]: 4132 siblings = None 4133 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4134 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4135 return this 4136 4137 siblings = True 4138 4139 return self.expression( 4140 exp.Order, 4141 this=this, 4142 expressions=self._parse_csv(self._parse_ordered), 4143 siblings=siblings, 4144 ) 4145 4146 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4147 if not self._match(token): 4148 return None 4149 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4150 4151 def _parse_ordered( 4152 self, parse_method: t.Optional[t.Callable] = None 4153 ) -> t.Optional[exp.Ordered]: 4154 this = parse_method() if parse_method else self._parse_assignment() 4155 if not this: 4156 return None 4157 4158 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4159 this = exp.var("ALL") 4160 4161 asc = self._match(TokenType.ASC) 4162 desc = self._match(TokenType.DESC) or (asc and False) 4163 4164 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4165 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4166 4167 nulls_first = is_nulls_first or False 4168 explicitly_null_ordered = is_nulls_first or is_nulls_last 4169 4170 if ( 4171 not explicitly_null_ordered 4172 and ( 4173 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4174 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4175 ) 4176 and self.dialect.NULL_ORDERING != "nulls_are_last" 4177 ): 4178 nulls_first = True 4179 4180 if self._match_text_seq("WITH", "FILL"): 4181 with_fill = self.expression( 4182 exp.WithFill, 4183 **{ # type: ignore 4184 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4185 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4186 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4187 "interpolate": self._parse_interpolate(), 4188 }, 4189 ) 4190 else: 4191 with_fill = None 4192 4193 return self.expression( 4194 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4195 ) 4196 4197 def _parse_limit( 4198 self, 4199 this: t.Optional[exp.Expression] = None, 4200 top: bool = False, 4201 skip_limit_token: bool = False, 4202 ) -> t.Optional[exp.Expression]: 4203 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4204 comments = self._prev_comments 4205 if top: 4206 limit_paren = self._match(TokenType.L_PAREN) 4207 expression = self._parse_term() if limit_paren else self._parse_number() 4208 4209 if limit_paren: 4210 self._match_r_paren() 4211 else: 4212 expression = self._parse_term() 4213 4214 if self._match(TokenType.COMMA): 4215 offset = expression 4216 expression = self._parse_term() 4217 else: 4218 offset = None 4219 4220 limit_exp = self.expression( 4221 exp.Limit, 4222 this=this, 4223 expression=expression, 4224 offset=offset, 4225 comments=comments, 4226 expressions=self._parse_limit_by(), 4227 ) 4228 4229 return limit_exp 4230 4231 if self._match(TokenType.FETCH): 4232 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4233 direction = self._prev.text.upper() if direction else "FIRST" 4234 4235 count = self._parse_field(tokens=self.FETCH_TOKENS) 4236 percent = self._match(TokenType.PERCENT) 4237 4238 self._match_set((TokenType.ROW, TokenType.ROWS)) 4239 4240 only = self._match_text_seq("ONLY") 4241 with_ties = self._match_text_seq("WITH", "TIES") 4242 4243 if only and with_ties: 4244 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4245 4246 return self.expression( 4247 exp.Fetch, 4248 direction=direction, 4249 count=count, 4250 percent=percent, 4251 with_ties=with_ties, 4252 ) 4253 4254 return this 4255 4256 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4257 if not self._match(TokenType.OFFSET): 4258 return this 4259 4260 count = self._parse_term() 4261 self._match_set((TokenType.ROW, TokenType.ROWS)) 4262 4263 return self.expression( 4264 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4265 ) 4266 4267 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4268 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4269 4270 def _parse_locks(self) -> t.List[exp.Lock]: 4271 locks = [] 4272 while True: 4273 if self._match_text_seq("FOR", "UPDATE"): 4274 update = True 4275 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4276 "LOCK", "IN", "SHARE", "MODE" 4277 ): 4278 update = False 4279 else: 4280 break 4281 4282 expressions = None 4283 if self._match_text_seq("OF"): 4284 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4285 4286 wait: t.Optional[bool | exp.Expression] = None 4287 if self._match_text_seq("NOWAIT"): 4288 wait = True 4289 elif self._match_text_seq("WAIT"): 4290 wait = self._parse_primary() 4291 elif self._match_text_seq("SKIP", "LOCKED"): 4292 wait = False 4293 4294 locks.append( 4295 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4296 ) 4297 4298 return locks 4299 4300 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4301 while this and self._match_set(self.SET_OPERATIONS): 4302 token_type = self._prev.token_type 4303 4304 if token_type == TokenType.UNION: 4305 operation: t.Type[exp.SetOperation] = exp.Union 4306 elif token_type == TokenType.EXCEPT: 4307 operation = exp.Except 4308 else: 4309 operation = exp.Intersect 4310 4311 comments = self._prev.comments 4312 4313 if self._match(TokenType.DISTINCT): 4314 distinct: t.Optional[bool] = True 4315 elif self._match(TokenType.ALL): 4316 distinct = False 4317 else: 4318 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4319 if distinct is None: 4320 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4321 4322 by_name = self._match_text_seq("BY", "NAME") 4323 expression = self._parse_select(nested=True, parse_set_operation=False) 4324 4325 this = self.expression( 4326 operation, 4327 comments=comments, 4328 this=this, 4329 distinct=distinct, 4330 by_name=by_name, 4331 expression=expression, 4332 ) 4333 4334 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4335 expression = this.expression 4336 4337 if expression: 4338 for arg in self.SET_OP_MODIFIERS: 4339 expr = expression.args.get(arg) 4340 if expr: 4341 this.set(arg, expr.pop()) 4342 4343 return this 4344 4345 def _parse_expression(self) -> t.Optional[exp.Expression]: 4346 return self._parse_alias(self._parse_assignment()) 4347 4348 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4349 this = self._parse_disjunction() 4350 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4351 # This allows us to parse <non-identifier token> := <expr> 4352 this = exp.column( 4353 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4354 ) 4355 4356 while self._match_set(self.ASSIGNMENT): 4357 if isinstance(this, exp.Column) and len(this.parts) == 1: 4358 this = this.this 4359 4360 this = self.expression( 4361 self.ASSIGNMENT[self._prev.token_type], 4362 this=this, 4363 comments=self._prev_comments, 4364 expression=self._parse_assignment(), 4365 ) 4366 4367 return this 4368 4369 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4370 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4371 4372 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4373 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4374 4375 def _parse_equality(self) -> t.Optional[exp.Expression]: 4376 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4377 4378 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4379 return self._parse_tokens(self._parse_range, self.COMPARISON) 4380 4381 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4382 this = this or self._parse_bitwise() 4383 negate = self._match(TokenType.NOT) 4384 4385 if self._match_set(self.RANGE_PARSERS): 4386 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4387 if not expression: 4388 return this 4389 4390 this = expression 4391 elif self._match(TokenType.ISNULL): 4392 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4393 4394 # Postgres supports ISNULL and NOTNULL for conditions. 4395 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4396 if self._match(TokenType.NOTNULL): 4397 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4398 this = self.expression(exp.Not, this=this) 4399 4400 if negate: 4401 this = self._negate_range(this) 4402 4403 if self._match(TokenType.IS): 4404 this = self._parse_is(this) 4405 4406 return this 4407 4408 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4409 if not this: 4410 return this 4411 4412 return self.expression(exp.Not, this=this) 4413 4414 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4415 index = self._index - 1 4416 negate = self._match(TokenType.NOT) 4417 4418 if self._match_text_seq("DISTINCT", "FROM"): 4419 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4420 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4421 4422 if self._match(TokenType.JSON): 4423 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4424 4425 if self._match_text_seq("WITH"): 4426 _with = True 4427 elif self._match_text_seq("WITHOUT"): 4428 _with = False 4429 else: 4430 _with = None 4431 4432 unique = self._match(TokenType.UNIQUE) 4433 self._match_text_seq("KEYS") 4434 expression: t.Optional[exp.Expression] = self.expression( 4435 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4436 ) 4437 else: 4438 expression = self._parse_primary() or self._parse_null() 4439 if not expression: 4440 self._retreat(index) 4441 return None 4442 4443 this = self.expression(exp.Is, this=this, expression=expression) 4444 return self.expression(exp.Not, this=this) if negate else this 4445 4446 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4447 unnest = self._parse_unnest(with_alias=False) 4448 if unnest: 4449 this = self.expression(exp.In, this=this, unnest=unnest) 4450 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4451 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4452 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4453 4454 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4455 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4456 else: 4457 this = self.expression(exp.In, this=this, expressions=expressions) 4458 4459 if matched_l_paren: 4460 self._match_r_paren(this) 4461 elif not self._match(TokenType.R_BRACKET, expression=this): 4462 self.raise_error("Expecting ]") 4463 else: 4464 this = self.expression(exp.In, this=this, field=self._parse_field()) 4465 4466 return this 4467 4468 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4469 low = self._parse_bitwise() 4470 self._match(TokenType.AND) 4471 high = self._parse_bitwise() 4472 return self.expression(exp.Between, this=this, low=low, high=high) 4473 4474 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4475 if not self._match(TokenType.ESCAPE): 4476 return this 4477 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4478 4479 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4480 index = self._index 4481 4482 if not self._match(TokenType.INTERVAL) and match_interval: 4483 return None 4484 4485 if self._match(TokenType.STRING, advance=False): 4486 this = self._parse_primary() 4487 else: 4488 this = self._parse_term() 4489 4490 if not this or ( 4491 isinstance(this, exp.Column) 4492 and not this.table 4493 and not this.this.quoted 4494 and this.name.upper() == "IS" 4495 ): 4496 self._retreat(index) 4497 return None 4498 4499 unit = self._parse_function() or ( 4500 not self._match(TokenType.ALIAS, advance=False) 4501 and self._parse_var(any_token=True, upper=True) 4502 ) 4503 4504 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4505 # each INTERVAL expression into this canonical form so it's easy to transpile 4506 if this and this.is_number: 4507 this = exp.Literal.string(this.to_py()) 4508 elif this and this.is_string: 4509 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4510 if len(parts) == 1: 4511 if unit: 4512 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4513 self._retreat(self._index - 1) 4514 4515 this = exp.Literal.string(parts[0][0]) 4516 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4517 4518 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4519 unit = self.expression( 4520 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4521 ) 4522 4523 interval = self.expression(exp.Interval, this=this, unit=unit) 4524 4525 index = self._index 4526 self._match(TokenType.PLUS) 4527 4528 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4529 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4530 return self.expression( 4531 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4532 ) 4533 4534 self._retreat(index) 4535 return interval 4536 4537 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4538 this = self._parse_term() 4539 4540 while True: 4541 if self._match_set(self.BITWISE): 4542 this = self.expression( 4543 self.BITWISE[self._prev.token_type], 4544 this=this, 4545 expression=self._parse_term(), 4546 ) 4547 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4548 this = self.expression( 4549 exp.DPipe, 4550 this=this, 4551 expression=self._parse_term(), 4552 safe=not self.dialect.STRICT_STRING_CONCAT, 4553 ) 4554 elif self._match(TokenType.DQMARK): 4555 this = self.expression( 4556 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4557 ) 4558 elif self._match_pair(TokenType.LT, TokenType.LT): 4559 this = self.expression( 4560 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4561 ) 4562 elif self._match_pair(TokenType.GT, TokenType.GT): 4563 this = self.expression( 4564 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4565 ) 4566 else: 4567 break 4568 4569 return this 4570 4571 def _parse_term(self) -> t.Optional[exp.Expression]: 4572 this = self._parse_factor() 4573 4574 while self._match_set(self.TERM): 4575 klass = self.TERM[self._prev.token_type] 4576 comments = self._prev_comments 4577 expression = self._parse_factor() 4578 4579 this = self.expression(klass, this=this, comments=comments, expression=expression) 4580 4581 if isinstance(this, exp.Collate): 4582 expr = this.expression 4583 4584 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4585 # fallback to Identifier / Var 4586 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4587 ident = expr.this 4588 if isinstance(ident, exp.Identifier): 4589 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4590 4591 return this 4592 4593 def _parse_factor(self) -> t.Optional[exp.Expression]: 4594 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4595 this = parse_method() 4596 4597 while self._match_set(self.FACTOR): 4598 klass = self.FACTOR[self._prev.token_type] 4599 comments = self._prev_comments 4600 expression = parse_method() 4601 4602 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4603 self._retreat(self._index - 1) 4604 return this 4605 4606 this = self.expression(klass, this=this, comments=comments, expression=expression) 4607 4608 if isinstance(this, exp.Div): 4609 this.args["typed"] = self.dialect.TYPED_DIVISION 4610 this.args["safe"] = self.dialect.SAFE_DIVISION 4611 4612 return this 4613 4614 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4615 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4616 4617 def _parse_unary(self) -> t.Optional[exp.Expression]: 4618 if self._match_set(self.UNARY_PARSERS): 4619 return self.UNARY_PARSERS[self._prev.token_type](self) 4620 return self._parse_at_time_zone(self._parse_type()) 4621 4622 def _parse_type( 4623 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4624 ) -> t.Optional[exp.Expression]: 4625 interval = parse_interval and self._parse_interval() 4626 if interval: 4627 return interval 4628 4629 index = self._index 4630 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4631 4632 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4633 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4634 if isinstance(data_type, exp.Cast): 4635 # This constructor can contain ops directly after it, for instance struct unnesting: 4636 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4637 return self._parse_column_ops(data_type) 4638 4639 if data_type: 4640 index2 = self._index 4641 this = self._parse_primary() 4642 4643 if isinstance(this, exp.Literal): 4644 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4645 if parser: 4646 return parser(self, this, data_type) 4647 4648 return self.expression(exp.Cast, this=this, to=data_type) 4649 4650 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4651 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4652 # 4653 # If the index difference here is greater than 1, that means the parser itself must have 4654 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4655 # 4656 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4657 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4658 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4659 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4660 # 4661 # In these cases, we don't really want to return the converted type, but instead retreat 4662 # and try to parse a Column or Identifier in the section below. 4663 if data_type.expressions and index2 - index > 1: 4664 self._retreat(index2) 4665 return self._parse_column_ops(data_type) 4666 4667 self._retreat(index) 4668 4669 if fallback_to_identifier: 4670 return self._parse_id_var() 4671 4672 this = self._parse_column() 4673 return this and self._parse_column_ops(this) 4674 4675 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4676 this = self._parse_type() 4677 if not this: 4678 return None 4679 4680 if isinstance(this, exp.Column) and not this.table: 4681 this = exp.var(this.name.upper()) 4682 4683 return self.expression( 4684 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4685 ) 4686 4687 def _parse_types( 4688 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4689 ) -> t.Optional[exp.Expression]: 4690 index = self._index 4691 4692 this: t.Optional[exp.Expression] = None 4693 prefix = self._match_text_seq("SYSUDTLIB", ".") 4694 4695 if not self._match_set(self.TYPE_TOKENS): 4696 identifier = allow_identifiers and self._parse_id_var( 4697 any_token=False, tokens=(TokenType.VAR,) 4698 ) 4699 if isinstance(identifier, exp.Identifier): 4700 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4701 4702 if len(tokens) != 1: 4703 self.raise_error("Unexpected identifier", self._prev) 4704 4705 if tokens[0].token_type in self.TYPE_TOKENS: 4706 self._prev = tokens[0] 4707 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4708 type_name = identifier.name 4709 4710 while self._match(TokenType.DOT): 4711 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4712 4713 this = exp.DataType.build(type_name, udt=True) 4714 else: 4715 self._retreat(self._index - 1) 4716 return None 4717 else: 4718 return None 4719 4720 type_token = self._prev.token_type 4721 4722 if type_token == TokenType.PSEUDO_TYPE: 4723 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4724 4725 if type_token == TokenType.OBJECT_IDENTIFIER: 4726 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4727 4728 # https://materialize.com/docs/sql/types/map/ 4729 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4730 key_type = self._parse_types( 4731 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4732 ) 4733 if not self._match(TokenType.FARROW): 4734 self._retreat(index) 4735 return None 4736 4737 value_type = self._parse_types( 4738 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4739 ) 4740 if not self._match(TokenType.R_BRACKET): 4741 self._retreat(index) 4742 return None 4743 4744 return exp.DataType( 4745 this=exp.DataType.Type.MAP, 4746 expressions=[key_type, value_type], 4747 nested=True, 4748 prefix=prefix, 4749 ) 4750 4751 nested = type_token in self.NESTED_TYPE_TOKENS 4752 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4753 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4754 expressions = None 4755 maybe_func = False 4756 4757 if self._match(TokenType.L_PAREN): 4758 if is_struct: 4759 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4760 elif nested: 4761 expressions = self._parse_csv( 4762 lambda: self._parse_types( 4763 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4764 ) 4765 ) 4766 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4767 this = expressions[0] 4768 this.set("nullable", True) 4769 self._match_r_paren() 4770 return this 4771 elif type_token in self.ENUM_TYPE_TOKENS: 4772 expressions = self._parse_csv(self._parse_equality) 4773 elif is_aggregate: 4774 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4775 any_token=False, tokens=(TokenType.VAR,) 4776 ) 4777 if not func_or_ident or not self._match(TokenType.COMMA): 4778 return None 4779 expressions = self._parse_csv( 4780 lambda: self._parse_types( 4781 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4782 ) 4783 ) 4784 expressions.insert(0, func_or_ident) 4785 else: 4786 expressions = self._parse_csv(self._parse_type_size) 4787 4788 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4789 if type_token == TokenType.VECTOR and len(expressions) == 2: 4790 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4791 4792 if not expressions or not self._match(TokenType.R_PAREN): 4793 self._retreat(index) 4794 return None 4795 4796 maybe_func = True 4797 4798 values: t.Optional[t.List[exp.Expression]] = None 4799 4800 if nested and self._match(TokenType.LT): 4801 if is_struct: 4802 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4803 else: 4804 expressions = self._parse_csv( 4805 lambda: self._parse_types( 4806 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4807 ) 4808 ) 4809 4810 if not self._match(TokenType.GT): 4811 self.raise_error("Expecting >") 4812 4813 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4814 values = self._parse_csv(self._parse_assignment) 4815 if not values and is_struct: 4816 values = None 4817 self._retreat(self._index - 1) 4818 else: 4819 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4820 4821 if type_token in self.TIMESTAMPS: 4822 if self._match_text_seq("WITH", "TIME", "ZONE"): 4823 maybe_func = False 4824 tz_type = ( 4825 exp.DataType.Type.TIMETZ 4826 if type_token in self.TIMES 4827 else exp.DataType.Type.TIMESTAMPTZ 4828 ) 4829 this = exp.DataType(this=tz_type, expressions=expressions) 4830 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4831 maybe_func = False 4832 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4833 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4834 maybe_func = False 4835 elif type_token == TokenType.INTERVAL: 4836 unit = self._parse_var(upper=True) 4837 if unit: 4838 if self._match_text_seq("TO"): 4839 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4840 4841 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4842 else: 4843 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4844 4845 if maybe_func and check_func: 4846 index2 = self._index 4847 peek = self._parse_string() 4848 4849 if not peek: 4850 self._retreat(index) 4851 return None 4852 4853 self._retreat(index2) 4854 4855 if not this: 4856 if self._match_text_seq("UNSIGNED"): 4857 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4858 if not unsigned_type_token: 4859 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4860 4861 type_token = unsigned_type_token or type_token 4862 4863 this = exp.DataType( 4864 this=exp.DataType.Type[type_token.value], 4865 expressions=expressions, 4866 nested=nested, 4867 prefix=prefix, 4868 ) 4869 4870 # Empty arrays/structs are allowed 4871 if values is not None: 4872 cls = exp.Struct if is_struct else exp.Array 4873 this = exp.cast(cls(expressions=values), this, copy=False) 4874 4875 elif expressions: 4876 this.set("expressions", expressions) 4877 4878 # https://materialize.com/docs/sql/types/list/#type-name 4879 while self._match(TokenType.LIST): 4880 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4881 4882 index = self._index 4883 4884 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4885 matched_array = self._match(TokenType.ARRAY) 4886 4887 while self._curr: 4888 datatype_token = self._prev.token_type 4889 matched_l_bracket = self._match(TokenType.L_BRACKET) 4890 if not matched_l_bracket and not matched_array: 4891 break 4892 4893 matched_array = False 4894 values = self._parse_csv(self._parse_assignment) or None 4895 if ( 4896 values 4897 and not schema 4898 and ( 4899 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4900 ) 4901 ): 4902 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4903 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4904 self._retreat(index) 4905 break 4906 4907 this = exp.DataType( 4908 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4909 ) 4910 self._match(TokenType.R_BRACKET) 4911 4912 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4913 converter = self.TYPE_CONVERTERS.get(this.this) 4914 if converter: 4915 this = converter(t.cast(exp.DataType, this)) 4916 4917 return this 4918 4919 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4920 index = self._index 4921 4922 if ( 4923 self._curr 4924 and self._next 4925 and self._curr.token_type in self.TYPE_TOKENS 4926 and self._next.token_type in self.TYPE_TOKENS 4927 ): 4928 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4929 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4930 this = self._parse_id_var() 4931 else: 4932 this = ( 4933 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4934 or self._parse_id_var() 4935 ) 4936 4937 self._match(TokenType.COLON) 4938 4939 if ( 4940 type_required 4941 and not isinstance(this, exp.DataType) 4942 and not self._match_set(self.TYPE_TOKENS, advance=False) 4943 ): 4944 self._retreat(index) 4945 return self._parse_types() 4946 4947 return self._parse_column_def(this) 4948 4949 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4950 if not self._match_text_seq("AT", "TIME", "ZONE"): 4951 return this 4952 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4953 4954 def _parse_column(self) -> t.Optional[exp.Expression]: 4955 this = self._parse_column_reference() 4956 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4957 4958 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4959 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4960 4961 return column 4962 4963 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4964 this = self._parse_field() 4965 if ( 4966 not this 4967 and self._match(TokenType.VALUES, advance=False) 4968 and self.VALUES_FOLLOWED_BY_PAREN 4969 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4970 ): 4971 this = self._parse_id_var() 4972 4973 if isinstance(this, exp.Identifier): 4974 # We bubble up comments from the Identifier to the Column 4975 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4976 4977 return this 4978 4979 def _parse_colon_as_variant_extract( 4980 self, this: t.Optional[exp.Expression] 4981 ) -> t.Optional[exp.Expression]: 4982 casts = [] 4983 json_path = [] 4984 escape = None 4985 4986 while self._match(TokenType.COLON): 4987 start_index = self._index 4988 4989 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4990 path = self._parse_column_ops( 4991 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4992 ) 4993 4994 # The cast :: operator has a lower precedence than the extraction operator :, so 4995 # we rearrange the AST appropriately to avoid casting the JSON path 4996 while isinstance(path, exp.Cast): 4997 casts.append(path.to) 4998 path = path.this 4999 5000 if casts: 5001 dcolon_offset = next( 5002 i 5003 for i, t in enumerate(self._tokens[start_index:]) 5004 if t.token_type == TokenType.DCOLON 5005 ) 5006 end_token = self._tokens[start_index + dcolon_offset - 1] 5007 else: 5008 end_token = self._prev 5009 5010 if path: 5011 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5012 # it'll roundtrip to a string literal in GET_PATH 5013 if isinstance(path, exp.Identifier) and path.quoted: 5014 escape = True 5015 5016 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5017 5018 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5019 # Databricks transforms it back to the colon/dot notation 5020 if json_path: 5021 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5022 5023 if json_path_expr: 5024 json_path_expr.set("escape", escape) 5025 5026 this = self.expression( 5027 exp.JSONExtract, 5028 this=this, 5029 expression=json_path_expr, 5030 variant_extract=True, 5031 ) 5032 5033 while casts: 5034 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5035 5036 return this 5037 5038 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5039 return self._parse_types() 5040 5041 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5042 this = self._parse_bracket(this) 5043 5044 while self._match_set(self.COLUMN_OPERATORS): 5045 op_token = self._prev.token_type 5046 op = self.COLUMN_OPERATORS.get(op_token) 5047 5048 if op_token == TokenType.DCOLON: 5049 field = self._parse_dcolon() 5050 if not field: 5051 self.raise_error("Expected type") 5052 elif op and self._curr: 5053 field = self._parse_column_reference() or self._parse_bracket() 5054 else: 5055 field = self._parse_field(any_token=True, anonymous_func=True) 5056 5057 if isinstance(field, exp.Func) and this: 5058 # bigquery allows function calls like x.y.count(...) 5059 # SAFE.SUBSTR(...) 5060 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5061 this = exp.replace_tree( 5062 this, 5063 lambda n: ( 5064 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5065 if n.table 5066 else n.this 5067 ) 5068 if isinstance(n, exp.Column) 5069 else n, 5070 ) 5071 5072 if op: 5073 this = op(self, this, field) 5074 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5075 this = self.expression( 5076 exp.Column, 5077 this=field, 5078 table=this.this, 5079 db=this.args.get("table"), 5080 catalog=this.args.get("db"), 5081 ) 5082 else: 5083 this = self.expression(exp.Dot, this=this, expression=field) 5084 5085 this = self._parse_bracket(this) 5086 5087 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5088 5089 def _parse_primary(self) -> t.Optional[exp.Expression]: 5090 if self._match_set(self.PRIMARY_PARSERS): 5091 token_type = self._prev.token_type 5092 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5093 5094 if token_type == TokenType.STRING: 5095 expressions = [primary] 5096 while self._match(TokenType.STRING): 5097 expressions.append(exp.Literal.string(self._prev.text)) 5098 5099 if len(expressions) > 1: 5100 return self.expression(exp.Concat, expressions=expressions) 5101 5102 return primary 5103 5104 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5105 return exp.Literal.number(f"0.{self._prev.text}") 5106 5107 if self._match(TokenType.L_PAREN): 5108 comments = self._prev_comments 5109 query = self._parse_select() 5110 5111 if query: 5112 expressions = [query] 5113 else: 5114 expressions = self._parse_expressions() 5115 5116 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5117 5118 if not this and self._match(TokenType.R_PAREN, advance=False): 5119 this = self.expression(exp.Tuple) 5120 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5121 this = self._parse_subquery(this=this, parse_alias=False) 5122 elif isinstance(this, exp.Subquery): 5123 this = self._parse_subquery( 5124 this=self._parse_set_operations(this), parse_alias=False 5125 ) 5126 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5127 this = self.expression(exp.Tuple, expressions=expressions) 5128 else: 5129 this = self.expression(exp.Paren, this=this) 5130 5131 if this: 5132 this.add_comments(comments) 5133 5134 self._match_r_paren(expression=this) 5135 return this 5136 5137 return None 5138 5139 def _parse_field( 5140 self, 5141 any_token: bool = False, 5142 tokens: t.Optional[t.Collection[TokenType]] = None, 5143 anonymous_func: bool = False, 5144 ) -> t.Optional[exp.Expression]: 5145 if anonymous_func: 5146 field = ( 5147 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5148 or self._parse_primary() 5149 ) 5150 else: 5151 field = self._parse_primary() or self._parse_function( 5152 anonymous=anonymous_func, any_token=any_token 5153 ) 5154 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5155 5156 def _parse_function( 5157 self, 5158 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5159 anonymous: bool = False, 5160 optional_parens: bool = True, 5161 any_token: bool = False, 5162 ) -> t.Optional[exp.Expression]: 5163 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5164 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5165 fn_syntax = False 5166 if ( 5167 self._match(TokenType.L_BRACE, advance=False) 5168 and self._next 5169 and self._next.text.upper() == "FN" 5170 ): 5171 self._advance(2) 5172 fn_syntax = True 5173 5174 func = self._parse_function_call( 5175 functions=functions, 5176 anonymous=anonymous, 5177 optional_parens=optional_parens, 5178 any_token=any_token, 5179 ) 5180 5181 if fn_syntax: 5182 self._match(TokenType.R_BRACE) 5183 5184 return func 5185 5186 def _parse_function_call( 5187 self, 5188 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5189 anonymous: bool = False, 5190 optional_parens: bool = True, 5191 any_token: bool = False, 5192 ) -> t.Optional[exp.Expression]: 5193 if not self._curr: 5194 return None 5195 5196 comments = self._curr.comments 5197 token_type = self._curr.token_type 5198 this = self._curr.text 5199 upper = this.upper() 5200 5201 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5202 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5203 self._advance() 5204 return self._parse_window(parser(self)) 5205 5206 if not self._next or self._next.token_type != TokenType.L_PAREN: 5207 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5208 self._advance() 5209 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5210 5211 return None 5212 5213 if any_token: 5214 if token_type in self.RESERVED_TOKENS: 5215 return None 5216 elif token_type not in self.FUNC_TOKENS: 5217 return None 5218 5219 self._advance(2) 5220 5221 parser = self.FUNCTION_PARSERS.get(upper) 5222 if parser and not anonymous: 5223 this = parser(self) 5224 else: 5225 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5226 5227 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5228 this = self.expression(subquery_predicate, this=self._parse_select()) 5229 self._match_r_paren() 5230 return this 5231 5232 if functions is None: 5233 functions = self.FUNCTIONS 5234 5235 function = functions.get(upper) 5236 5237 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5238 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5239 5240 if alias: 5241 args = self._kv_to_prop_eq(args) 5242 5243 if function and not anonymous: 5244 if "dialect" in function.__code__.co_varnames: 5245 func = function(args, dialect=self.dialect) 5246 else: 5247 func = function(args) 5248 5249 func = self.validate_expression(func, args) 5250 if not self.dialect.NORMALIZE_FUNCTIONS: 5251 func.meta["name"] = this 5252 5253 this = func 5254 else: 5255 if token_type == TokenType.IDENTIFIER: 5256 this = exp.Identifier(this=this, quoted=True) 5257 this = self.expression(exp.Anonymous, this=this, expressions=args) 5258 5259 if isinstance(this, exp.Expression): 5260 this.add_comments(comments) 5261 5262 self._match_r_paren(this) 5263 return self._parse_window(this) 5264 5265 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5266 return expression 5267 5268 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5269 transformed = [] 5270 5271 for index, e in enumerate(expressions): 5272 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5273 if isinstance(e, exp.Alias): 5274 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5275 5276 if not isinstance(e, exp.PropertyEQ): 5277 e = self.expression( 5278 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5279 ) 5280 5281 if isinstance(e.this, exp.Column): 5282 e.this.replace(e.this.this) 5283 else: 5284 e = self._to_prop_eq(e, index) 5285 5286 transformed.append(e) 5287 5288 return transformed 5289 5290 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5291 return self._parse_column_def(self._parse_id_var()) 5292 5293 def _parse_user_defined_function( 5294 self, kind: t.Optional[TokenType] = None 5295 ) -> t.Optional[exp.Expression]: 5296 this = self._parse_id_var() 5297 5298 while self._match(TokenType.DOT): 5299 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5300 5301 if not self._match(TokenType.L_PAREN): 5302 return this 5303 5304 expressions = self._parse_csv(self._parse_function_parameter) 5305 self._match_r_paren() 5306 return self.expression( 5307 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5308 ) 5309 5310 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5311 literal = self._parse_primary() 5312 if literal: 5313 return self.expression(exp.Introducer, this=token.text, expression=literal) 5314 5315 return self.expression(exp.Identifier, this=token.text) 5316 5317 def _parse_session_parameter(self) -> exp.SessionParameter: 5318 kind = None 5319 this = self._parse_id_var() or self._parse_primary() 5320 5321 if this and self._match(TokenType.DOT): 5322 kind = this.name 5323 this = self._parse_var() or self._parse_primary() 5324 5325 return self.expression(exp.SessionParameter, this=this, kind=kind) 5326 5327 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5328 return self._parse_id_var() 5329 5330 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5331 index = self._index 5332 5333 if self._match(TokenType.L_PAREN): 5334 expressions = t.cast( 5335 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5336 ) 5337 5338 if not self._match(TokenType.R_PAREN): 5339 self._retreat(index) 5340 else: 5341 expressions = [self._parse_lambda_arg()] 5342 5343 if self._match_set(self.LAMBDAS): 5344 return self.LAMBDAS[self._prev.token_type](self, expressions) 5345 5346 self._retreat(index) 5347 5348 this: t.Optional[exp.Expression] 5349 5350 if self._match(TokenType.DISTINCT): 5351 this = self.expression( 5352 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5353 ) 5354 else: 5355 this = self._parse_select_or_expression(alias=alias) 5356 5357 return self._parse_limit( 5358 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5359 ) 5360 5361 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5362 index = self._index 5363 if not self._match(TokenType.L_PAREN): 5364 return this 5365 5366 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5367 # expr can be of both types 5368 if self._match_set(self.SELECT_START_TOKENS): 5369 self._retreat(index) 5370 return this 5371 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5372 self._match_r_paren() 5373 return self.expression(exp.Schema, this=this, expressions=args) 5374 5375 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5376 return self._parse_column_def(self._parse_field(any_token=True)) 5377 5378 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5379 # column defs are not really columns, they're identifiers 5380 if isinstance(this, exp.Column): 5381 this = this.this 5382 5383 kind = self._parse_types(schema=True) 5384 5385 if self._match_text_seq("FOR", "ORDINALITY"): 5386 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5387 5388 constraints: t.List[exp.Expression] = [] 5389 5390 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5391 ("ALIAS", "MATERIALIZED") 5392 ): 5393 persisted = self._prev.text.upper() == "MATERIALIZED" 5394 constraint_kind = exp.ComputedColumnConstraint( 5395 this=self._parse_assignment(), 5396 persisted=persisted or self._match_text_seq("PERSISTED"), 5397 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5398 ) 5399 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5400 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5401 self._match(TokenType.ALIAS) 5402 constraints.append( 5403 self.expression( 5404 exp.ColumnConstraint, 5405 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5406 ) 5407 ) 5408 5409 while True: 5410 constraint = self._parse_column_constraint() 5411 if not constraint: 5412 break 5413 constraints.append(constraint) 5414 5415 if not kind and not constraints: 5416 return this 5417 5418 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5419 5420 def _parse_auto_increment( 5421 self, 5422 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5423 start = None 5424 increment = None 5425 5426 if self._match(TokenType.L_PAREN, advance=False): 5427 args = self._parse_wrapped_csv(self._parse_bitwise) 5428 start = seq_get(args, 0) 5429 increment = seq_get(args, 1) 5430 elif self._match_text_seq("START"): 5431 start = self._parse_bitwise() 5432 self._match_text_seq("INCREMENT") 5433 increment = self._parse_bitwise() 5434 5435 if start and increment: 5436 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5437 5438 return exp.AutoIncrementColumnConstraint() 5439 5440 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5441 if not self._match_text_seq("REFRESH"): 5442 self._retreat(self._index - 1) 5443 return None 5444 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5445 5446 def _parse_compress(self) -> exp.CompressColumnConstraint: 5447 if self._match(TokenType.L_PAREN, advance=False): 5448 return self.expression( 5449 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5450 ) 5451 5452 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5453 5454 def _parse_generated_as_identity( 5455 self, 5456 ) -> ( 5457 exp.GeneratedAsIdentityColumnConstraint 5458 | exp.ComputedColumnConstraint 5459 | exp.GeneratedAsRowColumnConstraint 5460 ): 5461 if self._match_text_seq("BY", "DEFAULT"): 5462 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5463 this = self.expression( 5464 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5465 ) 5466 else: 5467 self._match_text_seq("ALWAYS") 5468 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5469 5470 self._match(TokenType.ALIAS) 5471 5472 if self._match_text_seq("ROW"): 5473 start = self._match_text_seq("START") 5474 if not start: 5475 self._match(TokenType.END) 5476 hidden = self._match_text_seq("HIDDEN") 5477 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5478 5479 identity = self._match_text_seq("IDENTITY") 5480 5481 if self._match(TokenType.L_PAREN): 5482 if self._match(TokenType.START_WITH): 5483 this.set("start", self._parse_bitwise()) 5484 if self._match_text_seq("INCREMENT", "BY"): 5485 this.set("increment", self._parse_bitwise()) 5486 if self._match_text_seq("MINVALUE"): 5487 this.set("minvalue", self._parse_bitwise()) 5488 if self._match_text_seq("MAXVALUE"): 5489 this.set("maxvalue", self._parse_bitwise()) 5490 5491 if self._match_text_seq("CYCLE"): 5492 this.set("cycle", True) 5493 elif self._match_text_seq("NO", "CYCLE"): 5494 this.set("cycle", False) 5495 5496 if not identity: 5497 this.set("expression", self._parse_range()) 5498 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5499 args = self._parse_csv(self._parse_bitwise) 5500 this.set("start", seq_get(args, 0)) 5501 this.set("increment", seq_get(args, 1)) 5502 5503 self._match_r_paren() 5504 5505 return this 5506 5507 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5508 self._match_text_seq("LENGTH") 5509 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5510 5511 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5512 if self._match_text_seq("NULL"): 5513 return self.expression(exp.NotNullColumnConstraint) 5514 if self._match_text_seq("CASESPECIFIC"): 5515 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5516 if self._match_text_seq("FOR", "REPLICATION"): 5517 return self.expression(exp.NotForReplicationColumnConstraint) 5518 5519 # Unconsume the `NOT` token 5520 self._retreat(self._index - 1) 5521 return None 5522 5523 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5524 if self._match(TokenType.CONSTRAINT): 5525 this = self._parse_id_var() 5526 else: 5527 this = None 5528 5529 if self._match_texts(self.CONSTRAINT_PARSERS): 5530 return self.expression( 5531 exp.ColumnConstraint, 5532 this=this, 5533 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5534 ) 5535 5536 return this 5537 5538 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5539 if not self._match(TokenType.CONSTRAINT): 5540 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5541 5542 return self.expression( 5543 exp.Constraint, 5544 this=self._parse_id_var(), 5545 expressions=self._parse_unnamed_constraints(), 5546 ) 5547 5548 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5549 constraints = [] 5550 while True: 5551 constraint = self._parse_unnamed_constraint() or self._parse_function() 5552 if not constraint: 5553 break 5554 constraints.append(constraint) 5555 5556 return constraints 5557 5558 def _parse_unnamed_constraint( 5559 self, constraints: t.Optional[t.Collection[str]] = None 5560 ) -> t.Optional[exp.Expression]: 5561 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5562 constraints or self.CONSTRAINT_PARSERS 5563 ): 5564 return None 5565 5566 constraint = self._prev.text.upper() 5567 if constraint not in self.CONSTRAINT_PARSERS: 5568 self.raise_error(f"No parser found for schema constraint {constraint}.") 5569 5570 return self.CONSTRAINT_PARSERS[constraint](self) 5571 5572 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5573 return self._parse_id_var(any_token=False) 5574 5575 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5576 self._match_text_seq("KEY") 5577 return self.expression( 5578 exp.UniqueColumnConstraint, 5579 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5580 this=self._parse_schema(self._parse_unique_key()), 5581 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5582 on_conflict=self._parse_on_conflict(), 5583 ) 5584 5585 def _parse_key_constraint_options(self) -> t.List[str]: 5586 options = [] 5587 while True: 5588 if not self._curr: 5589 break 5590 5591 if self._match(TokenType.ON): 5592 action = None 5593 on = self._advance_any() and self._prev.text 5594 5595 if self._match_text_seq("NO", "ACTION"): 5596 action = "NO ACTION" 5597 elif self._match_text_seq("CASCADE"): 5598 action = "CASCADE" 5599 elif self._match_text_seq("RESTRICT"): 5600 action = "RESTRICT" 5601 elif self._match_pair(TokenType.SET, TokenType.NULL): 5602 action = "SET NULL" 5603 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5604 action = "SET DEFAULT" 5605 else: 5606 self.raise_error("Invalid key constraint") 5607 5608 options.append(f"ON {on} {action}") 5609 else: 5610 var = self._parse_var_from_options( 5611 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5612 ) 5613 if not var: 5614 break 5615 options.append(var.name) 5616 5617 return options 5618 5619 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5620 if match and not self._match(TokenType.REFERENCES): 5621 return None 5622 5623 expressions = None 5624 this = self._parse_table(schema=True) 5625 options = self._parse_key_constraint_options() 5626 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5627 5628 def _parse_foreign_key(self) -> exp.ForeignKey: 5629 expressions = self._parse_wrapped_id_vars() 5630 reference = self._parse_references() 5631 options = {} 5632 5633 while self._match(TokenType.ON): 5634 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5635 self.raise_error("Expected DELETE or UPDATE") 5636 5637 kind = self._prev.text.lower() 5638 5639 if self._match_text_seq("NO", "ACTION"): 5640 action = "NO ACTION" 5641 elif self._match(TokenType.SET): 5642 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5643 action = "SET " + self._prev.text.upper() 5644 else: 5645 self._advance() 5646 action = self._prev.text.upper() 5647 5648 options[kind] = action 5649 5650 return self.expression( 5651 exp.ForeignKey, 5652 expressions=expressions, 5653 reference=reference, 5654 **options, # type: ignore 5655 ) 5656 5657 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5658 return self._parse_field() 5659 5660 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5661 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5662 self._retreat(self._index - 1) 5663 return None 5664 5665 id_vars = self._parse_wrapped_id_vars() 5666 return self.expression( 5667 exp.PeriodForSystemTimeConstraint, 5668 this=seq_get(id_vars, 0), 5669 expression=seq_get(id_vars, 1), 5670 ) 5671 5672 def _parse_primary_key( 5673 self, wrapped_optional: bool = False, in_props: bool = False 5674 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5675 desc = ( 5676 self._match_set((TokenType.ASC, TokenType.DESC)) 5677 and self._prev.token_type == TokenType.DESC 5678 ) 5679 5680 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5681 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5682 5683 expressions = self._parse_wrapped_csv( 5684 self._parse_primary_key_part, optional=wrapped_optional 5685 ) 5686 options = self._parse_key_constraint_options() 5687 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5688 5689 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5690 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5691 5692 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5693 """ 5694 Parses a datetime column in ODBC format. We parse the column into the corresponding 5695 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5696 same as we did for `DATE('yyyy-mm-dd')`. 5697 5698 Reference: 5699 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5700 """ 5701 self._match(TokenType.VAR) 5702 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5703 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5704 if not self._match(TokenType.R_BRACE): 5705 self.raise_error("Expected }") 5706 return expression 5707 5708 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5709 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5710 return this 5711 5712 bracket_kind = self._prev.token_type 5713 if ( 5714 bracket_kind == TokenType.L_BRACE 5715 and self._curr 5716 and self._curr.token_type == TokenType.VAR 5717 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5718 ): 5719 return self._parse_odbc_datetime_literal() 5720 5721 expressions = self._parse_csv( 5722 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5723 ) 5724 5725 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5726 self.raise_error("Expected ]") 5727 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5728 self.raise_error("Expected }") 5729 5730 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5731 if bracket_kind == TokenType.L_BRACE: 5732 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5733 elif not this: 5734 this = build_array_constructor( 5735 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5736 ) 5737 else: 5738 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5739 if constructor_type: 5740 return build_array_constructor( 5741 constructor_type, 5742 args=expressions, 5743 bracket_kind=bracket_kind, 5744 dialect=self.dialect, 5745 ) 5746 5747 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5748 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5749 5750 self._add_comments(this) 5751 return self._parse_bracket(this) 5752 5753 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5754 if self._match(TokenType.COLON): 5755 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5756 return this 5757 5758 def _parse_case(self) -> t.Optional[exp.Expression]: 5759 ifs = [] 5760 default = None 5761 5762 comments = self._prev_comments 5763 expression = self._parse_assignment() 5764 5765 while self._match(TokenType.WHEN): 5766 this = self._parse_assignment() 5767 self._match(TokenType.THEN) 5768 then = self._parse_assignment() 5769 ifs.append(self.expression(exp.If, this=this, true=then)) 5770 5771 if self._match(TokenType.ELSE): 5772 default = self._parse_assignment() 5773 5774 if not self._match(TokenType.END): 5775 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5776 default = exp.column("interval") 5777 else: 5778 self.raise_error("Expected END after CASE", self._prev) 5779 5780 return self.expression( 5781 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5782 ) 5783 5784 def _parse_if(self) -> t.Optional[exp.Expression]: 5785 if self._match(TokenType.L_PAREN): 5786 args = self._parse_csv(self._parse_assignment) 5787 this = self.validate_expression(exp.If.from_arg_list(args), args) 5788 self._match_r_paren() 5789 else: 5790 index = self._index - 1 5791 5792 if self.NO_PAREN_IF_COMMANDS and index == 0: 5793 return self._parse_as_command(self._prev) 5794 5795 condition = self._parse_assignment() 5796 5797 if not condition: 5798 self._retreat(index) 5799 return None 5800 5801 self._match(TokenType.THEN) 5802 true = self._parse_assignment() 5803 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5804 self._match(TokenType.END) 5805 this = self.expression(exp.If, this=condition, true=true, false=false) 5806 5807 return this 5808 5809 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5810 if not self._match_text_seq("VALUE", "FOR"): 5811 self._retreat(self._index - 1) 5812 return None 5813 5814 return self.expression( 5815 exp.NextValueFor, 5816 this=self._parse_column(), 5817 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5818 ) 5819 5820 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5821 this = self._parse_function() or self._parse_var_or_string(upper=True) 5822 5823 if self._match(TokenType.FROM): 5824 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5825 5826 if not self._match(TokenType.COMMA): 5827 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5828 5829 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5830 5831 def _parse_gap_fill(self) -> exp.GapFill: 5832 self._match(TokenType.TABLE) 5833 this = self._parse_table() 5834 5835 self._match(TokenType.COMMA) 5836 args = [this, *self._parse_csv(self._parse_lambda)] 5837 5838 gap_fill = exp.GapFill.from_arg_list(args) 5839 return self.validate_expression(gap_fill, args) 5840 5841 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5842 this = self._parse_assignment() 5843 5844 if not self._match(TokenType.ALIAS): 5845 if self._match(TokenType.COMMA): 5846 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5847 5848 self.raise_error("Expected AS after CAST") 5849 5850 fmt = None 5851 to = self._parse_types() 5852 5853 if self._match(TokenType.FORMAT): 5854 fmt_string = self._parse_string() 5855 fmt = self._parse_at_time_zone(fmt_string) 5856 5857 if not to: 5858 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5859 if to.this in exp.DataType.TEMPORAL_TYPES: 5860 this = self.expression( 5861 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5862 this=this, 5863 format=exp.Literal.string( 5864 format_time( 5865 fmt_string.this if fmt_string else "", 5866 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5867 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5868 ) 5869 ), 5870 safe=safe, 5871 ) 5872 5873 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5874 this.set("zone", fmt.args["zone"]) 5875 return this 5876 elif not to: 5877 self.raise_error("Expected TYPE after CAST") 5878 elif isinstance(to, exp.Identifier): 5879 to = exp.DataType.build(to.name, udt=True) 5880 elif to.this == exp.DataType.Type.CHAR: 5881 if self._match(TokenType.CHARACTER_SET): 5882 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5883 5884 return self.expression( 5885 exp.Cast if strict else exp.TryCast, 5886 this=this, 5887 to=to, 5888 format=fmt, 5889 safe=safe, 5890 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5891 ) 5892 5893 def _parse_string_agg(self) -> exp.Expression: 5894 if self._match(TokenType.DISTINCT): 5895 args: t.List[t.Optional[exp.Expression]] = [ 5896 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5897 ] 5898 if self._match(TokenType.COMMA): 5899 args.extend(self._parse_csv(self._parse_assignment)) 5900 else: 5901 args = self._parse_csv(self._parse_assignment) # type: ignore 5902 5903 index = self._index 5904 if not self._match(TokenType.R_PAREN) and args: 5905 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5906 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5907 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5908 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5909 5910 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5911 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5912 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5913 if not self._match_text_seq("WITHIN", "GROUP"): 5914 self._retreat(index) 5915 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5916 5917 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5918 order = self._parse_order(this=seq_get(args, 0)) 5919 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5920 5921 def _parse_convert( 5922 self, strict: bool, safe: t.Optional[bool] = None 5923 ) -> t.Optional[exp.Expression]: 5924 this = self._parse_bitwise() 5925 5926 if self._match(TokenType.USING): 5927 to: t.Optional[exp.Expression] = self.expression( 5928 exp.CharacterSet, this=self._parse_var() 5929 ) 5930 elif self._match(TokenType.COMMA): 5931 to = self._parse_types() 5932 else: 5933 to = None 5934 5935 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5936 5937 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5938 """ 5939 There are generally two variants of the DECODE function: 5940 5941 - DECODE(bin, charset) 5942 - DECODE(expression, search, result [, search, result] ... [, default]) 5943 5944 The second variant will always be parsed into a CASE expression. Note that NULL 5945 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5946 instead of relying on pattern matching. 5947 """ 5948 args = self._parse_csv(self._parse_assignment) 5949 5950 if len(args) < 3: 5951 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5952 5953 expression, *expressions = args 5954 if not expression: 5955 return None 5956 5957 ifs = [] 5958 for search, result in zip(expressions[::2], expressions[1::2]): 5959 if not search or not result: 5960 return None 5961 5962 if isinstance(search, exp.Literal): 5963 ifs.append( 5964 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5965 ) 5966 elif isinstance(search, exp.Null): 5967 ifs.append( 5968 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5969 ) 5970 else: 5971 cond = exp.or_( 5972 exp.EQ(this=expression.copy(), expression=search), 5973 exp.and_( 5974 exp.Is(this=expression.copy(), expression=exp.Null()), 5975 exp.Is(this=search.copy(), expression=exp.Null()), 5976 copy=False, 5977 ), 5978 copy=False, 5979 ) 5980 ifs.append(exp.If(this=cond, true=result)) 5981 5982 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5983 5984 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5985 self._match_text_seq("KEY") 5986 key = self._parse_column() 5987 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5988 self._match_text_seq("VALUE") 5989 value = self._parse_bitwise() 5990 5991 if not key and not value: 5992 return None 5993 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5994 5995 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5996 if not this or not self._match_text_seq("FORMAT", "JSON"): 5997 return this 5998 5999 return self.expression(exp.FormatJson, this=this) 6000 6001 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6002 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6003 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6004 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6005 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6006 else: 6007 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6008 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6009 6010 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6011 6012 if not empty and not error and not null: 6013 return None 6014 6015 return self.expression( 6016 exp.OnCondition, 6017 empty=empty, 6018 error=error, 6019 null=null, 6020 ) 6021 6022 def _parse_on_handling( 6023 self, on: str, *values: str 6024 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6025 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6026 for value in values: 6027 if self._match_text_seq(value, "ON", on): 6028 return f"{value} ON {on}" 6029 6030 index = self._index 6031 if self._match(TokenType.DEFAULT): 6032 default_value = self._parse_bitwise() 6033 if self._match_text_seq("ON", on): 6034 return default_value 6035 6036 self._retreat(index) 6037 6038 return None 6039 6040 @t.overload 6041 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6042 6043 @t.overload 6044 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6045 6046 def _parse_json_object(self, agg=False): 6047 star = self._parse_star() 6048 expressions = ( 6049 [star] 6050 if star 6051 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6052 ) 6053 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6054 6055 unique_keys = None 6056 if self._match_text_seq("WITH", "UNIQUE"): 6057 unique_keys = True 6058 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6059 unique_keys = False 6060 6061 self._match_text_seq("KEYS") 6062 6063 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6064 self._parse_type() 6065 ) 6066 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6067 6068 return self.expression( 6069 exp.JSONObjectAgg if agg else exp.JSONObject, 6070 expressions=expressions, 6071 null_handling=null_handling, 6072 unique_keys=unique_keys, 6073 return_type=return_type, 6074 encoding=encoding, 6075 ) 6076 6077 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6078 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6079 if not self._match_text_seq("NESTED"): 6080 this = self._parse_id_var() 6081 kind = self._parse_types(allow_identifiers=False) 6082 nested = None 6083 else: 6084 this = None 6085 kind = None 6086 nested = True 6087 6088 path = self._match_text_seq("PATH") and self._parse_string() 6089 nested_schema = nested and self._parse_json_schema() 6090 6091 return self.expression( 6092 exp.JSONColumnDef, 6093 this=this, 6094 kind=kind, 6095 path=path, 6096 nested_schema=nested_schema, 6097 ) 6098 6099 def _parse_json_schema(self) -> exp.JSONSchema: 6100 self._match_text_seq("COLUMNS") 6101 return self.expression( 6102 exp.JSONSchema, 6103 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6104 ) 6105 6106 def _parse_json_table(self) -> exp.JSONTable: 6107 this = self._parse_format_json(self._parse_bitwise()) 6108 path = self._match(TokenType.COMMA) and self._parse_string() 6109 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6110 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6111 schema = self._parse_json_schema() 6112 6113 return exp.JSONTable( 6114 this=this, 6115 schema=schema, 6116 path=path, 6117 error_handling=error_handling, 6118 empty_handling=empty_handling, 6119 ) 6120 6121 def _parse_match_against(self) -> exp.MatchAgainst: 6122 expressions = self._parse_csv(self._parse_column) 6123 6124 self._match_text_seq(")", "AGAINST", "(") 6125 6126 this = self._parse_string() 6127 6128 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6129 modifier = "IN NATURAL LANGUAGE MODE" 6130 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6131 modifier = f"{modifier} WITH QUERY EXPANSION" 6132 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6133 modifier = "IN BOOLEAN MODE" 6134 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6135 modifier = "WITH QUERY EXPANSION" 6136 else: 6137 modifier = None 6138 6139 return self.expression( 6140 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6141 ) 6142 6143 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6144 def _parse_open_json(self) -> exp.OpenJSON: 6145 this = self._parse_bitwise() 6146 path = self._match(TokenType.COMMA) and self._parse_string() 6147 6148 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6149 this = self._parse_field(any_token=True) 6150 kind = self._parse_types() 6151 path = self._parse_string() 6152 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6153 6154 return self.expression( 6155 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6156 ) 6157 6158 expressions = None 6159 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6160 self._match_l_paren() 6161 expressions = self._parse_csv(_parse_open_json_column_def) 6162 6163 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6164 6165 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6166 args = self._parse_csv(self._parse_bitwise) 6167 6168 if self._match(TokenType.IN): 6169 return self.expression( 6170 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6171 ) 6172 6173 if haystack_first: 6174 haystack = seq_get(args, 0) 6175 needle = seq_get(args, 1) 6176 else: 6177 needle = seq_get(args, 0) 6178 haystack = seq_get(args, 1) 6179 6180 return self.expression( 6181 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6182 ) 6183 6184 def _parse_predict(self) -> exp.Predict: 6185 self._match_text_seq("MODEL") 6186 this = self._parse_table() 6187 6188 self._match(TokenType.COMMA) 6189 self._match_text_seq("TABLE") 6190 6191 return self.expression( 6192 exp.Predict, 6193 this=this, 6194 expression=self._parse_table(), 6195 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6196 ) 6197 6198 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6199 args = self._parse_csv(self._parse_table) 6200 return exp.JoinHint(this=func_name.upper(), expressions=args) 6201 6202 def _parse_substring(self) -> exp.Substring: 6203 # Postgres supports the form: substring(string [from int] [for int]) 6204 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6205 6206 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6207 6208 if self._match(TokenType.FROM): 6209 args.append(self._parse_bitwise()) 6210 if self._match(TokenType.FOR): 6211 if len(args) == 1: 6212 args.append(exp.Literal.number(1)) 6213 args.append(self._parse_bitwise()) 6214 6215 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6216 6217 def _parse_trim(self) -> exp.Trim: 6218 # https://www.w3resource.com/sql/character-functions/trim.php 6219 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6220 6221 position = None 6222 collation = None 6223 expression = None 6224 6225 if self._match_texts(self.TRIM_TYPES): 6226 position = self._prev.text.upper() 6227 6228 this = self._parse_bitwise() 6229 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6230 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6231 expression = self._parse_bitwise() 6232 6233 if invert_order: 6234 this, expression = expression, this 6235 6236 if self._match(TokenType.COLLATE): 6237 collation = self._parse_bitwise() 6238 6239 return self.expression( 6240 exp.Trim, this=this, position=position, expression=expression, collation=collation 6241 ) 6242 6243 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6244 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6245 6246 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6247 return self._parse_window(self._parse_id_var(), alias=True) 6248 6249 def _parse_respect_or_ignore_nulls( 6250 self, this: t.Optional[exp.Expression] 6251 ) -> t.Optional[exp.Expression]: 6252 if self._match_text_seq("IGNORE", "NULLS"): 6253 return self.expression(exp.IgnoreNulls, this=this) 6254 if self._match_text_seq("RESPECT", "NULLS"): 6255 return self.expression(exp.RespectNulls, this=this) 6256 return this 6257 6258 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6259 if self._match(TokenType.HAVING): 6260 self._match_texts(("MAX", "MIN")) 6261 max = self._prev.text.upper() != "MIN" 6262 return self.expression( 6263 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6264 ) 6265 6266 return this 6267 6268 def _parse_window( 6269 self, this: t.Optional[exp.Expression], alias: bool = False 6270 ) -> t.Optional[exp.Expression]: 6271 func = this 6272 comments = func.comments if isinstance(func, exp.Expression) else None 6273 6274 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6275 self._match(TokenType.WHERE) 6276 this = self.expression( 6277 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6278 ) 6279 self._match_r_paren() 6280 6281 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6282 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6283 if self._match_text_seq("WITHIN", "GROUP"): 6284 order = self._parse_wrapped(self._parse_order) 6285 this = self.expression(exp.WithinGroup, this=this, expression=order) 6286 6287 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6288 # Some dialects choose to implement and some do not. 6289 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6290 6291 # There is some code above in _parse_lambda that handles 6292 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6293 6294 # The below changes handle 6295 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6296 6297 # Oracle allows both formats 6298 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6299 # and Snowflake chose to do the same for familiarity 6300 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6301 if isinstance(this, exp.AggFunc): 6302 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6303 6304 if ignore_respect and ignore_respect is not this: 6305 ignore_respect.replace(ignore_respect.this) 6306 this = self.expression(ignore_respect.__class__, this=this) 6307 6308 this = self._parse_respect_or_ignore_nulls(this) 6309 6310 # bigquery select from window x AS (partition by ...) 6311 if alias: 6312 over = None 6313 self._match(TokenType.ALIAS) 6314 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6315 return this 6316 else: 6317 over = self._prev.text.upper() 6318 6319 if comments and isinstance(func, exp.Expression): 6320 func.pop_comments() 6321 6322 if not self._match(TokenType.L_PAREN): 6323 return self.expression( 6324 exp.Window, 6325 comments=comments, 6326 this=this, 6327 alias=self._parse_id_var(False), 6328 over=over, 6329 ) 6330 6331 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6332 6333 first = self._match(TokenType.FIRST) 6334 if self._match_text_seq("LAST"): 6335 first = False 6336 6337 partition, order = self._parse_partition_and_order() 6338 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6339 6340 if kind: 6341 self._match(TokenType.BETWEEN) 6342 start = self._parse_window_spec() 6343 self._match(TokenType.AND) 6344 end = self._parse_window_spec() 6345 6346 spec = self.expression( 6347 exp.WindowSpec, 6348 kind=kind, 6349 start=start["value"], 6350 start_side=start["side"], 6351 end=end["value"], 6352 end_side=end["side"], 6353 ) 6354 else: 6355 spec = None 6356 6357 self._match_r_paren() 6358 6359 window = self.expression( 6360 exp.Window, 6361 comments=comments, 6362 this=this, 6363 partition_by=partition, 6364 order=order, 6365 spec=spec, 6366 alias=window_alias, 6367 over=over, 6368 first=first, 6369 ) 6370 6371 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6372 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6373 return self._parse_window(window, alias=alias) 6374 6375 return window 6376 6377 def _parse_partition_and_order( 6378 self, 6379 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6380 return self._parse_partition_by(), self._parse_order() 6381 6382 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6383 self._match(TokenType.BETWEEN) 6384 6385 return { 6386 "value": ( 6387 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6388 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6389 or self._parse_bitwise() 6390 ), 6391 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6392 } 6393 6394 def _parse_alias( 6395 self, this: t.Optional[exp.Expression], explicit: bool = False 6396 ) -> t.Optional[exp.Expression]: 6397 any_token = self._match(TokenType.ALIAS) 6398 comments = self._prev_comments or [] 6399 6400 if explicit and not any_token: 6401 return this 6402 6403 if self._match(TokenType.L_PAREN): 6404 aliases = self.expression( 6405 exp.Aliases, 6406 comments=comments, 6407 this=this, 6408 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6409 ) 6410 self._match_r_paren(aliases) 6411 return aliases 6412 6413 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6414 self.STRING_ALIASES and self._parse_string_as_identifier() 6415 ) 6416 6417 if alias: 6418 comments.extend(alias.pop_comments()) 6419 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6420 column = this.this 6421 6422 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6423 if not this.comments and column and column.comments: 6424 this.comments = column.pop_comments() 6425 6426 return this 6427 6428 def _parse_id_var( 6429 self, 6430 any_token: bool = True, 6431 tokens: t.Optional[t.Collection[TokenType]] = None, 6432 ) -> t.Optional[exp.Expression]: 6433 expression = self._parse_identifier() 6434 if not expression and ( 6435 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6436 ): 6437 quoted = self._prev.token_type == TokenType.STRING 6438 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6439 6440 return expression 6441 6442 def _parse_string(self) -> t.Optional[exp.Expression]: 6443 if self._match_set(self.STRING_PARSERS): 6444 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6445 return self._parse_placeholder() 6446 6447 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6448 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6449 6450 def _parse_number(self) -> t.Optional[exp.Expression]: 6451 if self._match_set(self.NUMERIC_PARSERS): 6452 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6453 return self._parse_placeholder() 6454 6455 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6456 if self._match(TokenType.IDENTIFIER): 6457 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6458 return self._parse_placeholder() 6459 6460 def _parse_var( 6461 self, 6462 any_token: bool = False, 6463 tokens: t.Optional[t.Collection[TokenType]] = None, 6464 upper: bool = False, 6465 ) -> t.Optional[exp.Expression]: 6466 if ( 6467 (any_token and self._advance_any()) 6468 or self._match(TokenType.VAR) 6469 or (self._match_set(tokens) if tokens else False) 6470 ): 6471 return self.expression( 6472 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6473 ) 6474 return self._parse_placeholder() 6475 6476 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6477 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6478 self._advance() 6479 return self._prev 6480 return None 6481 6482 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6483 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6484 6485 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6486 return self._parse_primary() or self._parse_var(any_token=True) 6487 6488 def _parse_null(self) -> t.Optional[exp.Expression]: 6489 if self._match_set(self.NULL_TOKENS): 6490 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6491 return self._parse_placeholder() 6492 6493 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6494 if self._match(TokenType.TRUE): 6495 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6496 if self._match(TokenType.FALSE): 6497 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6498 return self._parse_placeholder() 6499 6500 def _parse_star(self) -> t.Optional[exp.Expression]: 6501 if self._match(TokenType.STAR): 6502 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6503 return self._parse_placeholder() 6504 6505 def _parse_parameter(self) -> exp.Parameter: 6506 this = self._parse_identifier() or self._parse_primary_or_var() 6507 return self.expression(exp.Parameter, this=this) 6508 6509 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6510 if self._match_set(self.PLACEHOLDER_PARSERS): 6511 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6512 if placeholder: 6513 return placeholder 6514 self._advance(-1) 6515 return None 6516 6517 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6518 if not self._match_texts(keywords): 6519 return None 6520 if self._match(TokenType.L_PAREN, advance=False): 6521 return self._parse_wrapped_csv(self._parse_expression) 6522 6523 expression = self._parse_expression() 6524 return [expression] if expression else None 6525 6526 def _parse_csv( 6527 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6528 ) -> t.List[exp.Expression]: 6529 parse_result = parse_method() 6530 items = [parse_result] if parse_result is not None else [] 6531 6532 while self._match(sep): 6533 self._add_comments(parse_result) 6534 parse_result = parse_method() 6535 if parse_result is not None: 6536 items.append(parse_result) 6537 6538 return items 6539 6540 def _parse_tokens( 6541 self, parse_method: t.Callable, expressions: t.Dict 6542 ) -> t.Optional[exp.Expression]: 6543 this = parse_method() 6544 6545 while self._match_set(expressions): 6546 this = self.expression( 6547 expressions[self._prev.token_type], 6548 this=this, 6549 comments=self._prev_comments, 6550 expression=parse_method(), 6551 ) 6552 6553 return this 6554 6555 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6556 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6557 6558 def _parse_wrapped_csv( 6559 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6560 ) -> t.List[exp.Expression]: 6561 return self._parse_wrapped( 6562 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6563 ) 6564 6565 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6566 wrapped = self._match(TokenType.L_PAREN) 6567 if not wrapped and not optional: 6568 self.raise_error("Expecting (") 6569 parse_result = parse_method() 6570 if wrapped: 6571 self._match_r_paren() 6572 return parse_result 6573 6574 def _parse_expressions(self) -> t.List[exp.Expression]: 6575 return self._parse_csv(self._parse_expression) 6576 6577 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6578 return self._parse_select() or self._parse_set_operations( 6579 self._parse_expression() if alias else self._parse_assignment() 6580 ) 6581 6582 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6583 return self._parse_query_modifiers( 6584 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6585 ) 6586 6587 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6588 this = None 6589 if self._match_texts(self.TRANSACTION_KIND): 6590 this = self._prev.text 6591 6592 self._match_texts(("TRANSACTION", "WORK")) 6593 6594 modes = [] 6595 while True: 6596 mode = [] 6597 while self._match(TokenType.VAR): 6598 mode.append(self._prev.text) 6599 6600 if mode: 6601 modes.append(" ".join(mode)) 6602 if not self._match(TokenType.COMMA): 6603 break 6604 6605 return self.expression(exp.Transaction, this=this, modes=modes) 6606 6607 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6608 chain = None 6609 savepoint = None 6610 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6611 6612 self._match_texts(("TRANSACTION", "WORK")) 6613 6614 if self._match_text_seq("TO"): 6615 self._match_text_seq("SAVEPOINT") 6616 savepoint = self._parse_id_var() 6617 6618 if self._match(TokenType.AND): 6619 chain = not self._match_text_seq("NO") 6620 self._match_text_seq("CHAIN") 6621 6622 if is_rollback: 6623 return self.expression(exp.Rollback, savepoint=savepoint) 6624 6625 return self.expression(exp.Commit, chain=chain) 6626 6627 def _parse_refresh(self) -> exp.Refresh: 6628 self._match(TokenType.TABLE) 6629 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6630 6631 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6632 if not self._match_text_seq("ADD"): 6633 return None 6634 6635 self._match(TokenType.COLUMN) 6636 exists_column = self._parse_exists(not_=True) 6637 expression = self._parse_field_def() 6638 6639 if expression: 6640 expression.set("exists", exists_column) 6641 6642 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6643 if self._match_texts(("FIRST", "AFTER")): 6644 position = self._prev.text 6645 column_position = self.expression( 6646 exp.ColumnPosition, this=self._parse_column(), position=position 6647 ) 6648 expression.set("position", column_position) 6649 6650 return expression 6651 6652 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6653 drop = self._match(TokenType.DROP) and self._parse_drop() 6654 if drop and not isinstance(drop, exp.Command): 6655 drop.set("kind", drop.args.get("kind", "COLUMN")) 6656 return drop 6657 6658 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6659 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6660 return self.expression( 6661 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6662 ) 6663 6664 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6665 index = self._index - 1 6666 6667 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6668 return self._parse_csv( 6669 lambda: self.expression( 6670 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6671 ) 6672 ) 6673 6674 self._retreat(index) 6675 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6676 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6677 6678 if self._match_text_seq("ADD", "COLUMNS"): 6679 schema = self._parse_schema() 6680 if schema: 6681 return [schema] 6682 return [] 6683 6684 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6685 6686 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6687 if self._match_texts(self.ALTER_ALTER_PARSERS): 6688 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6689 6690 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6691 # keyword after ALTER we default to parsing this statement 6692 self._match(TokenType.COLUMN) 6693 column = self._parse_field(any_token=True) 6694 6695 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6696 return self.expression(exp.AlterColumn, this=column, drop=True) 6697 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6698 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6699 if self._match(TokenType.COMMENT): 6700 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6701 if self._match_text_seq("DROP", "NOT", "NULL"): 6702 return self.expression( 6703 exp.AlterColumn, 6704 this=column, 6705 drop=True, 6706 allow_null=True, 6707 ) 6708 if self._match_text_seq("SET", "NOT", "NULL"): 6709 return self.expression( 6710 exp.AlterColumn, 6711 this=column, 6712 allow_null=False, 6713 ) 6714 self._match_text_seq("SET", "DATA") 6715 self._match_text_seq("TYPE") 6716 return self.expression( 6717 exp.AlterColumn, 6718 this=column, 6719 dtype=self._parse_types(), 6720 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6721 using=self._match(TokenType.USING) and self._parse_assignment(), 6722 ) 6723 6724 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6725 if self._match_texts(("ALL", "EVEN", "AUTO")): 6726 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6727 6728 self._match_text_seq("KEY", "DISTKEY") 6729 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6730 6731 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6732 if compound: 6733 self._match_text_seq("SORTKEY") 6734 6735 if self._match(TokenType.L_PAREN, advance=False): 6736 return self.expression( 6737 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6738 ) 6739 6740 self._match_texts(("AUTO", "NONE")) 6741 return self.expression( 6742 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6743 ) 6744 6745 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6746 index = self._index - 1 6747 6748 partition_exists = self._parse_exists() 6749 if self._match(TokenType.PARTITION, advance=False): 6750 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6751 6752 self._retreat(index) 6753 return self._parse_csv(self._parse_drop_column) 6754 6755 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6756 if self._match(TokenType.COLUMN): 6757 exists = self._parse_exists() 6758 old_column = self._parse_column() 6759 to = self._match_text_seq("TO") 6760 new_column = self._parse_column() 6761 6762 if old_column is None or to is None or new_column is None: 6763 return None 6764 6765 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6766 6767 self._match_text_seq("TO") 6768 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6769 6770 def _parse_alter_table_set(self) -> exp.AlterSet: 6771 alter_set = self.expression(exp.AlterSet) 6772 6773 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6774 "TABLE", "PROPERTIES" 6775 ): 6776 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6777 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6778 alter_set.set("expressions", [self._parse_assignment()]) 6779 elif self._match_texts(("LOGGED", "UNLOGGED")): 6780 alter_set.set("option", exp.var(self._prev.text.upper())) 6781 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6782 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6783 elif self._match_text_seq("LOCATION"): 6784 alter_set.set("location", self._parse_field()) 6785 elif self._match_text_seq("ACCESS", "METHOD"): 6786 alter_set.set("access_method", self._parse_field()) 6787 elif self._match_text_seq("TABLESPACE"): 6788 alter_set.set("tablespace", self._parse_field()) 6789 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6790 alter_set.set("file_format", [self._parse_field()]) 6791 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6792 alter_set.set("file_format", self._parse_wrapped_options()) 6793 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6794 alter_set.set("copy_options", self._parse_wrapped_options()) 6795 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6796 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6797 else: 6798 if self._match_text_seq("SERDE"): 6799 alter_set.set("serde", self._parse_field()) 6800 6801 alter_set.set("expressions", [self._parse_properties()]) 6802 6803 return alter_set 6804 6805 def _parse_alter(self) -> exp.Alter | exp.Command: 6806 start = self._prev 6807 6808 alter_token = self._match_set(self.ALTERABLES) and self._prev 6809 if not alter_token: 6810 return self._parse_as_command(start) 6811 6812 exists = self._parse_exists() 6813 only = self._match_text_seq("ONLY") 6814 this = self._parse_table(schema=True) 6815 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6816 6817 if self._next: 6818 self._advance() 6819 6820 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6821 if parser: 6822 actions = ensure_list(parser(self)) 6823 not_valid = self._match_text_seq("NOT", "VALID") 6824 options = self._parse_csv(self._parse_property) 6825 6826 if not self._curr and actions: 6827 return self.expression( 6828 exp.Alter, 6829 this=this, 6830 kind=alter_token.text.upper(), 6831 exists=exists, 6832 actions=actions, 6833 only=only, 6834 options=options, 6835 cluster=cluster, 6836 not_valid=not_valid, 6837 ) 6838 6839 return self._parse_as_command(start) 6840 6841 def _parse_merge(self) -> exp.Merge: 6842 self._match(TokenType.INTO) 6843 target = self._parse_table() 6844 6845 if target and self._match(TokenType.ALIAS, advance=False): 6846 target.set("alias", self._parse_table_alias()) 6847 6848 self._match(TokenType.USING) 6849 using = self._parse_table() 6850 6851 self._match(TokenType.ON) 6852 on = self._parse_assignment() 6853 6854 return self.expression( 6855 exp.Merge, 6856 this=target, 6857 using=using, 6858 on=on, 6859 expressions=self._parse_when_matched(), 6860 returning=self._parse_returning(), 6861 ) 6862 6863 def _parse_when_matched(self) -> t.List[exp.When]: 6864 whens = [] 6865 6866 while self._match(TokenType.WHEN): 6867 matched = not self._match(TokenType.NOT) 6868 self._match_text_seq("MATCHED") 6869 source = ( 6870 False 6871 if self._match_text_seq("BY", "TARGET") 6872 else self._match_text_seq("BY", "SOURCE") 6873 ) 6874 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6875 6876 self._match(TokenType.THEN) 6877 6878 if self._match(TokenType.INSERT): 6879 this = self._parse_star() 6880 if this: 6881 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6882 else: 6883 then = self.expression( 6884 exp.Insert, 6885 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6886 expression=self._match_text_seq("VALUES") and self._parse_value(), 6887 ) 6888 elif self._match(TokenType.UPDATE): 6889 expressions = self._parse_star() 6890 if expressions: 6891 then = self.expression(exp.Update, expressions=expressions) 6892 else: 6893 then = self.expression( 6894 exp.Update, 6895 expressions=self._match(TokenType.SET) 6896 and self._parse_csv(self._parse_equality), 6897 ) 6898 elif self._match(TokenType.DELETE): 6899 then = self.expression(exp.Var, this=self._prev.text) 6900 else: 6901 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6902 6903 whens.append( 6904 self.expression( 6905 exp.When, 6906 matched=matched, 6907 source=source, 6908 condition=condition, 6909 then=then, 6910 ) 6911 ) 6912 return whens 6913 6914 def _parse_show(self) -> t.Optional[exp.Expression]: 6915 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6916 if parser: 6917 return parser(self) 6918 return self._parse_as_command(self._prev) 6919 6920 def _parse_set_item_assignment( 6921 self, kind: t.Optional[str] = None 6922 ) -> t.Optional[exp.Expression]: 6923 index = self._index 6924 6925 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6926 return self._parse_set_transaction(global_=kind == "GLOBAL") 6927 6928 left = self._parse_primary() or self._parse_column() 6929 assignment_delimiter = self._match_texts(("=", "TO")) 6930 6931 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6932 self._retreat(index) 6933 return None 6934 6935 right = self._parse_statement() or self._parse_id_var() 6936 if isinstance(right, (exp.Column, exp.Identifier)): 6937 right = exp.var(right.name) 6938 6939 this = self.expression(exp.EQ, this=left, expression=right) 6940 return self.expression(exp.SetItem, this=this, kind=kind) 6941 6942 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6943 self._match_text_seq("TRANSACTION") 6944 characteristics = self._parse_csv( 6945 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6946 ) 6947 return self.expression( 6948 exp.SetItem, 6949 expressions=characteristics, 6950 kind="TRANSACTION", 6951 **{"global": global_}, # type: ignore 6952 ) 6953 6954 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6955 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6956 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6957 6958 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6959 index = self._index 6960 set_ = self.expression( 6961 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6962 ) 6963 6964 if self._curr: 6965 self._retreat(index) 6966 return self._parse_as_command(self._prev) 6967 6968 return set_ 6969 6970 def _parse_var_from_options( 6971 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6972 ) -> t.Optional[exp.Var]: 6973 start = self._curr 6974 if not start: 6975 return None 6976 6977 option = start.text.upper() 6978 continuations = options.get(option) 6979 6980 index = self._index 6981 self._advance() 6982 for keywords in continuations or []: 6983 if isinstance(keywords, str): 6984 keywords = (keywords,) 6985 6986 if self._match_text_seq(*keywords): 6987 option = f"{option} {' '.join(keywords)}" 6988 break 6989 else: 6990 if continuations or continuations is None: 6991 if raise_unmatched: 6992 self.raise_error(f"Unknown option {option}") 6993 6994 self._retreat(index) 6995 return None 6996 6997 return exp.var(option) 6998 6999 def _parse_as_command(self, start: Token) -> exp.Command: 7000 while self._curr: 7001 self._advance() 7002 text = self._find_sql(start, self._prev) 7003 size = len(start.text) 7004 self._warn_unsupported() 7005 return exp.Command(this=text[:size], expression=text[size:]) 7006 7007 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7008 settings = [] 7009 7010 self._match_l_paren() 7011 kind = self._parse_id_var() 7012 7013 if self._match(TokenType.L_PAREN): 7014 while True: 7015 key = self._parse_id_var() 7016 value = self._parse_primary() 7017 7018 if not key and value is None: 7019 break 7020 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7021 self._match(TokenType.R_PAREN) 7022 7023 self._match_r_paren() 7024 7025 return self.expression( 7026 exp.DictProperty, 7027 this=this, 7028 kind=kind.this if kind else None, 7029 settings=settings, 7030 ) 7031 7032 def _parse_dict_range(self, this: str) -> exp.DictRange: 7033 self._match_l_paren() 7034 has_min = self._match_text_seq("MIN") 7035 if has_min: 7036 min = self._parse_var() or self._parse_primary() 7037 self._match_text_seq("MAX") 7038 max = self._parse_var() or self._parse_primary() 7039 else: 7040 max = self._parse_var() or self._parse_primary() 7041 min = exp.Literal.number(0) 7042 self._match_r_paren() 7043 return self.expression(exp.DictRange, this=this, min=min, max=max) 7044 7045 def _parse_comprehension( 7046 self, this: t.Optional[exp.Expression] 7047 ) -> t.Optional[exp.Comprehension]: 7048 index = self._index 7049 expression = self._parse_column() 7050 if not self._match(TokenType.IN): 7051 self._retreat(index - 1) 7052 return None 7053 iterator = self._parse_column() 7054 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7055 return self.expression( 7056 exp.Comprehension, 7057 this=this, 7058 expression=expression, 7059 iterator=iterator, 7060 condition=condition, 7061 ) 7062 7063 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7064 if self._match(TokenType.HEREDOC_STRING): 7065 return self.expression(exp.Heredoc, this=self._prev.text) 7066 7067 if not self._match_text_seq("$"): 7068 return None 7069 7070 tags = ["$"] 7071 tag_text = None 7072 7073 if self._is_connected(): 7074 self._advance() 7075 tags.append(self._prev.text.upper()) 7076 else: 7077 self.raise_error("No closing $ found") 7078 7079 if tags[-1] != "$": 7080 if self._is_connected() and self._match_text_seq("$"): 7081 tag_text = tags[-1] 7082 tags.append("$") 7083 else: 7084 self.raise_error("No closing $ found") 7085 7086 heredoc_start = self._curr 7087 7088 while self._curr: 7089 if self._match_text_seq(*tags, advance=False): 7090 this = self._find_sql(heredoc_start, self._prev) 7091 self._advance(len(tags)) 7092 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7093 7094 self._advance() 7095 7096 self.raise_error(f"No closing {''.join(tags)} found") 7097 return None 7098 7099 def _find_parser( 7100 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7101 ) -> t.Optional[t.Callable]: 7102 if not self._curr: 7103 return None 7104 7105 index = self._index 7106 this = [] 7107 while True: 7108 # The current token might be multiple words 7109 curr = self._curr.text.upper() 7110 key = curr.split(" ") 7111 this.append(curr) 7112 7113 self._advance() 7114 result, trie = in_trie(trie, key) 7115 if result == TrieResult.FAILED: 7116 break 7117 7118 if result == TrieResult.EXISTS: 7119 subparser = parsers[" ".join(this)] 7120 return subparser 7121 7122 self._retreat(index) 7123 return None 7124 7125 def _match(self, token_type, advance=True, expression=None): 7126 if not self._curr: 7127 return None 7128 7129 if self._curr.token_type == token_type: 7130 if advance: 7131 self._advance() 7132 self._add_comments(expression) 7133 return True 7134 7135 return None 7136 7137 def _match_set(self, types, advance=True): 7138 if not self._curr: 7139 return None 7140 7141 if self._curr.token_type in types: 7142 if advance: 7143 self._advance() 7144 return True 7145 7146 return None 7147 7148 def _match_pair(self, token_type_a, token_type_b, advance=True): 7149 if not self._curr or not self._next: 7150 return None 7151 7152 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7153 if advance: 7154 self._advance(2) 7155 return True 7156 7157 return None 7158 7159 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7160 if not self._match(TokenType.L_PAREN, expression=expression): 7161 self.raise_error("Expecting (") 7162 7163 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7164 if not self._match(TokenType.R_PAREN, expression=expression): 7165 self.raise_error("Expecting )") 7166 7167 def _match_texts(self, texts, advance=True): 7168 if ( 7169 self._curr 7170 and self._curr.token_type != TokenType.STRING 7171 and self._curr.text.upper() in texts 7172 ): 7173 if advance: 7174 self._advance() 7175 return True 7176 return None 7177 7178 def _match_text_seq(self, *texts, advance=True): 7179 index = self._index 7180 for text in texts: 7181 if ( 7182 self._curr 7183 and self._curr.token_type != TokenType.STRING 7184 and self._curr.text.upper() == text 7185 ): 7186 self._advance() 7187 else: 7188 self._retreat(index) 7189 return None 7190 7191 if not advance: 7192 self._retreat(index) 7193 7194 return True 7195 7196 def _replace_lambda( 7197 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7198 ) -> t.Optional[exp.Expression]: 7199 if not node: 7200 return node 7201 7202 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7203 7204 for column in node.find_all(exp.Column): 7205 typ = lambda_types.get(column.parts[0].name) 7206 if typ is not None: 7207 dot_or_id = column.to_dot() if column.table else column.this 7208 7209 if typ: 7210 dot_or_id = self.expression( 7211 exp.Cast, 7212 this=dot_or_id, 7213 to=typ, 7214 ) 7215 7216 parent = column.parent 7217 7218 while isinstance(parent, exp.Dot): 7219 if not isinstance(parent.parent, exp.Dot): 7220 parent.replace(dot_or_id) 7221 break 7222 parent = parent.parent 7223 else: 7224 if column is node: 7225 node = dot_or_id 7226 else: 7227 column.replace(dot_or_id) 7228 return node 7229 7230 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7231 start = self._prev 7232 7233 # Not to be confused with TRUNCATE(number, decimals) function call 7234 if self._match(TokenType.L_PAREN): 7235 self._retreat(self._index - 2) 7236 return self._parse_function() 7237 7238 # Clickhouse supports TRUNCATE DATABASE as well 7239 is_database = self._match(TokenType.DATABASE) 7240 7241 self._match(TokenType.TABLE) 7242 7243 exists = self._parse_exists(not_=False) 7244 7245 expressions = self._parse_csv( 7246 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7247 ) 7248 7249 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7250 7251 if self._match_text_seq("RESTART", "IDENTITY"): 7252 identity = "RESTART" 7253 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7254 identity = "CONTINUE" 7255 else: 7256 identity = None 7257 7258 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7259 option = self._prev.text 7260 else: 7261 option = None 7262 7263 partition = self._parse_partition() 7264 7265 # Fallback case 7266 if self._curr: 7267 return self._parse_as_command(start) 7268 7269 return self.expression( 7270 exp.TruncateTable, 7271 expressions=expressions, 7272 is_database=is_database, 7273 exists=exists, 7274 cluster=cluster, 7275 identity=identity, 7276 option=option, 7277 partition=partition, 7278 ) 7279 7280 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7281 this = self._parse_ordered(self._parse_opclass) 7282 7283 if not self._match(TokenType.WITH): 7284 return this 7285 7286 op = self._parse_var(any_token=True) 7287 7288 return self.expression(exp.WithOperator, this=this, op=op) 7289 7290 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7291 self._match(TokenType.EQ) 7292 self._match(TokenType.L_PAREN) 7293 7294 opts: t.List[t.Optional[exp.Expression]] = [] 7295 while self._curr and not self._match(TokenType.R_PAREN): 7296 if self._match_text_seq("FORMAT_NAME", "="): 7297 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7298 # so we parse it separately to use _parse_field() 7299 prop = self.expression( 7300 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7301 ) 7302 opts.append(prop) 7303 else: 7304 opts.append(self._parse_property()) 7305 7306 self._match(TokenType.COMMA) 7307 7308 return opts 7309 7310 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7311 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7312 7313 options = [] 7314 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7315 option = self._parse_var(any_token=True) 7316 prev = self._prev.text.upper() 7317 7318 # Different dialects might separate options and values by white space, "=" and "AS" 7319 self._match(TokenType.EQ) 7320 self._match(TokenType.ALIAS) 7321 7322 param = self.expression(exp.CopyParameter, this=option) 7323 7324 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7325 TokenType.L_PAREN, advance=False 7326 ): 7327 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7328 param.set("expressions", self._parse_wrapped_options()) 7329 elif prev == "FILE_FORMAT": 7330 # T-SQL's external file format case 7331 param.set("expression", self._parse_field()) 7332 else: 7333 param.set("expression", self._parse_unquoted_field()) 7334 7335 options.append(param) 7336 self._match(sep) 7337 7338 return options 7339 7340 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7341 expr = self.expression(exp.Credentials) 7342 7343 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7344 expr.set("storage", self._parse_field()) 7345 if self._match_text_seq("CREDENTIALS"): 7346 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7347 creds = ( 7348 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7349 ) 7350 expr.set("credentials", creds) 7351 if self._match_text_seq("ENCRYPTION"): 7352 expr.set("encryption", self._parse_wrapped_options()) 7353 if self._match_text_seq("IAM_ROLE"): 7354 expr.set("iam_role", self._parse_field()) 7355 if self._match_text_seq("REGION"): 7356 expr.set("region", self._parse_field()) 7357 7358 return expr 7359 7360 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7361 return self._parse_field() 7362 7363 def _parse_copy(self) -> exp.Copy | exp.Command: 7364 start = self._prev 7365 7366 self._match(TokenType.INTO) 7367 7368 this = ( 7369 self._parse_select(nested=True, parse_subquery_alias=False) 7370 if self._match(TokenType.L_PAREN, advance=False) 7371 else self._parse_table(schema=True) 7372 ) 7373 7374 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7375 7376 files = self._parse_csv(self._parse_file_location) 7377 credentials = self._parse_credentials() 7378 7379 self._match_text_seq("WITH") 7380 7381 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7382 7383 # Fallback case 7384 if self._curr: 7385 return self._parse_as_command(start) 7386 7387 return self.expression( 7388 exp.Copy, 7389 this=this, 7390 kind=kind, 7391 credentials=credentials, 7392 files=files, 7393 params=params, 7394 ) 7395 7396 def _parse_normalize(self) -> exp.Normalize: 7397 return self.expression( 7398 exp.Normalize, 7399 this=self._parse_bitwise(), 7400 form=self._match(TokenType.COMMA) and self._parse_var(), 7401 ) 7402 7403 def _parse_star_ops(self) -> exp.Star | exp.UnpackColumns: 7404 if self._match_text_seq("COLUMNS", "(", advance=False): 7405 return exp.UnpackColumns(this=self._parse_function()) 7406 7407 return self.expression( 7408 exp.Star, 7409 **{ # type: ignore 7410 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7411 "replace": self._parse_star_op("REPLACE"), 7412 "rename": self._parse_star_op("RENAME"), 7413 }, 7414 ) 7415 7416 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7417 privilege_parts = [] 7418 7419 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7420 # (end of privilege list) or L_PAREN (start of column list) are met 7421 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7422 privilege_parts.append(self._curr.text.upper()) 7423 self._advance() 7424 7425 this = exp.var(" ".join(privilege_parts)) 7426 expressions = ( 7427 self._parse_wrapped_csv(self._parse_column) 7428 if self._match(TokenType.L_PAREN, advance=False) 7429 else None 7430 ) 7431 7432 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7433 7434 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7435 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7436 principal = self._parse_id_var() 7437 7438 if not principal: 7439 return None 7440 7441 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7442 7443 def _parse_grant(self) -> exp.Grant | exp.Command: 7444 start = self._prev 7445 7446 privileges = self._parse_csv(self._parse_grant_privilege) 7447 7448 self._match(TokenType.ON) 7449 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7450 7451 # Attempt to parse the securable e.g. MySQL allows names 7452 # such as "foo.*", "*.*" which are not easily parseable yet 7453 securable = self._try_parse(self._parse_table_parts) 7454 7455 if not securable or not self._match_text_seq("TO"): 7456 return self._parse_as_command(start) 7457 7458 principals = self._parse_csv(self._parse_grant_principal) 7459 7460 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7461 7462 if self._curr: 7463 return self._parse_as_command(start) 7464 7465 return self.expression( 7466 exp.Grant, 7467 privileges=privileges, 7468 kind=kind, 7469 securable=securable, 7470 principals=principals, 7471 grant_option=grant_option, 7472 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.UDECIMAL, 355 TokenType.BIGDECIMAL, 356 TokenType.UUID, 357 TokenType.GEOGRAPHY, 358 TokenType.GEOMETRY, 359 TokenType.HLLSKETCH, 360 TokenType.HSTORE, 361 TokenType.PSEUDO_TYPE, 362 TokenType.SUPER, 363 TokenType.SERIAL, 364 TokenType.SMALLSERIAL, 365 TokenType.BIGSERIAL, 366 TokenType.XML, 367 TokenType.YEAR, 368 TokenType.UNIQUEIDENTIFIER, 369 TokenType.USERDEFINED, 370 TokenType.MONEY, 371 TokenType.SMALLMONEY, 372 TokenType.ROWVERSION, 373 TokenType.IMAGE, 374 TokenType.VARIANT, 375 TokenType.VECTOR, 376 TokenType.OBJECT, 377 TokenType.OBJECT_IDENTIFIER, 378 TokenType.INET, 379 TokenType.IPADDRESS, 380 TokenType.IPPREFIX, 381 TokenType.IPV4, 382 TokenType.IPV6, 383 TokenType.UNKNOWN, 384 TokenType.NULL, 385 TokenType.NAME, 386 TokenType.TDIGEST, 387 *ENUM_TYPE_TOKENS, 388 *NESTED_TYPE_TOKENS, 389 *AGGREGATE_TYPE_TOKENS, 390 } 391 392 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 393 TokenType.BIGINT: TokenType.UBIGINT, 394 TokenType.INT: TokenType.UINT, 395 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 396 TokenType.SMALLINT: TokenType.USMALLINT, 397 TokenType.TINYINT: TokenType.UTINYINT, 398 TokenType.DECIMAL: TokenType.UDECIMAL, 399 } 400 401 SUBQUERY_PREDICATES = { 402 TokenType.ANY: exp.Any, 403 TokenType.ALL: exp.All, 404 TokenType.EXISTS: exp.Exists, 405 TokenType.SOME: exp.Any, 406 } 407 408 RESERVED_TOKENS = { 409 *Tokenizer.SINGLE_TOKENS.values(), 410 TokenType.SELECT, 411 } - {TokenType.IDENTIFIER} 412 413 DB_CREATABLES = { 414 TokenType.DATABASE, 415 TokenType.DICTIONARY, 416 TokenType.MODEL, 417 TokenType.SCHEMA, 418 TokenType.SEQUENCE, 419 TokenType.STORAGE_INTEGRATION, 420 TokenType.TABLE, 421 TokenType.TAG, 422 TokenType.VIEW, 423 TokenType.WAREHOUSE, 424 TokenType.STREAMLIT, 425 } 426 427 CREATABLES = { 428 TokenType.COLUMN, 429 TokenType.CONSTRAINT, 430 TokenType.FOREIGN_KEY, 431 TokenType.FUNCTION, 432 TokenType.INDEX, 433 TokenType.PROCEDURE, 434 *DB_CREATABLES, 435 } 436 437 ALTERABLES = { 438 TokenType.INDEX, 439 TokenType.TABLE, 440 TokenType.VIEW, 441 } 442 443 # Tokens that can represent identifiers 444 ID_VAR_TOKENS = { 445 TokenType.ALL, 446 TokenType.VAR, 447 TokenType.ANTI, 448 TokenType.APPLY, 449 TokenType.ASC, 450 TokenType.ASOF, 451 TokenType.AUTO_INCREMENT, 452 TokenType.BEGIN, 453 TokenType.BPCHAR, 454 TokenType.CACHE, 455 TokenType.CASE, 456 TokenType.COLLATE, 457 TokenType.COMMAND, 458 TokenType.COMMENT, 459 TokenType.COMMIT, 460 TokenType.CONSTRAINT, 461 TokenType.COPY, 462 TokenType.CUBE, 463 TokenType.DEFAULT, 464 TokenType.DELETE, 465 TokenType.DESC, 466 TokenType.DESCRIBE, 467 TokenType.DICTIONARY, 468 TokenType.DIV, 469 TokenType.END, 470 TokenType.EXECUTE, 471 TokenType.ESCAPE, 472 TokenType.FALSE, 473 TokenType.FIRST, 474 TokenType.FILTER, 475 TokenType.FINAL, 476 TokenType.FORMAT, 477 TokenType.FULL, 478 TokenType.IDENTIFIER, 479 TokenType.IS, 480 TokenType.ISNULL, 481 TokenType.INTERVAL, 482 TokenType.KEEP, 483 TokenType.KILL, 484 TokenType.LEFT, 485 TokenType.LOAD, 486 TokenType.MERGE, 487 TokenType.NATURAL, 488 TokenType.NEXT, 489 TokenType.OFFSET, 490 TokenType.OPERATOR, 491 TokenType.ORDINALITY, 492 TokenType.OVERLAPS, 493 TokenType.OVERWRITE, 494 TokenType.PARTITION, 495 TokenType.PERCENT, 496 TokenType.PIVOT, 497 TokenType.PRAGMA, 498 TokenType.RANGE, 499 TokenType.RECURSIVE, 500 TokenType.REFERENCES, 501 TokenType.REFRESH, 502 TokenType.RENAME, 503 TokenType.REPLACE, 504 TokenType.RIGHT, 505 TokenType.ROLLUP, 506 TokenType.ROW, 507 TokenType.ROWS, 508 TokenType.SEMI, 509 TokenType.SET, 510 TokenType.SETTINGS, 511 TokenType.SHOW, 512 TokenType.TEMPORARY, 513 TokenType.TOP, 514 TokenType.TRUE, 515 TokenType.TRUNCATE, 516 TokenType.UNIQUE, 517 TokenType.UNNEST, 518 TokenType.UNPIVOT, 519 TokenType.UPDATE, 520 TokenType.USE, 521 TokenType.VOLATILE, 522 TokenType.WINDOW, 523 *CREATABLES, 524 *SUBQUERY_PREDICATES, 525 *TYPE_TOKENS, 526 *NO_PAREN_FUNCTIONS, 527 } 528 ID_VAR_TOKENS.remove(TokenType.UNION) 529 530 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 531 532 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 533 TokenType.ANTI, 534 TokenType.APPLY, 535 TokenType.ASOF, 536 TokenType.FULL, 537 TokenType.LEFT, 538 TokenType.LOCK, 539 TokenType.NATURAL, 540 TokenType.OFFSET, 541 TokenType.RIGHT, 542 TokenType.SEMI, 543 TokenType.WINDOW, 544 } 545 546 ALIAS_TOKENS = ID_VAR_TOKENS 547 548 ARRAY_CONSTRUCTORS = { 549 "ARRAY": exp.Array, 550 "LIST": exp.List, 551 } 552 553 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 554 555 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 556 557 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 558 559 FUNC_TOKENS = { 560 TokenType.COLLATE, 561 TokenType.COMMAND, 562 TokenType.CURRENT_DATE, 563 TokenType.CURRENT_DATETIME, 564 TokenType.CURRENT_TIMESTAMP, 565 TokenType.CURRENT_TIME, 566 TokenType.CURRENT_USER, 567 TokenType.FILTER, 568 TokenType.FIRST, 569 TokenType.FORMAT, 570 TokenType.GLOB, 571 TokenType.IDENTIFIER, 572 TokenType.INDEX, 573 TokenType.ISNULL, 574 TokenType.ILIKE, 575 TokenType.INSERT, 576 TokenType.LIKE, 577 TokenType.MERGE, 578 TokenType.OFFSET, 579 TokenType.PRIMARY_KEY, 580 TokenType.RANGE, 581 TokenType.REPLACE, 582 TokenType.RLIKE, 583 TokenType.ROW, 584 TokenType.UNNEST, 585 TokenType.VAR, 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.SEQUENCE, 589 TokenType.DATE, 590 TokenType.DATETIME, 591 TokenType.TABLE, 592 TokenType.TIMESTAMP, 593 TokenType.TIMESTAMPTZ, 594 TokenType.TRUNCATE, 595 TokenType.WINDOW, 596 TokenType.XOR, 597 *TYPE_TOKENS, 598 *SUBQUERY_PREDICATES, 599 } 600 601 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 602 TokenType.AND: exp.And, 603 } 604 605 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 606 TokenType.COLON_EQ: exp.PropertyEQ, 607 } 608 609 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 610 TokenType.OR: exp.Or, 611 } 612 613 EQUALITY = { 614 TokenType.EQ: exp.EQ, 615 TokenType.NEQ: exp.NEQ, 616 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 617 } 618 619 COMPARISON = { 620 TokenType.GT: exp.GT, 621 TokenType.GTE: exp.GTE, 622 TokenType.LT: exp.LT, 623 TokenType.LTE: exp.LTE, 624 } 625 626 BITWISE = { 627 TokenType.AMP: exp.BitwiseAnd, 628 TokenType.CARET: exp.BitwiseXor, 629 TokenType.PIPE: exp.BitwiseOr, 630 } 631 632 TERM = { 633 TokenType.DASH: exp.Sub, 634 TokenType.PLUS: exp.Add, 635 TokenType.MOD: exp.Mod, 636 TokenType.COLLATE: exp.Collate, 637 } 638 639 FACTOR = { 640 TokenType.DIV: exp.IntDiv, 641 TokenType.LR_ARROW: exp.Distance, 642 TokenType.SLASH: exp.Div, 643 TokenType.STAR: exp.Mul, 644 } 645 646 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 647 648 TIMES = { 649 TokenType.TIME, 650 TokenType.TIMETZ, 651 } 652 653 TIMESTAMPS = { 654 TokenType.TIMESTAMP, 655 TokenType.TIMESTAMPTZ, 656 TokenType.TIMESTAMPLTZ, 657 *TIMES, 658 } 659 660 SET_OPERATIONS = { 661 TokenType.UNION, 662 TokenType.INTERSECT, 663 TokenType.EXCEPT, 664 } 665 666 JOIN_METHODS = { 667 TokenType.ASOF, 668 TokenType.NATURAL, 669 TokenType.POSITIONAL, 670 } 671 672 JOIN_SIDES = { 673 TokenType.LEFT, 674 TokenType.RIGHT, 675 TokenType.FULL, 676 } 677 678 JOIN_KINDS = { 679 TokenType.ANTI, 680 TokenType.CROSS, 681 TokenType.INNER, 682 TokenType.OUTER, 683 TokenType.SEMI, 684 TokenType.STRAIGHT_JOIN, 685 } 686 687 JOIN_HINTS: t.Set[str] = set() 688 689 LAMBDAS = { 690 TokenType.ARROW: lambda self, expressions: self.expression( 691 exp.Lambda, 692 this=self._replace_lambda( 693 self._parse_assignment(), 694 expressions, 695 ), 696 expressions=expressions, 697 ), 698 TokenType.FARROW: lambda self, expressions: self.expression( 699 exp.Kwarg, 700 this=exp.var(expressions[0].name), 701 expression=self._parse_assignment(), 702 ), 703 } 704 705 COLUMN_OPERATORS = { 706 TokenType.DOT: None, 707 TokenType.DCOLON: lambda self, this, to: self.expression( 708 exp.Cast if self.STRICT_CAST else exp.TryCast, 709 this=this, 710 to=to, 711 ), 712 TokenType.ARROW: lambda self, this, path: self.expression( 713 exp.JSONExtract, 714 this=this, 715 expression=self.dialect.to_json_path(path), 716 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 717 ), 718 TokenType.DARROW: lambda self, this, path: self.expression( 719 exp.JSONExtractScalar, 720 this=this, 721 expression=self.dialect.to_json_path(path), 722 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 723 ), 724 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 725 exp.JSONBExtract, 726 this=this, 727 expression=path, 728 ), 729 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 730 exp.JSONBExtractScalar, 731 this=this, 732 expression=path, 733 ), 734 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 735 exp.JSONBContains, 736 this=this, 737 expression=key, 738 ), 739 } 740 741 EXPRESSION_PARSERS = { 742 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 743 exp.Column: lambda self: self._parse_column(), 744 exp.Condition: lambda self: self._parse_assignment(), 745 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 746 exp.Expression: lambda self: self._parse_expression(), 747 exp.From: lambda self: self._parse_from(joins=True), 748 exp.Group: lambda self: self._parse_group(), 749 exp.Having: lambda self: self._parse_having(), 750 exp.Identifier: lambda self: self._parse_id_var(), 751 exp.Join: lambda self: self._parse_join(), 752 exp.Lambda: lambda self: self._parse_lambda(), 753 exp.Lateral: lambda self: self._parse_lateral(), 754 exp.Limit: lambda self: self._parse_limit(), 755 exp.Offset: lambda self: self._parse_offset(), 756 exp.Order: lambda self: self._parse_order(), 757 exp.Ordered: lambda self: self._parse_ordered(), 758 exp.Properties: lambda self: self._parse_properties(), 759 exp.Qualify: lambda self: self._parse_qualify(), 760 exp.Returning: lambda self: self._parse_returning(), 761 exp.Select: lambda self: self._parse_select(), 762 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 763 exp.Table: lambda self: self._parse_table_parts(), 764 exp.TableAlias: lambda self: self._parse_table_alias(), 765 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 766 exp.Where: lambda self: self._parse_where(), 767 exp.Window: lambda self: self._parse_named_window(), 768 exp.With: lambda self: self._parse_with(), 769 "JOIN_TYPE": lambda self: self._parse_join_parts(), 770 } 771 772 STATEMENT_PARSERS = { 773 TokenType.ALTER: lambda self: self._parse_alter(), 774 TokenType.BEGIN: lambda self: self._parse_transaction(), 775 TokenType.CACHE: lambda self: self._parse_cache(), 776 TokenType.COMMENT: lambda self: self._parse_comment(), 777 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 778 TokenType.COPY: lambda self: self._parse_copy(), 779 TokenType.CREATE: lambda self: self._parse_create(), 780 TokenType.DELETE: lambda self: self._parse_delete(), 781 TokenType.DESC: lambda self: self._parse_describe(), 782 TokenType.DESCRIBE: lambda self: self._parse_describe(), 783 TokenType.DROP: lambda self: self._parse_drop(), 784 TokenType.GRANT: lambda self: self._parse_grant(), 785 TokenType.INSERT: lambda self: self._parse_insert(), 786 TokenType.KILL: lambda self: self._parse_kill(), 787 TokenType.LOAD: lambda self: self._parse_load(), 788 TokenType.MERGE: lambda self: self._parse_merge(), 789 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 790 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 791 TokenType.REFRESH: lambda self: self._parse_refresh(), 792 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 793 TokenType.SET: lambda self: self._parse_set(), 794 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 795 TokenType.UNCACHE: lambda self: self._parse_uncache(), 796 TokenType.UPDATE: lambda self: self._parse_update(), 797 TokenType.USE: lambda self: self.expression( 798 exp.Use, 799 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 800 this=self._parse_table(schema=False), 801 ), 802 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 803 } 804 805 UNARY_PARSERS = { 806 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 807 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 808 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 809 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 810 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 811 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 812 } 813 814 STRING_PARSERS = { 815 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 816 exp.RawString, this=token.text 817 ), 818 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 819 exp.National, this=token.text 820 ), 821 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 822 TokenType.STRING: lambda self, token: self.expression( 823 exp.Literal, this=token.text, is_string=True 824 ), 825 TokenType.UNICODE_STRING: lambda self, token: self.expression( 826 exp.UnicodeString, 827 this=token.text, 828 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 829 ), 830 } 831 832 NUMERIC_PARSERS = { 833 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 834 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 835 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 836 TokenType.NUMBER: lambda self, token: self.expression( 837 exp.Literal, this=token.text, is_string=False 838 ), 839 } 840 841 PRIMARY_PARSERS = { 842 **STRING_PARSERS, 843 **NUMERIC_PARSERS, 844 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 845 TokenType.NULL: lambda self, _: self.expression(exp.Null), 846 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 847 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 848 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 849 TokenType.STAR: lambda self, _: self._parse_star_ops(), 850 } 851 852 PLACEHOLDER_PARSERS = { 853 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 854 TokenType.PARAMETER: lambda self: self._parse_parameter(), 855 TokenType.COLON: lambda self: ( 856 self.expression(exp.Placeholder, this=self._prev.text) 857 if self._match_set(self.ID_VAR_TOKENS) 858 else None 859 ), 860 } 861 862 RANGE_PARSERS = { 863 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 864 TokenType.GLOB: binary_range_parser(exp.Glob), 865 TokenType.ILIKE: binary_range_parser(exp.ILike), 866 TokenType.IN: lambda self, this: self._parse_in(this), 867 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 868 TokenType.IS: lambda self, this: self._parse_is(this), 869 TokenType.LIKE: binary_range_parser(exp.Like), 870 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 871 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 872 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 873 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 874 } 875 876 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 877 "ALLOWED_VALUES": lambda self: self.expression( 878 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 879 ), 880 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 881 "AUTO": lambda self: self._parse_auto_property(), 882 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 883 "BACKUP": lambda self: self.expression( 884 exp.BackupProperty, this=self._parse_var(any_token=True) 885 ), 886 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 887 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 888 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 889 "CHECKSUM": lambda self: self._parse_checksum(), 890 "CLUSTER BY": lambda self: self._parse_cluster(), 891 "CLUSTERED": lambda self: self._parse_clustered_by(), 892 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 893 exp.CollateProperty, **kwargs 894 ), 895 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 896 "CONTAINS": lambda self: self._parse_contains_property(), 897 "COPY": lambda self: self._parse_copy_property(), 898 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 899 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 900 "DEFINER": lambda self: self._parse_definer(), 901 "DETERMINISTIC": lambda self: self.expression( 902 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 903 ), 904 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 905 "DUPLICATE": lambda self: self._parse_duplicate(), 906 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 907 "DISTKEY": lambda self: self._parse_distkey(), 908 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 909 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 910 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 911 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 912 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 913 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 914 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 915 "FREESPACE": lambda self: self._parse_freespace(), 916 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 917 "HEAP": lambda self: self.expression(exp.HeapProperty), 918 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 919 "IMMUTABLE": lambda self: self.expression( 920 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 921 ), 922 "INHERITS": lambda self: self.expression( 923 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 924 ), 925 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 926 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 927 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 928 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 929 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 930 "LIKE": lambda self: self._parse_create_like(), 931 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 932 "LOCK": lambda self: self._parse_locking(), 933 "LOCKING": lambda self: self._parse_locking(), 934 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 935 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 936 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 937 "MODIFIES": lambda self: self._parse_modifies_property(), 938 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 939 "NO": lambda self: self._parse_no_property(), 940 "ON": lambda self: self._parse_on_property(), 941 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 942 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 943 "PARTITION": lambda self: self._parse_partitioned_of(), 944 "PARTITION BY": lambda self: self._parse_partitioned_by(), 945 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 946 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 947 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 948 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 949 "READS": lambda self: self._parse_reads_property(), 950 "REMOTE": lambda self: self._parse_remote_with_connection(), 951 "RETURNS": lambda self: self._parse_returns(), 952 "STRICT": lambda self: self.expression(exp.StrictProperty), 953 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 954 "ROW": lambda self: self._parse_row(), 955 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 956 "SAMPLE": lambda self: self.expression( 957 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 958 ), 959 "SECURE": lambda self: self.expression(exp.SecureProperty), 960 "SECURITY": lambda self: self._parse_security(), 961 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 962 "SETTINGS": lambda self: self._parse_settings_property(), 963 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 964 "SORTKEY": lambda self: self._parse_sortkey(), 965 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 966 "STABLE": lambda self: self.expression( 967 exp.StabilityProperty, this=exp.Literal.string("STABLE") 968 ), 969 "STORED": lambda self: self._parse_stored(), 970 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 971 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 972 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 973 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 974 "TO": lambda self: self._parse_to_table(), 975 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 976 "TRANSFORM": lambda self: self.expression( 977 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 978 ), 979 "TTL": lambda self: self._parse_ttl(), 980 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 981 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 982 "VOLATILE": lambda self: self._parse_volatile_property(), 983 "WITH": lambda self: self._parse_with_property(), 984 } 985 986 CONSTRAINT_PARSERS = { 987 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 988 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 989 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 990 "CHARACTER SET": lambda self: self.expression( 991 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 992 ), 993 "CHECK": lambda self: self.expression( 994 exp.CheckColumnConstraint, 995 this=self._parse_wrapped(self._parse_assignment), 996 enforced=self._match_text_seq("ENFORCED"), 997 ), 998 "COLLATE": lambda self: self.expression( 999 exp.CollateColumnConstraint, 1000 this=self._parse_identifier() or self._parse_column(), 1001 ), 1002 "COMMENT": lambda self: self.expression( 1003 exp.CommentColumnConstraint, this=self._parse_string() 1004 ), 1005 "COMPRESS": lambda self: self._parse_compress(), 1006 "CLUSTERED": lambda self: self.expression( 1007 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1008 ), 1009 "NONCLUSTERED": lambda self: self.expression( 1010 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1011 ), 1012 "DEFAULT": lambda self: self.expression( 1013 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1014 ), 1015 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1016 "EPHEMERAL": lambda self: self.expression( 1017 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1018 ), 1019 "EXCLUDE": lambda self: self.expression( 1020 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1021 ), 1022 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1023 "FORMAT": lambda self: self.expression( 1024 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1025 ), 1026 "GENERATED": lambda self: self._parse_generated_as_identity(), 1027 "IDENTITY": lambda self: self._parse_auto_increment(), 1028 "INLINE": lambda self: self._parse_inline(), 1029 "LIKE": lambda self: self._parse_create_like(), 1030 "NOT": lambda self: self._parse_not_constraint(), 1031 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1032 "ON": lambda self: ( 1033 self._match(TokenType.UPDATE) 1034 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1035 ) 1036 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1037 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1038 "PERIOD": lambda self: self._parse_period_for_system_time(), 1039 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1040 "REFERENCES": lambda self: self._parse_references(match=False), 1041 "TITLE": lambda self: self.expression( 1042 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1043 ), 1044 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1045 "UNIQUE": lambda self: self._parse_unique(), 1046 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1047 "WITH": lambda self: self.expression( 1048 exp.Properties, expressions=self._parse_wrapped_properties() 1049 ), 1050 } 1051 1052 ALTER_PARSERS = { 1053 "ADD": lambda self: self._parse_alter_table_add(), 1054 "ALTER": lambda self: self._parse_alter_table_alter(), 1055 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1056 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1057 "DROP": lambda self: self._parse_alter_table_drop(), 1058 "RENAME": lambda self: self._parse_alter_table_rename(), 1059 "SET": lambda self: self._parse_alter_table_set(), 1060 "AS": lambda self: self._parse_select(), 1061 } 1062 1063 ALTER_ALTER_PARSERS = { 1064 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1065 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1066 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1067 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1068 } 1069 1070 SCHEMA_UNNAMED_CONSTRAINTS = { 1071 "CHECK", 1072 "EXCLUDE", 1073 "FOREIGN KEY", 1074 "LIKE", 1075 "PERIOD", 1076 "PRIMARY KEY", 1077 "UNIQUE", 1078 } 1079 1080 NO_PAREN_FUNCTION_PARSERS = { 1081 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1082 "CASE": lambda self: self._parse_case(), 1083 "CONNECT_BY_ROOT": lambda self: self.expression( 1084 exp.ConnectByRoot, this=self._parse_column() 1085 ), 1086 "IF": lambda self: self._parse_if(), 1087 "NEXT": lambda self: self._parse_next_value_for(), 1088 } 1089 1090 INVALID_FUNC_NAME_TOKENS = { 1091 TokenType.IDENTIFIER, 1092 TokenType.STRING, 1093 } 1094 1095 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1096 1097 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1098 1099 FUNCTION_PARSERS = { 1100 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1101 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1102 "DECODE": lambda self: self._parse_decode(), 1103 "EXTRACT": lambda self: self._parse_extract(), 1104 "GAP_FILL": lambda self: self._parse_gap_fill(), 1105 "JSON_OBJECT": lambda self: self._parse_json_object(), 1106 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1107 "JSON_TABLE": lambda self: self._parse_json_table(), 1108 "MATCH": lambda self: self._parse_match_against(), 1109 "NORMALIZE": lambda self: self._parse_normalize(), 1110 "OPENJSON": lambda self: self._parse_open_json(), 1111 "POSITION": lambda self: self._parse_position(), 1112 "PREDICT": lambda self: self._parse_predict(), 1113 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1114 "STRING_AGG": lambda self: self._parse_string_agg(), 1115 "SUBSTRING": lambda self: self._parse_substring(), 1116 "TRIM": lambda self: self._parse_trim(), 1117 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1118 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1119 } 1120 1121 QUERY_MODIFIER_PARSERS = { 1122 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1123 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1124 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1125 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1126 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1127 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1128 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1129 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1130 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1131 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1132 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1133 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1134 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1135 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1136 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1137 TokenType.CLUSTER_BY: lambda self: ( 1138 "cluster", 1139 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1140 ), 1141 TokenType.DISTRIBUTE_BY: lambda self: ( 1142 "distribute", 1143 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1144 ), 1145 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1146 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1147 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1148 } 1149 1150 SET_PARSERS = { 1151 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1152 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1153 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1154 "TRANSACTION": lambda self: self._parse_set_transaction(), 1155 } 1156 1157 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1158 1159 TYPE_LITERAL_PARSERS = { 1160 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1161 } 1162 1163 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1164 1165 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1166 1167 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1168 1169 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1170 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1171 "ISOLATION": ( 1172 ("LEVEL", "REPEATABLE", "READ"), 1173 ("LEVEL", "READ", "COMMITTED"), 1174 ("LEVEL", "READ", "UNCOMITTED"), 1175 ("LEVEL", "SERIALIZABLE"), 1176 ), 1177 "READ": ("WRITE", "ONLY"), 1178 } 1179 1180 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1181 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1182 ) 1183 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1184 1185 CREATE_SEQUENCE: OPTIONS_TYPE = { 1186 "SCALE": ("EXTEND", "NOEXTEND"), 1187 "SHARD": ("EXTEND", "NOEXTEND"), 1188 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1189 **dict.fromkeys( 1190 ( 1191 "SESSION", 1192 "GLOBAL", 1193 "KEEP", 1194 "NOKEEP", 1195 "ORDER", 1196 "NOORDER", 1197 "NOCACHE", 1198 "CYCLE", 1199 "NOCYCLE", 1200 "NOMINVALUE", 1201 "NOMAXVALUE", 1202 "NOSCALE", 1203 "NOSHARD", 1204 ), 1205 tuple(), 1206 ), 1207 } 1208 1209 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1210 1211 USABLES: OPTIONS_TYPE = dict.fromkeys( 1212 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1213 ) 1214 1215 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1216 1217 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1218 "TYPE": ("EVOLUTION",), 1219 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1220 } 1221 1222 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1223 "NOT": ("ENFORCED",), 1224 "MATCH": ( 1225 "FULL", 1226 "PARTIAL", 1227 "SIMPLE", 1228 ), 1229 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1230 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1231 } 1232 1233 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1234 1235 CLONE_KEYWORDS = {"CLONE", "COPY"} 1236 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1237 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1238 1239 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1240 1241 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1242 1243 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1244 1245 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1246 1247 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1248 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1249 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1250 1251 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1252 1253 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1254 1255 ADD_CONSTRAINT_TOKENS = { 1256 TokenType.CONSTRAINT, 1257 TokenType.FOREIGN_KEY, 1258 TokenType.INDEX, 1259 TokenType.KEY, 1260 TokenType.PRIMARY_KEY, 1261 TokenType.UNIQUE, 1262 } 1263 1264 DISTINCT_TOKENS = {TokenType.DISTINCT} 1265 1266 NULL_TOKENS = {TokenType.NULL} 1267 1268 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1269 1270 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1271 1272 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1273 1274 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1275 1276 ODBC_DATETIME_LITERALS = { 1277 "d": exp.Date, 1278 "t": exp.Time, 1279 "ts": exp.Timestamp, 1280 } 1281 1282 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1283 1284 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1285 1286 STRICT_CAST = True 1287 1288 PREFIXED_PIVOT_COLUMNS = False 1289 IDENTIFY_PIVOT_STRINGS = False 1290 1291 LOG_DEFAULTS_TO_LN = False 1292 1293 # Whether ADD is present for each column added by ALTER TABLE 1294 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1295 1296 # Whether the table sample clause expects CSV syntax 1297 TABLESAMPLE_CSV = False 1298 1299 # The default method used for table sampling 1300 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1301 1302 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1303 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1304 1305 # Whether the TRIM function expects the characters to trim as its first argument 1306 TRIM_PATTERN_FIRST = False 1307 1308 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1309 STRING_ALIASES = False 1310 1311 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1312 MODIFIERS_ATTACHED_TO_SET_OP = True 1313 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1314 1315 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1316 NO_PAREN_IF_COMMANDS = True 1317 1318 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1319 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1320 1321 # Whether the `:` operator is used to extract a value from a VARIANT column 1322 COLON_IS_VARIANT_EXTRACT = False 1323 1324 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1325 # If this is True and '(' is not found, the keyword will be treated as an identifier 1326 VALUES_FOLLOWED_BY_PAREN = True 1327 1328 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1329 SUPPORTS_IMPLICIT_UNNEST = False 1330 1331 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1332 INTERVAL_SPANS = True 1333 1334 # Whether a PARTITION clause can follow a table reference 1335 SUPPORTS_PARTITION_SELECTION = False 1336 1337 __slots__ = ( 1338 "error_level", 1339 "error_message_context", 1340 "max_errors", 1341 "dialect", 1342 "sql", 1343 "errors", 1344 "_tokens", 1345 "_index", 1346 "_curr", 1347 "_next", 1348 "_prev", 1349 "_prev_comments", 1350 ) 1351 1352 # Autofilled 1353 SHOW_TRIE: t.Dict = {} 1354 SET_TRIE: t.Dict = {} 1355 1356 def __init__( 1357 self, 1358 error_level: t.Optional[ErrorLevel] = None, 1359 error_message_context: int = 100, 1360 max_errors: int = 3, 1361 dialect: DialectType = None, 1362 ): 1363 from sqlglot.dialects import Dialect 1364 1365 self.error_level = error_level or ErrorLevel.IMMEDIATE 1366 self.error_message_context = error_message_context 1367 self.max_errors = max_errors 1368 self.dialect = Dialect.get_or_raise(dialect) 1369 self.reset() 1370 1371 def reset(self): 1372 self.sql = "" 1373 self.errors = [] 1374 self._tokens = [] 1375 self._index = 0 1376 self._curr = None 1377 self._next = None 1378 self._prev = None 1379 self._prev_comments = None 1380 1381 def parse( 1382 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1383 ) -> t.List[t.Optional[exp.Expression]]: 1384 """ 1385 Parses a list of tokens and returns a list of syntax trees, one tree 1386 per parsed SQL statement. 1387 1388 Args: 1389 raw_tokens: The list of tokens. 1390 sql: The original SQL string, used to produce helpful debug messages. 1391 1392 Returns: 1393 The list of the produced syntax trees. 1394 """ 1395 return self._parse( 1396 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1397 ) 1398 1399 def parse_into( 1400 self, 1401 expression_types: exp.IntoType, 1402 raw_tokens: t.List[Token], 1403 sql: t.Optional[str] = None, 1404 ) -> t.List[t.Optional[exp.Expression]]: 1405 """ 1406 Parses a list of tokens into a given Expression type. If a collection of Expression 1407 types is given instead, this method will try to parse the token list into each one 1408 of them, stopping at the first for which the parsing succeeds. 1409 1410 Args: 1411 expression_types: The expression type(s) to try and parse the token list into. 1412 raw_tokens: The list of tokens. 1413 sql: The original SQL string, used to produce helpful debug messages. 1414 1415 Returns: 1416 The target Expression. 1417 """ 1418 errors = [] 1419 for expression_type in ensure_list(expression_types): 1420 parser = self.EXPRESSION_PARSERS.get(expression_type) 1421 if not parser: 1422 raise TypeError(f"No parser registered for {expression_type}") 1423 1424 try: 1425 return self._parse(parser, raw_tokens, sql) 1426 except ParseError as e: 1427 e.errors[0]["into_expression"] = expression_type 1428 errors.append(e) 1429 1430 raise ParseError( 1431 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1432 errors=merge_errors(errors), 1433 ) from errors[-1] 1434 1435 def _parse( 1436 self, 1437 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1438 raw_tokens: t.List[Token], 1439 sql: t.Optional[str] = None, 1440 ) -> t.List[t.Optional[exp.Expression]]: 1441 self.reset() 1442 self.sql = sql or "" 1443 1444 total = len(raw_tokens) 1445 chunks: t.List[t.List[Token]] = [[]] 1446 1447 for i, token in enumerate(raw_tokens): 1448 if token.token_type == TokenType.SEMICOLON: 1449 if token.comments: 1450 chunks.append([token]) 1451 1452 if i < total - 1: 1453 chunks.append([]) 1454 else: 1455 chunks[-1].append(token) 1456 1457 expressions = [] 1458 1459 for tokens in chunks: 1460 self._index = -1 1461 self._tokens = tokens 1462 self._advance() 1463 1464 expressions.append(parse_method(self)) 1465 1466 if self._index < len(self._tokens): 1467 self.raise_error("Invalid expression / Unexpected token") 1468 1469 self.check_errors() 1470 1471 return expressions 1472 1473 def check_errors(self) -> None: 1474 """Logs or raises any found errors, depending on the chosen error level setting.""" 1475 if self.error_level == ErrorLevel.WARN: 1476 for error in self.errors: 1477 logger.error(str(error)) 1478 elif self.error_level == ErrorLevel.RAISE and self.errors: 1479 raise ParseError( 1480 concat_messages(self.errors, self.max_errors), 1481 errors=merge_errors(self.errors), 1482 ) 1483 1484 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1485 """ 1486 Appends an error in the list of recorded errors or raises it, depending on the chosen 1487 error level setting. 1488 """ 1489 token = token or self._curr or self._prev or Token.string("") 1490 start = token.start 1491 end = token.end + 1 1492 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1493 highlight = self.sql[start:end] 1494 end_context = self.sql[end : end + self.error_message_context] 1495 1496 error = ParseError.new( 1497 f"{message}. Line {token.line}, Col: {token.col}.\n" 1498 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1499 description=message, 1500 line=token.line, 1501 col=token.col, 1502 start_context=start_context, 1503 highlight=highlight, 1504 end_context=end_context, 1505 ) 1506 1507 if self.error_level == ErrorLevel.IMMEDIATE: 1508 raise error 1509 1510 self.errors.append(error) 1511 1512 def expression( 1513 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1514 ) -> E: 1515 """ 1516 Creates a new, validated Expression. 1517 1518 Args: 1519 exp_class: The expression class to instantiate. 1520 comments: An optional list of comments to attach to the expression. 1521 kwargs: The arguments to set for the expression along with their respective values. 1522 1523 Returns: 1524 The target expression. 1525 """ 1526 instance = exp_class(**kwargs) 1527 instance.add_comments(comments) if comments else self._add_comments(instance) 1528 return self.validate_expression(instance) 1529 1530 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1531 if expression and self._prev_comments: 1532 expression.add_comments(self._prev_comments) 1533 self._prev_comments = None 1534 1535 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1536 """ 1537 Validates an Expression, making sure that all its mandatory arguments are set. 1538 1539 Args: 1540 expression: The expression to validate. 1541 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1542 1543 Returns: 1544 The validated expression. 1545 """ 1546 if self.error_level != ErrorLevel.IGNORE: 1547 for error_message in expression.error_messages(args): 1548 self.raise_error(error_message) 1549 1550 return expression 1551 1552 def _find_sql(self, start: Token, end: Token) -> str: 1553 return self.sql[start.start : end.end + 1] 1554 1555 def _is_connected(self) -> bool: 1556 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1557 1558 def _advance(self, times: int = 1) -> None: 1559 self._index += times 1560 self._curr = seq_get(self._tokens, self._index) 1561 self._next = seq_get(self._tokens, self._index + 1) 1562 1563 if self._index > 0: 1564 self._prev = self._tokens[self._index - 1] 1565 self._prev_comments = self._prev.comments 1566 else: 1567 self._prev = None 1568 self._prev_comments = None 1569 1570 def _retreat(self, index: int) -> None: 1571 if index != self._index: 1572 self._advance(index - self._index) 1573 1574 def _warn_unsupported(self) -> None: 1575 if len(self._tokens) <= 1: 1576 return 1577 1578 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1579 # interested in emitting a warning for the one being currently processed. 1580 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1581 1582 logger.warning( 1583 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1584 ) 1585 1586 def _parse_command(self) -> exp.Command: 1587 self._warn_unsupported() 1588 return self.expression( 1589 exp.Command, 1590 comments=self._prev_comments, 1591 this=self._prev.text.upper(), 1592 expression=self._parse_string(), 1593 ) 1594 1595 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1596 """ 1597 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1598 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1599 solve this by setting & resetting the parser state accordingly 1600 """ 1601 index = self._index 1602 error_level = self.error_level 1603 1604 self.error_level = ErrorLevel.IMMEDIATE 1605 try: 1606 this = parse_method() 1607 except ParseError: 1608 this = None 1609 finally: 1610 if not this or retreat: 1611 self._retreat(index) 1612 self.error_level = error_level 1613 1614 return this 1615 1616 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1617 start = self._prev 1618 exists = self._parse_exists() if allow_exists else None 1619 1620 self._match(TokenType.ON) 1621 1622 materialized = self._match_text_seq("MATERIALIZED") 1623 kind = self._match_set(self.CREATABLES) and self._prev 1624 if not kind: 1625 return self._parse_as_command(start) 1626 1627 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1628 this = self._parse_user_defined_function(kind=kind.token_type) 1629 elif kind.token_type == TokenType.TABLE: 1630 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1631 elif kind.token_type == TokenType.COLUMN: 1632 this = self._parse_column() 1633 else: 1634 this = self._parse_id_var() 1635 1636 self._match(TokenType.IS) 1637 1638 return self.expression( 1639 exp.Comment, 1640 this=this, 1641 kind=kind.text, 1642 expression=self._parse_string(), 1643 exists=exists, 1644 materialized=materialized, 1645 ) 1646 1647 def _parse_to_table( 1648 self, 1649 ) -> exp.ToTableProperty: 1650 table = self._parse_table_parts(schema=True) 1651 return self.expression(exp.ToTableProperty, this=table) 1652 1653 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1654 def _parse_ttl(self) -> exp.Expression: 1655 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1656 this = self._parse_bitwise() 1657 1658 if self._match_text_seq("DELETE"): 1659 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1660 if self._match_text_seq("RECOMPRESS"): 1661 return self.expression( 1662 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1663 ) 1664 if self._match_text_seq("TO", "DISK"): 1665 return self.expression( 1666 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1667 ) 1668 if self._match_text_seq("TO", "VOLUME"): 1669 return self.expression( 1670 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1671 ) 1672 1673 return this 1674 1675 expressions = self._parse_csv(_parse_ttl_action) 1676 where = self._parse_where() 1677 group = self._parse_group() 1678 1679 aggregates = None 1680 if group and self._match(TokenType.SET): 1681 aggregates = self._parse_csv(self._parse_set_item) 1682 1683 return self.expression( 1684 exp.MergeTreeTTL, 1685 expressions=expressions, 1686 where=where, 1687 group=group, 1688 aggregates=aggregates, 1689 ) 1690 1691 def _parse_statement(self) -> t.Optional[exp.Expression]: 1692 if self._curr is None: 1693 return None 1694 1695 if self._match_set(self.STATEMENT_PARSERS): 1696 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1697 1698 if self._match_set(self.dialect.tokenizer.COMMANDS): 1699 return self._parse_command() 1700 1701 expression = self._parse_expression() 1702 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1703 return self._parse_query_modifiers(expression) 1704 1705 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1706 start = self._prev 1707 temporary = self._match(TokenType.TEMPORARY) 1708 materialized = self._match_text_seq("MATERIALIZED") 1709 1710 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1711 if not kind: 1712 return self._parse_as_command(start) 1713 1714 concurrently = self._match_text_seq("CONCURRENTLY") 1715 if_exists = exists or self._parse_exists() 1716 table = self._parse_table_parts( 1717 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1718 ) 1719 1720 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1721 1722 if self._match(TokenType.L_PAREN, advance=False): 1723 expressions = self._parse_wrapped_csv(self._parse_types) 1724 else: 1725 expressions = None 1726 1727 return self.expression( 1728 exp.Drop, 1729 comments=start.comments, 1730 exists=if_exists, 1731 this=table, 1732 expressions=expressions, 1733 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1734 temporary=temporary, 1735 materialized=materialized, 1736 cascade=self._match_text_seq("CASCADE"), 1737 constraints=self._match_text_seq("CONSTRAINTS"), 1738 purge=self._match_text_seq("PURGE"), 1739 cluster=cluster, 1740 concurrently=concurrently, 1741 ) 1742 1743 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1744 return ( 1745 self._match_text_seq("IF") 1746 and (not not_ or self._match(TokenType.NOT)) 1747 and self._match(TokenType.EXISTS) 1748 ) 1749 1750 def _parse_create(self) -> exp.Create | exp.Command: 1751 # Note: this can't be None because we've matched a statement parser 1752 start = self._prev 1753 comments = self._prev_comments 1754 1755 replace = ( 1756 start.token_type == TokenType.REPLACE 1757 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1758 or self._match_pair(TokenType.OR, TokenType.ALTER) 1759 ) 1760 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1761 1762 unique = self._match(TokenType.UNIQUE) 1763 1764 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1765 clustered = True 1766 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1767 "COLUMNSTORE" 1768 ): 1769 clustered = False 1770 else: 1771 clustered = None 1772 1773 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1774 self._advance() 1775 1776 properties = None 1777 create_token = self._match_set(self.CREATABLES) and self._prev 1778 1779 if not create_token: 1780 # exp.Properties.Location.POST_CREATE 1781 properties = self._parse_properties() 1782 create_token = self._match_set(self.CREATABLES) and self._prev 1783 1784 if not properties or not create_token: 1785 return self._parse_as_command(start) 1786 1787 concurrently = self._match_text_seq("CONCURRENTLY") 1788 exists = self._parse_exists(not_=True) 1789 this = None 1790 expression: t.Optional[exp.Expression] = None 1791 indexes = None 1792 no_schema_binding = None 1793 begin = None 1794 end = None 1795 clone = None 1796 1797 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1798 nonlocal properties 1799 if properties and temp_props: 1800 properties.expressions.extend(temp_props.expressions) 1801 elif temp_props: 1802 properties = temp_props 1803 1804 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1805 this = self._parse_user_defined_function(kind=create_token.token_type) 1806 1807 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1808 extend_props(self._parse_properties()) 1809 1810 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1811 extend_props(self._parse_properties()) 1812 1813 if not expression: 1814 if self._match(TokenType.COMMAND): 1815 expression = self._parse_as_command(self._prev) 1816 else: 1817 begin = self._match(TokenType.BEGIN) 1818 return_ = self._match_text_seq("RETURN") 1819 1820 if self._match(TokenType.STRING, advance=False): 1821 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1822 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1823 expression = self._parse_string() 1824 extend_props(self._parse_properties()) 1825 else: 1826 expression = self._parse_statement() 1827 1828 end = self._match_text_seq("END") 1829 1830 if return_: 1831 expression = self.expression(exp.Return, this=expression) 1832 elif create_token.token_type == TokenType.INDEX: 1833 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1834 if not self._match(TokenType.ON): 1835 index = self._parse_id_var() 1836 anonymous = False 1837 else: 1838 index = None 1839 anonymous = True 1840 1841 this = self._parse_index(index=index, anonymous=anonymous) 1842 elif create_token.token_type in self.DB_CREATABLES: 1843 table_parts = self._parse_table_parts( 1844 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1845 ) 1846 1847 # exp.Properties.Location.POST_NAME 1848 self._match(TokenType.COMMA) 1849 extend_props(self._parse_properties(before=True)) 1850 1851 this = self._parse_schema(this=table_parts) 1852 1853 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1854 extend_props(self._parse_properties()) 1855 1856 self._match(TokenType.ALIAS) 1857 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1858 # exp.Properties.Location.POST_ALIAS 1859 extend_props(self._parse_properties()) 1860 1861 if create_token.token_type == TokenType.SEQUENCE: 1862 expression = self._parse_types() 1863 extend_props(self._parse_properties()) 1864 else: 1865 expression = self._parse_ddl_select() 1866 1867 if create_token.token_type == TokenType.TABLE: 1868 # exp.Properties.Location.POST_EXPRESSION 1869 extend_props(self._parse_properties()) 1870 1871 indexes = [] 1872 while True: 1873 index = self._parse_index() 1874 1875 # exp.Properties.Location.POST_INDEX 1876 extend_props(self._parse_properties()) 1877 if not index: 1878 break 1879 else: 1880 self._match(TokenType.COMMA) 1881 indexes.append(index) 1882 elif create_token.token_type == TokenType.VIEW: 1883 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1884 no_schema_binding = True 1885 1886 shallow = self._match_text_seq("SHALLOW") 1887 1888 if self._match_texts(self.CLONE_KEYWORDS): 1889 copy = self._prev.text.lower() == "copy" 1890 clone = self.expression( 1891 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1892 ) 1893 1894 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1895 return self._parse_as_command(start) 1896 1897 create_kind_text = create_token.text.upper() 1898 return self.expression( 1899 exp.Create, 1900 comments=comments, 1901 this=this, 1902 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1903 replace=replace, 1904 refresh=refresh, 1905 unique=unique, 1906 expression=expression, 1907 exists=exists, 1908 properties=properties, 1909 indexes=indexes, 1910 no_schema_binding=no_schema_binding, 1911 begin=begin, 1912 end=end, 1913 clone=clone, 1914 concurrently=concurrently, 1915 clustered=clustered, 1916 ) 1917 1918 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1919 seq = exp.SequenceProperties() 1920 1921 options = [] 1922 index = self._index 1923 1924 while self._curr: 1925 self._match(TokenType.COMMA) 1926 if self._match_text_seq("INCREMENT"): 1927 self._match_text_seq("BY") 1928 self._match_text_seq("=") 1929 seq.set("increment", self._parse_term()) 1930 elif self._match_text_seq("MINVALUE"): 1931 seq.set("minvalue", self._parse_term()) 1932 elif self._match_text_seq("MAXVALUE"): 1933 seq.set("maxvalue", self._parse_term()) 1934 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1935 self._match_text_seq("=") 1936 seq.set("start", self._parse_term()) 1937 elif self._match_text_seq("CACHE"): 1938 # T-SQL allows empty CACHE which is initialized dynamically 1939 seq.set("cache", self._parse_number() or True) 1940 elif self._match_text_seq("OWNED", "BY"): 1941 # "OWNED BY NONE" is the default 1942 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1943 else: 1944 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1945 if opt: 1946 options.append(opt) 1947 else: 1948 break 1949 1950 seq.set("options", options if options else None) 1951 return None if self._index == index else seq 1952 1953 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1954 # only used for teradata currently 1955 self._match(TokenType.COMMA) 1956 1957 kwargs = { 1958 "no": self._match_text_seq("NO"), 1959 "dual": self._match_text_seq("DUAL"), 1960 "before": self._match_text_seq("BEFORE"), 1961 "default": self._match_text_seq("DEFAULT"), 1962 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1963 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1964 "after": self._match_text_seq("AFTER"), 1965 "minimum": self._match_texts(("MIN", "MINIMUM")), 1966 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1967 } 1968 1969 if self._match_texts(self.PROPERTY_PARSERS): 1970 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1971 try: 1972 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1973 except TypeError: 1974 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1975 1976 return None 1977 1978 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1979 return self._parse_wrapped_csv(self._parse_property) 1980 1981 def _parse_property(self) -> t.Optional[exp.Expression]: 1982 if self._match_texts(self.PROPERTY_PARSERS): 1983 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1984 1985 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1986 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1987 1988 if self._match_text_seq("COMPOUND", "SORTKEY"): 1989 return self._parse_sortkey(compound=True) 1990 1991 if self._match_text_seq("SQL", "SECURITY"): 1992 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1993 1994 index = self._index 1995 key = self._parse_column() 1996 1997 if not self._match(TokenType.EQ): 1998 self._retreat(index) 1999 return self._parse_sequence_properties() 2000 2001 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2002 if isinstance(key, exp.Column): 2003 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2004 2005 value = self._parse_bitwise() or self._parse_var(any_token=True) 2006 2007 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2008 if isinstance(value, exp.Column): 2009 value = exp.var(value.name) 2010 2011 return self.expression(exp.Property, this=key, value=value) 2012 2013 def _parse_stored(self) -> exp.FileFormatProperty: 2014 self._match(TokenType.ALIAS) 2015 2016 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2017 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2018 2019 return self.expression( 2020 exp.FileFormatProperty, 2021 this=( 2022 self.expression( 2023 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2024 ) 2025 if input_format or output_format 2026 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2027 ), 2028 ) 2029 2030 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2031 field = self._parse_field() 2032 if isinstance(field, exp.Identifier) and not field.quoted: 2033 field = exp.var(field) 2034 2035 return field 2036 2037 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2038 self._match(TokenType.EQ) 2039 self._match(TokenType.ALIAS) 2040 2041 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2042 2043 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2044 properties = [] 2045 while True: 2046 if before: 2047 prop = self._parse_property_before() 2048 else: 2049 prop = self._parse_property() 2050 if not prop: 2051 break 2052 for p in ensure_list(prop): 2053 properties.append(p) 2054 2055 if properties: 2056 return self.expression(exp.Properties, expressions=properties) 2057 2058 return None 2059 2060 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2061 return self.expression( 2062 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2063 ) 2064 2065 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2066 if self._match_texts(("DEFINER", "INVOKER")): 2067 security_specifier = self._prev.text.upper() 2068 return self.expression(exp.SecurityProperty, this=security_specifier) 2069 return None 2070 2071 def _parse_settings_property(self) -> exp.SettingsProperty: 2072 return self.expression( 2073 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2074 ) 2075 2076 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2077 if self._index >= 2: 2078 pre_volatile_token = self._tokens[self._index - 2] 2079 else: 2080 pre_volatile_token = None 2081 2082 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2083 return exp.VolatileProperty() 2084 2085 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2086 2087 def _parse_retention_period(self) -> exp.Var: 2088 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2089 number = self._parse_number() 2090 number_str = f"{number} " if number else "" 2091 unit = self._parse_var(any_token=True) 2092 return exp.var(f"{number_str}{unit}") 2093 2094 def _parse_system_versioning_property( 2095 self, with_: bool = False 2096 ) -> exp.WithSystemVersioningProperty: 2097 self._match(TokenType.EQ) 2098 prop = self.expression( 2099 exp.WithSystemVersioningProperty, 2100 **{ # type: ignore 2101 "on": True, 2102 "with": with_, 2103 }, 2104 ) 2105 2106 if self._match_text_seq("OFF"): 2107 prop.set("on", False) 2108 return prop 2109 2110 self._match(TokenType.ON) 2111 if self._match(TokenType.L_PAREN): 2112 while self._curr and not self._match(TokenType.R_PAREN): 2113 if self._match_text_seq("HISTORY_TABLE", "="): 2114 prop.set("this", self._parse_table_parts()) 2115 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2116 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2117 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2118 prop.set("retention_period", self._parse_retention_period()) 2119 2120 self._match(TokenType.COMMA) 2121 2122 return prop 2123 2124 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2125 self._match(TokenType.EQ) 2126 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2127 prop = self.expression(exp.DataDeletionProperty, on=on) 2128 2129 if self._match(TokenType.L_PAREN): 2130 while self._curr and not self._match(TokenType.R_PAREN): 2131 if self._match_text_seq("FILTER_COLUMN", "="): 2132 prop.set("filter_column", self._parse_column()) 2133 elif self._match_text_seq("RETENTION_PERIOD", "="): 2134 prop.set("retention_period", self._parse_retention_period()) 2135 2136 self._match(TokenType.COMMA) 2137 2138 return prop 2139 2140 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2141 kind = "HASH" 2142 expressions: t.Optional[t.List[exp.Expression]] = None 2143 if self._match_text_seq("BY", "HASH"): 2144 expressions = self._parse_wrapped_csv(self._parse_id_var) 2145 elif self._match_text_seq("BY", "RANDOM"): 2146 kind = "RANDOM" 2147 2148 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2149 buckets: t.Optional[exp.Expression] = None 2150 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2151 buckets = self._parse_number() 2152 2153 return self.expression( 2154 exp.DistributedByProperty, 2155 expressions=expressions, 2156 kind=kind, 2157 buckets=buckets, 2158 order=self._parse_order(), 2159 ) 2160 2161 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2162 self._match_text_seq("KEY") 2163 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2164 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2165 2166 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2167 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2168 prop = self._parse_system_versioning_property(with_=True) 2169 self._match_r_paren() 2170 return prop 2171 2172 if self._match(TokenType.L_PAREN, advance=False): 2173 return self._parse_wrapped_properties() 2174 2175 if self._match_text_seq("JOURNAL"): 2176 return self._parse_withjournaltable() 2177 2178 if self._match_texts(self.VIEW_ATTRIBUTES): 2179 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2180 2181 if self._match_text_seq("DATA"): 2182 return self._parse_withdata(no=False) 2183 elif self._match_text_seq("NO", "DATA"): 2184 return self._parse_withdata(no=True) 2185 2186 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2187 return self._parse_serde_properties(with_=True) 2188 2189 if self._match(TokenType.SCHEMA): 2190 return self.expression( 2191 exp.WithSchemaBindingProperty, 2192 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2193 ) 2194 2195 if not self._next: 2196 return None 2197 2198 return self._parse_withisolatedloading() 2199 2200 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2201 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2202 self._match(TokenType.EQ) 2203 2204 user = self._parse_id_var() 2205 self._match(TokenType.PARAMETER) 2206 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2207 2208 if not user or not host: 2209 return None 2210 2211 return exp.DefinerProperty(this=f"{user}@{host}") 2212 2213 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2214 self._match(TokenType.TABLE) 2215 self._match(TokenType.EQ) 2216 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2217 2218 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2219 return self.expression(exp.LogProperty, no=no) 2220 2221 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2222 return self.expression(exp.JournalProperty, **kwargs) 2223 2224 def _parse_checksum(self) -> exp.ChecksumProperty: 2225 self._match(TokenType.EQ) 2226 2227 on = None 2228 if self._match(TokenType.ON): 2229 on = True 2230 elif self._match_text_seq("OFF"): 2231 on = False 2232 2233 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2234 2235 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2236 return self.expression( 2237 exp.Cluster, 2238 expressions=( 2239 self._parse_wrapped_csv(self._parse_ordered) 2240 if wrapped 2241 else self._parse_csv(self._parse_ordered) 2242 ), 2243 ) 2244 2245 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2246 self._match_text_seq("BY") 2247 2248 self._match_l_paren() 2249 expressions = self._parse_csv(self._parse_column) 2250 self._match_r_paren() 2251 2252 if self._match_text_seq("SORTED", "BY"): 2253 self._match_l_paren() 2254 sorted_by = self._parse_csv(self._parse_ordered) 2255 self._match_r_paren() 2256 else: 2257 sorted_by = None 2258 2259 self._match(TokenType.INTO) 2260 buckets = self._parse_number() 2261 self._match_text_seq("BUCKETS") 2262 2263 return self.expression( 2264 exp.ClusteredByProperty, 2265 expressions=expressions, 2266 sorted_by=sorted_by, 2267 buckets=buckets, 2268 ) 2269 2270 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2271 if not self._match_text_seq("GRANTS"): 2272 self._retreat(self._index - 1) 2273 return None 2274 2275 return self.expression(exp.CopyGrantsProperty) 2276 2277 def _parse_freespace(self) -> exp.FreespaceProperty: 2278 self._match(TokenType.EQ) 2279 return self.expression( 2280 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2281 ) 2282 2283 def _parse_mergeblockratio( 2284 self, no: bool = False, default: bool = False 2285 ) -> exp.MergeBlockRatioProperty: 2286 if self._match(TokenType.EQ): 2287 return self.expression( 2288 exp.MergeBlockRatioProperty, 2289 this=self._parse_number(), 2290 percent=self._match(TokenType.PERCENT), 2291 ) 2292 2293 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2294 2295 def _parse_datablocksize( 2296 self, 2297 default: t.Optional[bool] = None, 2298 minimum: t.Optional[bool] = None, 2299 maximum: t.Optional[bool] = None, 2300 ) -> exp.DataBlocksizeProperty: 2301 self._match(TokenType.EQ) 2302 size = self._parse_number() 2303 2304 units = None 2305 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2306 units = self._prev.text 2307 2308 return self.expression( 2309 exp.DataBlocksizeProperty, 2310 size=size, 2311 units=units, 2312 default=default, 2313 minimum=minimum, 2314 maximum=maximum, 2315 ) 2316 2317 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2318 self._match(TokenType.EQ) 2319 always = self._match_text_seq("ALWAYS") 2320 manual = self._match_text_seq("MANUAL") 2321 never = self._match_text_seq("NEVER") 2322 default = self._match_text_seq("DEFAULT") 2323 2324 autotemp = None 2325 if self._match_text_seq("AUTOTEMP"): 2326 autotemp = self._parse_schema() 2327 2328 return self.expression( 2329 exp.BlockCompressionProperty, 2330 always=always, 2331 manual=manual, 2332 never=never, 2333 default=default, 2334 autotemp=autotemp, 2335 ) 2336 2337 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2338 index = self._index 2339 no = self._match_text_seq("NO") 2340 concurrent = self._match_text_seq("CONCURRENT") 2341 2342 if not self._match_text_seq("ISOLATED", "LOADING"): 2343 self._retreat(index) 2344 return None 2345 2346 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2347 return self.expression( 2348 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2349 ) 2350 2351 def _parse_locking(self) -> exp.LockingProperty: 2352 if self._match(TokenType.TABLE): 2353 kind = "TABLE" 2354 elif self._match(TokenType.VIEW): 2355 kind = "VIEW" 2356 elif self._match(TokenType.ROW): 2357 kind = "ROW" 2358 elif self._match_text_seq("DATABASE"): 2359 kind = "DATABASE" 2360 else: 2361 kind = None 2362 2363 if kind in ("DATABASE", "TABLE", "VIEW"): 2364 this = self._parse_table_parts() 2365 else: 2366 this = None 2367 2368 if self._match(TokenType.FOR): 2369 for_or_in = "FOR" 2370 elif self._match(TokenType.IN): 2371 for_or_in = "IN" 2372 else: 2373 for_or_in = None 2374 2375 if self._match_text_seq("ACCESS"): 2376 lock_type = "ACCESS" 2377 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2378 lock_type = "EXCLUSIVE" 2379 elif self._match_text_seq("SHARE"): 2380 lock_type = "SHARE" 2381 elif self._match_text_seq("READ"): 2382 lock_type = "READ" 2383 elif self._match_text_seq("WRITE"): 2384 lock_type = "WRITE" 2385 elif self._match_text_seq("CHECKSUM"): 2386 lock_type = "CHECKSUM" 2387 else: 2388 lock_type = None 2389 2390 override = self._match_text_seq("OVERRIDE") 2391 2392 return self.expression( 2393 exp.LockingProperty, 2394 this=this, 2395 kind=kind, 2396 for_or_in=for_or_in, 2397 lock_type=lock_type, 2398 override=override, 2399 ) 2400 2401 def _parse_partition_by(self) -> t.List[exp.Expression]: 2402 if self._match(TokenType.PARTITION_BY): 2403 return self._parse_csv(self._parse_assignment) 2404 return [] 2405 2406 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2407 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2408 if self._match_text_seq("MINVALUE"): 2409 return exp.var("MINVALUE") 2410 if self._match_text_seq("MAXVALUE"): 2411 return exp.var("MAXVALUE") 2412 return self._parse_bitwise() 2413 2414 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2415 expression = None 2416 from_expressions = None 2417 to_expressions = None 2418 2419 if self._match(TokenType.IN): 2420 this = self._parse_wrapped_csv(self._parse_bitwise) 2421 elif self._match(TokenType.FROM): 2422 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2423 self._match_text_seq("TO") 2424 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2425 elif self._match_text_seq("WITH", "(", "MODULUS"): 2426 this = self._parse_number() 2427 self._match_text_seq(",", "REMAINDER") 2428 expression = self._parse_number() 2429 self._match_r_paren() 2430 else: 2431 self.raise_error("Failed to parse partition bound spec.") 2432 2433 return self.expression( 2434 exp.PartitionBoundSpec, 2435 this=this, 2436 expression=expression, 2437 from_expressions=from_expressions, 2438 to_expressions=to_expressions, 2439 ) 2440 2441 # https://www.postgresql.org/docs/current/sql-createtable.html 2442 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2443 if not self._match_text_seq("OF"): 2444 self._retreat(self._index - 1) 2445 return None 2446 2447 this = self._parse_table(schema=True) 2448 2449 if self._match(TokenType.DEFAULT): 2450 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2451 elif self._match_text_seq("FOR", "VALUES"): 2452 expression = self._parse_partition_bound_spec() 2453 else: 2454 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2455 2456 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2457 2458 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2459 self._match(TokenType.EQ) 2460 return self.expression( 2461 exp.PartitionedByProperty, 2462 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2463 ) 2464 2465 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2466 if self._match_text_seq("AND", "STATISTICS"): 2467 statistics = True 2468 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2469 statistics = False 2470 else: 2471 statistics = None 2472 2473 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2474 2475 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2476 if self._match_text_seq("SQL"): 2477 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2478 return None 2479 2480 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2481 if self._match_text_seq("SQL", "DATA"): 2482 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2483 return None 2484 2485 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2486 if self._match_text_seq("PRIMARY", "INDEX"): 2487 return exp.NoPrimaryIndexProperty() 2488 if self._match_text_seq("SQL"): 2489 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2490 return None 2491 2492 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2493 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2494 return exp.OnCommitProperty() 2495 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2496 return exp.OnCommitProperty(delete=True) 2497 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2498 2499 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2500 if self._match_text_seq("SQL", "DATA"): 2501 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2502 return None 2503 2504 def _parse_distkey(self) -> exp.DistKeyProperty: 2505 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2506 2507 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2508 table = self._parse_table(schema=True) 2509 2510 options = [] 2511 while self._match_texts(("INCLUDING", "EXCLUDING")): 2512 this = self._prev.text.upper() 2513 2514 id_var = self._parse_id_var() 2515 if not id_var: 2516 return None 2517 2518 options.append( 2519 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2520 ) 2521 2522 return self.expression(exp.LikeProperty, this=table, expressions=options) 2523 2524 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2525 return self.expression( 2526 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2527 ) 2528 2529 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2530 self._match(TokenType.EQ) 2531 return self.expression( 2532 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2533 ) 2534 2535 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2536 self._match_text_seq("WITH", "CONNECTION") 2537 return self.expression( 2538 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2539 ) 2540 2541 def _parse_returns(self) -> exp.ReturnsProperty: 2542 value: t.Optional[exp.Expression] 2543 null = None 2544 is_table = self._match(TokenType.TABLE) 2545 2546 if is_table: 2547 if self._match(TokenType.LT): 2548 value = self.expression( 2549 exp.Schema, 2550 this="TABLE", 2551 expressions=self._parse_csv(self._parse_struct_types), 2552 ) 2553 if not self._match(TokenType.GT): 2554 self.raise_error("Expecting >") 2555 else: 2556 value = self._parse_schema(exp.var("TABLE")) 2557 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2558 null = True 2559 value = None 2560 else: 2561 value = self._parse_types() 2562 2563 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2564 2565 def _parse_describe(self) -> exp.Describe: 2566 kind = self._match_set(self.CREATABLES) and self._prev.text 2567 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2568 if self._match(TokenType.DOT): 2569 style = None 2570 self._retreat(self._index - 2) 2571 this = self._parse_table(schema=True) 2572 properties = self._parse_properties() 2573 expressions = properties.expressions if properties else None 2574 partition = self._parse_partition() 2575 return self.expression( 2576 exp.Describe, 2577 this=this, 2578 style=style, 2579 kind=kind, 2580 expressions=expressions, 2581 partition=partition, 2582 ) 2583 2584 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2585 kind = self._prev.text.upper() 2586 expressions = [] 2587 2588 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2589 if self._match(TokenType.WHEN): 2590 expression = self._parse_disjunction() 2591 self._match(TokenType.THEN) 2592 else: 2593 expression = None 2594 2595 else_ = self._match(TokenType.ELSE) 2596 2597 if not self._match(TokenType.INTO): 2598 return None 2599 2600 return self.expression( 2601 exp.ConditionalInsert, 2602 this=self.expression( 2603 exp.Insert, 2604 this=self._parse_table(schema=True), 2605 expression=self._parse_derived_table_values(), 2606 ), 2607 expression=expression, 2608 else_=else_, 2609 ) 2610 2611 expression = parse_conditional_insert() 2612 while expression is not None: 2613 expressions.append(expression) 2614 expression = parse_conditional_insert() 2615 2616 return self.expression( 2617 exp.MultitableInserts, 2618 kind=kind, 2619 comments=comments, 2620 expressions=expressions, 2621 source=self._parse_table(), 2622 ) 2623 2624 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2625 comments = ensure_list(self._prev_comments) 2626 hint = self._parse_hint() 2627 overwrite = self._match(TokenType.OVERWRITE) 2628 ignore = self._match(TokenType.IGNORE) 2629 local = self._match_text_seq("LOCAL") 2630 alternative = None 2631 is_function = None 2632 2633 if self._match_text_seq("DIRECTORY"): 2634 this: t.Optional[exp.Expression] = self.expression( 2635 exp.Directory, 2636 this=self._parse_var_or_string(), 2637 local=local, 2638 row_format=self._parse_row_format(match_row=True), 2639 ) 2640 else: 2641 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2642 comments += ensure_list(self._prev_comments) 2643 return self._parse_multitable_inserts(comments) 2644 2645 if self._match(TokenType.OR): 2646 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2647 2648 self._match(TokenType.INTO) 2649 comments += ensure_list(self._prev_comments) 2650 self._match(TokenType.TABLE) 2651 is_function = self._match(TokenType.FUNCTION) 2652 2653 this = ( 2654 self._parse_table(schema=True, parse_partition=True) 2655 if not is_function 2656 else self._parse_function() 2657 ) 2658 2659 returning = self._parse_returning() 2660 2661 return self.expression( 2662 exp.Insert, 2663 comments=comments, 2664 hint=hint, 2665 is_function=is_function, 2666 this=this, 2667 stored=self._match_text_seq("STORED") and self._parse_stored(), 2668 by_name=self._match_text_seq("BY", "NAME"), 2669 exists=self._parse_exists(), 2670 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2671 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2672 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2673 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2674 conflict=self._parse_on_conflict(), 2675 returning=returning or self._parse_returning(), 2676 overwrite=overwrite, 2677 alternative=alternative, 2678 ignore=ignore, 2679 source=self._match(TokenType.TABLE) and self._parse_table(), 2680 ) 2681 2682 def _parse_kill(self) -> exp.Kill: 2683 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2684 2685 return self.expression( 2686 exp.Kill, 2687 this=self._parse_primary(), 2688 kind=kind, 2689 ) 2690 2691 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2692 conflict = self._match_text_seq("ON", "CONFLICT") 2693 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2694 2695 if not conflict and not duplicate: 2696 return None 2697 2698 conflict_keys = None 2699 constraint = None 2700 2701 if conflict: 2702 if self._match_text_seq("ON", "CONSTRAINT"): 2703 constraint = self._parse_id_var() 2704 elif self._match(TokenType.L_PAREN): 2705 conflict_keys = self._parse_csv(self._parse_id_var) 2706 self._match_r_paren() 2707 2708 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2709 if self._prev.token_type == TokenType.UPDATE: 2710 self._match(TokenType.SET) 2711 expressions = self._parse_csv(self._parse_equality) 2712 else: 2713 expressions = None 2714 2715 return self.expression( 2716 exp.OnConflict, 2717 duplicate=duplicate, 2718 expressions=expressions, 2719 action=action, 2720 conflict_keys=conflict_keys, 2721 constraint=constraint, 2722 ) 2723 2724 def _parse_returning(self) -> t.Optional[exp.Returning]: 2725 if not self._match(TokenType.RETURNING): 2726 return None 2727 return self.expression( 2728 exp.Returning, 2729 expressions=self._parse_csv(self._parse_expression), 2730 into=self._match(TokenType.INTO) and self._parse_table_part(), 2731 ) 2732 2733 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2734 if not self._match(TokenType.FORMAT): 2735 return None 2736 return self._parse_row_format() 2737 2738 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2739 index = self._index 2740 with_ = with_ or self._match_text_seq("WITH") 2741 2742 if not self._match(TokenType.SERDE_PROPERTIES): 2743 self._retreat(index) 2744 return None 2745 return self.expression( 2746 exp.SerdeProperties, 2747 **{ # type: ignore 2748 "expressions": self._parse_wrapped_properties(), 2749 "with": with_, 2750 }, 2751 ) 2752 2753 def _parse_row_format( 2754 self, match_row: bool = False 2755 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2756 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2757 return None 2758 2759 if self._match_text_seq("SERDE"): 2760 this = self._parse_string() 2761 2762 serde_properties = self._parse_serde_properties() 2763 2764 return self.expression( 2765 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2766 ) 2767 2768 self._match_text_seq("DELIMITED") 2769 2770 kwargs = {} 2771 2772 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2773 kwargs["fields"] = self._parse_string() 2774 if self._match_text_seq("ESCAPED", "BY"): 2775 kwargs["escaped"] = self._parse_string() 2776 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2777 kwargs["collection_items"] = self._parse_string() 2778 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2779 kwargs["map_keys"] = self._parse_string() 2780 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2781 kwargs["lines"] = self._parse_string() 2782 if self._match_text_seq("NULL", "DEFINED", "AS"): 2783 kwargs["null"] = self._parse_string() 2784 2785 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2786 2787 def _parse_load(self) -> exp.LoadData | exp.Command: 2788 if self._match_text_seq("DATA"): 2789 local = self._match_text_seq("LOCAL") 2790 self._match_text_seq("INPATH") 2791 inpath = self._parse_string() 2792 overwrite = self._match(TokenType.OVERWRITE) 2793 self._match_pair(TokenType.INTO, TokenType.TABLE) 2794 2795 return self.expression( 2796 exp.LoadData, 2797 this=self._parse_table(schema=True), 2798 local=local, 2799 overwrite=overwrite, 2800 inpath=inpath, 2801 partition=self._parse_partition(), 2802 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2803 serde=self._match_text_seq("SERDE") and self._parse_string(), 2804 ) 2805 return self._parse_as_command(self._prev) 2806 2807 def _parse_delete(self) -> exp.Delete: 2808 # This handles MySQL's "Multiple-Table Syntax" 2809 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2810 tables = None 2811 comments = self._prev_comments 2812 if not self._match(TokenType.FROM, advance=False): 2813 tables = self._parse_csv(self._parse_table) or None 2814 2815 returning = self._parse_returning() 2816 2817 return self.expression( 2818 exp.Delete, 2819 comments=comments, 2820 tables=tables, 2821 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2822 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2823 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2824 where=self._parse_where(), 2825 returning=returning or self._parse_returning(), 2826 limit=self._parse_limit(), 2827 ) 2828 2829 def _parse_update(self) -> exp.Update: 2830 comments = self._prev_comments 2831 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2832 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2833 returning = self._parse_returning() 2834 return self.expression( 2835 exp.Update, 2836 comments=comments, 2837 **{ # type: ignore 2838 "this": this, 2839 "expressions": expressions, 2840 "from": self._parse_from(joins=True), 2841 "where": self._parse_where(), 2842 "returning": returning or self._parse_returning(), 2843 "order": self._parse_order(), 2844 "limit": self._parse_limit(), 2845 }, 2846 ) 2847 2848 def _parse_uncache(self) -> exp.Uncache: 2849 if not self._match(TokenType.TABLE): 2850 self.raise_error("Expecting TABLE after UNCACHE") 2851 2852 return self.expression( 2853 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2854 ) 2855 2856 def _parse_cache(self) -> exp.Cache: 2857 lazy = self._match_text_seq("LAZY") 2858 self._match(TokenType.TABLE) 2859 table = self._parse_table(schema=True) 2860 2861 options = [] 2862 if self._match_text_seq("OPTIONS"): 2863 self._match_l_paren() 2864 k = self._parse_string() 2865 self._match(TokenType.EQ) 2866 v = self._parse_string() 2867 options = [k, v] 2868 self._match_r_paren() 2869 2870 self._match(TokenType.ALIAS) 2871 return self.expression( 2872 exp.Cache, 2873 this=table, 2874 lazy=lazy, 2875 options=options, 2876 expression=self._parse_select(nested=True), 2877 ) 2878 2879 def _parse_partition(self) -> t.Optional[exp.Partition]: 2880 if not self._match(TokenType.PARTITION): 2881 return None 2882 2883 return self.expression( 2884 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2885 ) 2886 2887 def _parse_value(self) -> t.Optional[exp.Tuple]: 2888 if self._match(TokenType.L_PAREN): 2889 expressions = self._parse_csv(self._parse_expression) 2890 self._match_r_paren() 2891 return self.expression(exp.Tuple, expressions=expressions) 2892 2893 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2894 expression = self._parse_expression() 2895 if expression: 2896 return self.expression(exp.Tuple, expressions=[expression]) 2897 return None 2898 2899 def _parse_projections(self) -> t.List[exp.Expression]: 2900 return self._parse_expressions() 2901 2902 def _parse_select( 2903 self, 2904 nested: bool = False, 2905 table: bool = False, 2906 parse_subquery_alias: bool = True, 2907 parse_set_operation: bool = True, 2908 ) -> t.Optional[exp.Expression]: 2909 cte = self._parse_with() 2910 2911 if cte: 2912 this = self._parse_statement() 2913 2914 if not this: 2915 self.raise_error("Failed to parse any statement following CTE") 2916 return cte 2917 2918 if "with" in this.arg_types: 2919 this.set("with", cte) 2920 else: 2921 self.raise_error(f"{this.key} does not support CTE") 2922 this = cte 2923 2924 return this 2925 2926 # duckdb supports leading with FROM x 2927 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2928 2929 if self._match(TokenType.SELECT): 2930 comments = self._prev_comments 2931 2932 hint = self._parse_hint() 2933 2934 if self._next and not self._next.token_type == TokenType.DOT: 2935 all_ = self._match(TokenType.ALL) 2936 distinct = self._match_set(self.DISTINCT_TOKENS) 2937 else: 2938 all_, distinct = None, None 2939 2940 kind = ( 2941 self._match(TokenType.ALIAS) 2942 and self._match_texts(("STRUCT", "VALUE")) 2943 and self._prev.text.upper() 2944 ) 2945 2946 if distinct: 2947 distinct = self.expression( 2948 exp.Distinct, 2949 on=self._parse_value() if self._match(TokenType.ON) else None, 2950 ) 2951 2952 if all_ and distinct: 2953 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2954 2955 limit = self._parse_limit(top=True) 2956 projections = self._parse_projections() 2957 2958 this = self.expression( 2959 exp.Select, 2960 kind=kind, 2961 hint=hint, 2962 distinct=distinct, 2963 expressions=projections, 2964 limit=limit, 2965 ) 2966 this.comments = comments 2967 2968 into = self._parse_into() 2969 if into: 2970 this.set("into", into) 2971 2972 if not from_: 2973 from_ = self._parse_from() 2974 2975 if from_: 2976 this.set("from", from_) 2977 2978 this = self._parse_query_modifiers(this) 2979 elif (table or nested) and self._match(TokenType.L_PAREN): 2980 if self._match(TokenType.PIVOT): 2981 this = self._parse_simplified_pivot() 2982 elif self._match(TokenType.FROM): 2983 this = exp.select("*").from_( 2984 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2985 ) 2986 else: 2987 this = ( 2988 self._parse_table() 2989 if table 2990 else self._parse_select(nested=True, parse_set_operation=False) 2991 ) 2992 2993 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 2994 # in case a modifier (e.g. join) is following 2995 if table and isinstance(this, exp.Values) and this.alias: 2996 alias = this.args["alias"].pop() 2997 this = exp.Table(this=this, alias=alias) 2998 2999 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3000 3001 self._match_r_paren() 3002 3003 # We return early here so that the UNION isn't attached to the subquery by the 3004 # following call to _parse_set_operations, but instead becomes the parent node 3005 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3006 elif self._match(TokenType.VALUES, advance=False): 3007 this = self._parse_derived_table_values() 3008 elif from_: 3009 this = exp.select("*").from_(from_.this, copy=False) 3010 elif self._match(TokenType.SUMMARIZE): 3011 table = self._match(TokenType.TABLE) 3012 this = self._parse_select() or self._parse_string() or self._parse_table() 3013 return self.expression(exp.Summarize, this=this, table=table) 3014 elif self._match(TokenType.DESCRIBE): 3015 this = self._parse_describe() 3016 elif self._match_text_seq("STREAM"): 3017 this = self.expression(exp.Stream, this=self._parse_function()) 3018 else: 3019 this = None 3020 3021 return self._parse_set_operations(this) if parse_set_operation else this 3022 3023 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3024 if not skip_with_token and not self._match(TokenType.WITH): 3025 return None 3026 3027 comments = self._prev_comments 3028 recursive = self._match(TokenType.RECURSIVE) 3029 3030 expressions = [] 3031 while True: 3032 expressions.append(self._parse_cte()) 3033 3034 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3035 break 3036 else: 3037 self._match(TokenType.WITH) 3038 3039 return self.expression( 3040 exp.With, comments=comments, expressions=expressions, recursive=recursive 3041 ) 3042 3043 def _parse_cte(self) -> exp.CTE: 3044 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3045 if not alias or not alias.this: 3046 self.raise_error("Expected CTE to have alias") 3047 3048 self._match(TokenType.ALIAS) 3049 comments = self._prev_comments 3050 3051 if self._match_text_seq("NOT", "MATERIALIZED"): 3052 materialized = False 3053 elif self._match_text_seq("MATERIALIZED"): 3054 materialized = True 3055 else: 3056 materialized = None 3057 3058 return self.expression( 3059 exp.CTE, 3060 this=self._parse_wrapped(self._parse_statement), 3061 alias=alias, 3062 materialized=materialized, 3063 comments=comments, 3064 ) 3065 3066 def _parse_table_alias( 3067 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3068 ) -> t.Optional[exp.TableAlias]: 3069 any_token = self._match(TokenType.ALIAS) 3070 alias = ( 3071 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3072 or self._parse_string_as_identifier() 3073 ) 3074 3075 index = self._index 3076 if self._match(TokenType.L_PAREN): 3077 columns = self._parse_csv(self._parse_function_parameter) 3078 self._match_r_paren() if columns else self._retreat(index) 3079 else: 3080 columns = None 3081 3082 if not alias and not columns: 3083 return None 3084 3085 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3086 3087 # We bubble up comments from the Identifier to the TableAlias 3088 if isinstance(alias, exp.Identifier): 3089 table_alias.add_comments(alias.pop_comments()) 3090 3091 return table_alias 3092 3093 def _parse_subquery( 3094 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3095 ) -> t.Optional[exp.Subquery]: 3096 if not this: 3097 return None 3098 3099 return self.expression( 3100 exp.Subquery, 3101 this=this, 3102 pivots=self._parse_pivots(), 3103 alias=self._parse_table_alias() if parse_alias else None, 3104 sample=self._parse_table_sample(), 3105 ) 3106 3107 def _implicit_unnests_to_explicit(self, this: E) -> E: 3108 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3109 3110 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3111 for i, join in enumerate(this.args.get("joins") or []): 3112 table = join.this 3113 normalized_table = table.copy() 3114 normalized_table.meta["maybe_column"] = True 3115 normalized_table = _norm(normalized_table, dialect=self.dialect) 3116 3117 if isinstance(table, exp.Table) and not join.args.get("on"): 3118 if normalized_table.parts[0].name in refs: 3119 table_as_column = table.to_column() 3120 unnest = exp.Unnest(expressions=[table_as_column]) 3121 3122 # Table.to_column creates a parent Alias node that we want to convert to 3123 # a TableAlias and attach to the Unnest, so it matches the parser's output 3124 if isinstance(table.args.get("alias"), exp.TableAlias): 3125 table_as_column.replace(table_as_column.this) 3126 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3127 3128 table.replace(unnest) 3129 3130 refs.add(normalized_table.alias_or_name) 3131 3132 return this 3133 3134 def _parse_query_modifiers( 3135 self, this: t.Optional[exp.Expression] 3136 ) -> t.Optional[exp.Expression]: 3137 if isinstance(this, (exp.Query, exp.Table)): 3138 for join in self._parse_joins(): 3139 this.append("joins", join) 3140 for lateral in iter(self._parse_lateral, None): 3141 this.append("laterals", lateral) 3142 3143 while True: 3144 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3145 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3146 key, expression = parser(self) 3147 3148 if expression: 3149 this.set(key, expression) 3150 if key == "limit": 3151 offset = expression.args.pop("offset", None) 3152 3153 if offset: 3154 offset = exp.Offset(expression=offset) 3155 this.set("offset", offset) 3156 3157 limit_by_expressions = expression.expressions 3158 expression.set("expressions", None) 3159 offset.set("expressions", limit_by_expressions) 3160 continue 3161 break 3162 3163 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3164 this = self._implicit_unnests_to_explicit(this) 3165 3166 return this 3167 3168 def _parse_hint(self) -> t.Optional[exp.Hint]: 3169 if self._match(TokenType.HINT): 3170 hints = [] 3171 for hint in iter( 3172 lambda: self._parse_csv( 3173 lambda: self._parse_function() or self._parse_var(upper=True) 3174 ), 3175 [], 3176 ): 3177 hints.extend(hint) 3178 3179 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3180 self.raise_error("Expected */ after HINT") 3181 3182 return self.expression(exp.Hint, expressions=hints) 3183 3184 return None 3185 3186 def _parse_into(self) -> t.Optional[exp.Into]: 3187 if not self._match(TokenType.INTO): 3188 return None 3189 3190 temp = self._match(TokenType.TEMPORARY) 3191 unlogged = self._match_text_seq("UNLOGGED") 3192 self._match(TokenType.TABLE) 3193 3194 return self.expression( 3195 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3196 ) 3197 3198 def _parse_from( 3199 self, joins: bool = False, skip_from_token: bool = False 3200 ) -> t.Optional[exp.From]: 3201 if not skip_from_token and not self._match(TokenType.FROM): 3202 return None 3203 3204 return self.expression( 3205 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3206 ) 3207 3208 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3209 return self.expression( 3210 exp.MatchRecognizeMeasure, 3211 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3212 this=self._parse_expression(), 3213 ) 3214 3215 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3216 if not self._match(TokenType.MATCH_RECOGNIZE): 3217 return None 3218 3219 self._match_l_paren() 3220 3221 partition = self._parse_partition_by() 3222 order = self._parse_order() 3223 3224 measures = ( 3225 self._parse_csv(self._parse_match_recognize_measure) 3226 if self._match_text_seq("MEASURES") 3227 else None 3228 ) 3229 3230 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3231 rows = exp.var("ONE ROW PER MATCH") 3232 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3233 text = "ALL ROWS PER MATCH" 3234 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3235 text += " SHOW EMPTY MATCHES" 3236 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3237 text += " OMIT EMPTY MATCHES" 3238 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3239 text += " WITH UNMATCHED ROWS" 3240 rows = exp.var(text) 3241 else: 3242 rows = None 3243 3244 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3245 text = "AFTER MATCH SKIP" 3246 if self._match_text_seq("PAST", "LAST", "ROW"): 3247 text += " PAST LAST ROW" 3248 elif self._match_text_seq("TO", "NEXT", "ROW"): 3249 text += " TO NEXT ROW" 3250 elif self._match_text_seq("TO", "FIRST"): 3251 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3252 elif self._match_text_seq("TO", "LAST"): 3253 text += f" TO LAST {self._advance_any().text}" # type: ignore 3254 after = exp.var(text) 3255 else: 3256 after = None 3257 3258 if self._match_text_seq("PATTERN"): 3259 self._match_l_paren() 3260 3261 if not self._curr: 3262 self.raise_error("Expecting )", self._curr) 3263 3264 paren = 1 3265 start = self._curr 3266 3267 while self._curr and paren > 0: 3268 if self._curr.token_type == TokenType.L_PAREN: 3269 paren += 1 3270 if self._curr.token_type == TokenType.R_PAREN: 3271 paren -= 1 3272 3273 end = self._prev 3274 self._advance() 3275 3276 if paren > 0: 3277 self.raise_error("Expecting )", self._curr) 3278 3279 pattern = exp.var(self._find_sql(start, end)) 3280 else: 3281 pattern = None 3282 3283 define = ( 3284 self._parse_csv(self._parse_name_as_expression) 3285 if self._match_text_seq("DEFINE") 3286 else None 3287 ) 3288 3289 self._match_r_paren() 3290 3291 return self.expression( 3292 exp.MatchRecognize, 3293 partition_by=partition, 3294 order=order, 3295 measures=measures, 3296 rows=rows, 3297 after=after, 3298 pattern=pattern, 3299 define=define, 3300 alias=self._parse_table_alias(), 3301 ) 3302 3303 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3304 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3305 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3306 cross_apply = False 3307 3308 if cross_apply is not None: 3309 this = self._parse_select(table=True) 3310 view = None 3311 outer = None 3312 elif self._match(TokenType.LATERAL): 3313 this = self._parse_select(table=True) 3314 view = self._match(TokenType.VIEW) 3315 outer = self._match(TokenType.OUTER) 3316 else: 3317 return None 3318 3319 if not this: 3320 this = ( 3321 self._parse_unnest() 3322 or self._parse_function() 3323 or self._parse_id_var(any_token=False) 3324 ) 3325 3326 while self._match(TokenType.DOT): 3327 this = exp.Dot( 3328 this=this, 3329 expression=self._parse_function() or self._parse_id_var(any_token=False), 3330 ) 3331 3332 if view: 3333 table = self._parse_id_var(any_token=False) 3334 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3335 table_alias: t.Optional[exp.TableAlias] = self.expression( 3336 exp.TableAlias, this=table, columns=columns 3337 ) 3338 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3339 # We move the alias from the lateral's child node to the lateral itself 3340 table_alias = this.args["alias"].pop() 3341 else: 3342 table_alias = self._parse_table_alias() 3343 3344 return self.expression( 3345 exp.Lateral, 3346 this=this, 3347 view=view, 3348 outer=outer, 3349 alias=table_alias, 3350 cross_apply=cross_apply, 3351 ) 3352 3353 def _parse_join_parts( 3354 self, 3355 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3356 return ( 3357 self._match_set(self.JOIN_METHODS) and self._prev, 3358 self._match_set(self.JOIN_SIDES) and self._prev, 3359 self._match_set(self.JOIN_KINDS) and self._prev, 3360 ) 3361 3362 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3363 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3364 this = self._parse_column() 3365 if isinstance(this, exp.Column): 3366 return this.this 3367 return this 3368 3369 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3370 3371 def _parse_join( 3372 self, skip_join_token: bool = False, parse_bracket: bool = False 3373 ) -> t.Optional[exp.Join]: 3374 if self._match(TokenType.COMMA): 3375 return self.expression(exp.Join, this=self._parse_table()) 3376 3377 index = self._index 3378 method, side, kind = self._parse_join_parts() 3379 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3380 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3381 3382 if not skip_join_token and not join: 3383 self._retreat(index) 3384 kind = None 3385 method = None 3386 side = None 3387 3388 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3389 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3390 3391 if not skip_join_token and not join and not outer_apply and not cross_apply: 3392 return None 3393 3394 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3395 3396 if method: 3397 kwargs["method"] = method.text 3398 if side: 3399 kwargs["side"] = side.text 3400 if kind: 3401 kwargs["kind"] = kind.text 3402 if hint: 3403 kwargs["hint"] = hint 3404 3405 if self._match(TokenType.MATCH_CONDITION): 3406 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3407 3408 if self._match(TokenType.ON): 3409 kwargs["on"] = self._parse_assignment() 3410 elif self._match(TokenType.USING): 3411 kwargs["using"] = self._parse_using_identifiers() 3412 elif ( 3413 not (outer_apply or cross_apply) 3414 and not isinstance(kwargs["this"], exp.Unnest) 3415 and not (kind and kind.token_type == TokenType.CROSS) 3416 ): 3417 index = self._index 3418 joins: t.Optional[list] = list(self._parse_joins()) 3419 3420 if joins and self._match(TokenType.ON): 3421 kwargs["on"] = self._parse_assignment() 3422 elif joins and self._match(TokenType.USING): 3423 kwargs["using"] = self._parse_using_identifiers() 3424 else: 3425 joins = None 3426 self._retreat(index) 3427 3428 kwargs["this"].set("joins", joins if joins else None) 3429 3430 comments = [c for token in (method, side, kind) if token for c in token.comments] 3431 return self.expression(exp.Join, comments=comments, **kwargs) 3432 3433 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3434 this = self._parse_assignment() 3435 3436 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3437 return this 3438 3439 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3440 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3441 3442 return this 3443 3444 def _parse_index_params(self) -> exp.IndexParameters: 3445 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3446 3447 if self._match(TokenType.L_PAREN, advance=False): 3448 columns = self._parse_wrapped_csv(self._parse_with_operator) 3449 else: 3450 columns = None 3451 3452 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3453 partition_by = self._parse_partition_by() 3454 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3455 tablespace = ( 3456 self._parse_var(any_token=True) 3457 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3458 else None 3459 ) 3460 where = self._parse_where() 3461 3462 on = self._parse_field() if self._match(TokenType.ON) else None 3463 3464 return self.expression( 3465 exp.IndexParameters, 3466 using=using, 3467 columns=columns, 3468 include=include, 3469 partition_by=partition_by, 3470 where=where, 3471 with_storage=with_storage, 3472 tablespace=tablespace, 3473 on=on, 3474 ) 3475 3476 def _parse_index( 3477 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3478 ) -> t.Optional[exp.Index]: 3479 if index or anonymous: 3480 unique = None 3481 primary = None 3482 amp = None 3483 3484 self._match(TokenType.ON) 3485 self._match(TokenType.TABLE) # hive 3486 table = self._parse_table_parts(schema=True) 3487 else: 3488 unique = self._match(TokenType.UNIQUE) 3489 primary = self._match_text_seq("PRIMARY") 3490 amp = self._match_text_seq("AMP") 3491 3492 if not self._match(TokenType.INDEX): 3493 return None 3494 3495 index = self._parse_id_var() 3496 table = None 3497 3498 params = self._parse_index_params() 3499 3500 return self.expression( 3501 exp.Index, 3502 this=index, 3503 table=table, 3504 unique=unique, 3505 primary=primary, 3506 amp=amp, 3507 params=params, 3508 ) 3509 3510 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3511 hints: t.List[exp.Expression] = [] 3512 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3513 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3514 hints.append( 3515 self.expression( 3516 exp.WithTableHint, 3517 expressions=self._parse_csv( 3518 lambda: self._parse_function() or self._parse_var(any_token=True) 3519 ), 3520 ) 3521 ) 3522 self._match_r_paren() 3523 else: 3524 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3525 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3526 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3527 3528 self._match_set((TokenType.INDEX, TokenType.KEY)) 3529 if self._match(TokenType.FOR): 3530 hint.set("target", self._advance_any() and self._prev.text.upper()) 3531 3532 hint.set("expressions", self._parse_wrapped_id_vars()) 3533 hints.append(hint) 3534 3535 return hints or None 3536 3537 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3538 return ( 3539 (not schema and self._parse_function(optional_parens=False)) 3540 or self._parse_id_var(any_token=False) 3541 or self._parse_string_as_identifier() 3542 or self._parse_placeholder() 3543 ) 3544 3545 def _parse_table_parts( 3546 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3547 ) -> exp.Table: 3548 catalog = None 3549 db = None 3550 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3551 3552 while self._match(TokenType.DOT): 3553 if catalog: 3554 # This allows nesting the table in arbitrarily many dot expressions if needed 3555 table = self.expression( 3556 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3557 ) 3558 else: 3559 catalog = db 3560 db = table 3561 # "" used for tsql FROM a..b case 3562 table = self._parse_table_part(schema=schema) or "" 3563 3564 if ( 3565 wildcard 3566 and self._is_connected() 3567 and (isinstance(table, exp.Identifier) or not table) 3568 and self._match(TokenType.STAR) 3569 ): 3570 if isinstance(table, exp.Identifier): 3571 table.args["this"] += "*" 3572 else: 3573 table = exp.Identifier(this="*") 3574 3575 # We bubble up comments from the Identifier to the Table 3576 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3577 3578 if is_db_reference: 3579 catalog = db 3580 db = table 3581 table = None 3582 3583 if not table and not is_db_reference: 3584 self.raise_error(f"Expected table name but got {self._curr}") 3585 if not db and is_db_reference: 3586 self.raise_error(f"Expected database name but got {self._curr}") 3587 3588 table = self.expression( 3589 exp.Table, 3590 comments=comments, 3591 this=table, 3592 db=db, 3593 catalog=catalog, 3594 ) 3595 3596 changes = self._parse_changes() 3597 if changes: 3598 table.set("changes", changes) 3599 3600 at_before = self._parse_historical_data() 3601 if at_before: 3602 table.set("when", at_before) 3603 3604 pivots = self._parse_pivots() 3605 if pivots: 3606 table.set("pivots", pivots) 3607 3608 return table 3609 3610 def _parse_table( 3611 self, 3612 schema: bool = False, 3613 joins: bool = False, 3614 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3615 parse_bracket: bool = False, 3616 is_db_reference: bool = False, 3617 parse_partition: bool = False, 3618 ) -> t.Optional[exp.Expression]: 3619 lateral = self._parse_lateral() 3620 if lateral: 3621 return lateral 3622 3623 unnest = self._parse_unnest() 3624 if unnest: 3625 return unnest 3626 3627 values = self._parse_derived_table_values() 3628 if values: 3629 return values 3630 3631 subquery = self._parse_select(table=True) 3632 if subquery: 3633 if not subquery.args.get("pivots"): 3634 subquery.set("pivots", self._parse_pivots()) 3635 return subquery 3636 3637 bracket = parse_bracket and self._parse_bracket(None) 3638 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3639 3640 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3641 self._parse_table 3642 ) 3643 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3644 3645 only = self._match(TokenType.ONLY) 3646 3647 this = t.cast( 3648 exp.Expression, 3649 bracket 3650 or rows_from 3651 or self._parse_bracket( 3652 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3653 ), 3654 ) 3655 3656 if only: 3657 this.set("only", only) 3658 3659 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3660 self._match_text_seq("*") 3661 3662 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3663 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3664 this.set("partition", self._parse_partition()) 3665 3666 if schema: 3667 return self._parse_schema(this=this) 3668 3669 version = self._parse_version() 3670 3671 if version: 3672 this.set("version", version) 3673 3674 if self.dialect.ALIAS_POST_TABLESAMPLE: 3675 this.set("sample", self._parse_table_sample()) 3676 3677 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3678 if alias: 3679 this.set("alias", alias) 3680 3681 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3682 return self.expression( 3683 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3684 ) 3685 3686 this.set("hints", self._parse_table_hints()) 3687 3688 if not this.args.get("pivots"): 3689 this.set("pivots", self._parse_pivots()) 3690 3691 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3692 this.set("sample", self._parse_table_sample()) 3693 3694 if joins: 3695 for join in self._parse_joins(): 3696 this.append("joins", join) 3697 3698 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3699 this.set("ordinality", True) 3700 this.set("alias", self._parse_table_alias()) 3701 3702 return this 3703 3704 def _parse_version(self) -> t.Optional[exp.Version]: 3705 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3706 this = "TIMESTAMP" 3707 elif self._match(TokenType.VERSION_SNAPSHOT): 3708 this = "VERSION" 3709 else: 3710 return None 3711 3712 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3713 kind = self._prev.text.upper() 3714 start = self._parse_bitwise() 3715 self._match_texts(("TO", "AND")) 3716 end = self._parse_bitwise() 3717 expression: t.Optional[exp.Expression] = self.expression( 3718 exp.Tuple, expressions=[start, end] 3719 ) 3720 elif self._match_text_seq("CONTAINED", "IN"): 3721 kind = "CONTAINED IN" 3722 expression = self.expression( 3723 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3724 ) 3725 elif self._match(TokenType.ALL): 3726 kind = "ALL" 3727 expression = None 3728 else: 3729 self._match_text_seq("AS", "OF") 3730 kind = "AS OF" 3731 expression = self._parse_type() 3732 3733 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3734 3735 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3736 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3737 index = self._index 3738 historical_data = None 3739 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3740 this = self._prev.text.upper() 3741 kind = ( 3742 self._match(TokenType.L_PAREN) 3743 and self._match_texts(self.HISTORICAL_DATA_KIND) 3744 and self._prev.text.upper() 3745 ) 3746 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3747 3748 if expression: 3749 self._match_r_paren() 3750 historical_data = self.expression( 3751 exp.HistoricalData, this=this, kind=kind, expression=expression 3752 ) 3753 else: 3754 self._retreat(index) 3755 3756 return historical_data 3757 3758 def _parse_changes(self) -> t.Optional[exp.Changes]: 3759 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3760 return None 3761 3762 information = self._parse_var(any_token=True) 3763 self._match_r_paren() 3764 3765 return self.expression( 3766 exp.Changes, 3767 information=information, 3768 at_before=self._parse_historical_data(), 3769 end=self._parse_historical_data(), 3770 ) 3771 3772 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3773 if not self._match(TokenType.UNNEST): 3774 return None 3775 3776 expressions = self._parse_wrapped_csv(self._parse_equality) 3777 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3778 3779 alias = self._parse_table_alias() if with_alias else None 3780 3781 if alias: 3782 if self.dialect.UNNEST_COLUMN_ONLY: 3783 if alias.args.get("columns"): 3784 self.raise_error("Unexpected extra column alias in unnest.") 3785 3786 alias.set("columns", [alias.this]) 3787 alias.set("this", None) 3788 3789 columns = alias.args.get("columns") or [] 3790 if offset and len(expressions) < len(columns): 3791 offset = columns.pop() 3792 3793 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3794 self._match(TokenType.ALIAS) 3795 offset = self._parse_id_var( 3796 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3797 ) or exp.to_identifier("offset") 3798 3799 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3800 3801 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3802 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3803 if not is_derived and not ( 3804 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3805 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3806 ): 3807 return None 3808 3809 expressions = self._parse_csv(self._parse_value) 3810 alias = self._parse_table_alias() 3811 3812 if is_derived: 3813 self._match_r_paren() 3814 3815 return self.expression( 3816 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3817 ) 3818 3819 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3820 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3821 as_modifier and self._match_text_seq("USING", "SAMPLE") 3822 ): 3823 return None 3824 3825 bucket_numerator = None 3826 bucket_denominator = None 3827 bucket_field = None 3828 percent = None 3829 size = None 3830 seed = None 3831 3832 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3833 matched_l_paren = self._match(TokenType.L_PAREN) 3834 3835 if self.TABLESAMPLE_CSV: 3836 num = None 3837 expressions = self._parse_csv(self._parse_primary) 3838 else: 3839 expressions = None 3840 num = ( 3841 self._parse_factor() 3842 if self._match(TokenType.NUMBER, advance=False) 3843 else self._parse_primary() or self._parse_placeholder() 3844 ) 3845 3846 if self._match_text_seq("BUCKET"): 3847 bucket_numerator = self._parse_number() 3848 self._match_text_seq("OUT", "OF") 3849 bucket_denominator = bucket_denominator = self._parse_number() 3850 self._match(TokenType.ON) 3851 bucket_field = self._parse_field() 3852 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3853 percent = num 3854 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3855 size = num 3856 else: 3857 percent = num 3858 3859 if matched_l_paren: 3860 self._match_r_paren() 3861 3862 if self._match(TokenType.L_PAREN): 3863 method = self._parse_var(upper=True) 3864 seed = self._match(TokenType.COMMA) and self._parse_number() 3865 self._match_r_paren() 3866 elif self._match_texts(("SEED", "REPEATABLE")): 3867 seed = self._parse_wrapped(self._parse_number) 3868 3869 if not method and self.DEFAULT_SAMPLING_METHOD: 3870 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3871 3872 return self.expression( 3873 exp.TableSample, 3874 expressions=expressions, 3875 method=method, 3876 bucket_numerator=bucket_numerator, 3877 bucket_denominator=bucket_denominator, 3878 bucket_field=bucket_field, 3879 percent=percent, 3880 size=size, 3881 seed=seed, 3882 ) 3883 3884 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3885 return list(iter(self._parse_pivot, None)) or None 3886 3887 def _parse_joins(self) -> t.Iterator[exp.Join]: 3888 return iter(self._parse_join, None) 3889 3890 # https://duckdb.org/docs/sql/statements/pivot 3891 def _parse_simplified_pivot(self) -> exp.Pivot: 3892 def _parse_on() -> t.Optional[exp.Expression]: 3893 this = self._parse_bitwise() 3894 return self._parse_in(this) if self._match(TokenType.IN) else this 3895 3896 this = self._parse_table() 3897 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3898 using = self._match(TokenType.USING) and self._parse_csv( 3899 lambda: self._parse_alias(self._parse_function()) 3900 ) 3901 group = self._parse_group() 3902 return self.expression( 3903 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3904 ) 3905 3906 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3907 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3908 this = self._parse_select_or_expression() 3909 3910 self._match(TokenType.ALIAS) 3911 alias = self._parse_bitwise() 3912 if alias: 3913 if isinstance(alias, exp.Column) and not alias.db: 3914 alias = alias.this 3915 return self.expression(exp.PivotAlias, this=this, alias=alias) 3916 3917 return this 3918 3919 value = self._parse_column() 3920 3921 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3922 self.raise_error("Expecting IN (") 3923 3924 if self._match(TokenType.ANY): 3925 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3926 else: 3927 exprs = self._parse_csv(_parse_aliased_expression) 3928 3929 self._match_r_paren() 3930 return self.expression(exp.In, this=value, expressions=exprs) 3931 3932 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3933 index = self._index 3934 include_nulls = None 3935 3936 if self._match(TokenType.PIVOT): 3937 unpivot = False 3938 elif self._match(TokenType.UNPIVOT): 3939 unpivot = True 3940 3941 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3942 if self._match_text_seq("INCLUDE", "NULLS"): 3943 include_nulls = True 3944 elif self._match_text_seq("EXCLUDE", "NULLS"): 3945 include_nulls = False 3946 else: 3947 return None 3948 3949 expressions = [] 3950 3951 if not self._match(TokenType.L_PAREN): 3952 self._retreat(index) 3953 return None 3954 3955 if unpivot: 3956 expressions = self._parse_csv(self._parse_column) 3957 else: 3958 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3959 3960 if not expressions: 3961 self.raise_error("Failed to parse PIVOT's aggregation list") 3962 3963 if not self._match(TokenType.FOR): 3964 self.raise_error("Expecting FOR") 3965 3966 field = self._parse_pivot_in() 3967 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3968 self._parse_bitwise 3969 ) 3970 3971 self._match_r_paren() 3972 3973 pivot = self.expression( 3974 exp.Pivot, 3975 expressions=expressions, 3976 field=field, 3977 unpivot=unpivot, 3978 include_nulls=include_nulls, 3979 default_on_null=default_on_null, 3980 ) 3981 3982 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3983 pivot.set("alias", self._parse_table_alias()) 3984 3985 if not unpivot: 3986 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3987 3988 columns: t.List[exp.Expression] = [] 3989 for fld in pivot.args["field"].expressions: 3990 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3991 for name in names: 3992 if self.PREFIXED_PIVOT_COLUMNS: 3993 name = f"{name}_{field_name}" if name else field_name 3994 else: 3995 name = f"{field_name}_{name}" if name else field_name 3996 3997 columns.append(exp.to_identifier(name)) 3998 3999 pivot.set("columns", columns) 4000 4001 return pivot 4002 4003 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4004 return [agg.alias for agg in aggregations] 4005 4006 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4007 if not skip_where_token and not self._match(TokenType.PREWHERE): 4008 return None 4009 4010 return self.expression( 4011 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4012 ) 4013 4014 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4015 if not skip_where_token and not self._match(TokenType.WHERE): 4016 return None 4017 4018 return self.expression( 4019 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4020 ) 4021 4022 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4023 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4024 return None 4025 4026 elements: t.Dict[str, t.Any] = defaultdict(list) 4027 4028 if self._match(TokenType.ALL): 4029 elements["all"] = True 4030 elif self._match(TokenType.DISTINCT): 4031 elements["all"] = False 4032 4033 while True: 4034 index = self._index 4035 4036 elements["expressions"].extend( 4037 self._parse_csv( 4038 lambda: None 4039 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4040 else self._parse_assignment() 4041 ) 4042 ) 4043 4044 before_with_index = self._index 4045 with_prefix = self._match(TokenType.WITH) 4046 4047 if self._match(TokenType.ROLLUP): 4048 elements["rollup"].append( 4049 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4050 ) 4051 elif self._match(TokenType.CUBE): 4052 elements["cube"].append( 4053 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4054 ) 4055 elif self._match(TokenType.GROUPING_SETS): 4056 elements["grouping_sets"].append( 4057 self.expression( 4058 exp.GroupingSets, 4059 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4060 ) 4061 ) 4062 elif self._match_text_seq("TOTALS"): 4063 elements["totals"] = True # type: ignore 4064 4065 if before_with_index <= self._index <= before_with_index + 1: 4066 self._retreat(before_with_index) 4067 break 4068 4069 if index == self._index: 4070 break 4071 4072 return self.expression(exp.Group, **elements) # type: ignore 4073 4074 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4075 return self.expression( 4076 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4077 ) 4078 4079 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4080 if self._match(TokenType.L_PAREN): 4081 grouping_set = self._parse_csv(self._parse_column) 4082 self._match_r_paren() 4083 return self.expression(exp.Tuple, expressions=grouping_set) 4084 4085 return self._parse_column() 4086 4087 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4088 if not skip_having_token and not self._match(TokenType.HAVING): 4089 return None 4090 return self.expression(exp.Having, this=self._parse_assignment()) 4091 4092 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4093 if not self._match(TokenType.QUALIFY): 4094 return None 4095 return self.expression(exp.Qualify, this=self._parse_assignment()) 4096 4097 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4098 if skip_start_token: 4099 start = None 4100 elif self._match(TokenType.START_WITH): 4101 start = self._parse_assignment() 4102 else: 4103 return None 4104 4105 self._match(TokenType.CONNECT_BY) 4106 nocycle = self._match_text_seq("NOCYCLE") 4107 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4108 exp.Prior, this=self._parse_bitwise() 4109 ) 4110 connect = self._parse_assignment() 4111 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4112 4113 if not start and self._match(TokenType.START_WITH): 4114 start = self._parse_assignment() 4115 4116 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4117 4118 def _parse_name_as_expression(self) -> exp.Alias: 4119 return self.expression( 4120 exp.Alias, 4121 alias=self._parse_id_var(any_token=True), 4122 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4123 ) 4124 4125 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4126 if self._match_text_seq("INTERPOLATE"): 4127 return self._parse_wrapped_csv(self._parse_name_as_expression) 4128 return None 4129 4130 def _parse_order( 4131 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4132 ) -> t.Optional[exp.Expression]: 4133 siblings = None 4134 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4135 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4136 return this 4137 4138 siblings = True 4139 4140 return self.expression( 4141 exp.Order, 4142 this=this, 4143 expressions=self._parse_csv(self._parse_ordered), 4144 siblings=siblings, 4145 ) 4146 4147 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4148 if not self._match(token): 4149 return None 4150 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4151 4152 def _parse_ordered( 4153 self, parse_method: t.Optional[t.Callable] = None 4154 ) -> t.Optional[exp.Ordered]: 4155 this = parse_method() if parse_method else self._parse_assignment() 4156 if not this: 4157 return None 4158 4159 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4160 this = exp.var("ALL") 4161 4162 asc = self._match(TokenType.ASC) 4163 desc = self._match(TokenType.DESC) or (asc and False) 4164 4165 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4166 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4167 4168 nulls_first = is_nulls_first or False 4169 explicitly_null_ordered = is_nulls_first or is_nulls_last 4170 4171 if ( 4172 not explicitly_null_ordered 4173 and ( 4174 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4175 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4176 ) 4177 and self.dialect.NULL_ORDERING != "nulls_are_last" 4178 ): 4179 nulls_first = True 4180 4181 if self._match_text_seq("WITH", "FILL"): 4182 with_fill = self.expression( 4183 exp.WithFill, 4184 **{ # type: ignore 4185 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4186 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4187 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4188 "interpolate": self._parse_interpolate(), 4189 }, 4190 ) 4191 else: 4192 with_fill = None 4193 4194 return self.expression( 4195 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4196 ) 4197 4198 def _parse_limit( 4199 self, 4200 this: t.Optional[exp.Expression] = None, 4201 top: bool = False, 4202 skip_limit_token: bool = False, 4203 ) -> t.Optional[exp.Expression]: 4204 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4205 comments = self._prev_comments 4206 if top: 4207 limit_paren = self._match(TokenType.L_PAREN) 4208 expression = self._parse_term() if limit_paren else self._parse_number() 4209 4210 if limit_paren: 4211 self._match_r_paren() 4212 else: 4213 expression = self._parse_term() 4214 4215 if self._match(TokenType.COMMA): 4216 offset = expression 4217 expression = self._parse_term() 4218 else: 4219 offset = None 4220 4221 limit_exp = self.expression( 4222 exp.Limit, 4223 this=this, 4224 expression=expression, 4225 offset=offset, 4226 comments=comments, 4227 expressions=self._parse_limit_by(), 4228 ) 4229 4230 return limit_exp 4231 4232 if self._match(TokenType.FETCH): 4233 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4234 direction = self._prev.text.upper() if direction else "FIRST" 4235 4236 count = self._parse_field(tokens=self.FETCH_TOKENS) 4237 percent = self._match(TokenType.PERCENT) 4238 4239 self._match_set((TokenType.ROW, TokenType.ROWS)) 4240 4241 only = self._match_text_seq("ONLY") 4242 with_ties = self._match_text_seq("WITH", "TIES") 4243 4244 if only and with_ties: 4245 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4246 4247 return self.expression( 4248 exp.Fetch, 4249 direction=direction, 4250 count=count, 4251 percent=percent, 4252 with_ties=with_ties, 4253 ) 4254 4255 return this 4256 4257 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4258 if not self._match(TokenType.OFFSET): 4259 return this 4260 4261 count = self._parse_term() 4262 self._match_set((TokenType.ROW, TokenType.ROWS)) 4263 4264 return self.expression( 4265 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4266 ) 4267 4268 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4269 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4270 4271 def _parse_locks(self) -> t.List[exp.Lock]: 4272 locks = [] 4273 while True: 4274 if self._match_text_seq("FOR", "UPDATE"): 4275 update = True 4276 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4277 "LOCK", "IN", "SHARE", "MODE" 4278 ): 4279 update = False 4280 else: 4281 break 4282 4283 expressions = None 4284 if self._match_text_seq("OF"): 4285 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4286 4287 wait: t.Optional[bool | exp.Expression] = None 4288 if self._match_text_seq("NOWAIT"): 4289 wait = True 4290 elif self._match_text_seq("WAIT"): 4291 wait = self._parse_primary() 4292 elif self._match_text_seq("SKIP", "LOCKED"): 4293 wait = False 4294 4295 locks.append( 4296 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4297 ) 4298 4299 return locks 4300 4301 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4302 while this and self._match_set(self.SET_OPERATIONS): 4303 token_type = self._prev.token_type 4304 4305 if token_type == TokenType.UNION: 4306 operation: t.Type[exp.SetOperation] = exp.Union 4307 elif token_type == TokenType.EXCEPT: 4308 operation = exp.Except 4309 else: 4310 operation = exp.Intersect 4311 4312 comments = self._prev.comments 4313 4314 if self._match(TokenType.DISTINCT): 4315 distinct: t.Optional[bool] = True 4316 elif self._match(TokenType.ALL): 4317 distinct = False 4318 else: 4319 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4320 if distinct is None: 4321 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4322 4323 by_name = self._match_text_seq("BY", "NAME") 4324 expression = self._parse_select(nested=True, parse_set_operation=False) 4325 4326 this = self.expression( 4327 operation, 4328 comments=comments, 4329 this=this, 4330 distinct=distinct, 4331 by_name=by_name, 4332 expression=expression, 4333 ) 4334 4335 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4336 expression = this.expression 4337 4338 if expression: 4339 for arg in self.SET_OP_MODIFIERS: 4340 expr = expression.args.get(arg) 4341 if expr: 4342 this.set(arg, expr.pop()) 4343 4344 return this 4345 4346 def _parse_expression(self) -> t.Optional[exp.Expression]: 4347 return self._parse_alias(self._parse_assignment()) 4348 4349 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4350 this = self._parse_disjunction() 4351 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4352 # This allows us to parse <non-identifier token> := <expr> 4353 this = exp.column( 4354 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4355 ) 4356 4357 while self._match_set(self.ASSIGNMENT): 4358 if isinstance(this, exp.Column) and len(this.parts) == 1: 4359 this = this.this 4360 4361 this = self.expression( 4362 self.ASSIGNMENT[self._prev.token_type], 4363 this=this, 4364 comments=self._prev_comments, 4365 expression=self._parse_assignment(), 4366 ) 4367 4368 return this 4369 4370 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4371 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4372 4373 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4374 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4375 4376 def _parse_equality(self) -> t.Optional[exp.Expression]: 4377 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4378 4379 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4380 return self._parse_tokens(self._parse_range, self.COMPARISON) 4381 4382 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4383 this = this or self._parse_bitwise() 4384 negate = self._match(TokenType.NOT) 4385 4386 if self._match_set(self.RANGE_PARSERS): 4387 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4388 if not expression: 4389 return this 4390 4391 this = expression 4392 elif self._match(TokenType.ISNULL): 4393 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4394 4395 # Postgres supports ISNULL and NOTNULL for conditions. 4396 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4397 if self._match(TokenType.NOTNULL): 4398 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4399 this = self.expression(exp.Not, this=this) 4400 4401 if negate: 4402 this = self._negate_range(this) 4403 4404 if self._match(TokenType.IS): 4405 this = self._parse_is(this) 4406 4407 return this 4408 4409 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4410 if not this: 4411 return this 4412 4413 return self.expression(exp.Not, this=this) 4414 4415 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4416 index = self._index - 1 4417 negate = self._match(TokenType.NOT) 4418 4419 if self._match_text_seq("DISTINCT", "FROM"): 4420 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4421 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4422 4423 if self._match(TokenType.JSON): 4424 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4425 4426 if self._match_text_seq("WITH"): 4427 _with = True 4428 elif self._match_text_seq("WITHOUT"): 4429 _with = False 4430 else: 4431 _with = None 4432 4433 unique = self._match(TokenType.UNIQUE) 4434 self._match_text_seq("KEYS") 4435 expression: t.Optional[exp.Expression] = self.expression( 4436 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4437 ) 4438 else: 4439 expression = self._parse_primary() or self._parse_null() 4440 if not expression: 4441 self._retreat(index) 4442 return None 4443 4444 this = self.expression(exp.Is, this=this, expression=expression) 4445 return self.expression(exp.Not, this=this) if negate else this 4446 4447 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4448 unnest = self._parse_unnest(with_alias=False) 4449 if unnest: 4450 this = self.expression(exp.In, this=this, unnest=unnest) 4451 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4452 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4453 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4454 4455 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4456 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4457 else: 4458 this = self.expression(exp.In, this=this, expressions=expressions) 4459 4460 if matched_l_paren: 4461 self._match_r_paren(this) 4462 elif not self._match(TokenType.R_BRACKET, expression=this): 4463 self.raise_error("Expecting ]") 4464 else: 4465 this = self.expression(exp.In, this=this, field=self._parse_field()) 4466 4467 return this 4468 4469 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4470 low = self._parse_bitwise() 4471 self._match(TokenType.AND) 4472 high = self._parse_bitwise() 4473 return self.expression(exp.Between, this=this, low=low, high=high) 4474 4475 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4476 if not self._match(TokenType.ESCAPE): 4477 return this 4478 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4479 4480 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4481 index = self._index 4482 4483 if not self._match(TokenType.INTERVAL) and match_interval: 4484 return None 4485 4486 if self._match(TokenType.STRING, advance=False): 4487 this = self._parse_primary() 4488 else: 4489 this = self._parse_term() 4490 4491 if not this or ( 4492 isinstance(this, exp.Column) 4493 and not this.table 4494 and not this.this.quoted 4495 and this.name.upper() == "IS" 4496 ): 4497 self._retreat(index) 4498 return None 4499 4500 unit = self._parse_function() or ( 4501 not self._match(TokenType.ALIAS, advance=False) 4502 and self._parse_var(any_token=True, upper=True) 4503 ) 4504 4505 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4506 # each INTERVAL expression into this canonical form so it's easy to transpile 4507 if this and this.is_number: 4508 this = exp.Literal.string(this.to_py()) 4509 elif this and this.is_string: 4510 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4511 if len(parts) == 1: 4512 if unit: 4513 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4514 self._retreat(self._index - 1) 4515 4516 this = exp.Literal.string(parts[0][0]) 4517 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4518 4519 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4520 unit = self.expression( 4521 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4522 ) 4523 4524 interval = self.expression(exp.Interval, this=this, unit=unit) 4525 4526 index = self._index 4527 self._match(TokenType.PLUS) 4528 4529 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4530 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4531 return self.expression( 4532 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4533 ) 4534 4535 self._retreat(index) 4536 return interval 4537 4538 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4539 this = self._parse_term() 4540 4541 while True: 4542 if self._match_set(self.BITWISE): 4543 this = self.expression( 4544 self.BITWISE[self._prev.token_type], 4545 this=this, 4546 expression=self._parse_term(), 4547 ) 4548 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4549 this = self.expression( 4550 exp.DPipe, 4551 this=this, 4552 expression=self._parse_term(), 4553 safe=not self.dialect.STRICT_STRING_CONCAT, 4554 ) 4555 elif self._match(TokenType.DQMARK): 4556 this = self.expression( 4557 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4558 ) 4559 elif self._match_pair(TokenType.LT, TokenType.LT): 4560 this = self.expression( 4561 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4562 ) 4563 elif self._match_pair(TokenType.GT, TokenType.GT): 4564 this = self.expression( 4565 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4566 ) 4567 else: 4568 break 4569 4570 return this 4571 4572 def _parse_term(self) -> t.Optional[exp.Expression]: 4573 this = self._parse_factor() 4574 4575 while self._match_set(self.TERM): 4576 klass = self.TERM[self._prev.token_type] 4577 comments = self._prev_comments 4578 expression = self._parse_factor() 4579 4580 this = self.expression(klass, this=this, comments=comments, expression=expression) 4581 4582 if isinstance(this, exp.Collate): 4583 expr = this.expression 4584 4585 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4586 # fallback to Identifier / Var 4587 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4588 ident = expr.this 4589 if isinstance(ident, exp.Identifier): 4590 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4591 4592 return this 4593 4594 def _parse_factor(self) -> t.Optional[exp.Expression]: 4595 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4596 this = parse_method() 4597 4598 while self._match_set(self.FACTOR): 4599 klass = self.FACTOR[self._prev.token_type] 4600 comments = self._prev_comments 4601 expression = parse_method() 4602 4603 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4604 self._retreat(self._index - 1) 4605 return this 4606 4607 this = self.expression(klass, this=this, comments=comments, expression=expression) 4608 4609 if isinstance(this, exp.Div): 4610 this.args["typed"] = self.dialect.TYPED_DIVISION 4611 this.args["safe"] = self.dialect.SAFE_DIVISION 4612 4613 return this 4614 4615 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4616 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4617 4618 def _parse_unary(self) -> t.Optional[exp.Expression]: 4619 if self._match_set(self.UNARY_PARSERS): 4620 return self.UNARY_PARSERS[self._prev.token_type](self) 4621 return self._parse_at_time_zone(self._parse_type()) 4622 4623 def _parse_type( 4624 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4625 ) -> t.Optional[exp.Expression]: 4626 interval = parse_interval and self._parse_interval() 4627 if interval: 4628 return interval 4629 4630 index = self._index 4631 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4632 4633 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4634 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4635 if isinstance(data_type, exp.Cast): 4636 # This constructor can contain ops directly after it, for instance struct unnesting: 4637 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4638 return self._parse_column_ops(data_type) 4639 4640 if data_type: 4641 index2 = self._index 4642 this = self._parse_primary() 4643 4644 if isinstance(this, exp.Literal): 4645 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4646 if parser: 4647 return parser(self, this, data_type) 4648 4649 return self.expression(exp.Cast, this=this, to=data_type) 4650 4651 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4652 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4653 # 4654 # If the index difference here is greater than 1, that means the parser itself must have 4655 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4656 # 4657 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4658 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4659 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4660 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4661 # 4662 # In these cases, we don't really want to return the converted type, but instead retreat 4663 # and try to parse a Column or Identifier in the section below. 4664 if data_type.expressions and index2 - index > 1: 4665 self._retreat(index2) 4666 return self._parse_column_ops(data_type) 4667 4668 self._retreat(index) 4669 4670 if fallback_to_identifier: 4671 return self._parse_id_var() 4672 4673 this = self._parse_column() 4674 return this and self._parse_column_ops(this) 4675 4676 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4677 this = self._parse_type() 4678 if not this: 4679 return None 4680 4681 if isinstance(this, exp.Column) and not this.table: 4682 this = exp.var(this.name.upper()) 4683 4684 return self.expression( 4685 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4686 ) 4687 4688 def _parse_types( 4689 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4690 ) -> t.Optional[exp.Expression]: 4691 index = self._index 4692 4693 this: t.Optional[exp.Expression] = None 4694 prefix = self._match_text_seq("SYSUDTLIB", ".") 4695 4696 if not self._match_set(self.TYPE_TOKENS): 4697 identifier = allow_identifiers and self._parse_id_var( 4698 any_token=False, tokens=(TokenType.VAR,) 4699 ) 4700 if isinstance(identifier, exp.Identifier): 4701 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4702 4703 if len(tokens) != 1: 4704 self.raise_error("Unexpected identifier", self._prev) 4705 4706 if tokens[0].token_type in self.TYPE_TOKENS: 4707 self._prev = tokens[0] 4708 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4709 type_name = identifier.name 4710 4711 while self._match(TokenType.DOT): 4712 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4713 4714 this = exp.DataType.build(type_name, udt=True) 4715 else: 4716 self._retreat(self._index - 1) 4717 return None 4718 else: 4719 return None 4720 4721 type_token = self._prev.token_type 4722 4723 if type_token == TokenType.PSEUDO_TYPE: 4724 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4725 4726 if type_token == TokenType.OBJECT_IDENTIFIER: 4727 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4728 4729 # https://materialize.com/docs/sql/types/map/ 4730 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4731 key_type = self._parse_types( 4732 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4733 ) 4734 if not self._match(TokenType.FARROW): 4735 self._retreat(index) 4736 return None 4737 4738 value_type = self._parse_types( 4739 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4740 ) 4741 if not self._match(TokenType.R_BRACKET): 4742 self._retreat(index) 4743 return None 4744 4745 return exp.DataType( 4746 this=exp.DataType.Type.MAP, 4747 expressions=[key_type, value_type], 4748 nested=True, 4749 prefix=prefix, 4750 ) 4751 4752 nested = type_token in self.NESTED_TYPE_TOKENS 4753 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4754 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4755 expressions = None 4756 maybe_func = False 4757 4758 if self._match(TokenType.L_PAREN): 4759 if is_struct: 4760 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4761 elif nested: 4762 expressions = self._parse_csv( 4763 lambda: self._parse_types( 4764 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4765 ) 4766 ) 4767 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4768 this = expressions[0] 4769 this.set("nullable", True) 4770 self._match_r_paren() 4771 return this 4772 elif type_token in self.ENUM_TYPE_TOKENS: 4773 expressions = self._parse_csv(self._parse_equality) 4774 elif is_aggregate: 4775 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4776 any_token=False, tokens=(TokenType.VAR,) 4777 ) 4778 if not func_or_ident or not self._match(TokenType.COMMA): 4779 return None 4780 expressions = self._parse_csv( 4781 lambda: self._parse_types( 4782 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4783 ) 4784 ) 4785 expressions.insert(0, func_or_ident) 4786 else: 4787 expressions = self._parse_csv(self._parse_type_size) 4788 4789 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4790 if type_token == TokenType.VECTOR and len(expressions) == 2: 4791 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4792 4793 if not expressions or not self._match(TokenType.R_PAREN): 4794 self._retreat(index) 4795 return None 4796 4797 maybe_func = True 4798 4799 values: t.Optional[t.List[exp.Expression]] = None 4800 4801 if nested and self._match(TokenType.LT): 4802 if is_struct: 4803 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4804 else: 4805 expressions = self._parse_csv( 4806 lambda: self._parse_types( 4807 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4808 ) 4809 ) 4810 4811 if not self._match(TokenType.GT): 4812 self.raise_error("Expecting >") 4813 4814 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4815 values = self._parse_csv(self._parse_assignment) 4816 if not values and is_struct: 4817 values = None 4818 self._retreat(self._index - 1) 4819 else: 4820 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4821 4822 if type_token in self.TIMESTAMPS: 4823 if self._match_text_seq("WITH", "TIME", "ZONE"): 4824 maybe_func = False 4825 tz_type = ( 4826 exp.DataType.Type.TIMETZ 4827 if type_token in self.TIMES 4828 else exp.DataType.Type.TIMESTAMPTZ 4829 ) 4830 this = exp.DataType(this=tz_type, expressions=expressions) 4831 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4832 maybe_func = False 4833 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4834 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4835 maybe_func = False 4836 elif type_token == TokenType.INTERVAL: 4837 unit = self._parse_var(upper=True) 4838 if unit: 4839 if self._match_text_seq("TO"): 4840 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4841 4842 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4843 else: 4844 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4845 4846 if maybe_func and check_func: 4847 index2 = self._index 4848 peek = self._parse_string() 4849 4850 if not peek: 4851 self._retreat(index) 4852 return None 4853 4854 self._retreat(index2) 4855 4856 if not this: 4857 if self._match_text_seq("UNSIGNED"): 4858 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4859 if not unsigned_type_token: 4860 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4861 4862 type_token = unsigned_type_token or type_token 4863 4864 this = exp.DataType( 4865 this=exp.DataType.Type[type_token.value], 4866 expressions=expressions, 4867 nested=nested, 4868 prefix=prefix, 4869 ) 4870 4871 # Empty arrays/structs are allowed 4872 if values is not None: 4873 cls = exp.Struct if is_struct else exp.Array 4874 this = exp.cast(cls(expressions=values), this, copy=False) 4875 4876 elif expressions: 4877 this.set("expressions", expressions) 4878 4879 # https://materialize.com/docs/sql/types/list/#type-name 4880 while self._match(TokenType.LIST): 4881 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4882 4883 index = self._index 4884 4885 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4886 matched_array = self._match(TokenType.ARRAY) 4887 4888 while self._curr: 4889 datatype_token = self._prev.token_type 4890 matched_l_bracket = self._match(TokenType.L_BRACKET) 4891 if not matched_l_bracket and not matched_array: 4892 break 4893 4894 matched_array = False 4895 values = self._parse_csv(self._parse_assignment) or None 4896 if ( 4897 values 4898 and not schema 4899 and ( 4900 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4901 ) 4902 ): 4903 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4904 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4905 self._retreat(index) 4906 break 4907 4908 this = exp.DataType( 4909 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4910 ) 4911 self._match(TokenType.R_BRACKET) 4912 4913 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4914 converter = self.TYPE_CONVERTERS.get(this.this) 4915 if converter: 4916 this = converter(t.cast(exp.DataType, this)) 4917 4918 return this 4919 4920 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4921 index = self._index 4922 4923 if ( 4924 self._curr 4925 and self._next 4926 and self._curr.token_type in self.TYPE_TOKENS 4927 and self._next.token_type in self.TYPE_TOKENS 4928 ): 4929 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4930 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4931 this = self._parse_id_var() 4932 else: 4933 this = ( 4934 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4935 or self._parse_id_var() 4936 ) 4937 4938 self._match(TokenType.COLON) 4939 4940 if ( 4941 type_required 4942 and not isinstance(this, exp.DataType) 4943 and not self._match_set(self.TYPE_TOKENS, advance=False) 4944 ): 4945 self._retreat(index) 4946 return self._parse_types() 4947 4948 return self._parse_column_def(this) 4949 4950 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4951 if not self._match_text_seq("AT", "TIME", "ZONE"): 4952 return this 4953 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4954 4955 def _parse_column(self) -> t.Optional[exp.Expression]: 4956 this = self._parse_column_reference() 4957 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4958 4959 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4960 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4961 4962 return column 4963 4964 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4965 this = self._parse_field() 4966 if ( 4967 not this 4968 and self._match(TokenType.VALUES, advance=False) 4969 and self.VALUES_FOLLOWED_BY_PAREN 4970 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4971 ): 4972 this = self._parse_id_var() 4973 4974 if isinstance(this, exp.Identifier): 4975 # We bubble up comments from the Identifier to the Column 4976 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4977 4978 return this 4979 4980 def _parse_colon_as_variant_extract( 4981 self, this: t.Optional[exp.Expression] 4982 ) -> t.Optional[exp.Expression]: 4983 casts = [] 4984 json_path = [] 4985 escape = None 4986 4987 while self._match(TokenType.COLON): 4988 start_index = self._index 4989 4990 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4991 path = self._parse_column_ops( 4992 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4993 ) 4994 4995 # The cast :: operator has a lower precedence than the extraction operator :, so 4996 # we rearrange the AST appropriately to avoid casting the JSON path 4997 while isinstance(path, exp.Cast): 4998 casts.append(path.to) 4999 path = path.this 5000 5001 if casts: 5002 dcolon_offset = next( 5003 i 5004 for i, t in enumerate(self._tokens[start_index:]) 5005 if t.token_type == TokenType.DCOLON 5006 ) 5007 end_token = self._tokens[start_index + dcolon_offset - 1] 5008 else: 5009 end_token = self._prev 5010 5011 if path: 5012 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5013 # it'll roundtrip to a string literal in GET_PATH 5014 if isinstance(path, exp.Identifier) and path.quoted: 5015 escape = True 5016 5017 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5018 5019 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5020 # Databricks transforms it back to the colon/dot notation 5021 if json_path: 5022 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5023 5024 if json_path_expr: 5025 json_path_expr.set("escape", escape) 5026 5027 this = self.expression( 5028 exp.JSONExtract, 5029 this=this, 5030 expression=json_path_expr, 5031 variant_extract=True, 5032 ) 5033 5034 while casts: 5035 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5036 5037 return this 5038 5039 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5040 return self._parse_types() 5041 5042 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5043 this = self._parse_bracket(this) 5044 5045 while self._match_set(self.COLUMN_OPERATORS): 5046 op_token = self._prev.token_type 5047 op = self.COLUMN_OPERATORS.get(op_token) 5048 5049 if op_token == TokenType.DCOLON: 5050 field = self._parse_dcolon() 5051 if not field: 5052 self.raise_error("Expected type") 5053 elif op and self._curr: 5054 field = self._parse_column_reference() or self._parse_bracket() 5055 else: 5056 field = self._parse_field(any_token=True, anonymous_func=True) 5057 5058 if isinstance(field, exp.Func) and this: 5059 # bigquery allows function calls like x.y.count(...) 5060 # SAFE.SUBSTR(...) 5061 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5062 this = exp.replace_tree( 5063 this, 5064 lambda n: ( 5065 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5066 if n.table 5067 else n.this 5068 ) 5069 if isinstance(n, exp.Column) 5070 else n, 5071 ) 5072 5073 if op: 5074 this = op(self, this, field) 5075 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5076 this = self.expression( 5077 exp.Column, 5078 this=field, 5079 table=this.this, 5080 db=this.args.get("table"), 5081 catalog=this.args.get("db"), 5082 ) 5083 else: 5084 this = self.expression(exp.Dot, this=this, expression=field) 5085 5086 this = self._parse_bracket(this) 5087 5088 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5089 5090 def _parse_primary(self) -> t.Optional[exp.Expression]: 5091 if self._match_set(self.PRIMARY_PARSERS): 5092 token_type = self._prev.token_type 5093 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5094 5095 if token_type == TokenType.STRING: 5096 expressions = [primary] 5097 while self._match(TokenType.STRING): 5098 expressions.append(exp.Literal.string(self._prev.text)) 5099 5100 if len(expressions) > 1: 5101 return self.expression(exp.Concat, expressions=expressions) 5102 5103 return primary 5104 5105 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5106 return exp.Literal.number(f"0.{self._prev.text}") 5107 5108 if self._match(TokenType.L_PAREN): 5109 comments = self._prev_comments 5110 query = self._parse_select() 5111 5112 if query: 5113 expressions = [query] 5114 else: 5115 expressions = self._parse_expressions() 5116 5117 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5118 5119 if not this and self._match(TokenType.R_PAREN, advance=False): 5120 this = self.expression(exp.Tuple) 5121 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5122 this = self._parse_subquery(this=this, parse_alias=False) 5123 elif isinstance(this, exp.Subquery): 5124 this = self._parse_subquery( 5125 this=self._parse_set_operations(this), parse_alias=False 5126 ) 5127 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5128 this = self.expression(exp.Tuple, expressions=expressions) 5129 else: 5130 this = self.expression(exp.Paren, this=this) 5131 5132 if this: 5133 this.add_comments(comments) 5134 5135 self._match_r_paren(expression=this) 5136 return this 5137 5138 return None 5139 5140 def _parse_field( 5141 self, 5142 any_token: bool = False, 5143 tokens: t.Optional[t.Collection[TokenType]] = None, 5144 anonymous_func: bool = False, 5145 ) -> t.Optional[exp.Expression]: 5146 if anonymous_func: 5147 field = ( 5148 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5149 or self._parse_primary() 5150 ) 5151 else: 5152 field = self._parse_primary() or self._parse_function( 5153 anonymous=anonymous_func, any_token=any_token 5154 ) 5155 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5156 5157 def _parse_function( 5158 self, 5159 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5160 anonymous: bool = False, 5161 optional_parens: bool = True, 5162 any_token: bool = False, 5163 ) -> t.Optional[exp.Expression]: 5164 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5165 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5166 fn_syntax = False 5167 if ( 5168 self._match(TokenType.L_BRACE, advance=False) 5169 and self._next 5170 and self._next.text.upper() == "FN" 5171 ): 5172 self._advance(2) 5173 fn_syntax = True 5174 5175 func = self._parse_function_call( 5176 functions=functions, 5177 anonymous=anonymous, 5178 optional_parens=optional_parens, 5179 any_token=any_token, 5180 ) 5181 5182 if fn_syntax: 5183 self._match(TokenType.R_BRACE) 5184 5185 return func 5186 5187 def _parse_function_call( 5188 self, 5189 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5190 anonymous: bool = False, 5191 optional_parens: bool = True, 5192 any_token: bool = False, 5193 ) -> t.Optional[exp.Expression]: 5194 if not self._curr: 5195 return None 5196 5197 comments = self._curr.comments 5198 token_type = self._curr.token_type 5199 this = self._curr.text 5200 upper = this.upper() 5201 5202 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5203 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5204 self._advance() 5205 return self._parse_window(parser(self)) 5206 5207 if not self._next or self._next.token_type != TokenType.L_PAREN: 5208 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5209 self._advance() 5210 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5211 5212 return None 5213 5214 if any_token: 5215 if token_type in self.RESERVED_TOKENS: 5216 return None 5217 elif token_type not in self.FUNC_TOKENS: 5218 return None 5219 5220 self._advance(2) 5221 5222 parser = self.FUNCTION_PARSERS.get(upper) 5223 if parser and not anonymous: 5224 this = parser(self) 5225 else: 5226 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5227 5228 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5229 this = self.expression(subquery_predicate, this=self._parse_select()) 5230 self._match_r_paren() 5231 return this 5232 5233 if functions is None: 5234 functions = self.FUNCTIONS 5235 5236 function = functions.get(upper) 5237 5238 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5239 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5240 5241 if alias: 5242 args = self._kv_to_prop_eq(args) 5243 5244 if function and not anonymous: 5245 if "dialect" in function.__code__.co_varnames: 5246 func = function(args, dialect=self.dialect) 5247 else: 5248 func = function(args) 5249 5250 func = self.validate_expression(func, args) 5251 if not self.dialect.NORMALIZE_FUNCTIONS: 5252 func.meta["name"] = this 5253 5254 this = func 5255 else: 5256 if token_type == TokenType.IDENTIFIER: 5257 this = exp.Identifier(this=this, quoted=True) 5258 this = self.expression(exp.Anonymous, this=this, expressions=args) 5259 5260 if isinstance(this, exp.Expression): 5261 this.add_comments(comments) 5262 5263 self._match_r_paren(this) 5264 return self._parse_window(this) 5265 5266 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5267 return expression 5268 5269 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5270 transformed = [] 5271 5272 for index, e in enumerate(expressions): 5273 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5274 if isinstance(e, exp.Alias): 5275 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5276 5277 if not isinstance(e, exp.PropertyEQ): 5278 e = self.expression( 5279 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5280 ) 5281 5282 if isinstance(e.this, exp.Column): 5283 e.this.replace(e.this.this) 5284 else: 5285 e = self._to_prop_eq(e, index) 5286 5287 transformed.append(e) 5288 5289 return transformed 5290 5291 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5292 return self._parse_column_def(self._parse_id_var()) 5293 5294 def _parse_user_defined_function( 5295 self, kind: t.Optional[TokenType] = None 5296 ) -> t.Optional[exp.Expression]: 5297 this = self._parse_id_var() 5298 5299 while self._match(TokenType.DOT): 5300 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5301 5302 if not self._match(TokenType.L_PAREN): 5303 return this 5304 5305 expressions = self._parse_csv(self._parse_function_parameter) 5306 self._match_r_paren() 5307 return self.expression( 5308 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5309 ) 5310 5311 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5312 literal = self._parse_primary() 5313 if literal: 5314 return self.expression(exp.Introducer, this=token.text, expression=literal) 5315 5316 return self.expression(exp.Identifier, this=token.text) 5317 5318 def _parse_session_parameter(self) -> exp.SessionParameter: 5319 kind = None 5320 this = self._parse_id_var() or self._parse_primary() 5321 5322 if this and self._match(TokenType.DOT): 5323 kind = this.name 5324 this = self._parse_var() or self._parse_primary() 5325 5326 return self.expression(exp.SessionParameter, this=this, kind=kind) 5327 5328 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5329 return self._parse_id_var() 5330 5331 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5332 index = self._index 5333 5334 if self._match(TokenType.L_PAREN): 5335 expressions = t.cast( 5336 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5337 ) 5338 5339 if not self._match(TokenType.R_PAREN): 5340 self._retreat(index) 5341 else: 5342 expressions = [self._parse_lambda_arg()] 5343 5344 if self._match_set(self.LAMBDAS): 5345 return self.LAMBDAS[self._prev.token_type](self, expressions) 5346 5347 self._retreat(index) 5348 5349 this: t.Optional[exp.Expression] 5350 5351 if self._match(TokenType.DISTINCT): 5352 this = self.expression( 5353 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5354 ) 5355 else: 5356 this = self._parse_select_or_expression(alias=alias) 5357 5358 return self._parse_limit( 5359 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5360 ) 5361 5362 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5363 index = self._index 5364 if not self._match(TokenType.L_PAREN): 5365 return this 5366 5367 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5368 # expr can be of both types 5369 if self._match_set(self.SELECT_START_TOKENS): 5370 self._retreat(index) 5371 return this 5372 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5373 self._match_r_paren() 5374 return self.expression(exp.Schema, this=this, expressions=args) 5375 5376 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5377 return self._parse_column_def(self._parse_field(any_token=True)) 5378 5379 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5380 # column defs are not really columns, they're identifiers 5381 if isinstance(this, exp.Column): 5382 this = this.this 5383 5384 kind = self._parse_types(schema=True) 5385 5386 if self._match_text_seq("FOR", "ORDINALITY"): 5387 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5388 5389 constraints: t.List[exp.Expression] = [] 5390 5391 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5392 ("ALIAS", "MATERIALIZED") 5393 ): 5394 persisted = self._prev.text.upper() == "MATERIALIZED" 5395 constraint_kind = exp.ComputedColumnConstraint( 5396 this=self._parse_assignment(), 5397 persisted=persisted or self._match_text_seq("PERSISTED"), 5398 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5399 ) 5400 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5401 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5402 self._match(TokenType.ALIAS) 5403 constraints.append( 5404 self.expression( 5405 exp.ColumnConstraint, 5406 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5407 ) 5408 ) 5409 5410 while True: 5411 constraint = self._parse_column_constraint() 5412 if not constraint: 5413 break 5414 constraints.append(constraint) 5415 5416 if not kind and not constraints: 5417 return this 5418 5419 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5420 5421 def _parse_auto_increment( 5422 self, 5423 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5424 start = None 5425 increment = None 5426 5427 if self._match(TokenType.L_PAREN, advance=False): 5428 args = self._parse_wrapped_csv(self._parse_bitwise) 5429 start = seq_get(args, 0) 5430 increment = seq_get(args, 1) 5431 elif self._match_text_seq("START"): 5432 start = self._parse_bitwise() 5433 self._match_text_seq("INCREMENT") 5434 increment = self._parse_bitwise() 5435 5436 if start and increment: 5437 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5438 5439 return exp.AutoIncrementColumnConstraint() 5440 5441 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5442 if not self._match_text_seq("REFRESH"): 5443 self._retreat(self._index - 1) 5444 return None 5445 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5446 5447 def _parse_compress(self) -> exp.CompressColumnConstraint: 5448 if self._match(TokenType.L_PAREN, advance=False): 5449 return self.expression( 5450 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5451 ) 5452 5453 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5454 5455 def _parse_generated_as_identity( 5456 self, 5457 ) -> ( 5458 exp.GeneratedAsIdentityColumnConstraint 5459 | exp.ComputedColumnConstraint 5460 | exp.GeneratedAsRowColumnConstraint 5461 ): 5462 if self._match_text_seq("BY", "DEFAULT"): 5463 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5464 this = self.expression( 5465 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5466 ) 5467 else: 5468 self._match_text_seq("ALWAYS") 5469 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5470 5471 self._match(TokenType.ALIAS) 5472 5473 if self._match_text_seq("ROW"): 5474 start = self._match_text_seq("START") 5475 if not start: 5476 self._match(TokenType.END) 5477 hidden = self._match_text_seq("HIDDEN") 5478 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5479 5480 identity = self._match_text_seq("IDENTITY") 5481 5482 if self._match(TokenType.L_PAREN): 5483 if self._match(TokenType.START_WITH): 5484 this.set("start", self._parse_bitwise()) 5485 if self._match_text_seq("INCREMENT", "BY"): 5486 this.set("increment", self._parse_bitwise()) 5487 if self._match_text_seq("MINVALUE"): 5488 this.set("minvalue", self._parse_bitwise()) 5489 if self._match_text_seq("MAXVALUE"): 5490 this.set("maxvalue", self._parse_bitwise()) 5491 5492 if self._match_text_seq("CYCLE"): 5493 this.set("cycle", True) 5494 elif self._match_text_seq("NO", "CYCLE"): 5495 this.set("cycle", False) 5496 5497 if not identity: 5498 this.set("expression", self._parse_range()) 5499 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5500 args = self._parse_csv(self._parse_bitwise) 5501 this.set("start", seq_get(args, 0)) 5502 this.set("increment", seq_get(args, 1)) 5503 5504 self._match_r_paren() 5505 5506 return this 5507 5508 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5509 self._match_text_seq("LENGTH") 5510 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5511 5512 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5513 if self._match_text_seq("NULL"): 5514 return self.expression(exp.NotNullColumnConstraint) 5515 if self._match_text_seq("CASESPECIFIC"): 5516 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5517 if self._match_text_seq("FOR", "REPLICATION"): 5518 return self.expression(exp.NotForReplicationColumnConstraint) 5519 5520 # Unconsume the `NOT` token 5521 self._retreat(self._index - 1) 5522 return None 5523 5524 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5525 if self._match(TokenType.CONSTRAINT): 5526 this = self._parse_id_var() 5527 else: 5528 this = None 5529 5530 if self._match_texts(self.CONSTRAINT_PARSERS): 5531 return self.expression( 5532 exp.ColumnConstraint, 5533 this=this, 5534 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5535 ) 5536 5537 return this 5538 5539 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5540 if not self._match(TokenType.CONSTRAINT): 5541 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5542 5543 return self.expression( 5544 exp.Constraint, 5545 this=self._parse_id_var(), 5546 expressions=self._parse_unnamed_constraints(), 5547 ) 5548 5549 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5550 constraints = [] 5551 while True: 5552 constraint = self._parse_unnamed_constraint() or self._parse_function() 5553 if not constraint: 5554 break 5555 constraints.append(constraint) 5556 5557 return constraints 5558 5559 def _parse_unnamed_constraint( 5560 self, constraints: t.Optional[t.Collection[str]] = None 5561 ) -> t.Optional[exp.Expression]: 5562 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5563 constraints or self.CONSTRAINT_PARSERS 5564 ): 5565 return None 5566 5567 constraint = self._prev.text.upper() 5568 if constraint not in self.CONSTRAINT_PARSERS: 5569 self.raise_error(f"No parser found for schema constraint {constraint}.") 5570 5571 return self.CONSTRAINT_PARSERS[constraint](self) 5572 5573 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5574 return self._parse_id_var(any_token=False) 5575 5576 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5577 self._match_text_seq("KEY") 5578 return self.expression( 5579 exp.UniqueColumnConstraint, 5580 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5581 this=self._parse_schema(self._parse_unique_key()), 5582 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5583 on_conflict=self._parse_on_conflict(), 5584 ) 5585 5586 def _parse_key_constraint_options(self) -> t.List[str]: 5587 options = [] 5588 while True: 5589 if not self._curr: 5590 break 5591 5592 if self._match(TokenType.ON): 5593 action = None 5594 on = self._advance_any() and self._prev.text 5595 5596 if self._match_text_seq("NO", "ACTION"): 5597 action = "NO ACTION" 5598 elif self._match_text_seq("CASCADE"): 5599 action = "CASCADE" 5600 elif self._match_text_seq("RESTRICT"): 5601 action = "RESTRICT" 5602 elif self._match_pair(TokenType.SET, TokenType.NULL): 5603 action = "SET NULL" 5604 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5605 action = "SET DEFAULT" 5606 else: 5607 self.raise_error("Invalid key constraint") 5608 5609 options.append(f"ON {on} {action}") 5610 else: 5611 var = self._parse_var_from_options( 5612 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5613 ) 5614 if not var: 5615 break 5616 options.append(var.name) 5617 5618 return options 5619 5620 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5621 if match and not self._match(TokenType.REFERENCES): 5622 return None 5623 5624 expressions = None 5625 this = self._parse_table(schema=True) 5626 options = self._parse_key_constraint_options() 5627 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5628 5629 def _parse_foreign_key(self) -> exp.ForeignKey: 5630 expressions = self._parse_wrapped_id_vars() 5631 reference = self._parse_references() 5632 options = {} 5633 5634 while self._match(TokenType.ON): 5635 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5636 self.raise_error("Expected DELETE or UPDATE") 5637 5638 kind = self._prev.text.lower() 5639 5640 if self._match_text_seq("NO", "ACTION"): 5641 action = "NO ACTION" 5642 elif self._match(TokenType.SET): 5643 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5644 action = "SET " + self._prev.text.upper() 5645 else: 5646 self._advance() 5647 action = self._prev.text.upper() 5648 5649 options[kind] = action 5650 5651 return self.expression( 5652 exp.ForeignKey, 5653 expressions=expressions, 5654 reference=reference, 5655 **options, # type: ignore 5656 ) 5657 5658 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5659 return self._parse_field() 5660 5661 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5662 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5663 self._retreat(self._index - 1) 5664 return None 5665 5666 id_vars = self._parse_wrapped_id_vars() 5667 return self.expression( 5668 exp.PeriodForSystemTimeConstraint, 5669 this=seq_get(id_vars, 0), 5670 expression=seq_get(id_vars, 1), 5671 ) 5672 5673 def _parse_primary_key( 5674 self, wrapped_optional: bool = False, in_props: bool = False 5675 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5676 desc = ( 5677 self._match_set((TokenType.ASC, TokenType.DESC)) 5678 and self._prev.token_type == TokenType.DESC 5679 ) 5680 5681 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5682 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5683 5684 expressions = self._parse_wrapped_csv( 5685 self._parse_primary_key_part, optional=wrapped_optional 5686 ) 5687 options = self._parse_key_constraint_options() 5688 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5689 5690 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5691 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5692 5693 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5694 """ 5695 Parses a datetime column in ODBC format. We parse the column into the corresponding 5696 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5697 same as we did for `DATE('yyyy-mm-dd')`. 5698 5699 Reference: 5700 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5701 """ 5702 self._match(TokenType.VAR) 5703 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5704 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5705 if not self._match(TokenType.R_BRACE): 5706 self.raise_error("Expected }") 5707 return expression 5708 5709 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5710 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5711 return this 5712 5713 bracket_kind = self._prev.token_type 5714 if ( 5715 bracket_kind == TokenType.L_BRACE 5716 and self._curr 5717 and self._curr.token_type == TokenType.VAR 5718 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5719 ): 5720 return self._parse_odbc_datetime_literal() 5721 5722 expressions = self._parse_csv( 5723 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5724 ) 5725 5726 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5727 self.raise_error("Expected ]") 5728 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5729 self.raise_error("Expected }") 5730 5731 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5732 if bracket_kind == TokenType.L_BRACE: 5733 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5734 elif not this: 5735 this = build_array_constructor( 5736 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5737 ) 5738 else: 5739 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5740 if constructor_type: 5741 return build_array_constructor( 5742 constructor_type, 5743 args=expressions, 5744 bracket_kind=bracket_kind, 5745 dialect=self.dialect, 5746 ) 5747 5748 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5749 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5750 5751 self._add_comments(this) 5752 return self._parse_bracket(this) 5753 5754 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5755 if self._match(TokenType.COLON): 5756 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5757 return this 5758 5759 def _parse_case(self) -> t.Optional[exp.Expression]: 5760 ifs = [] 5761 default = None 5762 5763 comments = self._prev_comments 5764 expression = self._parse_assignment() 5765 5766 while self._match(TokenType.WHEN): 5767 this = self._parse_assignment() 5768 self._match(TokenType.THEN) 5769 then = self._parse_assignment() 5770 ifs.append(self.expression(exp.If, this=this, true=then)) 5771 5772 if self._match(TokenType.ELSE): 5773 default = self._parse_assignment() 5774 5775 if not self._match(TokenType.END): 5776 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5777 default = exp.column("interval") 5778 else: 5779 self.raise_error("Expected END after CASE", self._prev) 5780 5781 return self.expression( 5782 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5783 ) 5784 5785 def _parse_if(self) -> t.Optional[exp.Expression]: 5786 if self._match(TokenType.L_PAREN): 5787 args = self._parse_csv(self._parse_assignment) 5788 this = self.validate_expression(exp.If.from_arg_list(args), args) 5789 self._match_r_paren() 5790 else: 5791 index = self._index - 1 5792 5793 if self.NO_PAREN_IF_COMMANDS and index == 0: 5794 return self._parse_as_command(self._prev) 5795 5796 condition = self._parse_assignment() 5797 5798 if not condition: 5799 self._retreat(index) 5800 return None 5801 5802 self._match(TokenType.THEN) 5803 true = self._parse_assignment() 5804 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5805 self._match(TokenType.END) 5806 this = self.expression(exp.If, this=condition, true=true, false=false) 5807 5808 return this 5809 5810 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5811 if not self._match_text_seq("VALUE", "FOR"): 5812 self._retreat(self._index - 1) 5813 return None 5814 5815 return self.expression( 5816 exp.NextValueFor, 5817 this=self._parse_column(), 5818 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5819 ) 5820 5821 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5822 this = self._parse_function() or self._parse_var_or_string(upper=True) 5823 5824 if self._match(TokenType.FROM): 5825 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5826 5827 if not self._match(TokenType.COMMA): 5828 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5829 5830 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5831 5832 def _parse_gap_fill(self) -> exp.GapFill: 5833 self._match(TokenType.TABLE) 5834 this = self._parse_table() 5835 5836 self._match(TokenType.COMMA) 5837 args = [this, *self._parse_csv(self._parse_lambda)] 5838 5839 gap_fill = exp.GapFill.from_arg_list(args) 5840 return self.validate_expression(gap_fill, args) 5841 5842 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5843 this = self._parse_assignment() 5844 5845 if not self._match(TokenType.ALIAS): 5846 if self._match(TokenType.COMMA): 5847 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5848 5849 self.raise_error("Expected AS after CAST") 5850 5851 fmt = None 5852 to = self._parse_types() 5853 5854 if self._match(TokenType.FORMAT): 5855 fmt_string = self._parse_string() 5856 fmt = self._parse_at_time_zone(fmt_string) 5857 5858 if not to: 5859 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5860 if to.this in exp.DataType.TEMPORAL_TYPES: 5861 this = self.expression( 5862 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5863 this=this, 5864 format=exp.Literal.string( 5865 format_time( 5866 fmt_string.this if fmt_string else "", 5867 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5868 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5869 ) 5870 ), 5871 safe=safe, 5872 ) 5873 5874 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5875 this.set("zone", fmt.args["zone"]) 5876 return this 5877 elif not to: 5878 self.raise_error("Expected TYPE after CAST") 5879 elif isinstance(to, exp.Identifier): 5880 to = exp.DataType.build(to.name, udt=True) 5881 elif to.this == exp.DataType.Type.CHAR: 5882 if self._match(TokenType.CHARACTER_SET): 5883 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5884 5885 return self.expression( 5886 exp.Cast if strict else exp.TryCast, 5887 this=this, 5888 to=to, 5889 format=fmt, 5890 safe=safe, 5891 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5892 ) 5893 5894 def _parse_string_agg(self) -> exp.Expression: 5895 if self._match(TokenType.DISTINCT): 5896 args: t.List[t.Optional[exp.Expression]] = [ 5897 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5898 ] 5899 if self._match(TokenType.COMMA): 5900 args.extend(self._parse_csv(self._parse_assignment)) 5901 else: 5902 args = self._parse_csv(self._parse_assignment) # type: ignore 5903 5904 index = self._index 5905 if not self._match(TokenType.R_PAREN) and args: 5906 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5907 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5908 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5909 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5910 5911 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5912 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5913 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5914 if not self._match_text_seq("WITHIN", "GROUP"): 5915 self._retreat(index) 5916 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5917 5918 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5919 order = self._parse_order(this=seq_get(args, 0)) 5920 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5921 5922 def _parse_convert( 5923 self, strict: bool, safe: t.Optional[bool] = None 5924 ) -> t.Optional[exp.Expression]: 5925 this = self._parse_bitwise() 5926 5927 if self._match(TokenType.USING): 5928 to: t.Optional[exp.Expression] = self.expression( 5929 exp.CharacterSet, this=self._parse_var() 5930 ) 5931 elif self._match(TokenType.COMMA): 5932 to = self._parse_types() 5933 else: 5934 to = None 5935 5936 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5937 5938 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5939 """ 5940 There are generally two variants of the DECODE function: 5941 5942 - DECODE(bin, charset) 5943 - DECODE(expression, search, result [, search, result] ... [, default]) 5944 5945 The second variant will always be parsed into a CASE expression. Note that NULL 5946 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5947 instead of relying on pattern matching. 5948 """ 5949 args = self._parse_csv(self._parse_assignment) 5950 5951 if len(args) < 3: 5952 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5953 5954 expression, *expressions = args 5955 if not expression: 5956 return None 5957 5958 ifs = [] 5959 for search, result in zip(expressions[::2], expressions[1::2]): 5960 if not search or not result: 5961 return None 5962 5963 if isinstance(search, exp.Literal): 5964 ifs.append( 5965 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5966 ) 5967 elif isinstance(search, exp.Null): 5968 ifs.append( 5969 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5970 ) 5971 else: 5972 cond = exp.or_( 5973 exp.EQ(this=expression.copy(), expression=search), 5974 exp.and_( 5975 exp.Is(this=expression.copy(), expression=exp.Null()), 5976 exp.Is(this=search.copy(), expression=exp.Null()), 5977 copy=False, 5978 ), 5979 copy=False, 5980 ) 5981 ifs.append(exp.If(this=cond, true=result)) 5982 5983 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5984 5985 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5986 self._match_text_seq("KEY") 5987 key = self._parse_column() 5988 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5989 self._match_text_seq("VALUE") 5990 value = self._parse_bitwise() 5991 5992 if not key and not value: 5993 return None 5994 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5995 5996 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5997 if not this or not self._match_text_seq("FORMAT", "JSON"): 5998 return this 5999 6000 return self.expression(exp.FormatJson, this=this) 6001 6002 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6003 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6004 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6005 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6006 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6007 else: 6008 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6009 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6010 6011 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6012 6013 if not empty and not error and not null: 6014 return None 6015 6016 return self.expression( 6017 exp.OnCondition, 6018 empty=empty, 6019 error=error, 6020 null=null, 6021 ) 6022 6023 def _parse_on_handling( 6024 self, on: str, *values: str 6025 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6026 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6027 for value in values: 6028 if self._match_text_seq(value, "ON", on): 6029 return f"{value} ON {on}" 6030 6031 index = self._index 6032 if self._match(TokenType.DEFAULT): 6033 default_value = self._parse_bitwise() 6034 if self._match_text_seq("ON", on): 6035 return default_value 6036 6037 self._retreat(index) 6038 6039 return None 6040 6041 @t.overload 6042 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6043 6044 @t.overload 6045 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6046 6047 def _parse_json_object(self, agg=False): 6048 star = self._parse_star() 6049 expressions = ( 6050 [star] 6051 if star 6052 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6053 ) 6054 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6055 6056 unique_keys = None 6057 if self._match_text_seq("WITH", "UNIQUE"): 6058 unique_keys = True 6059 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6060 unique_keys = False 6061 6062 self._match_text_seq("KEYS") 6063 6064 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6065 self._parse_type() 6066 ) 6067 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6068 6069 return self.expression( 6070 exp.JSONObjectAgg if agg else exp.JSONObject, 6071 expressions=expressions, 6072 null_handling=null_handling, 6073 unique_keys=unique_keys, 6074 return_type=return_type, 6075 encoding=encoding, 6076 ) 6077 6078 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6079 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6080 if not self._match_text_seq("NESTED"): 6081 this = self._parse_id_var() 6082 kind = self._parse_types(allow_identifiers=False) 6083 nested = None 6084 else: 6085 this = None 6086 kind = None 6087 nested = True 6088 6089 path = self._match_text_seq("PATH") and self._parse_string() 6090 nested_schema = nested and self._parse_json_schema() 6091 6092 return self.expression( 6093 exp.JSONColumnDef, 6094 this=this, 6095 kind=kind, 6096 path=path, 6097 nested_schema=nested_schema, 6098 ) 6099 6100 def _parse_json_schema(self) -> exp.JSONSchema: 6101 self._match_text_seq("COLUMNS") 6102 return self.expression( 6103 exp.JSONSchema, 6104 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6105 ) 6106 6107 def _parse_json_table(self) -> exp.JSONTable: 6108 this = self._parse_format_json(self._parse_bitwise()) 6109 path = self._match(TokenType.COMMA) and self._parse_string() 6110 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6111 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6112 schema = self._parse_json_schema() 6113 6114 return exp.JSONTable( 6115 this=this, 6116 schema=schema, 6117 path=path, 6118 error_handling=error_handling, 6119 empty_handling=empty_handling, 6120 ) 6121 6122 def _parse_match_against(self) -> exp.MatchAgainst: 6123 expressions = self._parse_csv(self._parse_column) 6124 6125 self._match_text_seq(")", "AGAINST", "(") 6126 6127 this = self._parse_string() 6128 6129 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6130 modifier = "IN NATURAL LANGUAGE MODE" 6131 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6132 modifier = f"{modifier} WITH QUERY EXPANSION" 6133 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6134 modifier = "IN BOOLEAN MODE" 6135 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6136 modifier = "WITH QUERY EXPANSION" 6137 else: 6138 modifier = None 6139 6140 return self.expression( 6141 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6142 ) 6143 6144 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6145 def _parse_open_json(self) -> exp.OpenJSON: 6146 this = self._parse_bitwise() 6147 path = self._match(TokenType.COMMA) and self._parse_string() 6148 6149 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6150 this = self._parse_field(any_token=True) 6151 kind = self._parse_types() 6152 path = self._parse_string() 6153 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6154 6155 return self.expression( 6156 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6157 ) 6158 6159 expressions = None 6160 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6161 self._match_l_paren() 6162 expressions = self._parse_csv(_parse_open_json_column_def) 6163 6164 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6165 6166 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6167 args = self._parse_csv(self._parse_bitwise) 6168 6169 if self._match(TokenType.IN): 6170 return self.expression( 6171 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6172 ) 6173 6174 if haystack_first: 6175 haystack = seq_get(args, 0) 6176 needle = seq_get(args, 1) 6177 else: 6178 needle = seq_get(args, 0) 6179 haystack = seq_get(args, 1) 6180 6181 return self.expression( 6182 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6183 ) 6184 6185 def _parse_predict(self) -> exp.Predict: 6186 self._match_text_seq("MODEL") 6187 this = self._parse_table() 6188 6189 self._match(TokenType.COMMA) 6190 self._match_text_seq("TABLE") 6191 6192 return self.expression( 6193 exp.Predict, 6194 this=this, 6195 expression=self._parse_table(), 6196 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6197 ) 6198 6199 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6200 args = self._parse_csv(self._parse_table) 6201 return exp.JoinHint(this=func_name.upper(), expressions=args) 6202 6203 def _parse_substring(self) -> exp.Substring: 6204 # Postgres supports the form: substring(string [from int] [for int]) 6205 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6206 6207 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6208 6209 if self._match(TokenType.FROM): 6210 args.append(self._parse_bitwise()) 6211 if self._match(TokenType.FOR): 6212 if len(args) == 1: 6213 args.append(exp.Literal.number(1)) 6214 args.append(self._parse_bitwise()) 6215 6216 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6217 6218 def _parse_trim(self) -> exp.Trim: 6219 # https://www.w3resource.com/sql/character-functions/trim.php 6220 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6221 6222 position = None 6223 collation = None 6224 expression = None 6225 6226 if self._match_texts(self.TRIM_TYPES): 6227 position = self._prev.text.upper() 6228 6229 this = self._parse_bitwise() 6230 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6231 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6232 expression = self._parse_bitwise() 6233 6234 if invert_order: 6235 this, expression = expression, this 6236 6237 if self._match(TokenType.COLLATE): 6238 collation = self._parse_bitwise() 6239 6240 return self.expression( 6241 exp.Trim, this=this, position=position, expression=expression, collation=collation 6242 ) 6243 6244 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6245 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6246 6247 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6248 return self._parse_window(self._parse_id_var(), alias=True) 6249 6250 def _parse_respect_or_ignore_nulls( 6251 self, this: t.Optional[exp.Expression] 6252 ) -> t.Optional[exp.Expression]: 6253 if self._match_text_seq("IGNORE", "NULLS"): 6254 return self.expression(exp.IgnoreNulls, this=this) 6255 if self._match_text_seq("RESPECT", "NULLS"): 6256 return self.expression(exp.RespectNulls, this=this) 6257 return this 6258 6259 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6260 if self._match(TokenType.HAVING): 6261 self._match_texts(("MAX", "MIN")) 6262 max = self._prev.text.upper() != "MIN" 6263 return self.expression( 6264 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6265 ) 6266 6267 return this 6268 6269 def _parse_window( 6270 self, this: t.Optional[exp.Expression], alias: bool = False 6271 ) -> t.Optional[exp.Expression]: 6272 func = this 6273 comments = func.comments if isinstance(func, exp.Expression) else None 6274 6275 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6276 self._match(TokenType.WHERE) 6277 this = self.expression( 6278 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6279 ) 6280 self._match_r_paren() 6281 6282 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6283 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6284 if self._match_text_seq("WITHIN", "GROUP"): 6285 order = self._parse_wrapped(self._parse_order) 6286 this = self.expression(exp.WithinGroup, this=this, expression=order) 6287 6288 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6289 # Some dialects choose to implement and some do not. 6290 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6291 6292 # There is some code above in _parse_lambda that handles 6293 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6294 6295 # The below changes handle 6296 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6297 6298 # Oracle allows both formats 6299 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6300 # and Snowflake chose to do the same for familiarity 6301 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6302 if isinstance(this, exp.AggFunc): 6303 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6304 6305 if ignore_respect and ignore_respect is not this: 6306 ignore_respect.replace(ignore_respect.this) 6307 this = self.expression(ignore_respect.__class__, this=this) 6308 6309 this = self._parse_respect_or_ignore_nulls(this) 6310 6311 # bigquery select from window x AS (partition by ...) 6312 if alias: 6313 over = None 6314 self._match(TokenType.ALIAS) 6315 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6316 return this 6317 else: 6318 over = self._prev.text.upper() 6319 6320 if comments and isinstance(func, exp.Expression): 6321 func.pop_comments() 6322 6323 if not self._match(TokenType.L_PAREN): 6324 return self.expression( 6325 exp.Window, 6326 comments=comments, 6327 this=this, 6328 alias=self._parse_id_var(False), 6329 over=over, 6330 ) 6331 6332 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6333 6334 first = self._match(TokenType.FIRST) 6335 if self._match_text_seq("LAST"): 6336 first = False 6337 6338 partition, order = self._parse_partition_and_order() 6339 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6340 6341 if kind: 6342 self._match(TokenType.BETWEEN) 6343 start = self._parse_window_spec() 6344 self._match(TokenType.AND) 6345 end = self._parse_window_spec() 6346 6347 spec = self.expression( 6348 exp.WindowSpec, 6349 kind=kind, 6350 start=start["value"], 6351 start_side=start["side"], 6352 end=end["value"], 6353 end_side=end["side"], 6354 ) 6355 else: 6356 spec = None 6357 6358 self._match_r_paren() 6359 6360 window = self.expression( 6361 exp.Window, 6362 comments=comments, 6363 this=this, 6364 partition_by=partition, 6365 order=order, 6366 spec=spec, 6367 alias=window_alias, 6368 over=over, 6369 first=first, 6370 ) 6371 6372 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6373 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6374 return self._parse_window(window, alias=alias) 6375 6376 return window 6377 6378 def _parse_partition_and_order( 6379 self, 6380 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6381 return self._parse_partition_by(), self._parse_order() 6382 6383 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6384 self._match(TokenType.BETWEEN) 6385 6386 return { 6387 "value": ( 6388 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6389 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6390 or self._parse_bitwise() 6391 ), 6392 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6393 } 6394 6395 def _parse_alias( 6396 self, this: t.Optional[exp.Expression], explicit: bool = False 6397 ) -> t.Optional[exp.Expression]: 6398 any_token = self._match(TokenType.ALIAS) 6399 comments = self._prev_comments or [] 6400 6401 if explicit and not any_token: 6402 return this 6403 6404 if self._match(TokenType.L_PAREN): 6405 aliases = self.expression( 6406 exp.Aliases, 6407 comments=comments, 6408 this=this, 6409 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6410 ) 6411 self._match_r_paren(aliases) 6412 return aliases 6413 6414 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6415 self.STRING_ALIASES and self._parse_string_as_identifier() 6416 ) 6417 6418 if alias: 6419 comments.extend(alias.pop_comments()) 6420 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6421 column = this.this 6422 6423 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6424 if not this.comments and column and column.comments: 6425 this.comments = column.pop_comments() 6426 6427 return this 6428 6429 def _parse_id_var( 6430 self, 6431 any_token: bool = True, 6432 tokens: t.Optional[t.Collection[TokenType]] = None, 6433 ) -> t.Optional[exp.Expression]: 6434 expression = self._parse_identifier() 6435 if not expression and ( 6436 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6437 ): 6438 quoted = self._prev.token_type == TokenType.STRING 6439 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6440 6441 return expression 6442 6443 def _parse_string(self) -> t.Optional[exp.Expression]: 6444 if self._match_set(self.STRING_PARSERS): 6445 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6446 return self._parse_placeholder() 6447 6448 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6449 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6450 6451 def _parse_number(self) -> t.Optional[exp.Expression]: 6452 if self._match_set(self.NUMERIC_PARSERS): 6453 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6454 return self._parse_placeholder() 6455 6456 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6457 if self._match(TokenType.IDENTIFIER): 6458 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6459 return self._parse_placeholder() 6460 6461 def _parse_var( 6462 self, 6463 any_token: bool = False, 6464 tokens: t.Optional[t.Collection[TokenType]] = None, 6465 upper: bool = False, 6466 ) -> t.Optional[exp.Expression]: 6467 if ( 6468 (any_token and self._advance_any()) 6469 or self._match(TokenType.VAR) 6470 or (self._match_set(tokens) if tokens else False) 6471 ): 6472 return self.expression( 6473 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6474 ) 6475 return self._parse_placeholder() 6476 6477 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6478 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6479 self._advance() 6480 return self._prev 6481 return None 6482 6483 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6484 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6485 6486 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6487 return self._parse_primary() or self._parse_var(any_token=True) 6488 6489 def _parse_null(self) -> t.Optional[exp.Expression]: 6490 if self._match_set(self.NULL_TOKENS): 6491 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6492 return self._parse_placeholder() 6493 6494 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6495 if self._match(TokenType.TRUE): 6496 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6497 if self._match(TokenType.FALSE): 6498 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6499 return self._parse_placeholder() 6500 6501 def _parse_star(self) -> t.Optional[exp.Expression]: 6502 if self._match(TokenType.STAR): 6503 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6504 return self._parse_placeholder() 6505 6506 def _parse_parameter(self) -> exp.Parameter: 6507 this = self._parse_identifier() or self._parse_primary_or_var() 6508 return self.expression(exp.Parameter, this=this) 6509 6510 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6511 if self._match_set(self.PLACEHOLDER_PARSERS): 6512 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6513 if placeholder: 6514 return placeholder 6515 self._advance(-1) 6516 return None 6517 6518 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6519 if not self._match_texts(keywords): 6520 return None 6521 if self._match(TokenType.L_PAREN, advance=False): 6522 return self._parse_wrapped_csv(self._parse_expression) 6523 6524 expression = self._parse_expression() 6525 return [expression] if expression else None 6526 6527 def _parse_csv( 6528 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6529 ) -> t.List[exp.Expression]: 6530 parse_result = parse_method() 6531 items = [parse_result] if parse_result is not None else [] 6532 6533 while self._match(sep): 6534 self._add_comments(parse_result) 6535 parse_result = parse_method() 6536 if parse_result is not None: 6537 items.append(parse_result) 6538 6539 return items 6540 6541 def _parse_tokens( 6542 self, parse_method: t.Callable, expressions: t.Dict 6543 ) -> t.Optional[exp.Expression]: 6544 this = parse_method() 6545 6546 while self._match_set(expressions): 6547 this = self.expression( 6548 expressions[self._prev.token_type], 6549 this=this, 6550 comments=self._prev_comments, 6551 expression=parse_method(), 6552 ) 6553 6554 return this 6555 6556 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6557 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6558 6559 def _parse_wrapped_csv( 6560 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6561 ) -> t.List[exp.Expression]: 6562 return self._parse_wrapped( 6563 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6564 ) 6565 6566 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6567 wrapped = self._match(TokenType.L_PAREN) 6568 if not wrapped and not optional: 6569 self.raise_error("Expecting (") 6570 parse_result = parse_method() 6571 if wrapped: 6572 self._match_r_paren() 6573 return parse_result 6574 6575 def _parse_expressions(self) -> t.List[exp.Expression]: 6576 return self._parse_csv(self._parse_expression) 6577 6578 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6579 return self._parse_select() or self._parse_set_operations( 6580 self._parse_expression() if alias else self._parse_assignment() 6581 ) 6582 6583 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6584 return self._parse_query_modifiers( 6585 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6586 ) 6587 6588 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6589 this = None 6590 if self._match_texts(self.TRANSACTION_KIND): 6591 this = self._prev.text 6592 6593 self._match_texts(("TRANSACTION", "WORK")) 6594 6595 modes = [] 6596 while True: 6597 mode = [] 6598 while self._match(TokenType.VAR): 6599 mode.append(self._prev.text) 6600 6601 if mode: 6602 modes.append(" ".join(mode)) 6603 if not self._match(TokenType.COMMA): 6604 break 6605 6606 return self.expression(exp.Transaction, this=this, modes=modes) 6607 6608 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6609 chain = None 6610 savepoint = None 6611 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6612 6613 self._match_texts(("TRANSACTION", "WORK")) 6614 6615 if self._match_text_seq("TO"): 6616 self._match_text_seq("SAVEPOINT") 6617 savepoint = self._parse_id_var() 6618 6619 if self._match(TokenType.AND): 6620 chain = not self._match_text_seq("NO") 6621 self._match_text_seq("CHAIN") 6622 6623 if is_rollback: 6624 return self.expression(exp.Rollback, savepoint=savepoint) 6625 6626 return self.expression(exp.Commit, chain=chain) 6627 6628 def _parse_refresh(self) -> exp.Refresh: 6629 self._match(TokenType.TABLE) 6630 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6631 6632 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6633 if not self._match_text_seq("ADD"): 6634 return None 6635 6636 self._match(TokenType.COLUMN) 6637 exists_column = self._parse_exists(not_=True) 6638 expression = self._parse_field_def() 6639 6640 if expression: 6641 expression.set("exists", exists_column) 6642 6643 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6644 if self._match_texts(("FIRST", "AFTER")): 6645 position = self._prev.text 6646 column_position = self.expression( 6647 exp.ColumnPosition, this=self._parse_column(), position=position 6648 ) 6649 expression.set("position", column_position) 6650 6651 return expression 6652 6653 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6654 drop = self._match(TokenType.DROP) and self._parse_drop() 6655 if drop and not isinstance(drop, exp.Command): 6656 drop.set("kind", drop.args.get("kind", "COLUMN")) 6657 return drop 6658 6659 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6660 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6661 return self.expression( 6662 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6663 ) 6664 6665 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6666 index = self._index - 1 6667 6668 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6669 return self._parse_csv( 6670 lambda: self.expression( 6671 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6672 ) 6673 ) 6674 6675 self._retreat(index) 6676 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6677 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6678 6679 if self._match_text_seq("ADD", "COLUMNS"): 6680 schema = self._parse_schema() 6681 if schema: 6682 return [schema] 6683 return [] 6684 6685 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6686 6687 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6688 if self._match_texts(self.ALTER_ALTER_PARSERS): 6689 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6690 6691 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6692 # keyword after ALTER we default to parsing this statement 6693 self._match(TokenType.COLUMN) 6694 column = self._parse_field(any_token=True) 6695 6696 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6697 return self.expression(exp.AlterColumn, this=column, drop=True) 6698 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6699 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6700 if self._match(TokenType.COMMENT): 6701 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6702 if self._match_text_seq("DROP", "NOT", "NULL"): 6703 return self.expression( 6704 exp.AlterColumn, 6705 this=column, 6706 drop=True, 6707 allow_null=True, 6708 ) 6709 if self._match_text_seq("SET", "NOT", "NULL"): 6710 return self.expression( 6711 exp.AlterColumn, 6712 this=column, 6713 allow_null=False, 6714 ) 6715 self._match_text_seq("SET", "DATA") 6716 self._match_text_seq("TYPE") 6717 return self.expression( 6718 exp.AlterColumn, 6719 this=column, 6720 dtype=self._parse_types(), 6721 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6722 using=self._match(TokenType.USING) and self._parse_assignment(), 6723 ) 6724 6725 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6726 if self._match_texts(("ALL", "EVEN", "AUTO")): 6727 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6728 6729 self._match_text_seq("KEY", "DISTKEY") 6730 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6731 6732 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6733 if compound: 6734 self._match_text_seq("SORTKEY") 6735 6736 if self._match(TokenType.L_PAREN, advance=False): 6737 return self.expression( 6738 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6739 ) 6740 6741 self._match_texts(("AUTO", "NONE")) 6742 return self.expression( 6743 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6744 ) 6745 6746 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6747 index = self._index - 1 6748 6749 partition_exists = self._parse_exists() 6750 if self._match(TokenType.PARTITION, advance=False): 6751 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6752 6753 self._retreat(index) 6754 return self._parse_csv(self._parse_drop_column) 6755 6756 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6757 if self._match(TokenType.COLUMN): 6758 exists = self._parse_exists() 6759 old_column = self._parse_column() 6760 to = self._match_text_seq("TO") 6761 new_column = self._parse_column() 6762 6763 if old_column is None or to is None or new_column is None: 6764 return None 6765 6766 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6767 6768 self._match_text_seq("TO") 6769 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6770 6771 def _parse_alter_table_set(self) -> exp.AlterSet: 6772 alter_set = self.expression(exp.AlterSet) 6773 6774 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6775 "TABLE", "PROPERTIES" 6776 ): 6777 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6778 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6779 alter_set.set("expressions", [self._parse_assignment()]) 6780 elif self._match_texts(("LOGGED", "UNLOGGED")): 6781 alter_set.set("option", exp.var(self._prev.text.upper())) 6782 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6783 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6784 elif self._match_text_seq("LOCATION"): 6785 alter_set.set("location", self._parse_field()) 6786 elif self._match_text_seq("ACCESS", "METHOD"): 6787 alter_set.set("access_method", self._parse_field()) 6788 elif self._match_text_seq("TABLESPACE"): 6789 alter_set.set("tablespace", self._parse_field()) 6790 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6791 alter_set.set("file_format", [self._parse_field()]) 6792 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6793 alter_set.set("file_format", self._parse_wrapped_options()) 6794 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6795 alter_set.set("copy_options", self._parse_wrapped_options()) 6796 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6797 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6798 else: 6799 if self._match_text_seq("SERDE"): 6800 alter_set.set("serde", self._parse_field()) 6801 6802 alter_set.set("expressions", [self._parse_properties()]) 6803 6804 return alter_set 6805 6806 def _parse_alter(self) -> exp.Alter | exp.Command: 6807 start = self._prev 6808 6809 alter_token = self._match_set(self.ALTERABLES) and self._prev 6810 if not alter_token: 6811 return self._parse_as_command(start) 6812 6813 exists = self._parse_exists() 6814 only = self._match_text_seq("ONLY") 6815 this = self._parse_table(schema=True) 6816 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6817 6818 if self._next: 6819 self._advance() 6820 6821 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6822 if parser: 6823 actions = ensure_list(parser(self)) 6824 not_valid = self._match_text_seq("NOT", "VALID") 6825 options = self._parse_csv(self._parse_property) 6826 6827 if not self._curr and actions: 6828 return self.expression( 6829 exp.Alter, 6830 this=this, 6831 kind=alter_token.text.upper(), 6832 exists=exists, 6833 actions=actions, 6834 only=only, 6835 options=options, 6836 cluster=cluster, 6837 not_valid=not_valid, 6838 ) 6839 6840 return self._parse_as_command(start) 6841 6842 def _parse_merge(self) -> exp.Merge: 6843 self._match(TokenType.INTO) 6844 target = self._parse_table() 6845 6846 if target and self._match(TokenType.ALIAS, advance=False): 6847 target.set("alias", self._parse_table_alias()) 6848 6849 self._match(TokenType.USING) 6850 using = self._parse_table() 6851 6852 self._match(TokenType.ON) 6853 on = self._parse_assignment() 6854 6855 return self.expression( 6856 exp.Merge, 6857 this=target, 6858 using=using, 6859 on=on, 6860 expressions=self._parse_when_matched(), 6861 returning=self._parse_returning(), 6862 ) 6863 6864 def _parse_when_matched(self) -> t.List[exp.When]: 6865 whens = [] 6866 6867 while self._match(TokenType.WHEN): 6868 matched = not self._match(TokenType.NOT) 6869 self._match_text_seq("MATCHED") 6870 source = ( 6871 False 6872 if self._match_text_seq("BY", "TARGET") 6873 else self._match_text_seq("BY", "SOURCE") 6874 ) 6875 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6876 6877 self._match(TokenType.THEN) 6878 6879 if self._match(TokenType.INSERT): 6880 this = self._parse_star() 6881 if this: 6882 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6883 else: 6884 then = self.expression( 6885 exp.Insert, 6886 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6887 expression=self._match_text_seq("VALUES") and self._parse_value(), 6888 ) 6889 elif self._match(TokenType.UPDATE): 6890 expressions = self._parse_star() 6891 if expressions: 6892 then = self.expression(exp.Update, expressions=expressions) 6893 else: 6894 then = self.expression( 6895 exp.Update, 6896 expressions=self._match(TokenType.SET) 6897 and self._parse_csv(self._parse_equality), 6898 ) 6899 elif self._match(TokenType.DELETE): 6900 then = self.expression(exp.Var, this=self._prev.text) 6901 else: 6902 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6903 6904 whens.append( 6905 self.expression( 6906 exp.When, 6907 matched=matched, 6908 source=source, 6909 condition=condition, 6910 then=then, 6911 ) 6912 ) 6913 return whens 6914 6915 def _parse_show(self) -> t.Optional[exp.Expression]: 6916 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6917 if parser: 6918 return parser(self) 6919 return self._parse_as_command(self._prev) 6920 6921 def _parse_set_item_assignment( 6922 self, kind: t.Optional[str] = None 6923 ) -> t.Optional[exp.Expression]: 6924 index = self._index 6925 6926 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6927 return self._parse_set_transaction(global_=kind == "GLOBAL") 6928 6929 left = self._parse_primary() or self._parse_column() 6930 assignment_delimiter = self._match_texts(("=", "TO")) 6931 6932 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6933 self._retreat(index) 6934 return None 6935 6936 right = self._parse_statement() or self._parse_id_var() 6937 if isinstance(right, (exp.Column, exp.Identifier)): 6938 right = exp.var(right.name) 6939 6940 this = self.expression(exp.EQ, this=left, expression=right) 6941 return self.expression(exp.SetItem, this=this, kind=kind) 6942 6943 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6944 self._match_text_seq("TRANSACTION") 6945 characteristics = self._parse_csv( 6946 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6947 ) 6948 return self.expression( 6949 exp.SetItem, 6950 expressions=characteristics, 6951 kind="TRANSACTION", 6952 **{"global": global_}, # type: ignore 6953 ) 6954 6955 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6956 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6957 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6958 6959 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6960 index = self._index 6961 set_ = self.expression( 6962 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6963 ) 6964 6965 if self._curr: 6966 self._retreat(index) 6967 return self._parse_as_command(self._prev) 6968 6969 return set_ 6970 6971 def _parse_var_from_options( 6972 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6973 ) -> t.Optional[exp.Var]: 6974 start = self._curr 6975 if not start: 6976 return None 6977 6978 option = start.text.upper() 6979 continuations = options.get(option) 6980 6981 index = self._index 6982 self._advance() 6983 for keywords in continuations or []: 6984 if isinstance(keywords, str): 6985 keywords = (keywords,) 6986 6987 if self._match_text_seq(*keywords): 6988 option = f"{option} {' '.join(keywords)}" 6989 break 6990 else: 6991 if continuations or continuations is None: 6992 if raise_unmatched: 6993 self.raise_error(f"Unknown option {option}") 6994 6995 self._retreat(index) 6996 return None 6997 6998 return exp.var(option) 6999 7000 def _parse_as_command(self, start: Token) -> exp.Command: 7001 while self._curr: 7002 self._advance() 7003 text = self._find_sql(start, self._prev) 7004 size = len(start.text) 7005 self._warn_unsupported() 7006 return exp.Command(this=text[:size], expression=text[size:]) 7007 7008 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7009 settings = [] 7010 7011 self._match_l_paren() 7012 kind = self._parse_id_var() 7013 7014 if self._match(TokenType.L_PAREN): 7015 while True: 7016 key = self._parse_id_var() 7017 value = self._parse_primary() 7018 7019 if not key and value is None: 7020 break 7021 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7022 self._match(TokenType.R_PAREN) 7023 7024 self._match_r_paren() 7025 7026 return self.expression( 7027 exp.DictProperty, 7028 this=this, 7029 kind=kind.this if kind else None, 7030 settings=settings, 7031 ) 7032 7033 def _parse_dict_range(self, this: str) -> exp.DictRange: 7034 self._match_l_paren() 7035 has_min = self._match_text_seq("MIN") 7036 if has_min: 7037 min = self._parse_var() or self._parse_primary() 7038 self._match_text_seq("MAX") 7039 max = self._parse_var() or self._parse_primary() 7040 else: 7041 max = self._parse_var() or self._parse_primary() 7042 min = exp.Literal.number(0) 7043 self._match_r_paren() 7044 return self.expression(exp.DictRange, this=this, min=min, max=max) 7045 7046 def _parse_comprehension( 7047 self, this: t.Optional[exp.Expression] 7048 ) -> t.Optional[exp.Comprehension]: 7049 index = self._index 7050 expression = self._parse_column() 7051 if not self._match(TokenType.IN): 7052 self._retreat(index - 1) 7053 return None 7054 iterator = self._parse_column() 7055 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7056 return self.expression( 7057 exp.Comprehension, 7058 this=this, 7059 expression=expression, 7060 iterator=iterator, 7061 condition=condition, 7062 ) 7063 7064 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7065 if self._match(TokenType.HEREDOC_STRING): 7066 return self.expression(exp.Heredoc, this=self._prev.text) 7067 7068 if not self._match_text_seq("$"): 7069 return None 7070 7071 tags = ["$"] 7072 tag_text = None 7073 7074 if self._is_connected(): 7075 self._advance() 7076 tags.append(self._prev.text.upper()) 7077 else: 7078 self.raise_error("No closing $ found") 7079 7080 if tags[-1] != "$": 7081 if self._is_connected() and self._match_text_seq("$"): 7082 tag_text = tags[-1] 7083 tags.append("$") 7084 else: 7085 self.raise_error("No closing $ found") 7086 7087 heredoc_start = self._curr 7088 7089 while self._curr: 7090 if self._match_text_seq(*tags, advance=False): 7091 this = self._find_sql(heredoc_start, self._prev) 7092 self._advance(len(tags)) 7093 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7094 7095 self._advance() 7096 7097 self.raise_error(f"No closing {''.join(tags)} found") 7098 return None 7099 7100 def _find_parser( 7101 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7102 ) -> t.Optional[t.Callable]: 7103 if not self._curr: 7104 return None 7105 7106 index = self._index 7107 this = [] 7108 while True: 7109 # The current token might be multiple words 7110 curr = self._curr.text.upper() 7111 key = curr.split(" ") 7112 this.append(curr) 7113 7114 self._advance() 7115 result, trie = in_trie(trie, key) 7116 if result == TrieResult.FAILED: 7117 break 7118 7119 if result == TrieResult.EXISTS: 7120 subparser = parsers[" ".join(this)] 7121 return subparser 7122 7123 self._retreat(index) 7124 return None 7125 7126 def _match(self, token_type, advance=True, expression=None): 7127 if not self._curr: 7128 return None 7129 7130 if self._curr.token_type == token_type: 7131 if advance: 7132 self._advance() 7133 self._add_comments(expression) 7134 return True 7135 7136 return None 7137 7138 def _match_set(self, types, advance=True): 7139 if not self._curr: 7140 return None 7141 7142 if self._curr.token_type in types: 7143 if advance: 7144 self._advance() 7145 return True 7146 7147 return None 7148 7149 def _match_pair(self, token_type_a, token_type_b, advance=True): 7150 if not self._curr or not self._next: 7151 return None 7152 7153 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7154 if advance: 7155 self._advance(2) 7156 return True 7157 7158 return None 7159 7160 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7161 if not self._match(TokenType.L_PAREN, expression=expression): 7162 self.raise_error("Expecting (") 7163 7164 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7165 if not self._match(TokenType.R_PAREN, expression=expression): 7166 self.raise_error("Expecting )") 7167 7168 def _match_texts(self, texts, advance=True): 7169 if ( 7170 self._curr 7171 and self._curr.token_type != TokenType.STRING 7172 and self._curr.text.upper() in texts 7173 ): 7174 if advance: 7175 self._advance() 7176 return True 7177 return None 7178 7179 def _match_text_seq(self, *texts, advance=True): 7180 index = self._index 7181 for text in texts: 7182 if ( 7183 self._curr 7184 and self._curr.token_type != TokenType.STRING 7185 and self._curr.text.upper() == text 7186 ): 7187 self._advance() 7188 else: 7189 self._retreat(index) 7190 return None 7191 7192 if not advance: 7193 self._retreat(index) 7194 7195 return True 7196 7197 def _replace_lambda( 7198 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7199 ) -> t.Optional[exp.Expression]: 7200 if not node: 7201 return node 7202 7203 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7204 7205 for column in node.find_all(exp.Column): 7206 typ = lambda_types.get(column.parts[0].name) 7207 if typ is not None: 7208 dot_or_id = column.to_dot() if column.table else column.this 7209 7210 if typ: 7211 dot_or_id = self.expression( 7212 exp.Cast, 7213 this=dot_or_id, 7214 to=typ, 7215 ) 7216 7217 parent = column.parent 7218 7219 while isinstance(parent, exp.Dot): 7220 if not isinstance(parent.parent, exp.Dot): 7221 parent.replace(dot_or_id) 7222 break 7223 parent = parent.parent 7224 else: 7225 if column is node: 7226 node = dot_or_id 7227 else: 7228 column.replace(dot_or_id) 7229 return node 7230 7231 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7232 start = self._prev 7233 7234 # Not to be confused with TRUNCATE(number, decimals) function call 7235 if self._match(TokenType.L_PAREN): 7236 self._retreat(self._index - 2) 7237 return self._parse_function() 7238 7239 # Clickhouse supports TRUNCATE DATABASE as well 7240 is_database = self._match(TokenType.DATABASE) 7241 7242 self._match(TokenType.TABLE) 7243 7244 exists = self._parse_exists(not_=False) 7245 7246 expressions = self._parse_csv( 7247 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7248 ) 7249 7250 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7251 7252 if self._match_text_seq("RESTART", "IDENTITY"): 7253 identity = "RESTART" 7254 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7255 identity = "CONTINUE" 7256 else: 7257 identity = None 7258 7259 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7260 option = self._prev.text 7261 else: 7262 option = None 7263 7264 partition = self._parse_partition() 7265 7266 # Fallback case 7267 if self._curr: 7268 return self._parse_as_command(start) 7269 7270 return self.expression( 7271 exp.TruncateTable, 7272 expressions=expressions, 7273 is_database=is_database, 7274 exists=exists, 7275 cluster=cluster, 7276 identity=identity, 7277 option=option, 7278 partition=partition, 7279 ) 7280 7281 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7282 this = self._parse_ordered(self._parse_opclass) 7283 7284 if not self._match(TokenType.WITH): 7285 return this 7286 7287 op = self._parse_var(any_token=True) 7288 7289 return self.expression(exp.WithOperator, this=this, op=op) 7290 7291 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7292 self._match(TokenType.EQ) 7293 self._match(TokenType.L_PAREN) 7294 7295 opts: t.List[t.Optional[exp.Expression]] = [] 7296 while self._curr and not self._match(TokenType.R_PAREN): 7297 if self._match_text_seq("FORMAT_NAME", "="): 7298 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7299 # so we parse it separately to use _parse_field() 7300 prop = self.expression( 7301 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7302 ) 7303 opts.append(prop) 7304 else: 7305 opts.append(self._parse_property()) 7306 7307 self._match(TokenType.COMMA) 7308 7309 return opts 7310 7311 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7312 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7313 7314 options = [] 7315 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7316 option = self._parse_var(any_token=True) 7317 prev = self._prev.text.upper() 7318 7319 # Different dialects might separate options and values by white space, "=" and "AS" 7320 self._match(TokenType.EQ) 7321 self._match(TokenType.ALIAS) 7322 7323 param = self.expression(exp.CopyParameter, this=option) 7324 7325 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7326 TokenType.L_PAREN, advance=False 7327 ): 7328 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7329 param.set("expressions", self._parse_wrapped_options()) 7330 elif prev == "FILE_FORMAT": 7331 # T-SQL's external file format case 7332 param.set("expression", self._parse_field()) 7333 else: 7334 param.set("expression", self._parse_unquoted_field()) 7335 7336 options.append(param) 7337 self._match(sep) 7338 7339 return options 7340 7341 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7342 expr = self.expression(exp.Credentials) 7343 7344 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7345 expr.set("storage", self._parse_field()) 7346 if self._match_text_seq("CREDENTIALS"): 7347 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7348 creds = ( 7349 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7350 ) 7351 expr.set("credentials", creds) 7352 if self._match_text_seq("ENCRYPTION"): 7353 expr.set("encryption", self._parse_wrapped_options()) 7354 if self._match_text_seq("IAM_ROLE"): 7355 expr.set("iam_role", self._parse_field()) 7356 if self._match_text_seq("REGION"): 7357 expr.set("region", self._parse_field()) 7358 7359 return expr 7360 7361 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7362 return self._parse_field() 7363 7364 def _parse_copy(self) -> exp.Copy | exp.Command: 7365 start = self._prev 7366 7367 self._match(TokenType.INTO) 7368 7369 this = ( 7370 self._parse_select(nested=True, parse_subquery_alias=False) 7371 if self._match(TokenType.L_PAREN, advance=False) 7372 else self._parse_table(schema=True) 7373 ) 7374 7375 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7376 7377 files = self._parse_csv(self._parse_file_location) 7378 credentials = self._parse_credentials() 7379 7380 self._match_text_seq("WITH") 7381 7382 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7383 7384 # Fallback case 7385 if self._curr: 7386 return self._parse_as_command(start) 7387 7388 return self.expression( 7389 exp.Copy, 7390 this=this, 7391 kind=kind, 7392 credentials=credentials, 7393 files=files, 7394 params=params, 7395 ) 7396 7397 def _parse_normalize(self) -> exp.Normalize: 7398 return self.expression( 7399 exp.Normalize, 7400 this=self._parse_bitwise(), 7401 form=self._match(TokenType.COMMA) and self._parse_var(), 7402 ) 7403 7404 def _parse_star_ops(self) -> exp.Star | exp.UnpackColumns: 7405 if self._match_text_seq("COLUMNS", "(", advance=False): 7406 return exp.UnpackColumns(this=self._parse_function()) 7407 7408 return self.expression( 7409 exp.Star, 7410 **{ # type: ignore 7411 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7412 "replace": self._parse_star_op("REPLACE"), 7413 "rename": self._parse_star_op("RENAME"), 7414 }, 7415 ) 7416 7417 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7418 privilege_parts = [] 7419 7420 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7421 # (end of privilege list) or L_PAREN (start of column list) are met 7422 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7423 privilege_parts.append(self._curr.text.upper()) 7424 self._advance() 7425 7426 this = exp.var(" ".join(privilege_parts)) 7427 expressions = ( 7428 self._parse_wrapped_csv(self._parse_column) 7429 if self._match(TokenType.L_PAREN, advance=False) 7430 else None 7431 ) 7432 7433 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7434 7435 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7436 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7437 principal = self._parse_id_var() 7438 7439 if not principal: 7440 return None 7441 7442 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7443 7444 def _parse_grant(self) -> exp.Grant | exp.Command: 7445 start = self._prev 7446 7447 privileges = self._parse_csv(self._parse_grant_privilege) 7448 7449 self._match(TokenType.ON) 7450 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7451 7452 # Attempt to parse the securable e.g. MySQL allows names 7453 # such as "foo.*", "*.*" which are not easily parseable yet 7454 securable = self._try_parse(self._parse_table_parts) 7455 7456 if not securable or not self._match_text_seq("TO"): 7457 return self._parse_as_command(start) 7458 7459 principals = self._parse_csv(self._parse_grant_principal) 7460 7461 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7462 7463 if self._curr: 7464 return self._parse_as_command(start) 7465 7466 return self.expression( 7467 exp.Grant, 7468 privileges=privileges, 7469 kind=kind, 7470 securable=securable, 7471 principals=principals, 7472 grant_option=grant_option, 7473 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1356 def __init__( 1357 self, 1358 error_level: t.Optional[ErrorLevel] = None, 1359 error_message_context: int = 100, 1360 max_errors: int = 3, 1361 dialect: DialectType = None, 1362 ): 1363 from sqlglot.dialects import Dialect 1364 1365 self.error_level = error_level or ErrorLevel.IMMEDIATE 1366 self.error_message_context = error_message_context 1367 self.max_errors = max_errors 1368 self.dialect = Dialect.get_or_raise(dialect) 1369 self.reset()
1381 def parse( 1382 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1383 ) -> t.List[t.Optional[exp.Expression]]: 1384 """ 1385 Parses a list of tokens and returns a list of syntax trees, one tree 1386 per parsed SQL statement. 1387 1388 Args: 1389 raw_tokens: The list of tokens. 1390 sql: The original SQL string, used to produce helpful debug messages. 1391 1392 Returns: 1393 The list of the produced syntax trees. 1394 """ 1395 return self._parse( 1396 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1397 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1399 def parse_into( 1400 self, 1401 expression_types: exp.IntoType, 1402 raw_tokens: t.List[Token], 1403 sql: t.Optional[str] = None, 1404 ) -> t.List[t.Optional[exp.Expression]]: 1405 """ 1406 Parses a list of tokens into a given Expression type. If a collection of Expression 1407 types is given instead, this method will try to parse the token list into each one 1408 of them, stopping at the first for which the parsing succeeds. 1409 1410 Args: 1411 expression_types: The expression type(s) to try and parse the token list into. 1412 raw_tokens: The list of tokens. 1413 sql: The original SQL string, used to produce helpful debug messages. 1414 1415 Returns: 1416 The target Expression. 1417 """ 1418 errors = [] 1419 for expression_type in ensure_list(expression_types): 1420 parser = self.EXPRESSION_PARSERS.get(expression_type) 1421 if not parser: 1422 raise TypeError(f"No parser registered for {expression_type}") 1423 1424 try: 1425 return self._parse(parser, raw_tokens, sql) 1426 except ParseError as e: 1427 e.errors[0]["into_expression"] = expression_type 1428 errors.append(e) 1429 1430 raise ParseError( 1431 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1432 errors=merge_errors(errors), 1433 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1473 def check_errors(self) -> None: 1474 """Logs or raises any found errors, depending on the chosen error level setting.""" 1475 if self.error_level == ErrorLevel.WARN: 1476 for error in self.errors: 1477 logger.error(str(error)) 1478 elif self.error_level == ErrorLevel.RAISE and self.errors: 1479 raise ParseError( 1480 concat_messages(self.errors, self.max_errors), 1481 errors=merge_errors(self.errors), 1482 )
Logs or raises any found errors, depending on the chosen error level setting.
1484 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1485 """ 1486 Appends an error in the list of recorded errors or raises it, depending on the chosen 1487 error level setting. 1488 """ 1489 token = token or self._curr or self._prev or Token.string("") 1490 start = token.start 1491 end = token.end + 1 1492 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1493 highlight = self.sql[start:end] 1494 end_context = self.sql[end : end + self.error_message_context] 1495 1496 error = ParseError.new( 1497 f"{message}. Line {token.line}, Col: {token.col}.\n" 1498 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1499 description=message, 1500 line=token.line, 1501 col=token.col, 1502 start_context=start_context, 1503 highlight=highlight, 1504 end_context=end_context, 1505 ) 1506 1507 if self.error_level == ErrorLevel.IMMEDIATE: 1508 raise error 1509 1510 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1512 def expression( 1513 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1514 ) -> E: 1515 """ 1516 Creates a new, validated Expression. 1517 1518 Args: 1519 exp_class: The expression class to instantiate. 1520 comments: An optional list of comments to attach to the expression. 1521 kwargs: The arguments to set for the expression along with their respective values. 1522 1523 Returns: 1524 The target expression. 1525 """ 1526 instance = exp_class(**kwargs) 1527 instance.add_comments(comments) if comments else self._add_comments(instance) 1528 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1535 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1536 """ 1537 Validates an Expression, making sure that all its mandatory arguments are set. 1538 1539 Args: 1540 expression: The expression to validate. 1541 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1542 1543 Returns: 1544 The validated expression. 1545 """ 1546 if self.error_level != ErrorLevel.IGNORE: 1547 for error_message in expression.error_messages(args): 1548 self.raise_error(error_message) 1549 1550 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.