sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143class _Parser(type): 144 def __new__(cls, clsname, bases, attrs): 145 klass = super().__new__(cls, clsname, bases, attrs) 146 147 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 148 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 149 150 return klass 151 152 153class Parser(metaclass=_Parser): 154 """ 155 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 156 157 Args: 158 error_level: The desired error level. 159 Default: ErrorLevel.IMMEDIATE 160 error_message_context: The amount of context to capture from a query string when displaying 161 the error message (in number of characters). 162 Default: 100 163 max_errors: Maximum number of error messages to include in a raised ParseError. 164 This is only relevant if error_level is ErrorLevel.RAISE. 165 Default: 3 166 """ 167 168 FUNCTIONS: t.Dict[str, t.Callable] = { 169 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 170 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 171 "CONCAT": lambda args, dialect: exp.Concat( 172 expressions=args, 173 safe=not dialect.STRICT_STRING_CONCAT, 174 coalesce=dialect.CONCAT_COALESCE, 175 ), 176 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 177 expressions=args, 178 safe=not dialect.STRICT_STRING_CONCAT, 179 coalesce=dialect.CONCAT_COALESCE, 180 ), 181 "CONVERT_TIMEZONE": build_convert_timezone, 182 "DATE_TO_DATE_STR": lambda args: exp.Cast( 183 this=seq_get(args, 0), 184 to=exp.DataType(this=exp.DataType.Type.TEXT), 185 ), 186 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 187 start=seq_get(args, 0), 188 end=seq_get(args, 1), 189 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 190 ), 191 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 192 "HEX": build_hex, 193 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 194 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 195 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 196 "LIKE": build_like, 197 "LOG": build_logarithm, 198 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 199 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 200 "LOWER": build_lower, 201 "LPAD": lambda args: build_pad(args), 202 "LEFTPAD": lambda args: build_pad(args), 203 "MOD": build_mod, 204 "RPAD": lambda args: build_pad(args, is_left=False), 205 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 206 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 207 if len(args) != 2 208 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 209 "TIME_TO_TIME_STR": lambda args: exp.Cast( 210 this=seq_get(args, 0), 211 to=exp.DataType(this=exp.DataType.Type.TEXT), 212 ), 213 "TO_HEX": build_hex, 214 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 215 this=exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 start=exp.Literal.number(1), 220 length=exp.Literal.number(10), 221 ), 222 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 223 "UPPER": build_upper, 224 "VAR_MAP": build_var_map, 225 "COALESCE": lambda args: exp.Coalesce(this=seq_get(args, 0), expressions=args[1:]), 226 } 227 228 NO_PAREN_FUNCTIONS = { 229 TokenType.CURRENT_DATE: exp.CurrentDate, 230 TokenType.CURRENT_DATETIME: exp.CurrentDate, 231 TokenType.CURRENT_TIME: exp.CurrentTime, 232 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 233 TokenType.CURRENT_USER: exp.CurrentUser, 234 } 235 236 STRUCT_TYPE_TOKENS = { 237 TokenType.NESTED, 238 TokenType.OBJECT, 239 TokenType.STRUCT, 240 } 241 242 NESTED_TYPE_TOKENS = { 243 TokenType.ARRAY, 244 TokenType.LIST, 245 TokenType.LOWCARDINALITY, 246 TokenType.MAP, 247 TokenType.NULLABLE, 248 *STRUCT_TYPE_TOKENS, 249 } 250 251 ENUM_TYPE_TOKENS = { 252 TokenType.ENUM, 253 TokenType.ENUM8, 254 TokenType.ENUM16, 255 } 256 257 AGGREGATE_TYPE_TOKENS = { 258 TokenType.AGGREGATEFUNCTION, 259 TokenType.SIMPLEAGGREGATEFUNCTION, 260 } 261 262 TYPE_TOKENS = { 263 TokenType.BIT, 264 TokenType.BOOLEAN, 265 TokenType.TINYINT, 266 TokenType.UTINYINT, 267 TokenType.SMALLINT, 268 TokenType.USMALLINT, 269 TokenType.INT, 270 TokenType.UINT, 271 TokenType.BIGINT, 272 TokenType.UBIGINT, 273 TokenType.INT128, 274 TokenType.UINT128, 275 TokenType.INT256, 276 TokenType.UINT256, 277 TokenType.MEDIUMINT, 278 TokenType.UMEDIUMINT, 279 TokenType.FIXEDSTRING, 280 TokenType.FLOAT, 281 TokenType.DOUBLE, 282 TokenType.CHAR, 283 TokenType.NCHAR, 284 TokenType.VARCHAR, 285 TokenType.NVARCHAR, 286 TokenType.BPCHAR, 287 TokenType.TEXT, 288 TokenType.MEDIUMTEXT, 289 TokenType.LONGTEXT, 290 TokenType.MEDIUMBLOB, 291 TokenType.LONGBLOB, 292 TokenType.BINARY, 293 TokenType.VARBINARY, 294 TokenType.JSON, 295 TokenType.JSONB, 296 TokenType.INTERVAL, 297 TokenType.TINYBLOB, 298 TokenType.TINYTEXT, 299 TokenType.TIME, 300 TokenType.TIMETZ, 301 TokenType.TIMESTAMP, 302 TokenType.TIMESTAMP_S, 303 TokenType.TIMESTAMP_MS, 304 TokenType.TIMESTAMP_NS, 305 TokenType.TIMESTAMPTZ, 306 TokenType.TIMESTAMPLTZ, 307 TokenType.TIMESTAMPNTZ, 308 TokenType.DATETIME, 309 TokenType.DATETIME64, 310 TokenType.DATE, 311 TokenType.DATE32, 312 TokenType.INT4RANGE, 313 TokenType.INT4MULTIRANGE, 314 TokenType.INT8RANGE, 315 TokenType.INT8MULTIRANGE, 316 TokenType.NUMRANGE, 317 TokenType.NUMMULTIRANGE, 318 TokenType.TSRANGE, 319 TokenType.TSMULTIRANGE, 320 TokenType.TSTZRANGE, 321 TokenType.TSTZMULTIRANGE, 322 TokenType.DATERANGE, 323 TokenType.DATEMULTIRANGE, 324 TokenType.DECIMAL, 325 TokenType.UDECIMAL, 326 TokenType.BIGDECIMAL, 327 TokenType.UUID, 328 TokenType.GEOGRAPHY, 329 TokenType.GEOMETRY, 330 TokenType.HLLSKETCH, 331 TokenType.HSTORE, 332 TokenType.PSEUDO_TYPE, 333 TokenType.SUPER, 334 TokenType.SERIAL, 335 TokenType.SMALLSERIAL, 336 TokenType.BIGSERIAL, 337 TokenType.XML, 338 TokenType.YEAR, 339 TokenType.UNIQUEIDENTIFIER, 340 TokenType.USERDEFINED, 341 TokenType.MONEY, 342 TokenType.SMALLMONEY, 343 TokenType.ROWVERSION, 344 TokenType.IMAGE, 345 TokenType.VARIANT, 346 TokenType.VECTOR, 347 TokenType.OBJECT, 348 TokenType.OBJECT_IDENTIFIER, 349 TokenType.INET, 350 TokenType.IPADDRESS, 351 TokenType.IPPREFIX, 352 TokenType.IPV4, 353 TokenType.IPV6, 354 TokenType.UNKNOWN, 355 TokenType.NULL, 356 TokenType.NAME, 357 TokenType.TDIGEST, 358 *ENUM_TYPE_TOKENS, 359 *NESTED_TYPE_TOKENS, 360 *AGGREGATE_TYPE_TOKENS, 361 } 362 363 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 364 TokenType.BIGINT: TokenType.UBIGINT, 365 TokenType.INT: TokenType.UINT, 366 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 367 TokenType.SMALLINT: TokenType.USMALLINT, 368 TokenType.TINYINT: TokenType.UTINYINT, 369 TokenType.DECIMAL: TokenType.UDECIMAL, 370 } 371 372 SUBQUERY_PREDICATES = { 373 TokenType.ANY: exp.Any, 374 TokenType.ALL: exp.All, 375 TokenType.EXISTS: exp.Exists, 376 TokenType.SOME: exp.Any, 377 } 378 379 RESERVED_TOKENS = { 380 *Tokenizer.SINGLE_TOKENS.values(), 381 TokenType.SELECT, 382 } - {TokenType.IDENTIFIER} 383 384 DB_CREATABLES = { 385 TokenType.DATABASE, 386 TokenType.DICTIONARY, 387 TokenType.MODEL, 388 TokenType.SCHEMA, 389 TokenType.SEQUENCE, 390 TokenType.STORAGE_INTEGRATION, 391 TokenType.TABLE, 392 TokenType.TAG, 393 TokenType.VIEW, 394 TokenType.WAREHOUSE, 395 TokenType.STREAMLIT, 396 } 397 398 CREATABLES = { 399 TokenType.COLUMN, 400 TokenType.CONSTRAINT, 401 TokenType.FOREIGN_KEY, 402 TokenType.FUNCTION, 403 TokenType.INDEX, 404 TokenType.PROCEDURE, 405 *DB_CREATABLES, 406 } 407 408 ALTERABLES = { 409 TokenType.TABLE, 410 TokenType.VIEW, 411 } 412 413 # Tokens that can represent identifiers 414 ID_VAR_TOKENS = { 415 TokenType.ALL, 416 TokenType.VAR, 417 TokenType.ANTI, 418 TokenType.APPLY, 419 TokenType.ASC, 420 TokenType.ASOF, 421 TokenType.AUTO_INCREMENT, 422 TokenType.BEGIN, 423 TokenType.BPCHAR, 424 TokenType.CACHE, 425 TokenType.CASE, 426 TokenType.COLLATE, 427 TokenType.COMMAND, 428 TokenType.COMMENT, 429 TokenType.COMMIT, 430 TokenType.CONSTRAINT, 431 TokenType.COPY, 432 TokenType.CUBE, 433 TokenType.DEFAULT, 434 TokenType.DELETE, 435 TokenType.DESC, 436 TokenType.DESCRIBE, 437 TokenType.DICTIONARY, 438 TokenType.DIV, 439 TokenType.END, 440 TokenType.EXECUTE, 441 TokenType.ESCAPE, 442 TokenType.FALSE, 443 TokenType.FIRST, 444 TokenType.FILTER, 445 TokenType.FINAL, 446 TokenType.FORMAT, 447 TokenType.FULL, 448 TokenType.IDENTIFIER, 449 TokenType.IS, 450 TokenType.ISNULL, 451 TokenType.INTERVAL, 452 TokenType.KEEP, 453 TokenType.KILL, 454 TokenType.LEFT, 455 TokenType.LOAD, 456 TokenType.MERGE, 457 TokenType.NATURAL, 458 TokenType.NEXT, 459 TokenType.OFFSET, 460 TokenType.OPERATOR, 461 TokenType.ORDINALITY, 462 TokenType.OVERLAPS, 463 TokenType.OVERWRITE, 464 TokenType.PARTITION, 465 TokenType.PERCENT, 466 TokenType.PIVOT, 467 TokenType.PRAGMA, 468 TokenType.RANGE, 469 TokenType.RECURSIVE, 470 TokenType.REFERENCES, 471 TokenType.REFRESH, 472 TokenType.RENAME, 473 TokenType.REPLACE, 474 TokenType.RIGHT, 475 TokenType.ROLLUP, 476 TokenType.ROW, 477 TokenType.ROWS, 478 TokenType.SEMI, 479 TokenType.SET, 480 TokenType.SETTINGS, 481 TokenType.SHOW, 482 TokenType.TEMPORARY, 483 TokenType.TOP, 484 TokenType.TRUE, 485 TokenType.TRUNCATE, 486 TokenType.UNIQUE, 487 TokenType.UNNEST, 488 TokenType.UNPIVOT, 489 TokenType.UPDATE, 490 TokenType.USE, 491 TokenType.VOLATILE, 492 TokenType.WINDOW, 493 *CREATABLES, 494 *SUBQUERY_PREDICATES, 495 *TYPE_TOKENS, 496 *NO_PAREN_FUNCTIONS, 497 } 498 499 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 500 501 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 502 TokenType.ANTI, 503 TokenType.APPLY, 504 TokenType.ASOF, 505 TokenType.FULL, 506 TokenType.LEFT, 507 TokenType.LOCK, 508 TokenType.NATURAL, 509 TokenType.OFFSET, 510 TokenType.RIGHT, 511 TokenType.SEMI, 512 TokenType.WINDOW, 513 } 514 515 ALIAS_TOKENS = ID_VAR_TOKENS 516 517 ARRAY_CONSTRUCTORS = { 518 "ARRAY": exp.Array, 519 "LIST": exp.List, 520 } 521 522 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 523 524 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 525 526 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 527 528 FUNC_TOKENS = { 529 TokenType.COLLATE, 530 TokenType.COMMAND, 531 TokenType.CURRENT_DATE, 532 TokenType.CURRENT_DATETIME, 533 TokenType.CURRENT_TIMESTAMP, 534 TokenType.CURRENT_TIME, 535 TokenType.CURRENT_USER, 536 TokenType.FILTER, 537 TokenType.FIRST, 538 TokenType.FORMAT, 539 TokenType.GLOB, 540 TokenType.IDENTIFIER, 541 TokenType.INDEX, 542 TokenType.ISNULL, 543 TokenType.ILIKE, 544 TokenType.INSERT, 545 TokenType.LIKE, 546 TokenType.MERGE, 547 TokenType.OFFSET, 548 TokenType.PRIMARY_KEY, 549 TokenType.RANGE, 550 TokenType.REPLACE, 551 TokenType.RLIKE, 552 TokenType.ROW, 553 TokenType.UNNEST, 554 TokenType.VAR, 555 TokenType.LEFT, 556 TokenType.RIGHT, 557 TokenType.SEQUENCE, 558 TokenType.DATE, 559 TokenType.DATETIME, 560 TokenType.TABLE, 561 TokenType.TIMESTAMP, 562 TokenType.TIMESTAMPTZ, 563 TokenType.TRUNCATE, 564 TokenType.WINDOW, 565 TokenType.XOR, 566 *TYPE_TOKENS, 567 *SUBQUERY_PREDICATES, 568 } 569 570 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 571 TokenType.AND: exp.And, 572 } 573 574 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 575 TokenType.COLON_EQ: exp.PropertyEQ, 576 } 577 578 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 579 TokenType.OR: exp.Or, 580 } 581 582 EQUALITY = { 583 TokenType.EQ: exp.EQ, 584 TokenType.NEQ: exp.NEQ, 585 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 586 } 587 588 COMPARISON = { 589 TokenType.GT: exp.GT, 590 TokenType.GTE: exp.GTE, 591 TokenType.LT: exp.LT, 592 TokenType.LTE: exp.LTE, 593 } 594 595 BITWISE = { 596 TokenType.AMP: exp.BitwiseAnd, 597 TokenType.CARET: exp.BitwiseXor, 598 TokenType.PIPE: exp.BitwiseOr, 599 } 600 601 TERM = { 602 TokenType.DASH: exp.Sub, 603 TokenType.PLUS: exp.Add, 604 TokenType.MOD: exp.Mod, 605 TokenType.COLLATE: exp.Collate, 606 } 607 608 FACTOR = { 609 TokenType.DIV: exp.IntDiv, 610 TokenType.LR_ARROW: exp.Distance, 611 TokenType.SLASH: exp.Div, 612 TokenType.STAR: exp.Mul, 613 } 614 615 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 616 617 TIMES = { 618 TokenType.TIME, 619 TokenType.TIMETZ, 620 } 621 622 TIMESTAMPS = { 623 TokenType.TIMESTAMP, 624 TokenType.TIMESTAMPTZ, 625 TokenType.TIMESTAMPLTZ, 626 *TIMES, 627 } 628 629 SET_OPERATIONS = { 630 TokenType.UNION, 631 TokenType.INTERSECT, 632 TokenType.EXCEPT, 633 } 634 635 JOIN_METHODS = { 636 TokenType.ASOF, 637 TokenType.NATURAL, 638 TokenType.POSITIONAL, 639 } 640 641 JOIN_SIDES = { 642 TokenType.LEFT, 643 TokenType.RIGHT, 644 TokenType.FULL, 645 } 646 647 JOIN_KINDS = { 648 TokenType.ANTI, 649 TokenType.CROSS, 650 TokenType.INNER, 651 TokenType.OUTER, 652 TokenType.SEMI, 653 TokenType.STRAIGHT_JOIN, 654 } 655 656 JOIN_HINTS: t.Set[str] = set() 657 658 LAMBDAS = { 659 TokenType.ARROW: lambda self, expressions: self.expression( 660 exp.Lambda, 661 this=self._replace_lambda( 662 self._parse_assignment(), 663 expressions, 664 ), 665 expressions=expressions, 666 ), 667 TokenType.FARROW: lambda self, expressions: self.expression( 668 exp.Kwarg, 669 this=exp.var(expressions[0].name), 670 expression=self._parse_assignment(), 671 ), 672 } 673 674 COLUMN_OPERATORS = { 675 TokenType.DOT: None, 676 TokenType.DCOLON: lambda self, this, to: self.expression( 677 exp.Cast if self.STRICT_CAST else exp.TryCast, 678 this=this, 679 to=to, 680 ), 681 TokenType.ARROW: lambda self, this, path: self.expression( 682 exp.JSONExtract, 683 this=this, 684 expression=self.dialect.to_json_path(path), 685 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 686 ), 687 TokenType.DARROW: lambda self, this, path: self.expression( 688 exp.JSONExtractScalar, 689 this=this, 690 expression=self.dialect.to_json_path(path), 691 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 692 ), 693 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 694 exp.JSONBExtract, 695 this=this, 696 expression=path, 697 ), 698 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 699 exp.JSONBExtractScalar, 700 this=this, 701 expression=path, 702 ), 703 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 704 exp.JSONBContains, 705 this=this, 706 expression=key, 707 ), 708 } 709 710 EXPRESSION_PARSERS = { 711 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 712 exp.Column: lambda self: self._parse_column(), 713 exp.Condition: lambda self: self._parse_assignment(), 714 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 715 exp.Expression: lambda self: self._parse_expression(), 716 exp.From: lambda self: self._parse_from(joins=True), 717 exp.Group: lambda self: self._parse_group(), 718 exp.Having: lambda self: self._parse_having(), 719 exp.Identifier: lambda self: self._parse_id_var(), 720 exp.Join: lambda self: self._parse_join(), 721 exp.Lambda: lambda self: self._parse_lambda(), 722 exp.Lateral: lambda self: self._parse_lateral(), 723 exp.Limit: lambda self: self._parse_limit(), 724 exp.Offset: lambda self: self._parse_offset(), 725 exp.Order: lambda self: self._parse_order(), 726 exp.Ordered: lambda self: self._parse_ordered(), 727 exp.Properties: lambda self: self._parse_properties(), 728 exp.Qualify: lambda self: self._parse_qualify(), 729 exp.Returning: lambda self: self._parse_returning(), 730 exp.Select: lambda self: self._parse_select(), 731 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 732 exp.Table: lambda self: self._parse_table_parts(), 733 exp.TableAlias: lambda self: self._parse_table_alias(), 734 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 735 exp.Where: lambda self: self._parse_where(), 736 exp.Window: lambda self: self._parse_named_window(), 737 exp.With: lambda self: self._parse_with(), 738 "JOIN_TYPE": lambda self: self._parse_join_parts(), 739 } 740 741 STATEMENT_PARSERS = { 742 TokenType.ALTER: lambda self: self._parse_alter(), 743 TokenType.BEGIN: lambda self: self._parse_transaction(), 744 TokenType.CACHE: lambda self: self._parse_cache(), 745 TokenType.COMMENT: lambda self: self._parse_comment(), 746 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 747 TokenType.COPY: lambda self: self._parse_copy(), 748 TokenType.CREATE: lambda self: self._parse_create(), 749 TokenType.DELETE: lambda self: self._parse_delete(), 750 TokenType.DESC: lambda self: self._parse_describe(), 751 TokenType.DESCRIBE: lambda self: self._parse_describe(), 752 TokenType.DROP: lambda self: self._parse_drop(), 753 TokenType.INSERT: lambda self: self._parse_insert(), 754 TokenType.KILL: lambda self: self._parse_kill(), 755 TokenType.LOAD: lambda self: self._parse_load(), 756 TokenType.MERGE: lambda self: self._parse_merge(), 757 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 758 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 759 TokenType.REFRESH: lambda self: self._parse_refresh(), 760 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 761 TokenType.SET: lambda self: self._parse_set(), 762 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 763 TokenType.UNCACHE: lambda self: self._parse_uncache(), 764 TokenType.UPDATE: lambda self: self._parse_update(), 765 TokenType.USE: lambda self: self.expression( 766 exp.Use, 767 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 768 this=self._parse_table(schema=False), 769 ), 770 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 771 } 772 773 UNARY_PARSERS = { 774 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 775 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 776 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 777 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 778 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 779 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 780 } 781 782 STRING_PARSERS = { 783 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 784 exp.RawString, this=token.text 785 ), 786 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 787 exp.National, this=token.text 788 ), 789 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 790 TokenType.STRING: lambda self, token: self.expression( 791 exp.Literal, this=token.text, is_string=True 792 ), 793 TokenType.UNICODE_STRING: lambda self, token: self.expression( 794 exp.UnicodeString, 795 this=token.text, 796 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 797 ), 798 } 799 800 NUMERIC_PARSERS = { 801 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 802 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 803 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 804 TokenType.NUMBER: lambda self, token: self.expression( 805 exp.Literal, this=token.text, is_string=False 806 ), 807 } 808 809 PRIMARY_PARSERS = { 810 **STRING_PARSERS, 811 **NUMERIC_PARSERS, 812 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 813 TokenType.NULL: lambda self, _: self.expression(exp.Null), 814 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 815 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 816 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 817 TokenType.STAR: lambda self, _: self.expression( 818 exp.Star, 819 **{ 820 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 821 "replace": self._parse_star_op("REPLACE"), 822 "rename": self._parse_star_op("RENAME"), 823 }, 824 ), 825 } 826 827 PLACEHOLDER_PARSERS = { 828 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 829 TokenType.PARAMETER: lambda self: self._parse_parameter(), 830 TokenType.COLON: lambda self: ( 831 self.expression(exp.Placeholder, this=self._prev.text) 832 if self._match_set(self.ID_VAR_TOKENS) 833 else None 834 ), 835 } 836 837 RANGE_PARSERS = { 838 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 839 TokenType.GLOB: binary_range_parser(exp.Glob), 840 TokenType.ILIKE: binary_range_parser(exp.ILike), 841 TokenType.IN: lambda self, this: self._parse_in(this), 842 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 843 TokenType.IS: lambda self, this: self._parse_is(this), 844 TokenType.LIKE: binary_range_parser(exp.Like), 845 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 846 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 847 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 848 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 849 } 850 851 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 852 "ALLOWED_VALUES": lambda self: self.expression( 853 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 854 ), 855 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 856 "AUTO": lambda self: self._parse_auto_property(), 857 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 858 "BACKUP": lambda self: self.expression( 859 exp.BackupProperty, this=self._parse_var(any_token=True) 860 ), 861 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 862 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 863 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 864 "CHECKSUM": lambda self: self._parse_checksum(), 865 "CLUSTER BY": lambda self: self._parse_cluster(), 866 "CLUSTERED": lambda self: self._parse_clustered_by(), 867 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 868 exp.CollateProperty, **kwargs 869 ), 870 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 871 "CONTAINS": lambda self: self._parse_contains_property(), 872 "COPY": lambda self: self._parse_copy_property(), 873 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 874 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 875 "DEFINER": lambda self: self._parse_definer(), 876 "DETERMINISTIC": lambda self: self.expression( 877 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 878 ), 879 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 880 "DISTKEY": lambda self: self._parse_distkey(), 881 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 882 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 883 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 884 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 885 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 886 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 887 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 888 "FREESPACE": lambda self: self._parse_freespace(), 889 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 890 "HEAP": lambda self: self.expression(exp.HeapProperty), 891 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 892 "IMMUTABLE": lambda self: self.expression( 893 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 894 ), 895 "INHERITS": lambda self: self.expression( 896 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 897 ), 898 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 899 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 900 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 901 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 902 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 903 "LIKE": lambda self: self._parse_create_like(), 904 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 905 "LOCK": lambda self: self._parse_locking(), 906 "LOCKING": lambda self: self._parse_locking(), 907 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 908 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 909 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 910 "MODIFIES": lambda self: self._parse_modifies_property(), 911 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 912 "NO": lambda self: self._parse_no_property(), 913 "ON": lambda self: self._parse_on_property(), 914 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 915 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 916 "PARTITION": lambda self: self._parse_partitioned_of(), 917 "PARTITION BY": lambda self: self._parse_partitioned_by(), 918 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 919 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 920 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 921 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 922 "READS": lambda self: self._parse_reads_property(), 923 "REMOTE": lambda self: self._parse_remote_with_connection(), 924 "RETURNS": lambda self: self._parse_returns(), 925 "STRICT": lambda self: self.expression(exp.StrictProperty), 926 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 927 "ROW": lambda self: self._parse_row(), 928 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 929 "SAMPLE": lambda self: self.expression( 930 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 931 ), 932 "SECURE": lambda self: self.expression(exp.SecureProperty), 933 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 934 "SETTINGS": lambda self: self._parse_settings_property(), 935 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 936 "SORTKEY": lambda self: self._parse_sortkey(), 937 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 938 "STABLE": lambda self: self.expression( 939 exp.StabilityProperty, this=exp.Literal.string("STABLE") 940 ), 941 "STORED": lambda self: self._parse_stored(), 942 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 943 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 944 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 945 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 946 "TO": lambda self: self._parse_to_table(), 947 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 948 "TRANSFORM": lambda self: self.expression( 949 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 950 ), 951 "TTL": lambda self: self._parse_ttl(), 952 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 953 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 954 "VOLATILE": lambda self: self._parse_volatile_property(), 955 "WITH": lambda self: self._parse_with_property(), 956 } 957 958 CONSTRAINT_PARSERS = { 959 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 960 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 961 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 962 "CHARACTER SET": lambda self: self.expression( 963 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 964 ), 965 "CHECK": lambda self: self.expression( 966 exp.CheckColumnConstraint, 967 this=self._parse_wrapped(self._parse_assignment), 968 enforced=self._match_text_seq("ENFORCED"), 969 ), 970 "COLLATE": lambda self: self.expression( 971 exp.CollateColumnConstraint, 972 this=self._parse_identifier() or self._parse_column(), 973 ), 974 "COMMENT": lambda self: self.expression( 975 exp.CommentColumnConstraint, this=self._parse_string() 976 ), 977 "COMPRESS": lambda self: self._parse_compress(), 978 "CLUSTERED": lambda self: self.expression( 979 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 980 ), 981 "NONCLUSTERED": lambda self: self.expression( 982 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 983 ), 984 "DEFAULT": lambda self: self.expression( 985 exp.DefaultColumnConstraint, this=self._parse_bitwise() 986 ), 987 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 988 "EPHEMERAL": lambda self: self.expression( 989 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 990 ), 991 "EXCLUDE": lambda self: self.expression( 992 exp.ExcludeColumnConstraint, this=self._parse_index_params() 993 ), 994 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 995 "FORMAT": lambda self: self.expression( 996 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 997 ), 998 "GENERATED": lambda self: self._parse_generated_as_identity(), 999 "IDENTITY": lambda self: self._parse_auto_increment(), 1000 "INLINE": lambda self: self._parse_inline(), 1001 "LIKE": lambda self: self._parse_create_like(), 1002 "NOT": lambda self: self._parse_not_constraint(), 1003 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1004 "ON": lambda self: ( 1005 self._match(TokenType.UPDATE) 1006 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1007 ) 1008 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1009 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1010 "PERIOD": lambda self: self._parse_period_for_system_time(), 1011 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1012 "REFERENCES": lambda self: self._parse_references(match=False), 1013 "TITLE": lambda self: self.expression( 1014 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1015 ), 1016 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1017 "UNIQUE": lambda self: self._parse_unique(), 1018 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1019 "WITH": lambda self: self.expression( 1020 exp.Properties, expressions=self._parse_wrapped_properties() 1021 ), 1022 } 1023 1024 ALTER_PARSERS = { 1025 "ADD": lambda self: self._parse_alter_table_add(), 1026 "ALTER": lambda self: self._parse_alter_table_alter(), 1027 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1028 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1029 "DROP": lambda self: self._parse_alter_table_drop(), 1030 "RENAME": lambda self: self._parse_alter_table_rename(), 1031 "SET": lambda self: self._parse_alter_table_set(), 1032 "AS": lambda self: self._parse_select(), 1033 } 1034 1035 ALTER_ALTER_PARSERS = { 1036 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1037 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1038 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1039 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1040 } 1041 1042 SCHEMA_UNNAMED_CONSTRAINTS = { 1043 "CHECK", 1044 "EXCLUDE", 1045 "FOREIGN KEY", 1046 "LIKE", 1047 "PERIOD", 1048 "PRIMARY KEY", 1049 "UNIQUE", 1050 } 1051 1052 NO_PAREN_FUNCTION_PARSERS = { 1053 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1054 "CASE": lambda self: self._parse_case(), 1055 "CONNECT_BY_ROOT": lambda self: self.expression( 1056 exp.ConnectByRoot, this=self._parse_column() 1057 ), 1058 "IF": lambda self: self._parse_if(), 1059 "NEXT": lambda self: self._parse_next_value_for(), 1060 } 1061 1062 INVALID_FUNC_NAME_TOKENS = { 1063 TokenType.IDENTIFIER, 1064 TokenType.STRING, 1065 } 1066 1067 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1068 1069 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1070 1071 FUNCTION_PARSERS = { 1072 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1073 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1074 "DECODE": lambda self: self._parse_decode(), 1075 "EXTRACT": lambda self: self._parse_extract(), 1076 "GAP_FILL": lambda self: self._parse_gap_fill(), 1077 "JSON_OBJECT": lambda self: self._parse_json_object(), 1078 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1079 "JSON_TABLE": lambda self: self._parse_json_table(), 1080 "MATCH": lambda self: self._parse_match_against(), 1081 "OPENJSON": lambda self: self._parse_open_json(), 1082 "POSITION": lambda self: self._parse_position(), 1083 "PREDICT": lambda self: self._parse_predict(), 1084 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1085 "STRING_AGG": lambda self: self._parse_string_agg(), 1086 "SUBSTRING": lambda self: self._parse_substring(), 1087 "TRIM": lambda self: self._parse_trim(), 1088 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1089 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1090 } 1091 1092 QUERY_MODIFIER_PARSERS = { 1093 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1094 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1095 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1096 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1097 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1098 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1099 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1100 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1101 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1102 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1103 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1104 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1105 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1106 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1107 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1108 TokenType.CLUSTER_BY: lambda self: ( 1109 "cluster", 1110 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1111 ), 1112 TokenType.DISTRIBUTE_BY: lambda self: ( 1113 "distribute", 1114 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1115 ), 1116 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1117 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1118 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1119 } 1120 1121 SET_PARSERS = { 1122 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1123 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1124 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1125 "TRANSACTION": lambda self: self._parse_set_transaction(), 1126 } 1127 1128 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1129 1130 TYPE_LITERAL_PARSERS = { 1131 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1132 } 1133 1134 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1135 1136 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1137 1138 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1139 1140 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1141 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1142 "ISOLATION": ( 1143 ("LEVEL", "REPEATABLE", "READ"), 1144 ("LEVEL", "READ", "COMMITTED"), 1145 ("LEVEL", "READ", "UNCOMITTED"), 1146 ("LEVEL", "SERIALIZABLE"), 1147 ), 1148 "READ": ("WRITE", "ONLY"), 1149 } 1150 1151 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1152 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1153 ) 1154 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1155 1156 CREATE_SEQUENCE: OPTIONS_TYPE = { 1157 "SCALE": ("EXTEND", "NOEXTEND"), 1158 "SHARD": ("EXTEND", "NOEXTEND"), 1159 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1160 **dict.fromkeys( 1161 ( 1162 "SESSION", 1163 "GLOBAL", 1164 "KEEP", 1165 "NOKEEP", 1166 "ORDER", 1167 "NOORDER", 1168 "NOCACHE", 1169 "CYCLE", 1170 "NOCYCLE", 1171 "NOMINVALUE", 1172 "NOMAXVALUE", 1173 "NOSCALE", 1174 "NOSHARD", 1175 ), 1176 tuple(), 1177 ), 1178 } 1179 1180 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1181 1182 USABLES: OPTIONS_TYPE = dict.fromkeys( 1183 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1184 ) 1185 1186 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1187 1188 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1189 "TYPE": ("EVOLUTION",), 1190 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1191 } 1192 1193 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1194 "NOT": ("ENFORCED",), 1195 "MATCH": ( 1196 "FULL", 1197 "PARTIAL", 1198 "SIMPLE", 1199 ), 1200 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1201 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1202 } 1203 1204 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1205 1206 CLONE_KEYWORDS = {"CLONE", "COPY"} 1207 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1208 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1209 1210 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1211 1212 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1213 1214 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1215 1216 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1217 1218 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1219 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1220 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1221 1222 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1223 1224 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1225 1226 ADD_CONSTRAINT_TOKENS = { 1227 TokenType.CONSTRAINT, 1228 TokenType.FOREIGN_KEY, 1229 TokenType.INDEX, 1230 TokenType.KEY, 1231 TokenType.PRIMARY_KEY, 1232 TokenType.UNIQUE, 1233 } 1234 1235 DISTINCT_TOKENS = {TokenType.DISTINCT} 1236 1237 NULL_TOKENS = {TokenType.NULL} 1238 1239 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1240 1241 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1242 1243 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1244 1245 STRICT_CAST = True 1246 1247 PREFIXED_PIVOT_COLUMNS = False 1248 IDENTIFY_PIVOT_STRINGS = False 1249 1250 LOG_DEFAULTS_TO_LN = False 1251 1252 # Whether ADD is present for each column added by ALTER TABLE 1253 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1254 1255 # Whether the table sample clause expects CSV syntax 1256 TABLESAMPLE_CSV = False 1257 1258 # The default method used for table sampling 1259 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1260 1261 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1262 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1263 1264 # Whether the TRIM function expects the characters to trim as its first argument 1265 TRIM_PATTERN_FIRST = False 1266 1267 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1268 STRING_ALIASES = False 1269 1270 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1271 MODIFIERS_ATTACHED_TO_SET_OP = True 1272 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1273 1274 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1275 NO_PAREN_IF_COMMANDS = True 1276 1277 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1278 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1279 1280 # Whether the `:` operator is used to extract a value from a VARIANT column 1281 COLON_IS_VARIANT_EXTRACT = False 1282 1283 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1284 # If this is True and '(' is not found, the keyword will be treated as an identifier 1285 VALUES_FOLLOWED_BY_PAREN = True 1286 1287 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1288 SUPPORTS_IMPLICIT_UNNEST = False 1289 1290 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1291 INTERVAL_SPANS = True 1292 1293 # Whether a PARTITION clause can follow a table reference 1294 SUPPORTS_PARTITION_SELECTION = False 1295 1296 __slots__ = ( 1297 "error_level", 1298 "error_message_context", 1299 "max_errors", 1300 "dialect", 1301 "sql", 1302 "errors", 1303 "_tokens", 1304 "_index", 1305 "_curr", 1306 "_next", 1307 "_prev", 1308 "_prev_comments", 1309 ) 1310 1311 # Autofilled 1312 SHOW_TRIE: t.Dict = {} 1313 SET_TRIE: t.Dict = {} 1314 1315 def __init__( 1316 self, 1317 error_level: t.Optional[ErrorLevel] = None, 1318 error_message_context: int = 100, 1319 max_errors: int = 3, 1320 dialect: DialectType = None, 1321 ): 1322 from sqlglot.dialects import Dialect 1323 1324 self.error_level = error_level or ErrorLevel.IMMEDIATE 1325 self.error_message_context = error_message_context 1326 self.max_errors = max_errors 1327 self.dialect = Dialect.get_or_raise(dialect) 1328 self.reset() 1329 1330 def reset(self): 1331 self.sql = "" 1332 self.errors = [] 1333 self._tokens = [] 1334 self._index = 0 1335 self._curr = None 1336 self._next = None 1337 self._prev = None 1338 self._prev_comments = None 1339 1340 def parse( 1341 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1342 ) -> t.List[t.Optional[exp.Expression]]: 1343 """ 1344 Parses a list of tokens and returns a list of syntax trees, one tree 1345 per parsed SQL statement. 1346 1347 Args: 1348 raw_tokens: The list of tokens. 1349 sql: The original SQL string, used to produce helpful debug messages. 1350 1351 Returns: 1352 The list of the produced syntax trees. 1353 """ 1354 return self._parse( 1355 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1356 ) 1357 1358 def parse_into( 1359 self, 1360 expression_types: exp.IntoType, 1361 raw_tokens: t.List[Token], 1362 sql: t.Optional[str] = None, 1363 ) -> t.List[t.Optional[exp.Expression]]: 1364 """ 1365 Parses a list of tokens into a given Expression type. If a collection of Expression 1366 types is given instead, this method will try to parse the token list into each one 1367 of them, stopping at the first for which the parsing succeeds. 1368 1369 Args: 1370 expression_types: The expression type(s) to try and parse the token list into. 1371 raw_tokens: The list of tokens. 1372 sql: The original SQL string, used to produce helpful debug messages. 1373 1374 Returns: 1375 The target Expression. 1376 """ 1377 errors = [] 1378 for expression_type in ensure_list(expression_types): 1379 parser = self.EXPRESSION_PARSERS.get(expression_type) 1380 if not parser: 1381 raise TypeError(f"No parser registered for {expression_type}") 1382 1383 try: 1384 return self._parse(parser, raw_tokens, sql) 1385 except ParseError as e: 1386 e.errors[0]["into_expression"] = expression_type 1387 errors.append(e) 1388 1389 raise ParseError( 1390 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1391 errors=merge_errors(errors), 1392 ) from errors[-1] 1393 1394 def _parse( 1395 self, 1396 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1397 raw_tokens: t.List[Token], 1398 sql: t.Optional[str] = None, 1399 ) -> t.List[t.Optional[exp.Expression]]: 1400 self.reset() 1401 self.sql = sql or "" 1402 1403 total = len(raw_tokens) 1404 chunks: t.List[t.List[Token]] = [[]] 1405 1406 for i, token in enumerate(raw_tokens): 1407 if token.token_type == TokenType.SEMICOLON: 1408 if token.comments: 1409 chunks.append([token]) 1410 1411 if i < total - 1: 1412 chunks.append([]) 1413 else: 1414 chunks[-1].append(token) 1415 1416 expressions = [] 1417 1418 for tokens in chunks: 1419 self._index = -1 1420 self._tokens = tokens 1421 self._advance() 1422 1423 expressions.append(parse_method(self)) 1424 1425 if self._index < len(self._tokens): 1426 self.raise_error("Invalid expression / Unexpected token") 1427 1428 self.check_errors() 1429 1430 return expressions 1431 1432 def check_errors(self) -> None: 1433 """Logs or raises any found errors, depending on the chosen error level setting.""" 1434 if self.error_level == ErrorLevel.WARN: 1435 for error in self.errors: 1436 logger.error(str(error)) 1437 elif self.error_level == ErrorLevel.RAISE and self.errors: 1438 raise ParseError( 1439 concat_messages(self.errors, self.max_errors), 1440 errors=merge_errors(self.errors), 1441 ) 1442 1443 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1444 """ 1445 Appends an error in the list of recorded errors or raises it, depending on the chosen 1446 error level setting. 1447 """ 1448 token = token or self._curr or self._prev or Token.string("") 1449 start = token.start 1450 end = token.end + 1 1451 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1452 highlight = self.sql[start:end] 1453 end_context = self.sql[end : end + self.error_message_context] 1454 1455 error = ParseError.new( 1456 f"{message}. Line {token.line}, Col: {token.col}.\n" 1457 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1458 description=message, 1459 line=token.line, 1460 col=token.col, 1461 start_context=start_context, 1462 highlight=highlight, 1463 end_context=end_context, 1464 ) 1465 1466 if self.error_level == ErrorLevel.IMMEDIATE: 1467 raise error 1468 1469 self.errors.append(error) 1470 1471 def expression( 1472 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1473 ) -> E: 1474 """ 1475 Creates a new, validated Expression. 1476 1477 Args: 1478 exp_class: The expression class to instantiate. 1479 comments: An optional list of comments to attach to the expression. 1480 kwargs: The arguments to set for the expression along with their respective values. 1481 1482 Returns: 1483 The target expression. 1484 """ 1485 instance = exp_class(**kwargs) 1486 instance.add_comments(comments) if comments else self._add_comments(instance) 1487 return self.validate_expression(instance) 1488 1489 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1490 if expression and self._prev_comments: 1491 expression.add_comments(self._prev_comments) 1492 self._prev_comments = None 1493 1494 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1495 """ 1496 Validates an Expression, making sure that all its mandatory arguments are set. 1497 1498 Args: 1499 expression: The expression to validate. 1500 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1501 1502 Returns: 1503 The validated expression. 1504 """ 1505 if self.error_level != ErrorLevel.IGNORE: 1506 for error_message in expression.error_messages(args): 1507 self.raise_error(error_message) 1508 1509 return expression 1510 1511 def _find_sql(self, start: Token, end: Token) -> str: 1512 return self.sql[start.start : end.end + 1] 1513 1514 def _is_connected(self) -> bool: 1515 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1516 1517 def _advance(self, times: int = 1) -> None: 1518 self._index += times 1519 self._curr = seq_get(self._tokens, self._index) 1520 self._next = seq_get(self._tokens, self._index + 1) 1521 1522 if self._index > 0: 1523 self._prev = self._tokens[self._index - 1] 1524 self._prev_comments = self._prev.comments 1525 else: 1526 self._prev = None 1527 self._prev_comments = None 1528 1529 def _retreat(self, index: int) -> None: 1530 if index != self._index: 1531 self._advance(index - self._index) 1532 1533 def _warn_unsupported(self) -> None: 1534 if len(self._tokens) <= 1: 1535 return 1536 1537 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1538 # interested in emitting a warning for the one being currently processed. 1539 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1540 1541 logger.warning( 1542 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1543 ) 1544 1545 def _parse_command(self) -> exp.Command: 1546 self._warn_unsupported() 1547 return self.expression( 1548 exp.Command, 1549 comments=self._prev_comments, 1550 this=self._prev.text.upper(), 1551 expression=self._parse_string(), 1552 ) 1553 1554 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1555 """ 1556 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1557 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1558 solve this by setting & resetting the parser state accordingly 1559 """ 1560 index = self._index 1561 error_level = self.error_level 1562 1563 self.error_level = ErrorLevel.IMMEDIATE 1564 try: 1565 this = parse_method() 1566 except ParseError: 1567 this = None 1568 finally: 1569 if not this or retreat: 1570 self._retreat(index) 1571 self.error_level = error_level 1572 1573 return this 1574 1575 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1576 start = self._prev 1577 exists = self._parse_exists() if allow_exists else None 1578 1579 self._match(TokenType.ON) 1580 1581 materialized = self._match_text_seq("MATERIALIZED") 1582 kind = self._match_set(self.CREATABLES) and self._prev 1583 if not kind: 1584 return self._parse_as_command(start) 1585 1586 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1587 this = self._parse_user_defined_function(kind=kind.token_type) 1588 elif kind.token_type == TokenType.TABLE: 1589 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1590 elif kind.token_type == TokenType.COLUMN: 1591 this = self._parse_column() 1592 else: 1593 this = self._parse_id_var() 1594 1595 self._match(TokenType.IS) 1596 1597 return self.expression( 1598 exp.Comment, 1599 this=this, 1600 kind=kind.text, 1601 expression=self._parse_string(), 1602 exists=exists, 1603 materialized=materialized, 1604 ) 1605 1606 def _parse_to_table( 1607 self, 1608 ) -> exp.ToTableProperty: 1609 table = self._parse_table_parts(schema=True) 1610 return self.expression(exp.ToTableProperty, this=table) 1611 1612 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1613 def _parse_ttl(self) -> exp.Expression: 1614 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1615 this = self._parse_bitwise() 1616 1617 if self._match_text_seq("DELETE"): 1618 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1619 if self._match_text_seq("RECOMPRESS"): 1620 return self.expression( 1621 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1622 ) 1623 if self._match_text_seq("TO", "DISK"): 1624 return self.expression( 1625 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1626 ) 1627 if self._match_text_seq("TO", "VOLUME"): 1628 return self.expression( 1629 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1630 ) 1631 1632 return this 1633 1634 expressions = self._parse_csv(_parse_ttl_action) 1635 where = self._parse_where() 1636 group = self._parse_group() 1637 1638 aggregates = None 1639 if group and self._match(TokenType.SET): 1640 aggregates = self._parse_csv(self._parse_set_item) 1641 1642 return self.expression( 1643 exp.MergeTreeTTL, 1644 expressions=expressions, 1645 where=where, 1646 group=group, 1647 aggregates=aggregates, 1648 ) 1649 1650 def _parse_statement(self) -> t.Optional[exp.Expression]: 1651 if self._curr is None: 1652 return None 1653 1654 if self._match_set(self.STATEMENT_PARSERS): 1655 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1656 1657 if self._match_set(self.dialect.tokenizer.COMMANDS): 1658 return self._parse_command() 1659 1660 expression = self._parse_expression() 1661 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1662 return self._parse_query_modifiers(expression) 1663 1664 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1665 start = self._prev 1666 temporary = self._match(TokenType.TEMPORARY) 1667 materialized = self._match_text_seq("MATERIALIZED") 1668 1669 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1670 if not kind: 1671 return self._parse_as_command(start) 1672 1673 if_exists = exists or self._parse_exists() 1674 table = self._parse_table_parts( 1675 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1676 ) 1677 1678 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1679 1680 if self._match(TokenType.L_PAREN, advance=False): 1681 expressions = self._parse_wrapped_csv(self._parse_types) 1682 else: 1683 expressions = None 1684 1685 return self.expression( 1686 exp.Drop, 1687 comments=start.comments, 1688 exists=if_exists, 1689 this=table, 1690 expressions=expressions, 1691 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1692 temporary=temporary, 1693 materialized=materialized, 1694 cascade=self._match_text_seq("CASCADE"), 1695 constraints=self._match_text_seq("CONSTRAINTS"), 1696 purge=self._match_text_seq("PURGE"), 1697 cluster=cluster, 1698 ) 1699 1700 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1701 return ( 1702 self._match_text_seq("IF") 1703 and (not not_ or self._match(TokenType.NOT)) 1704 and self._match(TokenType.EXISTS) 1705 ) 1706 1707 def _parse_create(self) -> exp.Create | exp.Command: 1708 # Note: this can't be None because we've matched a statement parser 1709 start = self._prev 1710 comments = self._prev_comments 1711 1712 replace = ( 1713 start.token_type == TokenType.REPLACE 1714 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1715 or self._match_pair(TokenType.OR, TokenType.ALTER) 1716 ) 1717 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1718 1719 unique = self._match(TokenType.UNIQUE) 1720 1721 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1722 clustered = True 1723 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1724 "COLUMNSTORE" 1725 ): 1726 clustered = False 1727 else: 1728 clustered = None 1729 1730 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1731 self._advance() 1732 1733 properties = None 1734 create_token = self._match_set(self.CREATABLES) and self._prev 1735 1736 if not create_token: 1737 # exp.Properties.Location.POST_CREATE 1738 properties = self._parse_properties() 1739 create_token = self._match_set(self.CREATABLES) and self._prev 1740 1741 if not properties or not create_token: 1742 return self._parse_as_command(start) 1743 1744 concurrently = self._match_text_seq("CONCURRENTLY") 1745 exists = self._parse_exists(not_=True) 1746 this = None 1747 expression: t.Optional[exp.Expression] = None 1748 indexes = None 1749 no_schema_binding = None 1750 begin = None 1751 end = None 1752 clone = None 1753 1754 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1755 nonlocal properties 1756 if properties and temp_props: 1757 properties.expressions.extend(temp_props.expressions) 1758 elif temp_props: 1759 properties = temp_props 1760 1761 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1762 this = self._parse_user_defined_function(kind=create_token.token_type) 1763 1764 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1765 extend_props(self._parse_properties()) 1766 1767 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1768 extend_props(self._parse_properties()) 1769 1770 if not expression: 1771 if self._match(TokenType.COMMAND): 1772 expression = self._parse_as_command(self._prev) 1773 else: 1774 begin = self._match(TokenType.BEGIN) 1775 return_ = self._match_text_seq("RETURN") 1776 1777 if self._match(TokenType.STRING, advance=False): 1778 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1779 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1780 expression = self._parse_string() 1781 extend_props(self._parse_properties()) 1782 else: 1783 expression = self._parse_statement() 1784 1785 end = self._match_text_seq("END") 1786 1787 if return_: 1788 expression = self.expression(exp.Return, this=expression) 1789 elif create_token.token_type == TokenType.INDEX: 1790 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1791 if not self._match(TokenType.ON): 1792 index = self._parse_id_var() 1793 anonymous = False 1794 else: 1795 index = None 1796 anonymous = True 1797 1798 this = self._parse_index(index=index, anonymous=anonymous) 1799 elif create_token.token_type in self.DB_CREATABLES: 1800 table_parts = self._parse_table_parts( 1801 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1802 ) 1803 1804 # exp.Properties.Location.POST_NAME 1805 self._match(TokenType.COMMA) 1806 extend_props(self._parse_properties(before=True)) 1807 1808 this = self._parse_schema(this=table_parts) 1809 1810 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1811 extend_props(self._parse_properties()) 1812 1813 self._match(TokenType.ALIAS) 1814 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1815 # exp.Properties.Location.POST_ALIAS 1816 extend_props(self._parse_properties()) 1817 1818 if create_token.token_type == TokenType.SEQUENCE: 1819 expression = self._parse_types() 1820 extend_props(self._parse_properties()) 1821 else: 1822 expression = self._parse_ddl_select() 1823 1824 if create_token.token_type == TokenType.TABLE: 1825 # exp.Properties.Location.POST_EXPRESSION 1826 extend_props(self._parse_properties()) 1827 1828 indexes = [] 1829 while True: 1830 index = self._parse_index() 1831 1832 # exp.Properties.Location.POST_INDEX 1833 extend_props(self._parse_properties()) 1834 if not index: 1835 break 1836 else: 1837 self._match(TokenType.COMMA) 1838 indexes.append(index) 1839 elif create_token.token_type == TokenType.VIEW: 1840 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1841 no_schema_binding = True 1842 1843 shallow = self._match_text_seq("SHALLOW") 1844 1845 if self._match_texts(self.CLONE_KEYWORDS): 1846 copy = self._prev.text.lower() == "copy" 1847 clone = self.expression( 1848 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1849 ) 1850 1851 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1852 return self._parse_as_command(start) 1853 1854 create_kind_text = create_token.text.upper() 1855 return self.expression( 1856 exp.Create, 1857 comments=comments, 1858 this=this, 1859 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1860 replace=replace, 1861 refresh=refresh, 1862 unique=unique, 1863 expression=expression, 1864 exists=exists, 1865 properties=properties, 1866 indexes=indexes, 1867 no_schema_binding=no_schema_binding, 1868 begin=begin, 1869 end=end, 1870 clone=clone, 1871 concurrently=concurrently, 1872 clustered=clustered, 1873 ) 1874 1875 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1876 seq = exp.SequenceProperties() 1877 1878 options = [] 1879 index = self._index 1880 1881 while self._curr: 1882 self._match(TokenType.COMMA) 1883 if self._match_text_seq("INCREMENT"): 1884 self._match_text_seq("BY") 1885 self._match_text_seq("=") 1886 seq.set("increment", self._parse_term()) 1887 elif self._match_text_seq("MINVALUE"): 1888 seq.set("minvalue", self._parse_term()) 1889 elif self._match_text_seq("MAXVALUE"): 1890 seq.set("maxvalue", self._parse_term()) 1891 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1892 self._match_text_seq("=") 1893 seq.set("start", self._parse_term()) 1894 elif self._match_text_seq("CACHE"): 1895 # T-SQL allows empty CACHE which is initialized dynamically 1896 seq.set("cache", self._parse_number() or True) 1897 elif self._match_text_seq("OWNED", "BY"): 1898 # "OWNED BY NONE" is the default 1899 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1900 else: 1901 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1902 if opt: 1903 options.append(opt) 1904 else: 1905 break 1906 1907 seq.set("options", options if options else None) 1908 return None if self._index == index else seq 1909 1910 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1911 # only used for teradata currently 1912 self._match(TokenType.COMMA) 1913 1914 kwargs = { 1915 "no": self._match_text_seq("NO"), 1916 "dual": self._match_text_seq("DUAL"), 1917 "before": self._match_text_seq("BEFORE"), 1918 "default": self._match_text_seq("DEFAULT"), 1919 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1920 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1921 "after": self._match_text_seq("AFTER"), 1922 "minimum": self._match_texts(("MIN", "MINIMUM")), 1923 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1924 } 1925 1926 if self._match_texts(self.PROPERTY_PARSERS): 1927 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1928 try: 1929 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1930 except TypeError: 1931 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1932 1933 return None 1934 1935 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1936 return self._parse_wrapped_csv(self._parse_property) 1937 1938 def _parse_property(self) -> t.Optional[exp.Expression]: 1939 if self._match_texts(self.PROPERTY_PARSERS): 1940 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1941 1942 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1943 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1944 1945 if self._match_text_seq("COMPOUND", "SORTKEY"): 1946 return self._parse_sortkey(compound=True) 1947 1948 if self._match_text_seq("SQL", "SECURITY"): 1949 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1950 1951 index = self._index 1952 key = self._parse_column() 1953 1954 if not self._match(TokenType.EQ): 1955 self._retreat(index) 1956 return self._parse_sequence_properties() 1957 1958 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1959 if isinstance(key, exp.Column): 1960 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1961 1962 value = self._parse_bitwise() or self._parse_var(any_token=True) 1963 1964 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1965 if isinstance(value, exp.Column): 1966 value = exp.var(value.name) 1967 1968 return self.expression(exp.Property, this=key, value=value) 1969 1970 def _parse_stored(self) -> exp.FileFormatProperty: 1971 self._match(TokenType.ALIAS) 1972 1973 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1974 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1975 1976 return self.expression( 1977 exp.FileFormatProperty, 1978 this=( 1979 self.expression( 1980 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1981 ) 1982 if input_format or output_format 1983 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1984 ), 1985 ) 1986 1987 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1988 field = self._parse_field() 1989 if isinstance(field, exp.Identifier) and not field.quoted: 1990 field = exp.var(field) 1991 1992 return field 1993 1994 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1995 self._match(TokenType.EQ) 1996 self._match(TokenType.ALIAS) 1997 1998 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1999 2000 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2001 properties = [] 2002 while True: 2003 if before: 2004 prop = self._parse_property_before() 2005 else: 2006 prop = self._parse_property() 2007 if not prop: 2008 break 2009 for p in ensure_list(prop): 2010 properties.append(p) 2011 2012 if properties: 2013 return self.expression(exp.Properties, expressions=properties) 2014 2015 return None 2016 2017 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2018 return self.expression( 2019 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2020 ) 2021 2022 def _parse_settings_property(self) -> exp.SettingsProperty: 2023 return self.expression( 2024 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2025 ) 2026 2027 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2028 if self._index >= 2: 2029 pre_volatile_token = self._tokens[self._index - 2] 2030 else: 2031 pre_volatile_token = None 2032 2033 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2034 return exp.VolatileProperty() 2035 2036 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2037 2038 def _parse_retention_period(self) -> exp.Var: 2039 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2040 number = self._parse_number() 2041 number_str = f"{number} " if number else "" 2042 unit = self._parse_var(any_token=True) 2043 return exp.var(f"{number_str}{unit}") 2044 2045 def _parse_system_versioning_property( 2046 self, with_: bool = False 2047 ) -> exp.WithSystemVersioningProperty: 2048 self._match(TokenType.EQ) 2049 prop = self.expression( 2050 exp.WithSystemVersioningProperty, 2051 **{ # type: ignore 2052 "on": True, 2053 "with": with_, 2054 }, 2055 ) 2056 2057 if self._match_text_seq("OFF"): 2058 prop.set("on", False) 2059 return prop 2060 2061 self._match(TokenType.ON) 2062 if self._match(TokenType.L_PAREN): 2063 while self._curr and not self._match(TokenType.R_PAREN): 2064 if self._match_text_seq("HISTORY_TABLE", "="): 2065 prop.set("this", self._parse_table_parts()) 2066 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2067 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2068 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2069 prop.set("retention_period", self._parse_retention_period()) 2070 2071 self._match(TokenType.COMMA) 2072 2073 return prop 2074 2075 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2076 self._match(TokenType.EQ) 2077 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2078 prop = self.expression(exp.DataDeletionProperty, on=on) 2079 2080 if self._match(TokenType.L_PAREN): 2081 while self._curr and not self._match(TokenType.R_PAREN): 2082 if self._match_text_seq("FILTER_COLUMN", "="): 2083 prop.set("filter_column", self._parse_column()) 2084 elif self._match_text_seq("RETENTION_PERIOD", "="): 2085 prop.set("retention_period", self._parse_retention_period()) 2086 2087 self._match(TokenType.COMMA) 2088 2089 return prop 2090 2091 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2092 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2093 prop = self._parse_system_versioning_property(with_=True) 2094 self._match_r_paren() 2095 return prop 2096 2097 if self._match(TokenType.L_PAREN, advance=False): 2098 return self._parse_wrapped_properties() 2099 2100 if self._match_text_seq("JOURNAL"): 2101 return self._parse_withjournaltable() 2102 2103 if self._match_texts(self.VIEW_ATTRIBUTES): 2104 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2105 2106 if self._match_text_seq("DATA"): 2107 return self._parse_withdata(no=False) 2108 elif self._match_text_seq("NO", "DATA"): 2109 return self._parse_withdata(no=True) 2110 2111 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2112 return self._parse_serde_properties(with_=True) 2113 2114 if self._match(TokenType.SCHEMA): 2115 return self.expression( 2116 exp.WithSchemaBindingProperty, 2117 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2118 ) 2119 2120 if not self._next: 2121 return None 2122 2123 return self._parse_withisolatedloading() 2124 2125 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2126 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2127 self._match(TokenType.EQ) 2128 2129 user = self._parse_id_var() 2130 self._match(TokenType.PARAMETER) 2131 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2132 2133 if not user or not host: 2134 return None 2135 2136 return exp.DefinerProperty(this=f"{user}@{host}") 2137 2138 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2139 self._match(TokenType.TABLE) 2140 self._match(TokenType.EQ) 2141 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2142 2143 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2144 return self.expression(exp.LogProperty, no=no) 2145 2146 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2147 return self.expression(exp.JournalProperty, **kwargs) 2148 2149 def _parse_checksum(self) -> exp.ChecksumProperty: 2150 self._match(TokenType.EQ) 2151 2152 on = None 2153 if self._match(TokenType.ON): 2154 on = True 2155 elif self._match_text_seq("OFF"): 2156 on = False 2157 2158 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2159 2160 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2161 return self.expression( 2162 exp.Cluster, 2163 expressions=( 2164 self._parse_wrapped_csv(self._parse_ordered) 2165 if wrapped 2166 else self._parse_csv(self._parse_ordered) 2167 ), 2168 ) 2169 2170 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2171 self._match_text_seq("BY") 2172 2173 self._match_l_paren() 2174 expressions = self._parse_csv(self._parse_column) 2175 self._match_r_paren() 2176 2177 if self._match_text_seq("SORTED", "BY"): 2178 self._match_l_paren() 2179 sorted_by = self._parse_csv(self._parse_ordered) 2180 self._match_r_paren() 2181 else: 2182 sorted_by = None 2183 2184 self._match(TokenType.INTO) 2185 buckets = self._parse_number() 2186 self._match_text_seq("BUCKETS") 2187 2188 return self.expression( 2189 exp.ClusteredByProperty, 2190 expressions=expressions, 2191 sorted_by=sorted_by, 2192 buckets=buckets, 2193 ) 2194 2195 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2196 if not self._match_text_seq("GRANTS"): 2197 self._retreat(self._index - 1) 2198 return None 2199 2200 return self.expression(exp.CopyGrantsProperty) 2201 2202 def _parse_freespace(self) -> exp.FreespaceProperty: 2203 self._match(TokenType.EQ) 2204 return self.expression( 2205 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2206 ) 2207 2208 def _parse_mergeblockratio( 2209 self, no: bool = False, default: bool = False 2210 ) -> exp.MergeBlockRatioProperty: 2211 if self._match(TokenType.EQ): 2212 return self.expression( 2213 exp.MergeBlockRatioProperty, 2214 this=self._parse_number(), 2215 percent=self._match(TokenType.PERCENT), 2216 ) 2217 2218 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2219 2220 def _parse_datablocksize( 2221 self, 2222 default: t.Optional[bool] = None, 2223 minimum: t.Optional[bool] = None, 2224 maximum: t.Optional[bool] = None, 2225 ) -> exp.DataBlocksizeProperty: 2226 self._match(TokenType.EQ) 2227 size = self._parse_number() 2228 2229 units = None 2230 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2231 units = self._prev.text 2232 2233 return self.expression( 2234 exp.DataBlocksizeProperty, 2235 size=size, 2236 units=units, 2237 default=default, 2238 minimum=minimum, 2239 maximum=maximum, 2240 ) 2241 2242 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2243 self._match(TokenType.EQ) 2244 always = self._match_text_seq("ALWAYS") 2245 manual = self._match_text_seq("MANUAL") 2246 never = self._match_text_seq("NEVER") 2247 default = self._match_text_seq("DEFAULT") 2248 2249 autotemp = None 2250 if self._match_text_seq("AUTOTEMP"): 2251 autotemp = self._parse_schema() 2252 2253 return self.expression( 2254 exp.BlockCompressionProperty, 2255 always=always, 2256 manual=manual, 2257 never=never, 2258 default=default, 2259 autotemp=autotemp, 2260 ) 2261 2262 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2263 index = self._index 2264 no = self._match_text_seq("NO") 2265 concurrent = self._match_text_seq("CONCURRENT") 2266 2267 if not self._match_text_seq("ISOLATED", "LOADING"): 2268 self._retreat(index) 2269 return None 2270 2271 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2272 return self.expression( 2273 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2274 ) 2275 2276 def _parse_locking(self) -> exp.LockingProperty: 2277 if self._match(TokenType.TABLE): 2278 kind = "TABLE" 2279 elif self._match(TokenType.VIEW): 2280 kind = "VIEW" 2281 elif self._match(TokenType.ROW): 2282 kind = "ROW" 2283 elif self._match_text_seq("DATABASE"): 2284 kind = "DATABASE" 2285 else: 2286 kind = None 2287 2288 if kind in ("DATABASE", "TABLE", "VIEW"): 2289 this = self._parse_table_parts() 2290 else: 2291 this = None 2292 2293 if self._match(TokenType.FOR): 2294 for_or_in = "FOR" 2295 elif self._match(TokenType.IN): 2296 for_or_in = "IN" 2297 else: 2298 for_or_in = None 2299 2300 if self._match_text_seq("ACCESS"): 2301 lock_type = "ACCESS" 2302 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2303 lock_type = "EXCLUSIVE" 2304 elif self._match_text_seq("SHARE"): 2305 lock_type = "SHARE" 2306 elif self._match_text_seq("READ"): 2307 lock_type = "READ" 2308 elif self._match_text_seq("WRITE"): 2309 lock_type = "WRITE" 2310 elif self._match_text_seq("CHECKSUM"): 2311 lock_type = "CHECKSUM" 2312 else: 2313 lock_type = None 2314 2315 override = self._match_text_seq("OVERRIDE") 2316 2317 return self.expression( 2318 exp.LockingProperty, 2319 this=this, 2320 kind=kind, 2321 for_or_in=for_or_in, 2322 lock_type=lock_type, 2323 override=override, 2324 ) 2325 2326 def _parse_partition_by(self) -> t.List[exp.Expression]: 2327 if self._match(TokenType.PARTITION_BY): 2328 return self._parse_csv(self._parse_assignment) 2329 return [] 2330 2331 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2332 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2333 if self._match_text_seq("MINVALUE"): 2334 return exp.var("MINVALUE") 2335 if self._match_text_seq("MAXVALUE"): 2336 return exp.var("MAXVALUE") 2337 return self._parse_bitwise() 2338 2339 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2340 expression = None 2341 from_expressions = None 2342 to_expressions = None 2343 2344 if self._match(TokenType.IN): 2345 this = self._parse_wrapped_csv(self._parse_bitwise) 2346 elif self._match(TokenType.FROM): 2347 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2348 self._match_text_seq("TO") 2349 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2350 elif self._match_text_seq("WITH", "(", "MODULUS"): 2351 this = self._parse_number() 2352 self._match_text_seq(",", "REMAINDER") 2353 expression = self._parse_number() 2354 self._match_r_paren() 2355 else: 2356 self.raise_error("Failed to parse partition bound spec.") 2357 2358 return self.expression( 2359 exp.PartitionBoundSpec, 2360 this=this, 2361 expression=expression, 2362 from_expressions=from_expressions, 2363 to_expressions=to_expressions, 2364 ) 2365 2366 # https://www.postgresql.org/docs/current/sql-createtable.html 2367 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2368 if not self._match_text_seq("OF"): 2369 self._retreat(self._index - 1) 2370 return None 2371 2372 this = self._parse_table(schema=True) 2373 2374 if self._match(TokenType.DEFAULT): 2375 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2376 elif self._match_text_seq("FOR", "VALUES"): 2377 expression = self._parse_partition_bound_spec() 2378 else: 2379 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2380 2381 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2382 2383 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2384 self._match(TokenType.EQ) 2385 return self.expression( 2386 exp.PartitionedByProperty, 2387 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2388 ) 2389 2390 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2391 if self._match_text_seq("AND", "STATISTICS"): 2392 statistics = True 2393 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2394 statistics = False 2395 else: 2396 statistics = None 2397 2398 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2399 2400 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2401 if self._match_text_seq("SQL"): 2402 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2403 return None 2404 2405 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2406 if self._match_text_seq("SQL", "DATA"): 2407 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2408 return None 2409 2410 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2411 if self._match_text_seq("PRIMARY", "INDEX"): 2412 return exp.NoPrimaryIndexProperty() 2413 if self._match_text_seq("SQL"): 2414 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2415 return None 2416 2417 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2418 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2419 return exp.OnCommitProperty() 2420 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2421 return exp.OnCommitProperty(delete=True) 2422 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2423 2424 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2425 if self._match_text_seq("SQL", "DATA"): 2426 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2427 return None 2428 2429 def _parse_distkey(self) -> exp.DistKeyProperty: 2430 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2431 2432 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2433 table = self._parse_table(schema=True) 2434 2435 options = [] 2436 while self._match_texts(("INCLUDING", "EXCLUDING")): 2437 this = self._prev.text.upper() 2438 2439 id_var = self._parse_id_var() 2440 if not id_var: 2441 return None 2442 2443 options.append( 2444 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2445 ) 2446 2447 return self.expression(exp.LikeProperty, this=table, expressions=options) 2448 2449 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2450 return self.expression( 2451 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2452 ) 2453 2454 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2455 self._match(TokenType.EQ) 2456 return self.expression( 2457 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2458 ) 2459 2460 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2461 self._match_text_seq("WITH", "CONNECTION") 2462 return self.expression( 2463 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2464 ) 2465 2466 def _parse_returns(self) -> exp.ReturnsProperty: 2467 value: t.Optional[exp.Expression] 2468 null = None 2469 is_table = self._match(TokenType.TABLE) 2470 2471 if is_table: 2472 if self._match(TokenType.LT): 2473 value = self.expression( 2474 exp.Schema, 2475 this="TABLE", 2476 expressions=self._parse_csv(self._parse_struct_types), 2477 ) 2478 if not self._match(TokenType.GT): 2479 self.raise_error("Expecting >") 2480 else: 2481 value = self._parse_schema(exp.var("TABLE")) 2482 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2483 null = True 2484 value = None 2485 else: 2486 value = self._parse_types() 2487 2488 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2489 2490 def _parse_describe(self) -> exp.Describe: 2491 kind = self._match_set(self.CREATABLES) and self._prev.text 2492 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2493 if self._match(TokenType.DOT): 2494 style = None 2495 self._retreat(self._index - 2) 2496 this = self._parse_table(schema=True) 2497 properties = self._parse_properties() 2498 expressions = properties.expressions if properties else None 2499 partition = self._parse_partition() 2500 return self.expression( 2501 exp.Describe, 2502 this=this, 2503 style=style, 2504 kind=kind, 2505 expressions=expressions, 2506 partition=partition, 2507 ) 2508 2509 def _parse_insert(self) -> exp.Insert: 2510 comments = ensure_list(self._prev_comments) 2511 hint = self._parse_hint() 2512 overwrite = self._match(TokenType.OVERWRITE) 2513 ignore = self._match(TokenType.IGNORE) 2514 local = self._match_text_seq("LOCAL") 2515 alternative = None 2516 is_function = None 2517 2518 if self._match_text_seq("DIRECTORY"): 2519 this: t.Optional[exp.Expression] = self.expression( 2520 exp.Directory, 2521 this=self._parse_var_or_string(), 2522 local=local, 2523 row_format=self._parse_row_format(match_row=True), 2524 ) 2525 else: 2526 if self._match(TokenType.OR): 2527 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2528 2529 self._match(TokenType.INTO) 2530 comments += ensure_list(self._prev_comments) 2531 self._match(TokenType.TABLE) 2532 is_function = self._match(TokenType.FUNCTION) 2533 2534 this = ( 2535 self._parse_table(schema=True, parse_partition=True) 2536 if not is_function 2537 else self._parse_function() 2538 ) 2539 2540 returning = self._parse_returning() 2541 2542 return self.expression( 2543 exp.Insert, 2544 comments=comments, 2545 hint=hint, 2546 is_function=is_function, 2547 this=this, 2548 stored=self._match_text_seq("STORED") and self._parse_stored(), 2549 by_name=self._match_text_seq("BY", "NAME"), 2550 exists=self._parse_exists(), 2551 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2552 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2553 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2554 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2555 conflict=self._parse_on_conflict(), 2556 returning=returning or self._parse_returning(), 2557 overwrite=overwrite, 2558 alternative=alternative, 2559 ignore=ignore, 2560 ) 2561 2562 def _parse_kill(self) -> exp.Kill: 2563 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2564 2565 return self.expression( 2566 exp.Kill, 2567 this=self._parse_primary(), 2568 kind=kind, 2569 ) 2570 2571 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2572 conflict = self._match_text_seq("ON", "CONFLICT") 2573 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2574 2575 if not conflict and not duplicate: 2576 return None 2577 2578 conflict_keys = None 2579 constraint = None 2580 2581 if conflict: 2582 if self._match_text_seq("ON", "CONSTRAINT"): 2583 constraint = self._parse_id_var() 2584 elif self._match(TokenType.L_PAREN): 2585 conflict_keys = self._parse_csv(self._parse_id_var) 2586 self._match_r_paren() 2587 2588 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2589 if self._prev.token_type == TokenType.UPDATE: 2590 self._match(TokenType.SET) 2591 expressions = self._parse_csv(self._parse_equality) 2592 else: 2593 expressions = None 2594 2595 return self.expression( 2596 exp.OnConflict, 2597 duplicate=duplicate, 2598 expressions=expressions, 2599 action=action, 2600 conflict_keys=conflict_keys, 2601 constraint=constraint, 2602 ) 2603 2604 def _parse_returning(self) -> t.Optional[exp.Returning]: 2605 if not self._match(TokenType.RETURNING): 2606 return None 2607 return self.expression( 2608 exp.Returning, 2609 expressions=self._parse_csv(self._parse_expression), 2610 into=self._match(TokenType.INTO) and self._parse_table_part(), 2611 ) 2612 2613 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2614 if not self._match(TokenType.FORMAT): 2615 return None 2616 return self._parse_row_format() 2617 2618 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2619 index = self._index 2620 with_ = with_ or self._match_text_seq("WITH") 2621 2622 if not self._match(TokenType.SERDE_PROPERTIES): 2623 self._retreat(index) 2624 return None 2625 return self.expression( 2626 exp.SerdeProperties, 2627 **{ # type: ignore 2628 "expressions": self._parse_wrapped_properties(), 2629 "with": with_, 2630 }, 2631 ) 2632 2633 def _parse_row_format( 2634 self, match_row: bool = False 2635 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2636 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2637 return None 2638 2639 if self._match_text_seq("SERDE"): 2640 this = self._parse_string() 2641 2642 serde_properties = self._parse_serde_properties() 2643 2644 return self.expression( 2645 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2646 ) 2647 2648 self._match_text_seq("DELIMITED") 2649 2650 kwargs = {} 2651 2652 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2653 kwargs["fields"] = self._parse_string() 2654 if self._match_text_seq("ESCAPED", "BY"): 2655 kwargs["escaped"] = self._parse_string() 2656 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2657 kwargs["collection_items"] = self._parse_string() 2658 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2659 kwargs["map_keys"] = self._parse_string() 2660 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2661 kwargs["lines"] = self._parse_string() 2662 if self._match_text_seq("NULL", "DEFINED", "AS"): 2663 kwargs["null"] = self._parse_string() 2664 2665 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2666 2667 def _parse_load(self) -> exp.LoadData | exp.Command: 2668 if self._match_text_seq("DATA"): 2669 local = self._match_text_seq("LOCAL") 2670 self._match_text_seq("INPATH") 2671 inpath = self._parse_string() 2672 overwrite = self._match(TokenType.OVERWRITE) 2673 self._match_pair(TokenType.INTO, TokenType.TABLE) 2674 2675 return self.expression( 2676 exp.LoadData, 2677 this=self._parse_table(schema=True), 2678 local=local, 2679 overwrite=overwrite, 2680 inpath=inpath, 2681 partition=self._parse_partition(), 2682 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2683 serde=self._match_text_seq("SERDE") and self._parse_string(), 2684 ) 2685 return self._parse_as_command(self._prev) 2686 2687 def _parse_delete(self) -> exp.Delete: 2688 # This handles MySQL's "Multiple-Table Syntax" 2689 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2690 tables = None 2691 comments = self._prev_comments 2692 if not self._match(TokenType.FROM, advance=False): 2693 tables = self._parse_csv(self._parse_table) or None 2694 2695 returning = self._parse_returning() 2696 2697 return self.expression( 2698 exp.Delete, 2699 comments=comments, 2700 tables=tables, 2701 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2702 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2703 where=self._parse_where(), 2704 returning=returning or self._parse_returning(), 2705 limit=self._parse_limit(), 2706 ) 2707 2708 def _parse_update(self) -> exp.Update: 2709 comments = self._prev_comments 2710 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2711 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2712 returning = self._parse_returning() 2713 return self.expression( 2714 exp.Update, 2715 comments=comments, 2716 **{ # type: ignore 2717 "this": this, 2718 "expressions": expressions, 2719 "from": self._parse_from(joins=True), 2720 "where": self._parse_where(), 2721 "returning": returning or self._parse_returning(), 2722 "order": self._parse_order(), 2723 "limit": self._parse_limit(), 2724 }, 2725 ) 2726 2727 def _parse_uncache(self) -> exp.Uncache: 2728 if not self._match(TokenType.TABLE): 2729 self.raise_error("Expecting TABLE after UNCACHE") 2730 2731 return self.expression( 2732 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2733 ) 2734 2735 def _parse_cache(self) -> exp.Cache: 2736 lazy = self._match_text_seq("LAZY") 2737 self._match(TokenType.TABLE) 2738 table = self._parse_table(schema=True) 2739 2740 options = [] 2741 if self._match_text_seq("OPTIONS"): 2742 self._match_l_paren() 2743 k = self._parse_string() 2744 self._match(TokenType.EQ) 2745 v = self._parse_string() 2746 options = [k, v] 2747 self._match_r_paren() 2748 2749 self._match(TokenType.ALIAS) 2750 return self.expression( 2751 exp.Cache, 2752 this=table, 2753 lazy=lazy, 2754 options=options, 2755 expression=self._parse_select(nested=True), 2756 ) 2757 2758 def _parse_partition(self) -> t.Optional[exp.Partition]: 2759 if not self._match(TokenType.PARTITION): 2760 return None 2761 2762 return self.expression( 2763 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2764 ) 2765 2766 def _parse_value(self) -> t.Optional[exp.Tuple]: 2767 if self._match(TokenType.L_PAREN): 2768 expressions = self._parse_csv(self._parse_expression) 2769 self._match_r_paren() 2770 return self.expression(exp.Tuple, expressions=expressions) 2771 2772 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2773 expression = self._parse_expression() 2774 if expression: 2775 return self.expression(exp.Tuple, expressions=[expression]) 2776 return None 2777 2778 def _parse_projections(self) -> t.List[exp.Expression]: 2779 return self._parse_expressions() 2780 2781 def _parse_select( 2782 self, 2783 nested: bool = False, 2784 table: bool = False, 2785 parse_subquery_alias: bool = True, 2786 parse_set_operation: bool = True, 2787 ) -> t.Optional[exp.Expression]: 2788 cte = self._parse_with() 2789 2790 if cte: 2791 this = self._parse_statement() 2792 2793 if not this: 2794 self.raise_error("Failed to parse any statement following CTE") 2795 return cte 2796 2797 if "with" in this.arg_types: 2798 this.set("with", cte) 2799 else: 2800 self.raise_error(f"{this.key} does not support CTE") 2801 this = cte 2802 2803 return this 2804 2805 # duckdb supports leading with FROM x 2806 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2807 2808 if self._match(TokenType.SELECT): 2809 comments = self._prev_comments 2810 2811 hint = self._parse_hint() 2812 2813 if self._next and not self._next.token_type == TokenType.DOT: 2814 all_ = self._match(TokenType.ALL) 2815 distinct = self._match_set(self.DISTINCT_TOKENS) 2816 else: 2817 all_, distinct = None, None 2818 2819 kind = ( 2820 self._match(TokenType.ALIAS) 2821 and self._match_texts(("STRUCT", "VALUE")) 2822 and self._prev.text.upper() 2823 ) 2824 2825 if distinct: 2826 distinct = self.expression( 2827 exp.Distinct, 2828 on=self._parse_value() if self._match(TokenType.ON) else None, 2829 ) 2830 2831 if all_ and distinct: 2832 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2833 2834 limit = self._parse_limit(top=True) 2835 projections = self._parse_projections() 2836 2837 this = self.expression( 2838 exp.Select, 2839 kind=kind, 2840 hint=hint, 2841 distinct=distinct, 2842 expressions=projections, 2843 limit=limit, 2844 ) 2845 this.comments = comments 2846 2847 into = self._parse_into() 2848 if into: 2849 this.set("into", into) 2850 2851 if not from_: 2852 from_ = self._parse_from() 2853 2854 if from_: 2855 this.set("from", from_) 2856 2857 this = self._parse_query_modifiers(this) 2858 elif (table or nested) and self._match(TokenType.L_PAREN): 2859 if self._match(TokenType.PIVOT): 2860 this = self._parse_simplified_pivot() 2861 elif self._match(TokenType.FROM): 2862 this = exp.select("*").from_( 2863 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2864 ) 2865 else: 2866 this = ( 2867 self._parse_table() 2868 if table 2869 else self._parse_select(nested=True, parse_set_operation=False) 2870 ) 2871 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2872 2873 self._match_r_paren() 2874 2875 # We return early here so that the UNION isn't attached to the subquery by the 2876 # following call to _parse_set_operations, but instead becomes the parent node 2877 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2878 elif self._match(TokenType.VALUES, advance=False): 2879 this = self._parse_derived_table_values() 2880 elif from_: 2881 this = exp.select("*").from_(from_.this, copy=False) 2882 elif self._match(TokenType.SUMMARIZE): 2883 table = self._match(TokenType.TABLE) 2884 this = self._parse_select() or self._parse_string() or self._parse_table() 2885 return self.expression(exp.Summarize, this=this, table=table) 2886 elif self._match(TokenType.DESCRIBE): 2887 this = self._parse_describe() 2888 elif self._match_text_seq("STREAM"): 2889 this = self.expression(exp.Stream, this=self._parse_function()) 2890 else: 2891 this = None 2892 2893 return self._parse_set_operations(this) if parse_set_operation else this 2894 2895 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2896 if not skip_with_token and not self._match(TokenType.WITH): 2897 return None 2898 2899 comments = self._prev_comments 2900 recursive = self._match(TokenType.RECURSIVE) 2901 2902 expressions = [] 2903 while True: 2904 expressions.append(self._parse_cte()) 2905 2906 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2907 break 2908 else: 2909 self._match(TokenType.WITH) 2910 2911 return self.expression( 2912 exp.With, comments=comments, expressions=expressions, recursive=recursive 2913 ) 2914 2915 def _parse_cte(self) -> exp.CTE: 2916 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2917 if not alias or not alias.this: 2918 self.raise_error("Expected CTE to have alias") 2919 2920 self._match(TokenType.ALIAS) 2921 comments = self._prev_comments 2922 2923 if self._match_text_seq("NOT", "MATERIALIZED"): 2924 materialized = False 2925 elif self._match_text_seq("MATERIALIZED"): 2926 materialized = True 2927 else: 2928 materialized = None 2929 2930 return self.expression( 2931 exp.CTE, 2932 this=self._parse_wrapped(self._parse_statement), 2933 alias=alias, 2934 materialized=materialized, 2935 comments=comments, 2936 ) 2937 2938 def _parse_table_alias( 2939 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2940 ) -> t.Optional[exp.TableAlias]: 2941 any_token = self._match(TokenType.ALIAS) 2942 alias = ( 2943 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2944 or self._parse_string_as_identifier() 2945 ) 2946 2947 index = self._index 2948 if self._match(TokenType.L_PAREN): 2949 columns = self._parse_csv(self._parse_function_parameter) 2950 self._match_r_paren() if columns else self._retreat(index) 2951 else: 2952 columns = None 2953 2954 if not alias and not columns: 2955 return None 2956 2957 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2958 2959 # We bubble up comments from the Identifier to the TableAlias 2960 if isinstance(alias, exp.Identifier): 2961 table_alias.add_comments(alias.pop_comments()) 2962 2963 return table_alias 2964 2965 def _parse_subquery( 2966 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2967 ) -> t.Optional[exp.Subquery]: 2968 if not this: 2969 return None 2970 2971 return self.expression( 2972 exp.Subquery, 2973 this=this, 2974 pivots=self._parse_pivots(), 2975 alias=self._parse_table_alias() if parse_alias else None, 2976 ) 2977 2978 def _implicit_unnests_to_explicit(self, this: E) -> E: 2979 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2980 2981 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2982 for i, join in enumerate(this.args.get("joins") or []): 2983 table = join.this 2984 normalized_table = table.copy() 2985 normalized_table.meta["maybe_column"] = True 2986 normalized_table = _norm(normalized_table, dialect=self.dialect) 2987 2988 if isinstance(table, exp.Table) and not join.args.get("on"): 2989 if normalized_table.parts[0].name in refs: 2990 table_as_column = table.to_column() 2991 unnest = exp.Unnest(expressions=[table_as_column]) 2992 2993 # Table.to_column creates a parent Alias node that we want to convert to 2994 # a TableAlias and attach to the Unnest, so it matches the parser's output 2995 if isinstance(table.args.get("alias"), exp.TableAlias): 2996 table_as_column.replace(table_as_column.this) 2997 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2998 2999 table.replace(unnest) 3000 3001 refs.add(normalized_table.alias_or_name) 3002 3003 return this 3004 3005 def _parse_query_modifiers( 3006 self, this: t.Optional[exp.Expression] 3007 ) -> t.Optional[exp.Expression]: 3008 if isinstance(this, (exp.Query, exp.Table)): 3009 for join in self._parse_joins(): 3010 this.append("joins", join) 3011 for lateral in iter(self._parse_lateral, None): 3012 this.append("laterals", lateral) 3013 3014 while True: 3015 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3016 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3017 key, expression = parser(self) 3018 3019 if expression: 3020 this.set(key, expression) 3021 if key == "limit": 3022 offset = expression.args.pop("offset", None) 3023 3024 if offset: 3025 offset = exp.Offset(expression=offset) 3026 this.set("offset", offset) 3027 3028 limit_by_expressions = expression.expressions 3029 expression.set("expressions", None) 3030 offset.set("expressions", limit_by_expressions) 3031 continue 3032 break 3033 3034 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3035 this = self._implicit_unnests_to_explicit(this) 3036 3037 return this 3038 3039 def _parse_hint(self) -> t.Optional[exp.Hint]: 3040 if self._match(TokenType.HINT): 3041 hints = [] 3042 for hint in iter( 3043 lambda: self._parse_csv( 3044 lambda: self._parse_function() or self._parse_var(upper=True) 3045 ), 3046 [], 3047 ): 3048 hints.extend(hint) 3049 3050 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3051 self.raise_error("Expected */ after HINT") 3052 3053 return self.expression(exp.Hint, expressions=hints) 3054 3055 return None 3056 3057 def _parse_into(self) -> t.Optional[exp.Into]: 3058 if not self._match(TokenType.INTO): 3059 return None 3060 3061 temp = self._match(TokenType.TEMPORARY) 3062 unlogged = self._match_text_seq("UNLOGGED") 3063 self._match(TokenType.TABLE) 3064 3065 return self.expression( 3066 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3067 ) 3068 3069 def _parse_from( 3070 self, joins: bool = False, skip_from_token: bool = False 3071 ) -> t.Optional[exp.From]: 3072 if not skip_from_token and not self._match(TokenType.FROM): 3073 return None 3074 3075 return self.expression( 3076 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3077 ) 3078 3079 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3080 return self.expression( 3081 exp.MatchRecognizeMeasure, 3082 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3083 this=self._parse_expression(), 3084 ) 3085 3086 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3087 if not self._match(TokenType.MATCH_RECOGNIZE): 3088 return None 3089 3090 self._match_l_paren() 3091 3092 partition = self._parse_partition_by() 3093 order = self._parse_order() 3094 3095 measures = ( 3096 self._parse_csv(self._parse_match_recognize_measure) 3097 if self._match_text_seq("MEASURES") 3098 else None 3099 ) 3100 3101 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3102 rows = exp.var("ONE ROW PER MATCH") 3103 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3104 text = "ALL ROWS PER MATCH" 3105 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3106 text += " SHOW EMPTY MATCHES" 3107 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3108 text += " OMIT EMPTY MATCHES" 3109 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3110 text += " WITH UNMATCHED ROWS" 3111 rows = exp.var(text) 3112 else: 3113 rows = None 3114 3115 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3116 text = "AFTER MATCH SKIP" 3117 if self._match_text_seq("PAST", "LAST", "ROW"): 3118 text += " PAST LAST ROW" 3119 elif self._match_text_seq("TO", "NEXT", "ROW"): 3120 text += " TO NEXT ROW" 3121 elif self._match_text_seq("TO", "FIRST"): 3122 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3123 elif self._match_text_seq("TO", "LAST"): 3124 text += f" TO LAST {self._advance_any().text}" # type: ignore 3125 after = exp.var(text) 3126 else: 3127 after = None 3128 3129 if self._match_text_seq("PATTERN"): 3130 self._match_l_paren() 3131 3132 if not self._curr: 3133 self.raise_error("Expecting )", self._curr) 3134 3135 paren = 1 3136 start = self._curr 3137 3138 while self._curr and paren > 0: 3139 if self._curr.token_type == TokenType.L_PAREN: 3140 paren += 1 3141 if self._curr.token_type == TokenType.R_PAREN: 3142 paren -= 1 3143 3144 end = self._prev 3145 self._advance() 3146 3147 if paren > 0: 3148 self.raise_error("Expecting )", self._curr) 3149 3150 pattern = exp.var(self._find_sql(start, end)) 3151 else: 3152 pattern = None 3153 3154 define = ( 3155 self._parse_csv(self._parse_name_as_expression) 3156 if self._match_text_seq("DEFINE") 3157 else None 3158 ) 3159 3160 self._match_r_paren() 3161 3162 return self.expression( 3163 exp.MatchRecognize, 3164 partition_by=partition, 3165 order=order, 3166 measures=measures, 3167 rows=rows, 3168 after=after, 3169 pattern=pattern, 3170 define=define, 3171 alias=self._parse_table_alias(), 3172 ) 3173 3174 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3175 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3176 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3177 cross_apply = False 3178 3179 if cross_apply is not None: 3180 this = self._parse_select(table=True) 3181 view = None 3182 outer = None 3183 elif self._match(TokenType.LATERAL): 3184 this = self._parse_select(table=True) 3185 view = self._match(TokenType.VIEW) 3186 outer = self._match(TokenType.OUTER) 3187 else: 3188 return None 3189 3190 if not this: 3191 this = ( 3192 self._parse_unnest() 3193 or self._parse_function() 3194 or self._parse_id_var(any_token=False) 3195 ) 3196 3197 while self._match(TokenType.DOT): 3198 this = exp.Dot( 3199 this=this, 3200 expression=self._parse_function() or self._parse_id_var(any_token=False), 3201 ) 3202 3203 if view: 3204 table = self._parse_id_var(any_token=False) 3205 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3206 table_alias: t.Optional[exp.TableAlias] = self.expression( 3207 exp.TableAlias, this=table, columns=columns 3208 ) 3209 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3210 # We move the alias from the lateral's child node to the lateral itself 3211 table_alias = this.args["alias"].pop() 3212 else: 3213 table_alias = self._parse_table_alias() 3214 3215 return self.expression( 3216 exp.Lateral, 3217 this=this, 3218 view=view, 3219 outer=outer, 3220 alias=table_alias, 3221 cross_apply=cross_apply, 3222 ) 3223 3224 def _parse_join_parts( 3225 self, 3226 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3227 return ( 3228 self._match_set(self.JOIN_METHODS) and self._prev, 3229 self._match_set(self.JOIN_SIDES) and self._prev, 3230 self._match_set(self.JOIN_KINDS) and self._prev, 3231 ) 3232 3233 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3234 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3235 this = self._parse_column() 3236 if isinstance(this, exp.Column): 3237 return this.this 3238 return this 3239 3240 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3241 3242 def _parse_join( 3243 self, skip_join_token: bool = False, parse_bracket: bool = False 3244 ) -> t.Optional[exp.Join]: 3245 if self._match(TokenType.COMMA): 3246 return self.expression(exp.Join, this=self._parse_table()) 3247 3248 index = self._index 3249 method, side, kind = self._parse_join_parts() 3250 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3251 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3252 3253 if not skip_join_token and not join: 3254 self._retreat(index) 3255 kind = None 3256 method = None 3257 side = None 3258 3259 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3260 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3261 3262 if not skip_join_token and not join and not outer_apply and not cross_apply: 3263 return None 3264 3265 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3266 3267 if method: 3268 kwargs["method"] = method.text 3269 if side: 3270 kwargs["side"] = side.text 3271 if kind: 3272 kwargs["kind"] = kind.text 3273 if hint: 3274 kwargs["hint"] = hint 3275 3276 if self._match(TokenType.MATCH_CONDITION): 3277 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3278 3279 if self._match(TokenType.ON): 3280 kwargs["on"] = self._parse_assignment() 3281 elif self._match(TokenType.USING): 3282 kwargs["using"] = self._parse_using_identifiers() 3283 elif ( 3284 not (outer_apply or cross_apply) 3285 and not isinstance(kwargs["this"], exp.Unnest) 3286 and not (kind and kind.token_type == TokenType.CROSS) 3287 ): 3288 index = self._index 3289 joins: t.Optional[list] = list(self._parse_joins()) 3290 3291 if joins and self._match(TokenType.ON): 3292 kwargs["on"] = self._parse_assignment() 3293 elif joins and self._match(TokenType.USING): 3294 kwargs["using"] = self._parse_using_identifiers() 3295 else: 3296 joins = None 3297 self._retreat(index) 3298 3299 kwargs["this"].set("joins", joins if joins else None) 3300 3301 comments = [c for token in (method, side, kind) if token for c in token.comments] 3302 return self.expression(exp.Join, comments=comments, **kwargs) 3303 3304 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3305 this = self._parse_assignment() 3306 3307 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3308 return this 3309 3310 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3311 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3312 3313 return this 3314 3315 def _parse_index_params(self) -> exp.IndexParameters: 3316 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3317 3318 if self._match(TokenType.L_PAREN, advance=False): 3319 columns = self._parse_wrapped_csv(self._parse_with_operator) 3320 else: 3321 columns = None 3322 3323 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3324 partition_by = self._parse_partition_by() 3325 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3326 tablespace = ( 3327 self._parse_var(any_token=True) 3328 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3329 else None 3330 ) 3331 where = self._parse_where() 3332 3333 on = self._parse_field() if self._match(TokenType.ON) else None 3334 3335 return self.expression( 3336 exp.IndexParameters, 3337 using=using, 3338 columns=columns, 3339 include=include, 3340 partition_by=partition_by, 3341 where=where, 3342 with_storage=with_storage, 3343 tablespace=tablespace, 3344 on=on, 3345 ) 3346 3347 def _parse_index( 3348 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3349 ) -> t.Optional[exp.Index]: 3350 if index or anonymous: 3351 unique = None 3352 primary = None 3353 amp = None 3354 3355 self._match(TokenType.ON) 3356 self._match(TokenType.TABLE) # hive 3357 table = self._parse_table_parts(schema=True) 3358 else: 3359 unique = self._match(TokenType.UNIQUE) 3360 primary = self._match_text_seq("PRIMARY") 3361 amp = self._match_text_seq("AMP") 3362 3363 if not self._match(TokenType.INDEX): 3364 return None 3365 3366 index = self._parse_id_var() 3367 table = None 3368 3369 params = self._parse_index_params() 3370 3371 return self.expression( 3372 exp.Index, 3373 this=index, 3374 table=table, 3375 unique=unique, 3376 primary=primary, 3377 amp=amp, 3378 params=params, 3379 ) 3380 3381 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3382 hints: t.List[exp.Expression] = [] 3383 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3384 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3385 hints.append( 3386 self.expression( 3387 exp.WithTableHint, 3388 expressions=self._parse_csv( 3389 lambda: self._parse_function() or self._parse_var(any_token=True) 3390 ), 3391 ) 3392 ) 3393 self._match_r_paren() 3394 else: 3395 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3396 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3397 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3398 3399 self._match_set((TokenType.INDEX, TokenType.KEY)) 3400 if self._match(TokenType.FOR): 3401 hint.set("target", self._advance_any() and self._prev.text.upper()) 3402 3403 hint.set("expressions", self._parse_wrapped_id_vars()) 3404 hints.append(hint) 3405 3406 return hints or None 3407 3408 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3409 return ( 3410 (not schema and self._parse_function(optional_parens=False)) 3411 or self._parse_id_var(any_token=False) 3412 or self._parse_string_as_identifier() 3413 or self._parse_placeholder() 3414 ) 3415 3416 def _parse_table_parts( 3417 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3418 ) -> exp.Table: 3419 catalog = None 3420 db = None 3421 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3422 3423 while self._match(TokenType.DOT): 3424 if catalog: 3425 # This allows nesting the table in arbitrarily many dot expressions if needed 3426 table = self.expression( 3427 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3428 ) 3429 else: 3430 catalog = db 3431 db = table 3432 # "" used for tsql FROM a..b case 3433 table = self._parse_table_part(schema=schema) or "" 3434 3435 if ( 3436 wildcard 3437 and self._is_connected() 3438 and (isinstance(table, exp.Identifier) or not table) 3439 and self._match(TokenType.STAR) 3440 ): 3441 if isinstance(table, exp.Identifier): 3442 table.args["this"] += "*" 3443 else: 3444 table = exp.Identifier(this="*") 3445 3446 # We bubble up comments from the Identifier to the Table 3447 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3448 3449 if is_db_reference: 3450 catalog = db 3451 db = table 3452 table = None 3453 3454 if not table and not is_db_reference: 3455 self.raise_error(f"Expected table name but got {self._curr}") 3456 if not db and is_db_reference: 3457 self.raise_error(f"Expected database name but got {self._curr}") 3458 3459 table = self.expression( 3460 exp.Table, 3461 comments=comments, 3462 this=table, 3463 db=db, 3464 catalog=catalog, 3465 ) 3466 3467 changes = self._parse_changes() 3468 if changes: 3469 table.set("changes", changes) 3470 3471 at_before = self._parse_historical_data() 3472 if at_before: 3473 table.set("when", at_before) 3474 3475 pivots = self._parse_pivots() 3476 if pivots: 3477 table.set("pivots", pivots) 3478 3479 return table 3480 3481 def _parse_table( 3482 self, 3483 schema: bool = False, 3484 joins: bool = False, 3485 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3486 parse_bracket: bool = False, 3487 is_db_reference: bool = False, 3488 parse_partition: bool = False, 3489 ) -> t.Optional[exp.Expression]: 3490 lateral = self._parse_lateral() 3491 if lateral: 3492 return lateral 3493 3494 unnest = self._parse_unnest() 3495 if unnest: 3496 return unnest 3497 3498 values = self._parse_derived_table_values() 3499 if values: 3500 return values 3501 3502 subquery = self._parse_select(table=True) 3503 if subquery: 3504 if not subquery.args.get("pivots"): 3505 subquery.set("pivots", self._parse_pivots()) 3506 return subquery 3507 3508 bracket = parse_bracket and self._parse_bracket(None) 3509 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3510 3511 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3512 self._parse_table 3513 ) 3514 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3515 3516 only = self._match(TokenType.ONLY) 3517 3518 this = t.cast( 3519 exp.Expression, 3520 bracket 3521 or rows_from 3522 or self._parse_bracket( 3523 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3524 ), 3525 ) 3526 3527 if only: 3528 this.set("only", only) 3529 3530 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3531 self._match_text_seq("*") 3532 3533 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3534 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3535 this.set("partition", self._parse_partition()) 3536 3537 if schema: 3538 return self._parse_schema(this=this) 3539 3540 version = self._parse_version() 3541 3542 if version: 3543 this.set("version", version) 3544 3545 if self.dialect.ALIAS_POST_TABLESAMPLE: 3546 table_sample = self._parse_table_sample() 3547 3548 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3549 if alias: 3550 this.set("alias", alias) 3551 3552 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3553 return self.expression( 3554 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3555 ) 3556 3557 this.set("hints", self._parse_table_hints()) 3558 3559 if not this.args.get("pivots"): 3560 this.set("pivots", self._parse_pivots()) 3561 3562 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3563 table_sample = self._parse_table_sample() 3564 3565 if table_sample: 3566 table_sample.set("this", this) 3567 this = table_sample 3568 3569 if joins: 3570 for join in self._parse_joins(): 3571 this.append("joins", join) 3572 3573 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3574 this.set("ordinality", True) 3575 this.set("alias", self._parse_table_alias()) 3576 3577 return this 3578 3579 def _parse_version(self) -> t.Optional[exp.Version]: 3580 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3581 this = "TIMESTAMP" 3582 elif self._match(TokenType.VERSION_SNAPSHOT): 3583 this = "VERSION" 3584 else: 3585 return None 3586 3587 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3588 kind = self._prev.text.upper() 3589 start = self._parse_bitwise() 3590 self._match_texts(("TO", "AND")) 3591 end = self._parse_bitwise() 3592 expression: t.Optional[exp.Expression] = self.expression( 3593 exp.Tuple, expressions=[start, end] 3594 ) 3595 elif self._match_text_seq("CONTAINED", "IN"): 3596 kind = "CONTAINED IN" 3597 expression = self.expression( 3598 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3599 ) 3600 elif self._match(TokenType.ALL): 3601 kind = "ALL" 3602 expression = None 3603 else: 3604 self._match_text_seq("AS", "OF") 3605 kind = "AS OF" 3606 expression = self._parse_type() 3607 3608 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3609 3610 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3611 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3612 index = self._index 3613 historical_data = None 3614 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3615 this = self._prev.text.upper() 3616 kind = ( 3617 self._match(TokenType.L_PAREN) 3618 and self._match_texts(self.HISTORICAL_DATA_KIND) 3619 and self._prev.text.upper() 3620 ) 3621 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3622 3623 if expression: 3624 self._match_r_paren() 3625 historical_data = self.expression( 3626 exp.HistoricalData, this=this, kind=kind, expression=expression 3627 ) 3628 else: 3629 self._retreat(index) 3630 3631 return historical_data 3632 3633 def _parse_changes(self) -> t.Optional[exp.Changes]: 3634 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3635 return None 3636 3637 information = self._parse_var(any_token=True) 3638 self._match_r_paren() 3639 3640 return self.expression( 3641 exp.Changes, 3642 information=information, 3643 at_before=self._parse_historical_data(), 3644 end=self._parse_historical_data(), 3645 ) 3646 3647 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3648 if not self._match(TokenType.UNNEST): 3649 return None 3650 3651 expressions = self._parse_wrapped_csv(self._parse_equality) 3652 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3653 3654 alias = self._parse_table_alias() if with_alias else None 3655 3656 if alias: 3657 if self.dialect.UNNEST_COLUMN_ONLY: 3658 if alias.args.get("columns"): 3659 self.raise_error("Unexpected extra column alias in unnest.") 3660 3661 alias.set("columns", [alias.this]) 3662 alias.set("this", None) 3663 3664 columns = alias.args.get("columns") or [] 3665 if offset and len(expressions) < len(columns): 3666 offset = columns.pop() 3667 3668 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3669 self._match(TokenType.ALIAS) 3670 offset = self._parse_id_var( 3671 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3672 ) or exp.to_identifier("offset") 3673 3674 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3675 3676 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3677 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3678 if not is_derived and not ( 3679 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3680 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3681 ): 3682 return None 3683 3684 expressions = self._parse_csv(self._parse_value) 3685 alias = self._parse_table_alias() 3686 3687 if is_derived: 3688 self._match_r_paren() 3689 3690 return self.expression( 3691 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3692 ) 3693 3694 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3695 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3696 as_modifier and self._match_text_seq("USING", "SAMPLE") 3697 ): 3698 return None 3699 3700 bucket_numerator = None 3701 bucket_denominator = None 3702 bucket_field = None 3703 percent = None 3704 size = None 3705 seed = None 3706 3707 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3708 matched_l_paren = self._match(TokenType.L_PAREN) 3709 3710 if self.TABLESAMPLE_CSV: 3711 num = None 3712 expressions = self._parse_csv(self._parse_primary) 3713 else: 3714 expressions = None 3715 num = ( 3716 self._parse_factor() 3717 if self._match(TokenType.NUMBER, advance=False) 3718 else self._parse_primary() or self._parse_placeholder() 3719 ) 3720 3721 if self._match_text_seq("BUCKET"): 3722 bucket_numerator = self._parse_number() 3723 self._match_text_seq("OUT", "OF") 3724 bucket_denominator = bucket_denominator = self._parse_number() 3725 self._match(TokenType.ON) 3726 bucket_field = self._parse_field() 3727 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3728 percent = num 3729 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3730 size = num 3731 else: 3732 percent = num 3733 3734 if matched_l_paren: 3735 self._match_r_paren() 3736 3737 if self._match(TokenType.L_PAREN): 3738 method = self._parse_var(upper=True) 3739 seed = self._match(TokenType.COMMA) and self._parse_number() 3740 self._match_r_paren() 3741 elif self._match_texts(("SEED", "REPEATABLE")): 3742 seed = self._parse_wrapped(self._parse_number) 3743 3744 if not method and self.DEFAULT_SAMPLING_METHOD: 3745 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3746 3747 return self.expression( 3748 exp.TableSample, 3749 expressions=expressions, 3750 method=method, 3751 bucket_numerator=bucket_numerator, 3752 bucket_denominator=bucket_denominator, 3753 bucket_field=bucket_field, 3754 percent=percent, 3755 size=size, 3756 seed=seed, 3757 ) 3758 3759 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3760 return list(iter(self._parse_pivot, None)) or None 3761 3762 def _parse_joins(self) -> t.Iterator[exp.Join]: 3763 return iter(self._parse_join, None) 3764 3765 # https://duckdb.org/docs/sql/statements/pivot 3766 def _parse_simplified_pivot(self) -> exp.Pivot: 3767 def _parse_on() -> t.Optional[exp.Expression]: 3768 this = self._parse_bitwise() 3769 return self._parse_in(this) if self._match(TokenType.IN) else this 3770 3771 this = self._parse_table() 3772 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3773 using = self._match(TokenType.USING) and self._parse_csv( 3774 lambda: self._parse_alias(self._parse_function()) 3775 ) 3776 group = self._parse_group() 3777 return self.expression( 3778 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3779 ) 3780 3781 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3782 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3783 this = self._parse_select_or_expression() 3784 3785 self._match(TokenType.ALIAS) 3786 alias = self._parse_bitwise() 3787 if alias: 3788 if isinstance(alias, exp.Column) and not alias.db: 3789 alias = alias.this 3790 return self.expression(exp.PivotAlias, this=this, alias=alias) 3791 3792 return this 3793 3794 value = self._parse_column() 3795 3796 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3797 self.raise_error("Expecting IN (") 3798 3799 if self._match(TokenType.ANY): 3800 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3801 else: 3802 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3803 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3804 3805 self._match_r_paren() 3806 return expr 3807 3808 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3809 index = self._index 3810 include_nulls = None 3811 3812 if self._match(TokenType.PIVOT): 3813 unpivot = False 3814 elif self._match(TokenType.UNPIVOT): 3815 unpivot = True 3816 3817 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3818 if self._match_text_seq("INCLUDE", "NULLS"): 3819 include_nulls = True 3820 elif self._match_text_seq("EXCLUDE", "NULLS"): 3821 include_nulls = False 3822 else: 3823 return None 3824 3825 expressions = [] 3826 3827 if not self._match(TokenType.L_PAREN): 3828 self._retreat(index) 3829 return None 3830 3831 if unpivot: 3832 expressions = self._parse_csv(self._parse_column) 3833 else: 3834 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3835 3836 if not expressions: 3837 self.raise_error("Failed to parse PIVOT's aggregation list") 3838 3839 if not self._match(TokenType.FOR): 3840 self.raise_error("Expecting FOR") 3841 3842 field = self._parse_pivot_in() 3843 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3844 self._parse_bitwise 3845 ) 3846 3847 self._match_r_paren() 3848 3849 pivot = self.expression( 3850 exp.Pivot, 3851 expressions=expressions, 3852 field=field, 3853 unpivot=unpivot, 3854 include_nulls=include_nulls, 3855 default_on_null=default_on_null, 3856 ) 3857 3858 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3859 pivot.set("alias", self._parse_table_alias()) 3860 3861 if not unpivot: 3862 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3863 3864 columns: t.List[exp.Expression] = [] 3865 for fld in pivot.args["field"].expressions: 3866 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3867 for name in names: 3868 if self.PREFIXED_PIVOT_COLUMNS: 3869 name = f"{name}_{field_name}" if name else field_name 3870 else: 3871 name = f"{field_name}_{name}" if name else field_name 3872 3873 columns.append(exp.to_identifier(name)) 3874 3875 pivot.set("columns", columns) 3876 3877 return pivot 3878 3879 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3880 return [agg.alias for agg in aggregations] 3881 3882 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3883 if not skip_where_token and not self._match(TokenType.PREWHERE): 3884 return None 3885 3886 return self.expression( 3887 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3888 ) 3889 3890 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3891 if not skip_where_token and not self._match(TokenType.WHERE): 3892 return None 3893 3894 return self.expression( 3895 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3896 ) 3897 3898 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3899 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3900 return None 3901 3902 elements: t.Dict[str, t.Any] = defaultdict(list) 3903 3904 if self._match(TokenType.ALL): 3905 elements["all"] = True 3906 elif self._match(TokenType.DISTINCT): 3907 elements["all"] = False 3908 3909 while True: 3910 expressions = self._parse_csv( 3911 lambda: None 3912 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3913 else self._parse_assignment() 3914 ) 3915 if expressions: 3916 elements["expressions"].extend(expressions) 3917 3918 grouping_sets = self._parse_grouping_sets() 3919 if grouping_sets: 3920 elements["grouping_sets"].extend(grouping_sets) 3921 3922 rollup = None 3923 cube = None 3924 totals = None 3925 3926 index = self._index 3927 with_ = self._match(TokenType.WITH) 3928 if self._match(TokenType.ROLLUP): 3929 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3930 elements["rollup"].extend(ensure_list(rollup)) 3931 3932 if self._match(TokenType.CUBE): 3933 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3934 elements["cube"].extend(ensure_list(cube)) 3935 3936 if self._match_text_seq("TOTALS"): 3937 totals = True 3938 elements["totals"] = True # type: ignore 3939 3940 if not (grouping_sets or rollup or cube or totals): 3941 if with_: 3942 self._retreat(index) 3943 break 3944 3945 return self.expression(exp.Group, **elements) # type: ignore 3946 3947 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3948 if not self._match(TokenType.GROUPING_SETS): 3949 return None 3950 3951 return self._parse_wrapped_csv(self._parse_grouping_set) 3952 3953 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3954 if self._match(TokenType.L_PAREN): 3955 grouping_set = self._parse_csv(self._parse_column) 3956 self._match_r_paren() 3957 return self.expression(exp.Tuple, expressions=grouping_set) 3958 3959 return self._parse_column() 3960 3961 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3962 if not skip_having_token and not self._match(TokenType.HAVING): 3963 return None 3964 return self.expression(exp.Having, this=self._parse_assignment()) 3965 3966 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3967 if not self._match(TokenType.QUALIFY): 3968 return None 3969 return self.expression(exp.Qualify, this=self._parse_assignment()) 3970 3971 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3972 if skip_start_token: 3973 start = None 3974 elif self._match(TokenType.START_WITH): 3975 start = self._parse_assignment() 3976 else: 3977 return None 3978 3979 self._match(TokenType.CONNECT_BY) 3980 nocycle = self._match_text_seq("NOCYCLE") 3981 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3982 exp.Prior, this=self._parse_bitwise() 3983 ) 3984 connect = self._parse_assignment() 3985 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3986 3987 if not start and self._match(TokenType.START_WITH): 3988 start = self._parse_assignment() 3989 3990 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3991 3992 def _parse_name_as_expression(self) -> exp.Alias: 3993 return self.expression( 3994 exp.Alias, 3995 alias=self._parse_id_var(any_token=True), 3996 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3997 ) 3998 3999 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4000 if self._match_text_seq("INTERPOLATE"): 4001 return self._parse_wrapped_csv(self._parse_name_as_expression) 4002 return None 4003 4004 def _parse_order( 4005 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4006 ) -> t.Optional[exp.Expression]: 4007 siblings = None 4008 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4009 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4010 return this 4011 4012 siblings = True 4013 4014 return self.expression( 4015 exp.Order, 4016 this=this, 4017 expressions=self._parse_csv(self._parse_ordered), 4018 siblings=siblings, 4019 ) 4020 4021 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4022 if not self._match(token): 4023 return None 4024 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4025 4026 def _parse_ordered( 4027 self, parse_method: t.Optional[t.Callable] = None 4028 ) -> t.Optional[exp.Ordered]: 4029 this = parse_method() if parse_method else self._parse_assignment() 4030 if not this: 4031 return None 4032 4033 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4034 this = exp.var("ALL") 4035 4036 asc = self._match(TokenType.ASC) 4037 desc = self._match(TokenType.DESC) or (asc and False) 4038 4039 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4040 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4041 4042 nulls_first = is_nulls_first or False 4043 explicitly_null_ordered = is_nulls_first or is_nulls_last 4044 4045 if ( 4046 not explicitly_null_ordered 4047 and ( 4048 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4049 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4050 ) 4051 and self.dialect.NULL_ORDERING != "nulls_are_last" 4052 ): 4053 nulls_first = True 4054 4055 if self._match_text_seq("WITH", "FILL"): 4056 with_fill = self.expression( 4057 exp.WithFill, 4058 **{ # type: ignore 4059 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4060 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4061 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4062 "interpolate": self._parse_interpolate(), 4063 }, 4064 ) 4065 else: 4066 with_fill = None 4067 4068 return self.expression( 4069 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4070 ) 4071 4072 def _parse_limit( 4073 self, 4074 this: t.Optional[exp.Expression] = None, 4075 top: bool = False, 4076 skip_limit_token: bool = False, 4077 ) -> t.Optional[exp.Expression]: 4078 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4079 comments = self._prev_comments 4080 if top: 4081 limit_paren = self._match(TokenType.L_PAREN) 4082 expression = self._parse_term() if limit_paren else self._parse_number() 4083 4084 if limit_paren: 4085 self._match_r_paren() 4086 else: 4087 expression = self._parse_term() 4088 4089 if self._match(TokenType.COMMA): 4090 offset = expression 4091 expression = self._parse_term() 4092 else: 4093 offset = None 4094 4095 limit_exp = self.expression( 4096 exp.Limit, 4097 this=this, 4098 expression=expression, 4099 offset=offset, 4100 comments=comments, 4101 expressions=self._parse_limit_by(), 4102 ) 4103 4104 return limit_exp 4105 4106 if self._match(TokenType.FETCH): 4107 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4108 direction = self._prev.text.upper() if direction else "FIRST" 4109 4110 count = self._parse_field(tokens=self.FETCH_TOKENS) 4111 percent = self._match(TokenType.PERCENT) 4112 4113 self._match_set((TokenType.ROW, TokenType.ROWS)) 4114 4115 only = self._match_text_seq("ONLY") 4116 with_ties = self._match_text_seq("WITH", "TIES") 4117 4118 if only and with_ties: 4119 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4120 4121 return self.expression( 4122 exp.Fetch, 4123 direction=direction, 4124 count=count, 4125 percent=percent, 4126 with_ties=with_ties, 4127 ) 4128 4129 return this 4130 4131 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4132 if not self._match(TokenType.OFFSET): 4133 return this 4134 4135 count = self._parse_term() 4136 self._match_set((TokenType.ROW, TokenType.ROWS)) 4137 4138 return self.expression( 4139 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4140 ) 4141 4142 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4143 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4144 4145 def _parse_locks(self) -> t.List[exp.Lock]: 4146 locks = [] 4147 while True: 4148 if self._match_text_seq("FOR", "UPDATE"): 4149 update = True 4150 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4151 "LOCK", "IN", "SHARE", "MODE" 4152 ): 4153 update = False 4154 else: 4155 break 4156 4157 expressions = None 4158 if self._match_text_seq("OF"): 4159 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4160 4161 wait: t.Optional[bool | exp.Expression] = None 4162 if self._match_text_seq("NOWAIT"): 4163 wait = True 4164 elif self._match_text_seq("WAIT"): 4165 wait = self._parse_primary() 4166 elif self._match_text_seq("SKIP", "LOCKED"): 4167 wait = False 4168 4169 locks.append( 4170 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4171 ) 4172 4173 return locks 4174 4175 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4176 while this and self._match_set(self.SET_OPERATIONS): 4177 token_type = self._prev.token_type 4178 4179 if token_type == TokenType.UNION: 4180 operation: t.Type[exp.SetOperation] = exp.Union 4181 elif token_type == TokenType.EXCEPT: 4182 operation = exp.Except 4183 else: 4184 operation = exp.Intersect 4185 4186 comments = self._prev.comments 4187 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4188 by_name = self._match_text_seq("BY", "NAME") 4189 expression = self._parse_select(nested=True, parse_set_operation=False) 4190 4191 this = self.expression( 4192 operation, 4193 comments=comments, 4194 this=this, 4195 distinct=distinct, 4196 by_name=by_name, 4197 expression=expression, 4198 ) 4199 4200 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4201 expression = this.expression 4202 4203 if expression: 4204 for arg in self.SET_OP_MODIFIERS: 4205 expr = expression.args.get(arg) 4206 if expr: 4207 this.set(arg, expr.pop()) 4208 4209 return this 4210 4211 def _parse_expression(self) -> t.Optional[exp.Expression]: 4212 return self._parse_alias(self._parse_assignment()) 4213 4214 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4215 this = self._parse_disjunction() 4216 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4217 # This allows us to parse <non-identifier token> := <expr> 4218 this = exp.column( 4219 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4220 ) 4221 4222 while self._match_set(self.ASSIGNMENT): 4223 this = self.expression( 4224 self.ASSIGNMENT[self._prev.token_type], 4225 this=this, 4226 comments=self._prev_comments, 4227 expression=self._parse_assignment(), 4228 ) 4229 4230 return this 4231 4232 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4233 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4234 4235 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4236 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4237 4238 def _parse_equality(self) -> t.Optional[exp.Expression]: 4239 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4240 4241 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4242 return self._parse_tokens(self._parse_range, self.COMPARISON) 4243 4244 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4245 this = this or self._parse_bitwise() 4246 negate = self._match(TokenType.NOT) 4247 4248 if self._match_set(self.RANGE_PARSERS): 4249 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4250 if not expression: 4251 return this 4252 4253 this = expression 4254 elif self._match(TokenType.ISNULL): 4255 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4256 4257 # Postgres supports ISNULL and NOTNULL for conditions. 4258 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4259 if self._match(TokenType.NOTNULL): 4260 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4261 this = self.expression(exp.Not, this=this) 4262 4263 if negate: 4264 this = self._negate_range(this) 4265 4266 if self._match(TokenType.IS): 4267 this = self._parse_is(this) 4268 4269 return this 4270 4271 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4272 if not this: 4273 return this 4274 4275 return self.expression(exp.Not, this=this) 4276 4277 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4278 index = self._index - 1 4279 negate = self._match(TokenType.NOT) 4280 4281 if self._match_text_seq("DISTINCT", "FROM"): 4282 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4283 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4284 4285 expression = self._parse_null() or self._parse_boolean() 4286 if not expression: 4287 self._retreat(index) 4288 return None 4289 4290 this = self.expression(exp.Is, this=this, expression=expression) 4291 return self.expression(exp.Not, this=this) if negate else this 4292 4293 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4294 unnest = self._parse_unnest(with_alias=False) 4295 if unnest: 4296 this = self.expression(exp.In, this=this, unnest=unnest) 4297 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4298 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4299 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4300 4301 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4302 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4303 else: 4304 this = self.expression(exp.In, this=this, expressions=expressions) 4305 4306 if matched_l_paren: 4307 self._match_r_paren(this) 4308 elif not self._match(TokenType.R_BRACKET, expression=this): 4309 self.raise_error("Expecting ]") 4310 else: 4311 this = self.expression(exp.In, this=this, field=self._parse_field()) 4312 4313 return this 4314 4315 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4316 low = self._parse_bitwise() 4317 self._match(TokenType.AND) 4318 high = self._parse_bitwise() 4319 return self.expression(exp.Between, this=this, low=low, high=high) 4320 4321 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4322 if not self._match(TokenType.ESCAPE): 4323 return this 4324 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4325 4326 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4327 index = self._index 4328 4329 if not self._match(TokenType.INTERVAL) and match_interval: 4330 return None 4331 4332 if self._match(TokenType.STRING, advance=False): 4333 this = self._parse_primary() 4334 else: 4335 this = self._parse_term() 4336 4337 if not this or ( 4338 isinstance(this, exp.Column) 4339 and not this.table 4340 and not this.this.quoted 4341 and this.name.upper() == "IS" 4342 ): 4343 self._retreat(index) 4344 return None 4345 4346 unit = self._parse_function() or ( 4347 not self._match(TokenType.ALIAS, advance=False) 4348 and self._parse_var(any_token=True, upper=True) 4349 ) 4350 4351 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4352 # each INTERVAL expression into this canonical form so it's easy to transpile 4353 if this and this.is_number: 4354 this = exp.Literal.string(this.to_py()) 4355 elif this and this.is_string: 4356 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4357 if len(parts) == 1: 4358 if unit: 4359 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4360 self._retreat(self._index - 1) 4361 4362 this = exp.Literal.string(parts[0][0]) 4363 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4364 4365 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4366 unit = self.expression( 4367 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4368 ) 4369 4370 interval = self.expression(exp.Interval, this=this, unit=unit) 4371 4372 index = self._index 4373 self._match(TokenType.PLUS) 4374 4375 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4376 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4377 return self.expression( 4378 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4379 ) 4380 4381 self._retreat(index) 4382 return interval 4383 4384 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4385 this = self._parse_term() 4386 4387 while True: 4388 if self._match_set(self.BITWISE): 4389 this = self.expression( 4390 self.BITWISE[self._prev.token_type], 4391 this=this, 4392 expression=self._parse_term(), 4393 ) 4394 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4395 this = self.expression( 4396 exp.DPipe, 4397 this=this, 4398 expression=self._parse_term(), 4399 safe=not self.dialect.STRICT_STRING_CONCAT, 4400 ) 4401 elif self._match(TokenType.DQMARK): 4402 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4403 elif self._match_pair(TokenType.LT, TokenType.LT): 4404 this = self.expression( 4405 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4406 ) 4407 elif self._match_pair(TokenType.GT, TokenType.GT): 4408 this = self.expression( 4409 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4410 ) 4411 else: 4412 break 4413 4414 return this 4415 4416 def _parse_term(self) -> t.Optional[exp.Expression]: 4417 this = self._parse_factor() 4418 4419 while self._match_set(self.TERM): 4420 klass = self.TERM[self._prev.token_type] 4421 comments = self._prev_comments 4422 expression = self._parse_factor() 4423 4424 this = self.expression(klass, this=this, comments=comments, expression=expression) 4425 4426 if isinstance(this, exp.Collate): 4427 expr = this.expression 4428 4429 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4430 # fallback to Identifier / Var 4431 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4432 ident = expr.this 4433 if isinstance(ident, exp.Identifier): 4434 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4435 4436 return this 4437 4438 def _parse_factor(self) -> t.Optional[exp.Expression]: 4439 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4440 this = parse_method() 4441 4442 while self._match_set(self.FACTOR): 4443 klass = self.FACTOR[self._prev.token_type] 4444 comments = self._prev_comments 4445 expression = parse_method() 4446 4447 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4448 self._retreat(self._index - 1) 4449 return this 4450 4451 this = self.expression(klass, this=this, comments=comments, expression=expression) 4452 4453 if isinstance(this, exp.Div): 4454 this.args["typed"] = self.dialect.TYPED_DIVISION 4455 this.args["safe"] = self.dialect.SAFE_DIVISION 4456 4457 return this 4458 4459 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4460 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4461 4462 def _parse_unary(self) -> t.Optional[exp.Expression]: 4463 if self._match_set(self.UNARY_PARSERS): 4464 return self.UNARY_PARSERS[self._prev.token_type](self) 4465 return self._parse_at_time_zone(self._parse_type()) 4466 4467 def _parse_type( 4468 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4469 ) -> t.Optional[exp.Expression]: 4470 interval = parse_interval and self._parse_interval() 4471 if interval: 4472 return interval 4473 4474 index = self._index 4475 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4476 4477 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4478 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4479 if isinstance(data_type, exp.Cast): 4480 # This constructor can contain ops directly after it, for instance struct unnesting: 4481 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4482 return self._parse_column_ops(data_type) 4483 4484 if data_type: 4485 index2 = self._index 4486 this = self._parse_primary() 4487 4488 if isinstance(this, exp.Literal): 4489 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4490 if parser: 4491 return parser(self, this, data_type) 4492 4493 return self.expression(exp.Cast, this=this, to=data_type) 4494 4495 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4496 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4497 # 4498 # If the index difference here is greater than 1, that means the parser itself must have 4499 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4500 # 4501 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4502 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4503 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4504 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4505 # 4506 # In these cases, we don't really want to return the converted type, but instead retreat 4507 # and try to parse a Column or Identifier in the section below. 4508 if data_type.expressions and index2 - index > 1: 4509 self._retreat(index2) 4510 return self._parse_column_ops(data_type) 4511 4512 self._retreat(index) 4513 4514 if fallback_to_identifier: 4515 return self._parse_id_var() 4516 4517 this = self._parse_column() 4518 return this and self._parse_column_ops(this) 4519 4520 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4521 this = self._parse_type() 4522 if not this: 4523 return None 4524 4525 if isinstance(this, exp.Column) and not this.table: 4526 this = exp.var(this.name.upper()) 4527 4528 return self.expression( 4529 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4530 ) 4531 4532 def _parse_types( 4533 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4534 ) -> t.Optional[exp.Expression]: 4535 index = self._index 4536 4537 this: t.Optional[exp.Expression] = None 4538 prefix = self._match_text_seq("SYSUDTLIB", ".") 4539 4540 if not self._match_set(self.TYPE_TOKENS): 4541 identifier = allow_identifiers and self._parse_id_var( 4542 any_token=False, tokens=(TokenType.VAR,) 4543 ) 4544 if isinstance(identifier, exp.Identifier): 4545 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4546 4547 if len(tokens) != 1: 4548 self.raise_error("Unexpected identifier", self._prev) 4549 4550 if tokens[0].token_type in self.TYPE_TOKENS: 4551 self._prev = tokens[0] 4552 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4553 type_name = identifier.name 4554 4555 while self._match(TokenType.DOT): 4556 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4557 4558 this = exp.DataType.build(type_name, udt=True) 4559 else: 4560 self._retreat(self._index - 1) 4561 return None 4562 else: 4563 return None 4564 4565 type_token = self._prev.token_type 4566 4567 if type_token == TokenType.PSEUDO_TYPE: 4568 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4569 4570 if type_token == TokenType.OBJECT_IDENTIFIER: 4571 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4572 4573 # https://materialize.com/docs/sql/types/map/ 4574 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4575 key_type = self._parse_types( 4576 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4577 ) 4578 if not self._match(TokenType.FARROW): 4579 self._retreat(index) 4580 return None 4581 4582 value_type = self._parse_types( 4583 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4584 ) 4585 if not self._match(TokenType.R_BRACKET): 4586 self._retreat(index) 4587 return None 4588 4589 return exp.DataType( 4590 this=exp.DataType.Type.MAP, 4591 expressions=[key_type, value_type], 4592 nested=True, 4593 prefix=prefix, 4594 ) 4595 4596 nested = type_token in self.NESTED_TYPE_TOKENS 4597 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4598 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4599 expressions = None 4600 maybe_func = False 4601 4602 if self._match(TokenType.L_PAREN): 4603 if is_struct: 4604 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4605 elif nested: 4606 expressions = self._parse_csv( 4607 lambda: self._parse_types( 4608 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4609 ) 4610 ) 4611 elif type_token in self.ENUM_TYPE_TOKENS: 4612 expressions = self._parse_csv(self._parse_equality) 4613 elif is_aggregate: 4614 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4615 any_token=False, tokens=(TokenType.VAR,) 4616 ) 4617 if not func_or_ident or not self._match(TokenType.COMMA): 4618 return None 4619 expressions = self._parse_csv( 4620 lambda: self._parse_types( 4621 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4622 ) 4623 ) 4624 expressions.insert(0, func_or_ident) 4625 else: 4626 expressions = self._parse_csv(self._parse_type_size) 4627 4628 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4629 if type_token == TokenType.VECTOR and len(expressions) == 2: 4630 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4631 4632 if not expressions or not self._match(TokenType.R_PAREN): 4633 self._retreat(index) 4634 return None 4635 4636 maybe_func = True 4637 4638 values: t.Optional[t.List[exp.Expression]] = None 4639 4640 if nested and self._match(TokenType.LT): 4641 if is_struct: 4642 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4643 else: 4644 expressions = self._parse_csv( 4645 lambda: self._parse_types( 4646 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4647 ) 4648 ) 4649 4650 if not self._match(TokenType.GT): 4651 self.raise_error("Expecting >") 4652 4653 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4654 values = self._parse_csv(self._parse_assignment) 4655 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4656 4657 if type_token in self.TIMESTAMPS: 4658 if self._match_text_seq("WITH", "TIME", "ZONE"): 4659 maybe_func = False 4660 tz_type = ( 4661 exp.DataType.Type.TIMETZ 4662 if type_token in self.TIMES 4663 else exp.DataType.Type.TIMESTAMPTZ 4664 ) 4665 this = exp.DataType(this=tz_type, expressions=expressions) 4666 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4667 maybe_func = False 4668 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4669 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4670 maybe_func = False 4671 elif type_token == TokenType.INTERVAL: 4672 unit = self._parse_var(upper=True) 4673 if unit: 4674 if self._match_text_seq("TO"): 4675 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4676 4677 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4678 else: 4679 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4680 4681 if maybe_func and check_func: 4682 index2 = self._index 4683 peek = self._parse_string() 4684 4685 if not peek: 4686 self._retreat(index) 4687 return None 4688 4689 self._retreat(index2) 4690 4691 if not this: 4692 if self._match_text_seq("UNSIGNED"): 4693 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4694 if not unsigned_type_token: 4695 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4696 4697 type_token = unsigned_type_token or type_token 4698 4699 this = exp.DataType( 4700 this=exp.DataType.Type[type_token.value], 4701 expressions=expressions, 4702 nested=nested, 4703 prefix=prefix, 4704 ) 4705 4706 # Empty arrays/structs are allowed 4707 if values is not None: 4708 cls = exp.Struct if is_struct else exp.Array 4709 this = exp.cast(cls(expressions=values), this, copy=False) 4710 4711 elif expressions: 4712 this.set("expressions", expressions) 4713 4714 # https://materialize.com/docs/sql/types/list/#type-name 4715 while self._match(TokenType.LIST): 4716 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4717 4718 index = self._index 4719 4720 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4721 matched_array = self._match(TokenType.ARRAY) 4722 4723 while self._curr: 4724 datatype_token = self._prev.token_type 4725 matched_l_bracket = self._match(TokenType.L_BRACKET) 4726 if not matched_l_bracket and not matched_array: 4727 break 4728 4729 matched_array = False 4730 values = self._parse_csv(self._parse_assignment) or None 4731 if ( 4732 values 4733 and not schema 4734 and ( 4735 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4736 ) 4737 ): 4738 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4739 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4740 self._retreat(index) 4741 break 4742 4743 this = exp.DataType( 4744 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4745 ) 4746 self._match(TokenType.R_BRACKET) 4747 4748 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4749 converter = self.TYPE_CONVERTERS.get(this.this) 4750 if converter: 4751 this = converter(t.cast(exp.DataType, this)) 4752 4753 return this 4754 4755 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4756 index = self._index 4757 4758 if ( 4759 self._curr 4760 and self._next 4761 and self._curr.token_type in self.TYPE_TOKENS 4762 and self._next.token_type in self.TYPE_TOKENS 4763 ): 4764 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4765 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4766 this = self._parse_id_var() 4767 else: 4768 this = ( 4769 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4770 or self._parse_id_var() 4771 ) 4772 4773 self._match(TokenType.COLON) 4774 4775 if ( 4776 type_required 4777 and not isinstance(this, exp.DataType) 4778 and not self._match_set(self.TYPE_TOKENS, advance=False) 4779 ): 4780 self._retreat(index) 4781 return self._parse_types() 4782 4783 return self._parse_column_def(this) 4784 4785 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4786 if not self._match_text_seq("AT", "TIME", "ZONE"): 4787 return this 4788 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4789 4790 def _parse_column(self) -> t.Optional[exp.Expression]: 4791 this = self._parse_column_reference() 4792 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4793 4794 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4795 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4796 4797 return column 4798 4799 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4800 this = self._parse_field() 4801 if ( 4802 not this 4803 and self._match(TokenType.VALUES, advance=False) 4804 and self.VALUES_FOLLOWED_BY_PAREN 4805 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4806 ): 4807 this = self._parse_id_var() 4808 4809 if isinstance(this, exp.Identifier): 4810 # We bubble up comments from the Identifier to the Column 4811 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4812 4813 return this 4814 4815 def _parse_colon_as_variant_extract( 4816 self, this: t.Optional[exp.Expression] 4817 ) -> t.Optional[exp.Expression]: 4818 casts = [] 4819 json_path = [] 4820 4821 while self._match(TokenType.COLON): 4822 start_index = self._index 4823 4824 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4825 path = self._parse_column_ops( 4826 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4827 ) 4828 4829 # The cast :: operator has a lower precedence than the extraction operator :, so 4830 # we rearrange the AST appropriately to avoid casting the JSON path 4831 while isinstance(path, exp.Cast): 4832 casts.append(path.to) 4833 path = path.this 4834 4835 if casts: 4836 dcolon_offset = next( 4837 i 4838 for i, t in enumerate(self._tokens[start_index:]) 4839 if t.token_type == TokenType.DCOLON 4840 ) 4841 end_token = self._tokens[start_index + dcolon_offset - 1] 4842 else: 4843 end_token = self._prev 4844 4845 if path: 4846 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4847 4848 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4849 # Databricks transforms it back to the colon/dot notation 4850 if json_path: 4851 this = self.expression( 4852 exp.JSONExtract, 4853 this=this, 4854 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4855 variant_extract=True, 4856 ) 4857 4858 while casts: 4859 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4860 4861 return this 4862 4863 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4864 return self._parse_types() 4865 4866 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4867 this = self._parse_bracket(this) 4868 4869 while self._match_set(self.COLUMN_OPERATORS): 4870 op_token = self._prev.token_type 4871 op = self.COLUMN_OPERATORS.get(op_token) 4872 4873 if op_token == TokenType.DCOLON: 4874 field = self._parse_dcolon() 4875 if not field: 4876 self.raise_error("Expected type") 4877 elif op and self._curr: 4878 field = self._parse_column_reference() 4879 else: 4880 field = self._parse_field(any_token=True, anonymous_func=True) 4881 4882 if isinstance(field, exp.Func) and this: 4883 # bigquery allows function calls like x.y.count(...) 4884 # SAFE.SUBSTR(...) 4885 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4886 this = exp.replace_tree( 4887 this, 4888 lambda n: ( 4889 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4890 if n.table 4891 else n.this 4892 ) 4893 if isinstance(n, exp.Column) 4894 else n, 4895 ) 4896 4897 if op: 4898 this = op(self, this, field) 4899 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4900 this = self.expression( 4901 exp.Column, 4902 this=field, 4903 table=this.this, 4904 db=this.args.get("table"), 4905 catalog=this.args.get("db"), 4906 ) 4907 else: 4908 this = self.expression(exp.Dot, this=this, expression=field) 4909 4910 this = self._parse_bracket(this) 4911 4912 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4913 4914 def _parse_primary(self) -> t.Optional[exp.Expression]: 4915 if self._match_set(self.PRIMARY_PARSERS): 4916 token_type = self._prev.token_type 4917 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4918 4919 if token_type == TokenType.STRING: 4920 expressions = [primary] 4921 while self._match(TokenType.STRING): 4922 expressions.append(exp.Literal.string(self._prev.text)) 4923 4924 if len(expressions) > 1: 4925 return self.expression(exp.Concat, expressions=expressions) 4926 4927 return primary 4928 4929 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4930 return exp.Literal.number(f"0.{self._prev.text}") 4931 4932 if self._match(TokenType.L_PAREN): 4933 comments = self._prev_comments 4934 query = self._parse_select() 4935 4936 if query: 4937 expressions = [query] 4938 else: 4939 expressions = self._parse_expressions() 4940 4941 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4942 4943 if not this and self._match(TokenType.R_PAREN, advance=False): 4944 this = self.expression(exp.Tuple) 4945 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4946 this = self._parse_subquery(this=this, parse_alias=False) 4947 elif isinstance(this, exp.Subquery): 4948 this = self._parse_subquery( 4949 this=self._parse_set_operations(this), parse_alias=False 4950 ) 4951 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4952 this = self.expression(exp.Tuple, expressions=expressions) 4953 else: 4954 this = self.expression(exp.Paren, this=this) 4955 4956 if this: 4957 this.add_comments(comments) 4958 4959 self._match_r_paren(expression=this) 4960 return this 4961 4962 return None 4963 4964 def _parse_field( 4965 self, 4966 any_token: bool = False, 4967 tokens: t.Optional[t.Collection[TokenType]] = None, 4968 anonymous_func: bool = False, 4969 ) -> t.Optional[exp.Expression]: 4970 if anonymous_func: 4971 field = ( 4972 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4973 or self._parse_primary() 4974 ) 4975 else: 4976 field = self._parse_primary() or self._parse_function( 4977 anonymous=anonymous_func, any_token=any_token 4978 ) 4979 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4980 4981 def _parse_function( 4982 self, 4983 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4984 anonymous: bool = False, 4985 optional_parens: bool = True, 4986 any_token: bool = False, 4987 ) -> t.Optional[exp.Expression]: 4988 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4989 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4990 fn_syntax = False 4991 if ( 4992 self._match(TokenType.L_BRACE, advance=False) 4993 and self._next 4994 and self._next.text.upper() == "FN" 4995 ): 4996 self._advance(2) 4997 fn_syntax = True 4998 4999 func = self._parse_function_call( 5000 functions=functions, 5001 anonymous=anonymous, 5002 optional_parens=optional_parens, 5003 any_token=any_token, 5004 ) 5005 5006 if fn_syntax: 5007 self._match(TokenType.R_BRACE) 5008 5009 return func 5010 5011 def _parse_function_call( 5012 self, 5013 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5014 anonymous: bool = False, 5015 optional_parens: bool = True, 5016 any_token: bool = False, 5017 ) -> t.Optional[exp.Expression]: 5018 if not self._curr: 5019 return None 5020 5021 comments = self._curr.comments 5022 token_type = self._curr.token_type 5023 this = self._curr.text 5024 upper = this.upper() 5025 5026 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5027 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5028 self._advance() 5029 return self._parse_window(parser(self)) 5030 5031 if not self._next or self._next.token_type != TokenType.L_PAREN: 5032 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5033 self._advance() 5034 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5035 5036 return None 5037 5038 if any_token: 5039 if token_type in self.RESERVED_TOKENS: 5040 return None 5041 elif token_type not in self.FUNC_TOKENS: 5042 return None 5043 5044 self._advance(2) 5045 5046 parser = self.FUNCTION_PARSERS.get(upper) 5047 if parser and not anonymous: 5048 this = parser(self) 5049 else: 5050 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5051 5052 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5053 this = self.expression(subquery_predicate, this=self._parse_select()) 5054 self._match_r_paren() 5055 return this 5056 5057 if functions is None: 5058 functions = self.FUNCTIONS 5059 5060 function = functions.get(upper) 5061 5062 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5063 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5064 5065 if alias: 5066 args = self._kv_to_prop_eq(args) 5067 5068 if function and not anonymous: 5069 if "dialect" in function.__code__.co_varnames: 5070 func = function(args, dialect=self.dialect) 5071 else: 5072 func = function(args) 5073 5074 func = self.validate_expression(func, args) 5075 if not self.dialect.NORMALIZE_FUNCTIONS: 5076 func.meta["name"] = this 5077 5078 this = func 5079 else: 5080 if token_type == TokenType.IDENTIFIER: 5081 this = exp.Identifier(this=this, quoted=True) 5082 this = self.expression(exp.Anonymous, this=this, expressions=args) 5083 5084 if isinstance(this, exp.Expression): 5085 this.add_comments(comments) 5086 5087 self._match_r_paren(this) 5088 return self._parse_window(this) 5089 5090 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5091 transformed = [] 5092 5093 for e in expressions: 5094 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5095 if isinstance(e, exp.Alias): 5096 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5097 5098 if not isinstance(e, exp.PropertyEQ): 5099 e = self.expression( 5100 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5101 ) 5102 5103 if isinstance(e.this, exp.Column): 5104 e.this.replace(e.this.this) 5105 5106 transformed.append(e) 5107 5108 return transformed 5109 5110 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5111 return self._parse_column_def(self._parse_id_var()) 5112 5113 def _parse_user_defined_function( 5114 self, kind: t.Optional[TokenType] = None 5115 ) -> t.Optional[exp.Expression]: 5116 this = self._parse_id_var() 5117 5118 while self._match(TokenType.DOT): 5119 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5120 5121 if not self._match(TokenType.L_PAREN): 5122 return this 5123 5124 expressions = self._parse_csv(self._parse_function_parameter) 5125 self._match_r_paren() 5126 return self.expression( 5127 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5128 ) 5129 5130 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5131 literal = self._parse_primary() 5132 if literal: 5133 return self.expression(exp.Introducer, this=token.text, expression=literal) 5134 5135 return self.expression(exp.Identifier, this=token.text) 5136 5137 def _parse_session_parameter(self) -> exp.SessionParameter: 5138 kind = None 5139 this = self._parse_id_var() or self._parse_primary() 5140 5141 if this and self._match(TokenType.DOT): 5142 kind = this.name 5143 this = self._parse_var() or self._parse_primary() 5144 5145 return self.expression(exp.SessionParameter, this=this, kind=kind) 5146 5147 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5148 return self._parse_id_var() 5149 5150 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5151 index = self._index 5152 5153 if self._match(TokenType.L_PAREN): 5154 expressions = t.cast( 5155 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5156 ) 5157 5158 if not self._match(TokenType.R_PAREN): 5159 self._retreat(index) 5160 else: 5161 expressions = [self._parse_lambda_arg()] 5162 5163 if self._match_set(self.LAMBDAS): 5164 return self.LAMBDAS[self._prev.token_type](self, expressions) 5165 5166 self._retreat(index) 5167 5168 this: t.Optional[exp.Expression] 5169 5170 if self._match(TokenType.DISTINCT): 5171 this = self.expression( 5172 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5173 ) 5174 else: 5175 this = self._parse_select_or_expression(alias=alias) 5176 5177 return self._parse_limit( 5178 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5179 ) 5180 5181 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5182 index = self._index 5183 if not self._match(TokenType.L_PAREN): 5184 return this 5185 5186 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5187 # expr can be of both types 5188 if self._match_set(self.SELECT_START_TOKENS): 5189 self._retreat(index) 5190 return this 5191 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5192 self._match_r_paren() 5193 return self.expression(exp.Schema, this=this, expressions=args) 5194 5195 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5196 return self._parse_column_def(self._parse_field(any_token=True)) 5197 5198 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5199 # column defs are not really columns, they're identifiers 5200 if isinstance(this, exp.Column): 5201 this = this.this 5202 5203 kind = self._parse_types(schema=True) 5204 5205 if self._match_text_seq("FOR", "ORDINALITY"): 5206 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5207 5208 constraints: t.List[exp.Expression] = [] 5209 5210 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5211 ("ALIAS", "MATERIALIZED") 5212 ): 5213 persisted = self._prev.text.upper() == "MATERIALIZED" 5214 constraints.append( 5215 self.expression( 5216 exp.ComputedColumnConstraint, 5217 this=self._parse_assignment(), 5218 persisted=persisted or self._match_text_seq("PERSISTED"), 5219 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5220 ) 5221 ) 5222 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5223 self._match(TokenType.ALIAS) 5224 constraints.append( 5225 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5226 ) 5227 5228 while True: 5229 constraint = self._parse_column_constraint() 5230 if not constraint: 5231 break 5232 constraints.append(constraint) 5233 5234 if not kind and not constraints: 5235 return this 5236 5237 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5238 5239 def _parse_auto_increment( 5240 self, 5241 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5242 start = None 5243 increment = None 5244 5245 if self._match(TokenType.L_PAREN, advance=False): 5246 args = self._parse_wrapped_csv(self._parse_bitwise) 5247 start = seq_get(args, 0) 5248 increment = seq_get(args, 1) 5249 elif self._match_text_seq("START"): 5250 start = self._parse_bitwise() 5251 self._match_text_seq("INCREMENT") 5252 increment = self._parse_bitwise() 5253 5254 if start and increment: 5255 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5256 5257 return exp.AutoIncrementColumnConstraint() 5258 5259 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5260 if not self._match_text_seq("REFRESH"): 5261 self._retreat(self._index - 1) 5262 return None 5263 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5264 5265 def _parse_compress(self) -> exp.CompressColumnConstraint: 5266 if self._match(TokenType.L_PAREN, advance=False): 5267 return self.expression( 5268 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5269 ) 5270 5271 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5272 5273 def _parse_generated_as_identity( 5274 self, 5275 ) -> ( 5276 exp.GeneratedAsIdentityColumnConstraint 5277 | exp.ComputedColumnConstraint 5278 | exp.GeneratedAsRowColumnConstraint 5279 ): 5280 if self._match_text_seq("BY", "DEFAULT"): 5281 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5282 this = self.expression( 5283 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5284 ) 5285 else: 5286 self._match_text_seq("ALWAYS") 5287 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5288 5289 self._match(TokenType.ALIAS) 5290 5291 if self._match_text_seq("ROW"): 5292 start = self._match_text_seq("START") 5293 if not start: 5294 self._match(TokenType.END) 5295 hidden = self._match_text_seq("HIDDEN") 5296 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5297 5298 identity = self._match_text_seq("IDENTITY") 5299 5300 if self._match(TokenType.L_PAREN): 5301 if self._match(TokenType.START_WITH): 5302 this.set("start", self._parse_bitwise()) 5303 if self._match_text_seq("INCREMENT", "BY"): 5304 this.set("increment", self._parse_bitwise()) 5305 if self._match_text_seq("MINVALUE"): 5306 this.set("minvalue", self._parse_bitwise()) 5307 if self._match_text_seq("MAXVALUE"): 5308 this.set("maxvalue", self._parse_bitwise()) 5309 5310 if self._match_text_seq("CYCLE"): 5311 this.set("cycle", True) 5312 elif self._match_text_seq("NO", "CYCLE"): 5313 this.set("cycle", False) 5314 5315 if not identity: 5316 this.set("expression", self._parse_range()) 5317 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5318 args = self._parse_csv(self._parse_bitwise) 5319 this.set("start", seq_get(args, 0)) 5320 this.set("increment", seq_get(args, 1)) 5321 5322 self._match_r_paren() 5323 5324 return this 5325 5326 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5327 self._match_text_seq("LENGTH") 5328 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5329 5330 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5331 if self._match_text_seq("NULL"): 5332 return self.expression(exp.NotNullColumnConstraint) 5333 if self._match_text_seq("CASESPECIFIC"): 5334 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5335 if self._match_text_seq("FOR", "REPLICATION"): 5336 return self.expression(exp.NotForReplicationColumnConstraint) 5337 return None 5338 5339 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5340 if self._match(TokenType.CONSTRAINT): 5341 this = self._parse_id_var() 5342 else: 5343 this = None 5344 5345 if self._match_texts(self.CONSTRAINT_PARSERS): 5346 return self.expression( 5347 exp.ColumnConstraint, 5348 this=this, 5349 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5350 ) 5351 5352 return this 5353 5354 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5355 if not self._match(TokenType.CONSTRAINT): 5356 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5357 5358 return self.expression( 5359 exp.Constraint, 5360 this=self._parse_id_var(), 5361 expressions=self._parse_unnamed_constraints(), 5362 ) 5363 5364 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5365 constraints = [] 5366 while True: 5367 constraint = self._parse_unnamed_constraint() or self._parse_function() 5368 if not constraint: 5369 break 5370 constraints.append(constraint) 5371 5372 return constraints 5373 5374 def _parse_unnamed_constraint( 5375 self, constraints: t.Optional[t.Collection[str]] = None 5376 ) -> t.Optional[exp.Expression]: 5377 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5378 constraints or self.CONSTRAINT_PARSERS 5379 ): 5380 return None 5381 5382 constraint = self._prev.text.upper() 5383 if constraint not in self.CONSTRAINT_PARSERS: 5384 self.raise_error(f"No parser found for schema constraint {constraint}.") 5385 5386 return self.CONSTRAINT_PARSERS[constraint](self) 5387 5388 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5389 return self._parse_id_var(any_token=False) 5390 5391 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5392 self._match_text_seq("KEY") 5393 return self.expression( 5394 exp.UniqueColumnConstraint, 5395 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5396 this=self._parse_schema(self._parse_unique_key()), 5397 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5398 on_conflict=self._parse_on_conflict(), 5399 ) 5400 5401 def _parse_key_constraint_options(self) -> t.List[str]: 5402 options = [] 5403 while True: 5404 if not self._curr: 5405 break 5406 5407 if self._match(TokenType.ON): 5408 action = None 5409 on = self._advance_any() and self._prev.text 5410 5411 if self._match_text_seq("NO", "ACTION"): 5412 action = "NO ACTION" 5413 elif self._match_text_seq("CASCADE"): 5414 action = "CASCADE" 5415 elif self._match_text_seq("RESTRICT"): 5416 action = "RESTRICT" 5417 elif self._match_pair(TokenType.SET, TokenType.NULL): 5418 action = "SET NULL" 5419 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5420 action = "SET DEFAULT" 5421 else: 5422 self.raise_error("Invalid key constraint") 5423 5424 options.append(f"ON {on} {action}") 5425 else: 5426 var = self._parse_var_from_options( 5427 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5428 ) 5429 if not var: 5430 break 5431 options.append(var.name) 5432 5433 return options 5434 5435 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5436 if match and not self._match(TokenType.REFERENCES): 5437 return None 5438 5439 expressions = None 5440 this = self._parse_table(schema=True) 5441 options = self._parse_key_constraint_options() 5442 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5443 5444 def _parse_foreign_key(self) -> exp.ForeignKey: 5445 expressions = self._parse_wrapped_id_vars() 5446 reference = self._parse_references() 5447 options = {} 5448 5449 while self._match(TokenType.ON): 5450 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5451 self.raise_error("Expected DELETE or UPDATE") 5452 5453 kind = self._prev.text.lower() 5454 5455 if self._match_text_seq("NO", "ACTION"): 5456 action = "NO ACTION" 5457 elif self._match(TokenType.SET): 5458 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5459 action = "SET " + self._prev.text.upper() 5460 else: 5461 self._advance() 5462 action = self._prev.text.upper() 5463 5464 options[kind] = action 5465 5466 return self.expression( 5467 exp.ForeignKey, 5468 expressions=expressions, 5469 reference=reference, 5470 **options, # type: ignore 5471 ) 5472 5473 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5474 return self._parse_field() 5475 5476 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5477 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5478 self._retreat(self._index - 1) 5479 return None 5480 5481 id_vars = self._parse_wrapped_id_vars() 5482 return self.expression( 5483 exp.PeriodForSystemTimeConstraint, 5484 this=seq_get(id_vars, 0), 5485 expression=seq_get(id_vars, 1), 5486 ) 5487 5488 def _parse_primary_key( 5489 self, wrapped_optional: bool = False, in_props: bool = False 5490 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5491 desc = ( 5492 self._match_set((TokenType.ASC, TokenType.DESC)) 5493 and self._prev.token_type == TokenType.DESC 5494 ) 5495 5496 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5497 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5498 5499 expressions = self._parse_wrapped_csv( 5500 self._parse_primary_key_part, optional=wrapped_optional 5501 ) 5502 options = self._parse_key_constraint_options() 5503 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5504 5505 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5506 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5507 5508 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5509 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5510 return this 5511 5512 bracket_kind = self._prev.token_type 5513 expressions = self._parse_csv( 5514 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5515 ) 5516 5517 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5518 self.raise_error("Expected ]") 5519 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5520 self.raise_error("Expected }") 5521 5522 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5523 if bracket_kind == TokenType.L_BRACE: 5524 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5525 elif not this: 5526 this = build_array_constructor( 5527 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5528 ) 5529 else: 5530 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5531 if constructor_type: 5532 return build_array_constructor( 5533 constructor_type, 5534 args=expressions, 5535 bracket_kind=bracket_kind, 5536 dialect=self.dialect, 5537 ) 5538 5539 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5540 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5541 5542 self._add_comments(this) 5543 return self._parse_bracket(this) 5544 5545 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5546 if self._match(TokenType.COLON): 5547 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5548 return this 5549 5550 def _parse_case(self) -> t.Optional[exp.Expression]: 5551 ifs = [] 5552 default = None 5553 5554 comments = self._prev_comments 5555 expression = self._parse_assignment() 5556 5557 while self._match(TokenType.WHEN): 5558 this = self._parse_assignment() 5559 self._match(TokenType.THEN) 5560 then = self._parse_assignment() 5561 ifs.append(self.expression(exp.If, this=this, true=then)) 5562 5563 if self._match(TokenType.ELSE): 5564 default = self._parse_assignment() 5565 5566 if not self._match(TokenType.END): 5567 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5568 default = exp.column("interval") 5569 else: 5570 self.raise_error("Expected END after CASE", self._prev) 5571 5572 return self.expression( 5573 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5574 ) 5575 5576 def _parse_if(self) -> t.Optional[exp.Expression]: 5577 if self._match(TokenType.L_PAREN): 5578 args = self._parse_csv(self._parse_assignment) 5579 this = self.validate_expression(exp.If.from_arg_list(args), args) 5580 self._match_r_paren() 5581 else: 5582 index = self._index - 1 5583 5584 if self.NO_PAREN_IF_COMMANDS and index == 0: 5585 return self._parse_as_command(self._prev) 5586 5587 condition = self._parse_assignment() 5588 5589 if not condition: 5590 self._retreat(index) 5591 return None 5592 5593 self._match(TokenType.THEN) 5594 true = self._parse_assignment() 5595 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5596 self._match(TokenType.END) 5597 this = self.expression(exp.If, this=condition, true=true, false=false) 5598 5599 return this 5600 5601 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5602 if not self._match_text_seq("VALUE", "FOR"): 5603 self._retreat(self._index - 1) 5604 return None 5605 5606 return self.expression( 5607 exp.NextValueFor, 5608 this=self._parse_column(), 5609 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5610 ) 5611 5612 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5613 this = self._parse_function() or self._parse_var_or_string(upper=True) 5614 5615 if self._match(TokenType.FROM): 5616 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5617 5618 if not self._match(TokenType.COMMA): 5619 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5620 5621 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5622 5623 def _parse_gap_fill(self) -> exp.GapFill: 5624 self._match(TokenType.TABLE) 5625 this = self._parse_table() 5626 5627 self._match(TokenType.COMMA) 5628 args = [this, *self._parse_csv(self._parse_lambda)] 5629 5630 gap_fill = exp.GapFill.from_arg_list(args) 5631 return self.validate_expression(gap_fill, args) 5632 5633 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5634 this = self._parse_assignment() 5635 5636 if not self._match(TokenType.ALIAS): 5637 if self._match(TokenType.COMMA): 5638 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5639 5640 self.raise_error("Expected AS after CAST") 5641 5642 fmt = None 5643 to = self._parse_types() 5644 5645 if self._match(TokenType.FORMAT): 5646 fmt_string = self._parse_string() 5647 fmt = self._parse_at_time_zone(fmt_string) 5648 5649 if not to: 5650 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5651 if to.this in exp.DataType.TEMPORAL_TYPES: 5652 this = self.expression( 5653 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5654 this=this, 5655 format=exp.Literal.string( 5656 format_time( 5657 fmt_string.this if fmt_string else "", 5658 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5659 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5660 ) 5661 ), 5662 safe=safe, 5663 ) 5664 5665 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5666 this.set("zone", fmt.args["zone"]) 5667 return this 5668 elif not to: 5669 self.raise_error("Expected TYPE after CAST") 5670 elif isinstance(to, exp.Identifier): 5671 to = exp.DataType.build(to.name, udt=True) 5672 elif to.this == exp.DataType.Type.CHAR: 5673 if self._match(TokenType.CHARACTER_SET): 5674 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5675 5676 return self.expression( 5677 exp.Cast if strict else exp.TryCast, 5678 this=this, 5679 to=to, 5680 format=fmt, 5681 safe=safe, 5682 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5683 ) 5684 5685 def _parse_string_agg(self) -> exp.Expression: 5686 if self._match(TokenType.DISTINCT): 5687 args: t.List[t.Optional[exp.Expression]] = [ 5688 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5689 ] 5690 if self._match(TokenType.COMMA): 5691 args.extend(self._parse_csv(self._parse_assignment)) 5692 else: 5693 args = self._parse_csv(self._parse_assignment) # type: ignore 5694 5695 index = self._index 5696 if not self._match(TokenType.R_PAREN) and args: 5697 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5698 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5699 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5700 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5701 5702 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5703 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5704 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5705 if not self._match_text_seq("WITHIN", "GROUP"): 5706 self._retreat(index) 5707 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5708 5709 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5710 order = self._parse_order(this=seq_get(args, 0)) 5711 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5712 5713 def _parse_convert( 5714 self, strict: bool, safe: t.Optional[bool] = None 5715 ) -> t.Optional[exp.Expression]: 5716 this = self._parse_bitwise() 5717 5718 if self._match(TokenType.USING): 5719 to: t.Optional[exp.Expression] = self.expression( 5720 exp.CharacterSet, this=self._parse_var() 5721 ) 5722 elif self._match(TokenType.COMMA): 5723 to = self._parse_types() 5724 else: 5725 to = None 5726 5727 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5728 5729 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5730 """ 5731 There are generally two variants of the DECODE function: 5732 5733 - DECODE(bin, charset) 5734 - DECODE(expression, search, result [, search, result] ... [, default]) 5735 5736 The second variant will always be parsed into a CASE expression. Note that NULL 5737 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5738 instead of relying on pattern matching. 5739 """ 5740 args = self._parse_csv(self._parse_assignment) 5741 5742 if len(args) < 3: 5743 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5744 5745 expression, *expressions = args 5746 if not expression: 5747 return None 5748 5749 ifs = [] 5750 for search, result in zip(expressions[::2], expressions[1::2]): 5751 if not search or not result: 5752 return None 5753 5754 if isinstance(search, exp.Literal): 5755 ifs.append( 5756 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5757 ) 5758 elif isinstance(search, exp.Null): 5759 ifs.append( 5760 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5761 ) 5762 else: 5763 cond = exp.or_( 5764 exp.EQ(this=expression.copy(), expression=search), 5765 exp.and_( 5766 exp.Is(this=expression.copy(), expression=exp.Null()), 5767 exp.Is(this=search.copy(), expression=exp.Null()), 5768 copy=False, 5769 ), 5770 copy=False, 5771 ) 5772 ifs.append(exp.If(this=cond, true=result)) 5773 5774 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5775 5776 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5777 self._match_text_seq("KEY") 5778 key = self._parse_column() 5779 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5780 self._match_text_seq("VALUE") 5781 value = self._parse_bitwise() 5782 5783 if not key and not value: 5784 return None 5785 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5786 5787 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5788 if not this or not self._match_text_seq("FORMAT", "JSON"): 5789 return this 5790 5791 return self.expression(exp.FormatJson, this=this) 5792 5793 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5794 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5795 for value in values: 5796 if self._match_text_seq(value, "ON", on): 5797 return f"{value} ON {on}" 5798 5799 return None 5800 5801 @t.overload 5802 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5803 5804 @t.overload 5805 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5806 5807 def _parse_json_object(self, agg=False): 5808 star = self._parse_star() 5809 expressions = ( 5810 [star] 5811 if star 5812 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5813 ) 5814 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5815 5816 unique_keys = None 5817 if self._match_text_seq("WITH", "UNIQUE"): 5818 unique_keys = True 5819 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5820 unique_keys = False 5821 5822 self._match_text_seq("KEYS") 5823 5824 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5825 self._parse_type() 5826 ) 5827 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5828 5829 return self.expression( 5830 exp.JSONObjectAgg if agg else exp.JSONObject, 5831 expressions=expressions, 5832 null_handling=null_handling, 5833 unique_keys=unique_keys, 5834 return_type=return_type, 5835 encoding=encoding, 5836 ) 5837 5838 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5839 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5840 if not self._match_text_seq("NESTED"): 5841 this = self._parse_id_var() 5842 kind = self._parse_types(allow_identifiers=False) 5843 nested = None 5844 else: 5845 this = None 5846 kind = None 5847 nested = True 5848 5849 path = self._match_text_seq("PATH") and self._parse_string() 5850 nested_schema = nested and self._parse_json_schema() 5851 5852 return self.expression( 5853 exp.JSONColumnDef, 5854 this=this, 5855 kind=kind, 5856 path=path, 5857 nested_schema=nested_schema, 5858 ) 5859 5860 def _parse_json_schema(self) -> exp.JSONSchema: 5861 self._match_text_seq("COLUMNS") 5862 return self.expression( 5863 exp.JSONSchema, 5864 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5865 ) 5866 5867 def _parse_json_table(self) -> exp.JSONTable: 5868 this = self._parse_format_json(self._parse_bitwise()) 5869 path = self._match(TokenType.COMMA) and self._parse_string() 5870 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5871 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5872 schema = self._parse_json_schema() 5873 5874 return exp.JSONTable( 5875 this=this, 5876 schema=schema, 5877 path=path, 5878 error_handling=error_handling, 5879 empty_handling=empty_handling, 5880 ) 5881 5882 def _parse_match_against(self) -> exp.MatchAgainst: 5883 expressions = self._parse_csv(self._parse_column) 5884 5885 self._match_text_seq(")", "AGAINST", "(") 5886 5887 this = self._parse_string() 5888 5889 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5890 modifier = "IN NATURAL LANGUAGE MODE" 5891 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5892 modifier = f"{modifier} WITH QUERY EXPANSION" 5893 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5894 modifier = "IN BOOLEAN MODE" 5895 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5896 modifier = "WITH QUERY EXPANSION" 5897 else: 5898 modifier = None 5899 5900 return self.expression( 5901 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5902 ) 5903 5904 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5905 def _parse_open_json(self) -> exp.OpenJSON: 5906 this = self._parse_bitwise() 5907 path = self._match(TokenType.COMMA) and self._parse_string() 5908 5909 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5910 this = self._parse_field(any_token=True) 5911 kind = self._parse_types() 5912 path = self._parse_string() 5913 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5914 5915 return self.expression( 5916 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5917 ) 5918 5919 expressions = None 5920 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5921 self._match_l_paren() 5922 expressions = self._parse_csv(_parse_open_json_column_def) 5923 5924 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5925 5926 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5927 args = self._parse_csv(self._parse_bitwise) 5928 5929 if self._match(TokenType.IN): 5930 return self.expression( 5931 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5932 ) 5933 5934 if haystack_first: 5935 haystack = seq_get(args, 0) 5936 needle = seq_get(args, 1) 5937 else: 5938 needle = seq_get(args, 0) 5939 haystack = seq_get(args, 1) 5940 5941 return self.expression( 5942 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5943 ) 5944 5945 def _parse_predict(self) -> exp.Predict: 5946 self._match_text_seq("MODEL") 5947 this = self._parse_table() 5948 5949 self._match(TokenType.COMMA) 5950 self._match_text_seq("TABLE") 5951 5952 return self.expression( 5953 exp.Predict, 5954 this=this, 5955 expression=self._parse_table(), 5956 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5957 ) 5958 5959 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5960 args = self._parse_csv(self._parse_table) 5961 return exp.JoinHint(this=func_name.upper(), expressions=args) 5962 5963 def _parse_substring(self) -> exp.Substring: 5964 # Postgres supports the form: substring(string [from int] [for int]) 5965 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5966 5967 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5968 5969 if self._match(TokenType.FROM): 5970 args.append(self._parse_bitwise()) 5971 if self._match(TokenType.FOR): 5972 if len(args) == 1: 5973 args.append(exp.Literal.number(1)) 5974 args.append(self._parse_bitwise()) 5975 5976 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5977 5978 def _parse_trim(self) -> exp.Trim: 5979 # https://www.w3resource.com/sql/character-functions/trim.php 5980 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5981 5982 position = None 5983 collation = None 5984 expression = None 5985 5986 if self._match_texts(self.TRIM_TYPES): 5987 position = self._prev.text.upper() 5988 5989 this = self._parse_bitwise() 5990 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5991 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5992 expression = self._parse_bitwise() 5993 5994 if invert_order: 5995 this, expression = expression, this 5996 5997 if self._match(TokenType.COLLATE): 5998 collation = self._parse_bitwise() 5999 6000 return self.expression( 6001 exp.Trim, this=this, position=position, expression=expression, collation=collation 6002 ) 6003 6004 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6005 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6006 6007 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6008 return self._parse_window(self._parse_id_var(), alias=True) 6009 6010 def _parse_respect_or_ignore_nulls( 6011 self, this: t.Optional[exp.Expression] 6012 ) -> t.Optional[exp.Expression]: 6013 if self._match_text_seq("IGNORE", "NULLS"): 6014 return self.expression(exp.IgnoreNulls, this=this) 6015 if self._match_text_seq("RESPECT", "NULLS"): 6016 return self.expression(exp.RespectNulls, this=this) 6017 return this 6018 6019 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6020 if self._match(TokenType.HAVING): 6021 self._match_texts(("MAX", "MIN")) 6022 max = self._prev.text.upper() != "MIN" 6023 return self.expression( 6024 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6025 ) 6026 6027 return this 6028 6029 def _parse_window( 6030 self, this: t.Optional[exp.Expression], alias: bool = False 6031 ) -> t.Optional[exp.Expression]: 6032 func = this 6033 comments = func.comments if isinstance(func, exp.Expression) else None 6034 6035 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6036 self._match(TokenType.WHERE) 6037 this = self.expression( 6038 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6039 ) 6040 self._match_r_paren() 6041 6042 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6043 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6044 if self._match_text_seq("WITHIN", "GROUP"): 6045 order = self._parse_wrapped(self._parse_order) 6046 this = self.expression(exp.WithinGroup, this=this, expression=order) 6047 6048 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6049 # Some dialects choose to implement and some do not. 6050 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6051 6052 # There is some code above in _parse_lambda that handles 6053 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6054 6055 # The below changes handle 6056 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6057 6058 # Oracle allows both formats 6059 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6060 # and Snowflake chose to do the same for familiarity 6061 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6062 if isinstance(this, exp.AggFunc): 6063 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6064 6065 if ignore_respect and ignore_respect is not this: 6066 ignore_respect.replace(ignore_respect.this) 6067 this = self.expression(ignore_respect.__class__, this=this) 6068 6069 this = self._parse_respect_or_ignore_nulls(this) 6070 6071 # bigquery select from window x AS (partition by ...) 6072 if alias: 6073 over = None 6074 self._match(TokenType.ALIAS) 6075 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6076 return this 6077 else: 6078 over = self._prev.text.upper() 6079 6080 if comments and isinstance(func, exp.Expression): 6081 func.pop_comments() 6082 6083 if not self._match(TokenType.L_PAREN): 6084 return self.expression( 6085 exp.Window, 6086 comments=comments, 6087 this=this, 6088 alias=self._parse_id_var(False), 6089 over=over, 6090 ) 6091 6092 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6093 6094 first = self._match(TokenType.FIRST) 6095 if self._match_text_seq("LAST"): 6096 first = False 6097 6098 partition, order = self._parse_partition_and_order() 6099 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6100 6101 if kind: 6102 self._match(TokenType.BETWEEN) 6103 start = self._parse_window_spec() 6104 self._match(TokenType.AND) 6105 end = self._parse_window_spec() 6106 6107 spec = self.expression( 6108 exp.WindowSpec, 6109 kind=kind, 6110 start=start["value"], 6111 start_side=start["side"], 6112 end=end["value"], 6113 end_side=end["side"], 6114 ) 6115 else: 6116 spec = None 6117 6118 self._match_r_paren() 6119 6120 window = self.expression( 6121 exp.Window, 6122 comments=comments, 6123 this=this, 6124 partition_by=partition, 6125 order=order, 6126 spec=spec, 6127 alias=window_alias, 6128 over=over, 6129 first=first, 6130 ) 6131 6132 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6133 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6134 return self._parse_window(window, alias=alias) 6135 6136 return window 6137 6138 def _parse_partition_and_order( 6139 self, 6140 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6141 return self._parse_partition_by(), self._parse_order() 6142 6143 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6144 self._match(TokenType.BETWEEN) 6145 6146 return { 6147 "value": ( 6148 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6149 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6150 or self._parse_bitwise() 6151 ), 6152 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6153 } 6154 6155 def _parse_alias( 6156 self, this: t.Optional[exp.Expression], explicit: bool = False 6157 ) -> t.Optional[exp.Expression]: 6158 any_token = self._match(TokenType.ALIAS) 6159 comments = self._prev_comments or [] 6160 6161 if explicit and not any_token: 6162 return this 6163 6164 if self._match(TokenType.L_PAREN): 6165 aliases = self.expression( 6166 exp.Aliases, 6167 comments=comments, 6168 this=this, 6169 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6170 ) 6171 self._match_r_paren(aliases) 6172 return aliases 6173 6174 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6175 self.STRING_ALIASES and self._parse_string_as_identifier() 6176 ) 6177 6178 if alias: 6179 comments.extend(alias.pop_comments()) 6180 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6181 column = this.this 6182 6183 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6184 if not this.comments and column and column.comments: 6185 this.comments = column.pop_comments() 6186 6187 return this 6188 6189 def _parse_id_var( 6190 self, 6191 any_token: bool = True, 6192 tokens: t.Optional[t.Collection[TokenType]] = None, 6193 ) -> t.Optional[exp.Expression]: 6194 expression = self._parse_identifier() 6195 if not expression and ( 6196 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6197 ): 6198 quoted = self._prev.token_type == TokenType.STRING 6199 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6200 6201 return expression 6202 6203 def _parse_string(self) -> t.Optional[exp.Expression]: 6204 if self._match_set(self.STRING_PARSERS): 6205 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6206 return self._parse_placeholder() 6207 6208 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6209 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6210 6211 def _parse_number(self) -> t.Optional[exp.Expression]: 6212 if self._match_set(self.NUMERIC_PARSERS): 6213 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6214 return self._parse_placeholder() 6215 6216 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6217 if self._match(TokenType.IDENTIFIER): 6218 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6219 return self._parse_placeholder() 6220 6221 def _parse_var( 6222 self, 6223 any_token: bool = False, 6224 tokens: t.Optional[t.Collection[TokenType]] = None, 6225 upper: bool = False, 6226 ) -> t.Optional[exp.Expression]: 6227 if ( 6228 (any_token and self._advance_any()) 6229 or self._match(TokenType.VAR) 6230 or (self._match_set(tokens) if tokens else False) 6231 ): 6232 return self.expression( 6233 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6234 ) 6235 return self._parse_placeholder() 6236 6237 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6238 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6239 self._advance() 6240 return self._prev 6241 return None 6242 6243 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6244 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6245 6246 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6247 return self._parse_primary() or self._parse_var(any_token=True) 6248 6249 def _parse_null(self) -> t.Optional[exp.Expression]: 6250 if self._match_set(self.NULL_TOKENS): 6251 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6252 return self._parse_placeholder() 6253 6254 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6255 if self._match(TokenType.TRUE): 6256 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6257 if self._match(TokenType.FALSE): 6258 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6259 return self._parse_placeholder() 6260 6261 def _parse_star(self) -> t.Optional[exp.Expression]: 6262 if self._match(TokenType.STAR): 6263 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6264 return self._parse_placeholder() 6265 6266 def _parse_parameter(self) -> exp.Parameter: 6267 this = self._parse_identifier() or self._parse_primary_or_var() 6268 return self.expression(exp.Parameter, this=this) 6269 6270 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6271 if self._match_set(self.PLACEHOLDER_PARSERS): 6272 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6273 if placeholder: 6274 return placeholder 6275 self._advance(-1) 6276 return None 6277 6278 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6279 if not self._match_texts(keywords): 6280 return None 6281 if self._match(TokenType.L_PAREN, advance=False): 6282 return self._parse_wrapped_csv(self._parse_expression) 6283 6284 expression = self._parse_expression() 6285 return [expression] if expression else None 6286 6287 def _parse_csv( 6288 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6289 ) -> t.List[exp.Expression]: 6290 parse_result = parse_method() 6291 items = [parse_result] if parse_result is not None else [] 6292 6293 while self._match(sep): 6294 self._add_comments(parse_result) 6295 parse_result = parse_method() 6296 if parse_result is not None: 6297 items.append(parse_result) 6298 6299 return items 6300 6301 def _parse_tokens( 6302 self, parse_method: t.Callable, expressions: t.Dict 6303 ) -> t.Optional[exp.Expression]: 6304 this = parse_method() 6305 6306 while self._match_set(expressions): 6307 this = self.expression( 6308 expressions[self._prev.token_type], 6309 this=this, 6310 comments=self._prev_comments, 6311 expression=parse_method(), 6312 ) 6313 6314 return this 6315 6316 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6317 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6318 6319 def _parse_wrapped_csv( 6320 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6321 ) -> t.List[exp.Expression]: 6322 return self._parse_wrapped( 6323 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6324 ) 6325 6326 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6327 wrapped = self._match(TokenType.L_PAREN) 6328 if not wrapped and not optional: 6329 self.raise_error("Expecting (") 6330 parse_result = parse_method() 6331 if wrapped: 6332 self._match_r_paren() 6333 return parse_result 6334 6335 def _parse_expressions(self) -> t.List[exp.Expression]: 6336 return self._parse_csv(self._parse_expression) 6337 6338 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6339 return self._parse_select() or self._parse_set_operations( 6340 self._parse_expression() if alias else self._parse_assignment() 6341 ) 6342 6343 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6344 return self._parse_query_modifiers( 6345 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6346 ) 6347 6348 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6349 this = None 6350 if self._match_texts(self.TRANSACTION_KIND): 6351 this = self._prev.text 6352 6353 self._match_texts(("TRANSACTION", "WORK")) 6354 6355 modes = [] 6356 while True: 6357 mode = [] 6358 while self._match(TokenType.VAR): 6359 mode.append(self._prev.text) 6360 6361 if mode: 6362 modes.append(" ".join(mode)) 6363 if not self._match(TokenType.COMMA): 6364 break 6365 6366 return self.expression(exp.Transaction, this=this, modes=modes) 6367 6368 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6369 chain = None 6370 savepoint = None 6371 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6372 6373 self._match_texts(("TRANSACTION", "WORK")) 6374 6375 if self._match_text_seq("TO"): 6376 self._match_text_seq("SAVEPOINT") 6377 savepoint = self._parse_id_var() 6378 6379 if self._match(TokenType.AND): 6380 chain = not self._match_text_seq("NO") 6381 self._match_text_seq("CHAIN") 6382 6383 if is_rollback: 6384 return self.expression(exp.Rollback, savepoint=savepoint) 6385 6386 return self.expression(exp.Commit, chain=chain) 6387 6388 def _parse_refresh(self) -> exp.Refresh: 6389 self._match(TokenType.TABLE) 6390 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6391 6392 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6393 if not self._match_text_seq("ADD"): 6394 return None 6395 6396 self._match(TokenType.COLUMN) 6397 exists_column = self._parse_exists(not_=True) 6398 expression = self._parse_field_def() 6399 6400 if expression: 6401 expression.set("exists", exists_column) 6402 6403 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6404 if self._match_texts(("FIRST", "AFTER")): 6405 position = self._prev.text 6406 column_position = self.expression( 6407 exp.ColumnPosition, this=self._parse_column(), position=position 6408 ) 6409 expression.set("position", column_position) 6410 6411 return expression 6412 6413 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6414 drop = self._match(TokenType.DROP) and self._parse_drop() 6415 if drop and not isinstance(drop, exp.Command): 6416 drop.set("kind", drop.args.get("kind", "COLUMN")) 6417 return drop 6418 6419 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6420 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6421 return self.expression( 6422 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6423 ) 6424 6425 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6426 index = self._index - 1 6427 6428 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6429 return self._parse_csv( 6430 lambda: self.expression( 6431 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6432 ) 6433 ) 6434 6435 self._retreat(index) 6436 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6437 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6438 6439 if self._match_text_seq("ADD", "COLUMNS"): 6440 schema = self._parse_schema() 6441 if schema: 6442 return [schema] 6443 return [] 6444 6445 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6446 6447 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6448 if self._match_texts(self.ALTER_ALTER_PARSERS): 6449 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6450 6451 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6452 # keyword after ALTER we default to parsing this statement 6453 self._match(TokenType.COLUMN) 6454 column = self._parse_field(any_token=True) 6455 6456 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6457 return self.expression(exp.AlterColumn, this=column, drop=True) 6458 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6459 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6460 if self._match(TokenType.COMMENT): 6461 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6462 if self._match_text_seq("DROP", "NOT", "NULL"): 6463 return self.expression( 6464 exp.AlterColumn, 6465 this=column, 6466 drop=True, 6467 allow_null=True, 6468 ) 6469 if self._match_text_seq("SET", "NOT", "NULL"): 6470 return self.expression( 6471 exp.AlterColumn, 6472 this=column, 6473 allow_null=False, 6474 ) 6475 self._match_text_seq("SET", "DATA") 6476 self._match_text_seq("TYPE") 6477 return self.expression( 6478 exp.AlterColumn, 6479 this=column, 6480 dtype=self._parse_types(), 6481 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6482 using=self._match(TokenType.USING) and self._parse_assignment(), 6483 ) 6484 6485 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6486 if self._match_texts(("ALL", "EVEN", "AUTO")): 6487 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6488 6489 self._match_text_seq("KEY", "DISTKEY") 6490 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6491 6492 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6493 if compound: 6494 self._match_text_seq("SORTKEY") 6495 6496 if self._match(TokenType.L_PAREN, advance=False): 6497 return self.expression( 6498 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6499 ) 6500 6501 self._match_texts(("AUTO", "NONE")) 6502 return self.expression( 6503 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6504 ) 6505 6506 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6507 index = self._index - 1 6508 6509 partition_exists = self._parse_exists() 6510 if self._match(TokenType.PARTITION, advance=False): 6511 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6512 6513 self._retreat(index) 6514 return self._parse_csv(self._parse_drop_column) 6515 6516 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6517 if self._match(TokenType.COLUMN): 6518 exists = self._parse_exists() 6519 old_column = self._parse_column() 6520 to = self._match_text_seq("TO") 6521 new_column = self._parse_column() 6522 6523 if old_column is None or to is None or new_column is None: 6524 return None 6525 6526 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6527 6528 self._match_text_seq("TO") 6529 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6530 6531 def _parse_alter_table_set(self) -> exp.AlterSet: 6532 alter_set = self.expression(exp.AlterSet) 6533 6534 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6535 "TABLE", "PROPERTIES" 6536 ): 6537 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6538 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6539 alter_set.set("expressions", [self._parse_assignment()]) 6540 elif self._match_texts(("LOGGED", "UNLOGGED")): 6541 alter_set.set("option", exp.var(self._prev.text.upper())) 6542 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6543 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6544 elif self._match_text_seq("LOCATION"): 6545 alter_set.set("location", self._parse_field()) 6546 elif self._match_text_seq("ACCESS", "METHOD"): 6547 alter_set.set("access_method", self._parse_field()) 6548 elif self._match_text_seq("TABLESPACE"): 6549 alter_set.set("tablespace", self._parse_field()) 6550 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6551 alter_set.set("file_format", [self._parse_field()]) 6552 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6553 alter_set.set("file_format", self._parse_wrapped_options()) 6554 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6555 alter_set.set("copy_options", self._parse_wrapped_options()) 6556 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6557 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6558 else: 6559 if self._match_text_seq("SERDE"): 6560 alter_set.set("serde", self._parse_field()) 6561 6562 alter_set.set("expressions", [self._parse_properties()]) 6563 6564 return alter_set 6565 6566 def _parse_alter(self) -> exp.Alter | exp.Command: 6567 start = self._prev 6568 6569 alter_token = self._match_set(self.ALTERABLES) and self._prev 6570 if not alter_token: 6571 return self._parse_as_command(start) 6572 6573 exists = self._parse_exists() 6574 only = self._match_text_seq("ONLY") 6575 this = self._parse_table(schema=True) 6576 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6577 6578 if self._next: 6579 self._advance() 6580 6581 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6582 if parser: 6583 actions = ensure_list(parser(self)) 6584 options = self._parse_csv(self._parse_property) 6585 6586 if not self._curr and actions: 6587 return self.expression( 6588 exp.Alter, 6589 this=this, 6590 kind=alter_token.text.upper(), 6591 exists=exists, 6592 actions=actions, 6593 only=only, 6594 options=options, 6595 cluster=cluster, 6596 ) 6597 6598 return self._parse_as_command(start) 6599 6600 def _parse_merge(self) -> exp.Merge: 6601 self._match(TokenType.INTO) 6602 target = self._parse_table() 6603 6604 if target and self._match(TokenType.ALIAS, advance=False): 6605 target.set("alias", self._parse_table_alias()) 6606 6607 self._match(TokenType.USING) 6608 using = self._parse_table() 6609 6610 self._match(TokenType.ON) 6611 on = self._parse_assignment() 6612 6613 return self.expression( 6614 exp.Merge, 6615 this=target, 6616 using=using, 6617 on=on, 6618 expressions=self._parse_when_matched(), 6619 ) 6620 6621 def _parse_when_matched(self) -> t.List[exp.When]: 6622 whens = [] 6623 6624 while self._match(TokenType.WHEN): 6625 matched = not self._match(TokenType.NOT) 6626 self._match_text_seq("MATCHED") 6627 source = ( 6628 False 6629 if self._match_text_seq("BY", "TARGET") 6630 else self._match_text_seq("BY", "SOURCE") 6631 ) 6632 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6633 6634 self._match(TokenType.THEN) 6635 6636 if self._match(TokenType.INSERT): 6637 _this = self._parse_star() 6638 if _this: 6639 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6640 else: 6641 then = self.expression( 6642 exp.Insert, 6643 this=self._parse_value(), 6644 expression=self._match_text_seq("VALUES") and self._parse_value(), 6645 ) 6646 elif self._match(TokenType.UPDATE): 6647 expressions = self._parse_star() 6648 if expressions: 6649 then = self.expression(exp.Update, expressions=expressions) 6650 else: 6651 then = self.expression( 6652 exp.Update, 6653 expressions=self._match(TokenType.SET) 6654 and self._parse_csv(self._parse_equality), 6655 ) 6656 elif self._match(TokenType.DELETE): 6657 then = self.expression(exp.Var, this=self._prev.text) 6658 else: 6659 then = None 6660 6661 whens.append( 6662 self.expression( 6663 exp.When, 6664 matched=matched, 6665 source=source, 6666 condition=condition, 6667 then=then, 6668 ) 6669 ) 6670 return whens 6671 6672 def _parse_show(self) -> t.Optional[exp.Expression]: 6673 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6674 if parser: 6675 return parser(self) 6676 return self._parse_as_command(self._prev) 6677 6678 def _parse_set_item_assignment( 6679 self, kind: t.Optional[str] = None 6680 ) -> t.Optional[exp.Expression]: 6681 index = self._index 6682 6683 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6684 return self._parse_set_transaction(global_=kind == "GLOBAL") 6685 6686 left = self._parse_primary() or self._parse_column() 6687 assignment_delimiter = self._match_texts(("=", "TO")) 6688 6689 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6690 self._retreat(index) 6691 return None 6692 6693 right = self._parse_statement() or self._parse_id_var() 6694 if isinstance(right, (exp.Column, exp.Identifier)): 6695 right = exp.var(right.name) 6696 6697 this = self.expression(exp.EQ, this=left, expression=right) 6698 return self.expression(exp.SetItem, this=this, kind=kind) 6699 6700 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6701 self._match_text_seq("TRANSACTION") 6702 characteristics = self._parse_csv( 6703 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6704 ) 6705 return self.expression( 6706 exp.SetItem, 6707 expressions=characteristics, 6708 kind="TRANSACTION", 6709 **{"global": global_}, # type: ignore 6710 ) 6711 6712 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6713 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6714 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6715 6716 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6717 index = self._index 6718 set_ = self.expression( 6719 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6720 ) 6721 6722 if self._curr: 6723 self._retreat(index) 6724 return self._parse_as_command(self._prev) 6725 6726 return set_ 6727 6728 def _parse_var_from_options( 6729 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6730 ) -> t.Optional[exp.Var]: 6731 start = self._curr 6732 if not start: 6733 return None 6734 6735 option = start.text.upper() 6736 continuations = options.get(option) 6737 6738 index = self._index 6739 self._advance() 6740 for keywords in continuations or []: 6741 if isinstance(keywords, str): 6742 keywords = (keywords,) 6743 6744 if self._match_text_seq(*keywords): 6745 option = f"{option} {' '.join(keywords)}" 6746 break 6747 else: 6748 if continuations or continuations is None: 6749 if raise_unmatched: 6750 self.raise_error(f"Unknown option {option}") 6751 6752 self._retreat(index) 6753 return None 6754 6755 return exp.var(option) 6756 6757 def _parse_as_command(self, start: Token) -> exp.Command: 6758 while self._curr: 6759 self._advance() 6760 text = self._find_sql(start, self._prev) 6761 size = len(start.text) 6762 self._warn_unsupported() 6763 return exp.Command(this=text[:size], expression=text[size:]) 6764 6765 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6766 settings = [] 6767 6768 self._match_l_paren() 6769 kind = self._parse_id_var() 6770 6771 if self._match(TokenType.L_PAREN): 6772 while True: 6773 key = self._parse_id_var() 6774 value = self._parse_primary() 6775 6776 if not key and value is None: 6777 break 6778 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6779 self._match(TokenType.R_PAREN) 6780 6781 self._match_r_paren() 6782 6783 return self.expression( 6784 exp.DictProperty, 6785 this=this, 6786 kind=kind.this if kind else None, 6787 settings=settings, 6788 ) 6789 6790 def _parse_dict_range(self, this: str) -> exp.DictRange: 6791 self._match_l_paren() 6792 has_min = self._match_text_seq("MIN") 6793 if has_min: 6794 min = self._parse_var() or self._parse_primary() 6795 self._match_text_seq("MAX") 6796 max = self._parse_var() or self._parse_primary() 6797 else: 6798 max = self._parse_var() or self._parse_primary() 6799 min = exp.Literal.number(0) 6800 self._match_r_paren() 6801 return self.expression(exp.DictRange, this=this, min=min, max=max) 6802 6803 def _parse_comprehension( 6804 self, this: t.Optional[exp.Expression] 6805 ) -> t.Optional[exp.Comprehension]: 6806 index = self._index 6807 expression = self._parse_column() 6808 if not self._match(TokenType.IN): 6809 self._retreat(index - 1) 6810 return None 6811 iterator = self._parse_column() 6812 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6813 return self.expression( 6814 exp.Comprehension, 6815 this=this, 6816 expression=expression, 6817 iterator=iterator, 6818 condition=condition, 6819 ) 6820 6821 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6822 if self._match(TokenType.HEREDOC_STRING): 6823 return self.expression(exp.Heredoc, this=self._prev.text) 6824 6825 if not self._match_text_seq("$"): 6826 return None 6827 6828 tags = ["$"] 6829 tag_text = None 6830 6831 if self._is_connected(): 6832 self._advance() 6833 tags.append(self._prev.text.upper()) 6834 else: 6835 self.raise_error("No closing $ found") 6836 6837 if tags[-1] != "$": 6838 if self._is_connected() and self._match_text_seq("$"): 6839 tag_text = tags[-1] 6840 tags.append("$") 6841 else: 6842 self.raise_error("No closing $ found") 6843 6844 heredoc_start = self._curr 6845 6846 while self._curr: 6847 if self._match_text_seq(*tags, advance=False): 6848 this = self._find_sql(heredoc_start, self._prev) 6849 self._advance(len(tags)) 6850 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6851 6852 self._advance() 6853 6854 self.raise_error(f"No closing {''.join(tags)} found") 6855 return None 6856 6857 def _find_parser( 6858 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6859 ) -> t.Optional[t.Callable]: 6860 if not self._curr: 6861 return None 6862 6863 index = self._index 6864 this = [] 6865 while True: 6866 # The current token might be multiple words 6867 curr = self._curr.text.upper() 6868 key = curr.split(" ") 6869 this.append(curr) 6870 6871 self._advance() 6872 result, trie = in_trie(trie, key) 6873 if result == TrieResult.FAILED: 6874 break 6875 6876 if result == TrieResult.EXISTS: 6877 subparser = parsers[" ".join(this)] 6878 return subparser 6879 6880 self._retreat(index) 6881 return None 6882 6883 def _match(self, token_type, advance=True, expression=None): 6884 if not self._curr: 6885 return None 6886 6887 if self._curr.token_type == token_type: 6888 if advance: 6889 self._advance() 6890 self._add_comments(expression) 6891 return True 6892 6893 return None 6894 6895 def _match_set(self, types, advance=True): 6896 if not self._curr: 6897 return None 6898 6899 if self._curr.token_type in types: 6900 if advance: 6901 self._advance() 6902 return True 6903 6904 return None 6905 6906 def _match_pair(self, token_type_a, token_type_b, advance=True): 6907 if not self._curr or not self._next: 6908 return None 6909 6910 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6911 if advance: 6912 self._advance(2) 6913 return True 6914 6915 return None 6916 6917 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6918 if not self._match(TokenType.L_PAREN, expression=expression): 6919 self.raise_error("Expecting (") 6920 6921 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6922 if not self._match(TokenType.R_PAREN, expression=expression): 6923 self.raise_error("Expecting )") 6924 6925 def _match_texts(self, texts, advance=True): 6926 if self._curr and self._curr.text.upper() in texts: 6927 if advance: 6928 self._advance() 6929 return True 6930 return None 6931 6932 def _match_text_seq(self, *texts, advance=True): 6933 index = self._index 6934 for text in texts: 6935 if self._curr and self._curr.text.upper() == text: 6936 self._advance() 6937 else: 6938 self._retreat(index) 6939 return None 6940 6941 if not advance: 6942 self._retreat(index) 6943 6944 return True 6945 6946 def _replace_lambda( 6947 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6948 ) -> t.Optional[exp.Expression]: 6949 if not node: 6950 return node 6951 6952 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6953 6954 for column in node.find_all(exp.Column): 6955 typ = lambda_types.get(column.parts[0].name) 6956 if typ is not None: 6957 dot_or_id = column.to_dot() if column.table else column.this 6958 6959 if typ: 6960 dot_or_id = self.expression( 6961 exp.Cast, 6962 this=dot_or_id, 6963 to=typ, 6964 ) 6965 6966 parent = column.parent 6967 6968 while isinstance(parent, exp.Dot): 6969 if not isinstance(parent.parent, exp.Dot): 6970 parent.replace(dot_or_id) 6971 break 6972 parent = parent.parent 6973 else: 6974 if column is node: 6975 node = dot_or_id 6976 else: 6977 column.replace(dot_or_id) 6978 return node 6979 6980 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6981 start = self._prev 6982 6983 # Not to be confused with TRUNCATE(number, decimals) function call 6984 if self._match(TokenType.L_PAREN): 6985 self._retreat(self._index - 2) 6986 return self._parse_function() 6987 6988 # Clickhouse supports TRUNCATE DATABASE as well 6989 is_database = self._match(TokenType.DATABASE) 6990 6991 self._match(TokenType.TABLE) 6992 6993 exists = self._parse_exists(not_=False) 6994 6995 expressions = self._parse_csv( 6996 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6997 ) 6998 6999 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7000 7001 if self._match_text_seq("RESTART", "IDENTITY"): 7002 identity = "RESTART" 7003 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7004 identity = "CONTINUE" 7005 else: 7006 identity = None 7007 7008 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7009 option = self._prev.text 7010 else: 7011 option = None 7012 7013 partition = self._parse_partition() 7014 7015 # Fallback case 7016 if self._curr: 7017 return self._parse_as_command(start) 7018 7019 return self.expression( 7020 exp.TruncateTable, 7021 expressions=expressions, 7022 is_database=is_database, 7023 exists=exists, 7024 cluster=cluster, 7025 identity=identity, 7026 option=option, 7027 partition=partition, 7028 ) 7029 7030 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7031 this = self._parse_ordered(self._parse_opclass) 7032 7033 if not self._match(TokenType.WITH): 7034 return this 7035 7036 op = self._parse_var(any_token=True) 7037 7038 return self.expression(exp.WithOperator, this=this, op=op) 7039 7040 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7041 self._match(TokenType.EQ) 7042 self._match(TokenType.L_PAREN) 7043 7044 opts: t.List[t.Optional[exp.Expression]] = [] 7045 while self._curr and not self._match(TokenType.R_PAREN): 7046 if self._match_text_seq("FORMAT_NAME", "="): 7047 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7048 # so we parse it separately to use _parse_field() 7049 prop = self.expression( 7050 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7051 ) 7052 opts.append(prop) 7053 else: 7054 opts.append(self._parse_property()) 7055 7056 self._match(TokenType.COMMA) 7057 7058 return opts 7059 7060 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7061 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7062 7063 options = [] 7064 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7065 option = self._parse_var(any_token=True) 7066 prev = self._prev.text.upper() 7067 7068 # Different dialects might separate options and values by white space, "=" and "AS" 7069 self._match(TokenType.EQ) 7070 self._match(TokenType.ALIAS) 7071 7072 param = self.expression(exp.CopyParameter, this=option) 7073 7074 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7075 TokenType.L_PAREN, advance=False 7076 ): 7077 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7078 param.set("expressions", self._parse_wrapped_options()) 7079 elif prev == "FILE_FORMAT": 7080 # T-SQL's external file format case 7081 param.set("expression", self._parse_field()) 7082 else: 7083 param.set("expression", self._parse_unquoted_field()) 7084 7085 options.append(param) 7086 self._match(sep) 7087 7088 return options 7089 7090 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7091 expr = self.expression(exp.Credentials) 7092 7093 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7094 expr.set("storage", self._parse_field()) 7095 if self._match_text_seq("CREDENTIALS"): 7096 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7097 creds = ( 7098 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7099 ) 7100 expr.set("credentials", creds) 7101 if self._match_text_seq("ENCRYPTION"): 7102 expr.set("encryption", self._parse_wrapped_options()) 7103 if self._match_text_seq("IAM_ROLE"): 7104 expr.set("iam_role", self._parse_field()) 7105 if self._match_text_seq("REGION"): 7106 expr.set("region", self._parse_field()) 7107 7108 return expr 7109 7110 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7111 return self._parse_field() 7112 7113 def _parse_copy(self) -> exp.Copy | exp.Command: 7114 start = self._prev 7115 7116 self._match(TokenType.INTO) 7117 7118 this = ( 7119 self._parse_select(nested=True, parse_subquery_alias=False) 7120 if self._match(TokenType.L_PAREN, advance=False) 7121 else self._parse_table(schema=True) 7122 ) 7123 7124 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7125 7126 files = self._parse_csv(self._parse_file_location) 7127 credentials = self._parse_credentials() 7128 7129 self._match_text_seq("WITH") 7130 7131 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7132 7133 # Fallback case 7134 if self._curr: 7135 return self._parse_as_command(start) 7136 7137 return self.expression( 7138 exp.Copy, 7139 this=this, 7140 kind=kind, 7141 credentials=credentials, 7142 files=files, 7143 params=params, 7144 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
154class Parser(metaclass=_Parser): 155 """ 156 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 157 158 Args: 159 error_level: The desired error level. 160 Default: ErrorLevel.IMMEDIATE 161 error_message_context: The amount of context to capture from a query string when displaying 162 the error message (in number of characters). 163 Default: 100 164 max_errors: Maximum number of error messages to include in a raised ParseError. 165 This is only relevant if error_level is ErrorLevel.RAISE. 166 Default: 3 167 """ 168 169 FUNCTIONS: t.Dict[str, t.Callable] = { 170 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 171 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 172 "CONCAT": lambda args, dialect: exp.Concat( 173 expressions=args, 174 safe=not dialect.STRICT_STRING_CONCAT, 175 coalesce=dialect.CONCAT_COALESCE, 176 ), 177 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 178 expressions=args, 179 safe=not dialect.STRICT_STRING_CONCAT, 180 coalesce=dialect.CONCAT_COALESCE, 181 ), 182 "CONVERT_TIMEZONE": build_convert_timezone, 183 "DATE_TO_DATE_STR": lambda args: exp.Cast( 184 this=seq_get(args, 0), 185 to=exp.DataType(this=exp.DataType.Type.TEXT), 186 ), 187 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 188 start=seq_get(args, 0), 189 end=seq_get(args, 1), 190 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 191 ), 192 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 193 "HEX": build_hex, 194 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 195 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 196 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 197 "LIKE": build_like, 198 "LOG": build_logarithm, 199 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 200 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 201 "LOWER": build_lower, 202 "LPAD": lambda args: build_pad(args), 203 "LEFTPAD": lambda args: build_pad(args), 204 "MOD": build_mod, 205 "RPAD": lambda args: build_pad(args, is_left=False), 206 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 207 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 208 if len(args) != 2 209 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 210 "TIME_TO_TIME_STR": lambda args: exp.Cast( 211 this=seq_get(args, 0), 212 to=exp.DataType(this=exp.DataType.Type.TEXT), 213 ), 214 "TO_HEX": build_hex, 215 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 216 this=exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 start=exp.Literal.number(1), 221 length=exp.Literal.number(10), 222 ), 223 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 224 "UPPER": build_upper, 225 "VAR_MAP": build_var_map, 226 "COALESCE": lambda args: exp.Coalesce(this=seq_get(args, 0), expressions=args[1:]), 227 } 228 229 NO_PAREN_FUNCTIONS = { 230 TokenType.CURRENT_DATE: exp.CurrentDate, 231 TokenType.CURRENT_DATETIME: exp.CurrentDate, 232 TokenType.CURRENT_TIME: exp.CurrentTime, 233 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 234 TokenType.CURRENT_USER: exp.CurrentUser, 235 } 236 237 STRUCT_TYPE_TOKENS = { 238 TokenType.NESTED, 239 TokenType.OBJECT, 240 TokenType.STRUCT, 241 } 242 243 NESTED_TYPE_TOKENS = { 244 TokenType.ARRAY, 245 TokenType.LIST, 246 TokenType.LOWCARDINALITY, 247 TokenType.MAP, 248 TokenType.NULLABLE, 249 *STRUCT_TYPE_TOKENS, 250 } 251 252 ENUM_TYPE_TOKENS = { 253 TokenType.ENUM, 254 TokenType.ENUM8, 255 TokenType.ENUM16, 256 } 257 258 AGGREGATE_TYPE_TOKENS = { 259 TokenType.AGGREGATEFUNCTION, 260 TokenType.SIMPLEAGGREGATEFUNCTION, 261 } 262 263 TYPE_TOKENS = { 264 TokenType.BIT, 265 TokenType.BOOLEAN, 266 TokenType.TINYINT, 267 TokenType.UTINYINT, 268 TokenType.SMALLINT, 269 TokenType.USMALLINT, 270 TokenType.INT, 271 TokenType.UINT, 272 TokenType.BIGINT, 273 TokenType.UBIGINT, 274 TokenType.INT128, 275 TokenType.UINT128, 276 TokenType.INT256, 277 TokenType.UINT256, 278 TokenType.MEDIUMINT, 279 TokenType.UMEDIUMINT, 280 TokenType.FIXEDSTRING, 281 TokenType.FLOAT, 282 TokenType.DOUBLE, 283 TokenType.CHAR, 284 TokenType.NCHAR, 285 TokenType.VARCHAR, 286 TokenType.NVARCHAR, 287 TokenType.BPCHAR, 288 TokenType.TEXT, 289 TokenType.MEDIUMTEXT, 290 TokenType.LONGTEXT, 291 TokenType.MEDIUMBLOB, 292 TokenType.LONGBLOB, 293 TokenType.BINARY, 294 TokenType.VARBINARY, 295 TokenType.JSON, 296 TokenType.JSONB, 297 TokenType.INTERVAL, 298 TokenType.TINYBLOB, 299 TokenType.TINYTEXT, 300 TokenType.TIME, 301 TokenType.TIMETZ, 302 TokenType.TIMESTAMP, 303 TokenType.TIMESTAMP_S, 304 TokenType.TIMESTAMP_MS, 305 TokenType.TIMESTAMP_NS, 306 TokenType.TIMESTAMPTZ, 307 TokenType.TIMESTAMPLTZ, 308 TokenType.TIMESTAMPNTZ, 309 TokenType.DATETIME, 310 TokenType.DATETIME64, 311 TokenType.DATE, 312 TokenType.DATE32, 313 TokenType.INT4RANGE, 314 TokenType.INT4MULTIRANGE, 315 TokenType.INT8RANGE, 316 TokenType.INT8MULTIRANGE, 317 TokenType.NUMRANGE, 318 TokenType.NUMMULTIRANGE, 319 TokenType.TSRANGE, 320 TokenType.TSMULTIRANGE, 321 TokenType.TSTZRANGE, 322 TokenType.TSTZMULTIRANGE, 323 TokenType.DATERANGE, 324 TokenType.DATEMULTIRANGE, 325 TokenType.DECIMAL, 326 TokenType.UDECIMAL, 327 TokenType.BIGDECIMAL, 328 TokenType.UUID, 329 TokenType.GEOGRAPHY, 330 TokenType.GEOMETRY, 331 TokenType.HLLSKETCH, 332 TokenType.HSTORE, 333 TokenType.PSEUDO_TYPE, 334 TokenType.SUPER, 335 TokenType.SERIAL, 336 TokenType.SMALLSERIAL, 337 TokenType.BIGSERIAL, 338 TokenType.XML, 339 TokenType.YEAR, 340 TokenType.UNIQUEIDENTIFIER, 341 TokenType.USERDEFINED, 342 TokenType.MONEY, 343 TokenType.SMALLMONEY, 344 TokenType.ROWVERSION, 345 TokenType.IMAGE, 346 TokenType.VARIANT, 347 TokenType.VECTOR, 348 TokenType.OBJECT, 349 TokenType.OBJECT_IDENTIFIER, 350 TokenType.INET, 351 TokenType.IPADDRESS, 352 TokenType.IPPREFIX, 353 TokenType.IPV4, 354 TokenType.IPV6, 355 TokenType.UNKNOWN, 356 TokenType.NULL, 357 TokenType.NAME, 358 TokenType.TDIGEST, 359 *ENUM_TYPE_TOKENS, 360 *NESTED_TYPE_TOKENS, 361 *AGGREGATE_TYPE_TOKENS, 362 } 363 364 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 365 TokenType.BIGINT: TokenType.UBIGINT, 366 TokenType.INT: TokenType.UINT, 367 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 368 TokenType.SMALLINT: TokenType.USMALLINT, 369 TokenType.TINYINT: TokenType.UTINYINT, 370 TokenType.DECIMAL: TokenType.UDECIMAL, 371 } 372 373 SUBQUERY_PREDICATES = { 374 TokenType.ANY: exp.Any, 375 TokenType.ALL: exp.All, 376 TokenType.EXISTS: exp.Exists, 377 TokenType.SOME: exp.Any, 378 } 379 380 RESERVED_TOKENS = { 381 *Tokenizer.SINGLE_TOKENS.values(), 382 TokenType.SELECT, 383 } - {TokenType.IDENTIFIER} 384 385 DB_CREATABLES = { 386 TokenType.DATABASE, 387 TokenType.DICTIONARY, 388 TokenType.MODEL, 389 TokenType.SCHEMA, 390 TokenType.SEQUENCE, 391 TokenType.STORAGE_INTEGRATION, 392 TokenType.TABLE, 393 TokenType.TAG, 394 TokenType.VIEW, 395 TokenType.WAREHOUSE, 396 TokenType.STREAMLIT, 397 } 398 399 CREATABLES = { 400 TokenType.COLUMN, 401 TokenType.CONSTRAINT, 402 TokenType.FOREIGN_KEY, 403 TokenType.FUNCTION, 404 TokenType.INDEX, 405 TokenType.PROCEDURE, 406 *DB_CREATABLES, 407 } 408 409 ALTERABLES = { 410 TokenType.TABLE, 411 TokenType.VIEW, 412 } 413 414 # Tokens that can represent identifiers 415 ID_VAR_TOKENS = { 416 TokenType.ALL, 417 TokenType.VAR, 418 TokenType.ANTI, 419 TokenType.APPLY, 420 TokenType.ASC, 421 TokenType.ASOF, 422 TokenType.AUTO_INCREMENT, 423 TokenType.BEGIN, 424 TokenType.BPCHAR, 425 TokenType.CACHE, 426 TokenType.CASE, 427 TokenType.COLLATE, 428 TokenType.COMMAND, 429 TokenType.COMMENT, 430 TokenType.COMMIT, 431 TokenType.CONSTRAINT, 432 TokenType.COPY, 433 TokenType.CUBE, 434 TokenType.DEFAULT, 435 TokenType.DELETE, 436 TokenType.DESC, 437 TokenType.DESCRIBE, 438 TokenType.DICTIONARY, 439 TokenType.DIV, 440 TokenType.END, 441 TokenType.EXECUTE, 442 TokenType.ESCAPE, 443 TokenType.FALSE, 444 TokenType.FIRST, 445 TokenType.FILTER, 446 TokenType.FINAL, 447 TokenType.FORMAT, 448 TokenType.FULL, 449 TokenType.IDENTIFIER, 450 TokenType.IS, 451 TokenType.ISNULL, 452 TokenType.INTERVAL, 453 TokenType.KEEP, 454 TokenType.KILL, 455 TokenType.LEFT, 456 TokenType.LOAD, 457 TokenType.MERGE, 458 TokenType.NATURAL, 459 TokenType.NEXT, 460 TokenType.OFFSET, 461 TokenType.OPERATOR, 462 TokenType.ORDINALITY, 463 TokenType.OVERLAPS, 464 TokenType.OVERWRITE, 465 TokenType.PARTITION, 466 TokenType.PERCENT, 467 TokenType.PIVOT, 468 TokenType.PRAGMA, 469 TokenType.RANGE, 470 TokenType.RECURSIVE, 471 TokenType.REFERENCES, 472 TokenType.REFRESH, 473 TokenType.RENAME, 474 TokenType.REPLACE, 475 TokenType.RIGHT, 476 TokenType.ROLLUP, 477 TokenType.ROW, 478 TokenType.ROWS, 479 TokenType.SEMI, 480 TokenType.SET, 481 TokenType.SETTINGS, 482 TokenType.SHOW, 483 TokenType.TEMPORARY, 484 TokenType.TOP, 485 TokenType.TRUE, 486 TokenType.TRUNCATE, 487 TokenType.UNIQUE, 488 TokenType.UNNEST, 489 TokenType.UNPIVOT, 490 TokenType.UPDATE, 491 TokenType.USE, 492 TokenType.VOLATILE, 493 TokenType.WINDOW, 494 *CREATABLES, 495 *SUBQUERY_PREDICATES, 496 *TYPE_TOKENS, 497 *NO_PAREN_FUNCTIONS, 498 } 499 500 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 501 502 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 503 TokenType.ANTI, 504 TokenType.APPLY, 505 TokenType.ASOF, 506 TokenType.FULL, 507 TokenType.LEFT, 508 TokenType.LOCK, 509 TokenType.NATURAL, 510 TokenType.OFFSET, 511 TokenType.RIGHT, 512 TokenType.SEMI, 513 TokenType.WINDOW, 514 } 515 516 ALIAS_TOKENS = ID_VAR_TOKENS 517 518 ARRAY_CONSTRUCTORS = { 519 "ARRAY": exp.Array, 520 "LIST": exp.List, 521 } 522 523 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 524 525 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 526 527 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 528 529 FUNC_TOKENS = { 530 TokenType.COLLATE, 531 TokenType.COMMAND, 532 TokenType.CURRENT_DATE, 533 TokenType.CURRENT_DATETIME, 534 TokenType.CURRENT_TIMESTAMP, 535 TokenType.CURRENT_TIME, 536 TokenType.CURRENT_USER, 537 TokenType.FILTER, 538 TokenType.FIRST, 539 TokenType.FORMAT, 540 TokenType.GLOB, 541 TokenType.IDENTIFIER, 542 TokenType.INDEX, 543 TokenType.ISNULL, 544 TokenType.ILIKE, 545 TokenType.INSERT, 546 TokenType.LIKE, 547 TokenType.MERGE, 548 TokenType.OFFSET, 549 TokenType.PRIMARY_KEY, 550 TokenType.RANGE, 551 TokenType.REPLACE, 552 TokenType.RLIKE, 553 TokenType.ROW, 554 TokenType.UNNEST, 555 TokenType.VAR, 556 TokenType.LEFT, 557 TokenType.RIGHT, 558 TokenType.SEQUENCE, 559 TokenType.DATE, 560 TokenType.DATETIME, 561 TokenType.TABLE, 562 TokenType.TIMESTAMP, 563 TokenType.TIMESTAMPTZ, 564 TokenType.TRUNCATE, 565 TokenType.WINDOW, 566 TokenType.XOR, 567 *TYPE_TOKENS, 568 *SUBQUERY_PREDICATES, 569 } 570 571 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 572 TokenType.AND: exp.And, 573 } 574 575 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 576 TokenType.COLON_EQ: exp.PropertyEQ, 577 } 578 579 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 580 TokenType.OR: exp.Or, 581 } 582 583 EQUALITY = { 584 TokenType.EQ: exp.EQ, 585 TokenType.NEQ: exp.NEQ, 586 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 587 } 588 589 COMPARISON = { 590 TokenType.GT: exp.GT, 591 TokenType.GTE: exp.GTE, 592 TokenType.LT: exp.LT, 593 TokenType.LTE: exp.LTE, 594 } 595 596 BITWISE = { 597 TokenType.AMP: exp.BitwiseAnd, 598 TokenType.CARET: exp.BitwiseXor, 599 TokenType.PIPE: exp.BitwiseOr, 600 } 601 602 TERM = { 603 TokenType.DASH: exp.Sub, 604 TokenType.PLUS: exp.Add, 605 TokenType.MOD: exp.Mod, 606 TokenType.COLLATE: exp.Collate, 607 } 608 609 FACTOR = { 610 TokenType.DIV: exp.IntDiv, 611 TokenType.LR_ARROW: exp.Distance, 612 TokenType.SLASH: exp.Div, 613 TokenType.STAR: exp.Mul, 614 } 615 616 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 617 618 TIMES = { 619 TokenType.TIME, 620 TokenType.TIMETZ, 621 } 622 623 TIMESTAMPS = { 624 TokenType.TIMESTAMP, 625 TokenType.TIMESTAMPTZ, 626 TokenType.TIMESTAMPLTZ, 627 *TIMES, 628 } 629 630 SET_OPERATIONS = { 631 TokenType.UNION, 632 TokenType.INTERSECT, 633 TokenType.EXCEPT, 634 } 635 636 JOIN_METHODS = { 637 TokenType.ASOF, 638 TokenType.NATURAL, 639 TokenType.POSITIONAL, 640 } 641 642 JOIN_SIDES = { 643 TokenType.LEFT, 644 TokenType.RIGHT, 645 TokenType.FULL, 646 } 647 648 JOIN_KINDS = { 649 TokenType.ANTI, 650 TokenType.CROSS, 651 TokenType.INNER, 652 TokenType.OUTER, 653 TokenType.SEMI, 654 TokenType.STRAIGHT_JOIN, 655 } 656 657 JOIN_HINTS: t.Set[str] = set() 658 659 LAMBDAS = { 660 TokenType.ARROW: lambda self, expressions: self.expression( 661 exp.Lambda, 662 this=self._replace_lambda( 663 self._parse_assignment(), 664 expressions, 665 ), 666 expressions=expressions, 667 ), 668 TokenType.FARROW: lambda self, expressions: self.expression( 669 exp.Kwarg, 670 this=exp.var(expressions[0].name), 671 expression=self._parse_assignment(), 672 ), 673 } 674 675 COLUMN_OPERATORS = { 676 TokenType.DOT: None, 677 TokenType.DCOLON: lambda self, this, to: self.expression( 678 exp.Cast if self.STRICT_CAST else exp.TryCast, 679 this=this, 680 to=to, 681 ), 682 TokenType.ARROW: lambda self, this, path: self.expression( 683 exp.JSONExtract, 684 this=this, 685 expression=self.dialect.to_json_path(path), 686 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 687 ), 688 TokenType.DARROW: lambda self, this, path: self.expression( 689 exp.JSONExtractScalar, 690 this=this, 691 expression=self.dialect.to_json_path(path), 692 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 693 ), 694 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 695 exp.JSONBExtract, 696 this=this, 697 expression=path, 698 ), 699 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 700 exp.JSONBExtractScalar, 701 this=this, 702 expression=path, 703 ), 704 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 705 exp.JSONBContains, 706 this=this, 707 expression=key, 708 ), 709 } 710 711 EXPRESSION_PARSERS = { 712 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 713 exp.Column: lambda self: self._parse_column(), 714 exp.Condition: lambda self: self._parse_assignment(), 715 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 716 exp.Expression: lambda self: self._parse_expression(), 717 exp.From: lambda self: self._parse_from(joins=True), 718 exp.Group: lambda self: self._parse_group(), 719 exp.Having: lambda self: self._parse_having(), 720 exp.Identifier: lambda self: self._parse_id_var(), 721 exp.Join: lambda self: self._parse_join(), 722 exp.Lambda: lambda self: self._parse_lambda(), 723 exp.Lateral: lambda self: self._parse_lateral(), 724 exp.Limit: lambda self: self._parse_limit(), 725 exp.Offset: lambda self: self._parse_offset(), 726 exp.Order: lambda self: self._parse_order(), 727 exp.Ordered: lambda self: self._parse_ordered(), 728 exp.Properties: lambda self: self._parse_properties(), 729 exp.Qualify: lambda self: self._parse_qualify(), 730 exp.Returning: lambda self: self._parse_returning(), 731 exp.Select: lambda self: self._parse_select(), 732 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 733 exp.Table: lambda self: self._parse_table_parts(), 734 exp.TableAlias: lambda self: self._parse_table_alias(), 735 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 736 exp.Where: lambda self: self._parse_where(), 737 exp.Window: lambda self: self._parse_named_window(), 738 exp.With: lambda self: self._parse_with(), 739 "JOIN_TYPE": lambda self: self._parse_join_parts(), 740 } 741 742 STATEMENT_PARSERS = { 743 TokenType.ALTER: lambda self: self._parse_alter(), 744 TokenType.BEGIN: lambda self: self._parse_transaction(), 745 TokenType.CACHE: lambda self: self._parse_cache(), 746 TokenType.COMMENT: lambda self: self._parse_comment(), 747 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 748 TokenType.COPY: lambda self: self._parse_copy(), 749 TokenType.CREATE: lambda self: self._parse_create(), 750 TokenType.DELETE: lambda self: self._parse_delete(), 751 TokenType.DESC: lambda self: self._parse_describe(), 752 TokenType.DESCRIBE: lambda self: self._parse_describe(), 753 TokenType.DROP: lambda self: self._parse_drop(), 754 TokenType.INSERT: lambda self: self._parse_insert(), 755 TokenType.KILL: lambda self: self._parse_kill(), 756 TokenType.LOAD: lambda self: self._parse_load(), 757 TokenType.MERGE: lambda self: self._parse_merge(), 758 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 759 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 760 TokenType.REFRESH: lambda self: self._parse_refresh(), 761 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 762 TokenType.SET: lambda self: self._parse_set(), 763 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 764 TokenType.UNCACHE: lambda self: self._parse_uncache(), 765 TokenType.UPDATE: lambda self: self._parse_update(), 766 TokenType.USE: lambda self: self.expression( 767 exp.Use, 768 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 769 this=self._parse_table(schema=False), 770 ), 771 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 772 } 773 774 UNARY_PARSERS = { 775 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 776 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 777 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 778 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 779 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 780 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 781 } 782 783 STRING_PARSERS = { 784 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 785 exp.RawString, this=token.text 786 ), 787 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 788 exp.National, this=token.text 789 ), 790 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 791 TokenType.STRING: lambda self, token: self.expression( 792 exp.Literal, this=token.text, is_string=True 793 ), 794 TokenType.UNICODE_STRING: lambda self, token: self.expression( 795 exp.UnicodeString, 796 this=token.text, 797 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 798 ), 799 } 800 801 NUMERIC_PARSERS = { 802 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 803 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 804 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 805 TokenType.NUMBER: lambda self, token: self.expression( 806 exp.Literal, this=token.text, is_string=False 807 ), 808 } 809 810 PRIMARY_PARSERS = { 811 **STRING_PARSERS, 812 **NUMERIC_PARSERS, 813 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 814 TokenType.NULL: lambda self, _: self.expression(exp.Null), 815 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 816 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 817 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 818 TokenType.STAR: lambda self, _: self.expression( 819 exp.Star, 820 **{ 821 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 822 "replace": self._parse_star_op("REPLACE"), 823 "rename": self._parse_star_op("RENAME"), 824 }, 825 ), 826 } 827 828 PLACEHOLDER_PARSERS = { 829 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 830 TokenType.PARAMETER: lambda self: self._parse_parameter(), 831 TokenType.COLON: lambda self: ( 832 self.expression(exp.Placeholder, this=self._prev.text) 833 if self._match_set(self.ID_VAR_TOKENS) 834 else None 835 ), 836 } 837 838 RANGE_PARSERS = { 839 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 840 TokenType.GLOB: binary_range_parser(exp.Glob), 841 TokenType.ILIKE: binary_range_parser(exp.ILike), 842 TokenType.IN: lambda self, this: self._parse_in(this), 843 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 844 TokenType.IS: lambda self, this: self._parse_is(this), 845 TokenType.LIKE: binary_range_parser(exp.Like), 846 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 847 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 848 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 849 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 850 } 851 852 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 853 "ALLOWED_VALUES": lambda self: self.expression( 854 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 855 ), 856 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 857 "AUTO": lambda self: self._parse_auto_property(), 858 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 859 "BACKUP": lambda self: self.expression( 860 exp.BackupProperty, this=self._parse_var(any_token=True) 861 ), 862 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 863 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 864 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 865 "CHECKSUM": lambda self: self._parse_checksum(), 866 "CLUSTER BY": lambda self: self._parse_cluster(), 867 "CLUSTERED": lambda self: self._parse_clustered_by(), 868 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 869 exp.CollateProperty, **kwargs 870 ), 871 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 872 "CONTAINS": lambda self: self._parse_contains_property(), 873 "COPY": lambda self: self._parse_copy_property(), 874 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 875 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 876 "DEFINER": lambda self: self._parse_definer(), 877 "DETERMINISTIC": lambda self: self.expression( 878 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 879 ), 880 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 881 "DISTKEY": lambda self: self._parse_distkey(), 882 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 883 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 884 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 885 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 886 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 887 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 888 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 889 "FREESPACE": lambda self: self._parse_freespace(), 890 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 891 "HEAP": lambda self: self.expression(exp.HeapProperty), 892 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 893 "IMMUTABLE": lambda self: self.expression( 894 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 895 ), 896 "INHERITS": lambda self: self.expression( 897 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 898 ), 899 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 900 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 901 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 902 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 903 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 904 "LIKE": lambda self: self._parse_create_like(), 905 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 906 "LOCK": lambda self: self._parse_locking(), 907 "LOCKING": lambda self: self._parse_locking(), 908 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 909 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 910 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 911 "MODIFIES": lambda self: self._parse_modifies_property(), 912 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 913 "NO": lambda self: self._parse_no_property(), 914 "ON": lambda self: self._parse_on_property(), 915 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 916 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 917 "PARTITION": lambda self: self._parse_partitioned_of(), 918 "PARTITION BY": lambda self: self._parse_partitioned_by(), 919 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 920 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 921 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 922 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 923 "READS": lambda self: self._parse_reads_property(), 924 "REMOTE": lambda self: self._parse_remote_with_connection(), 925 "RETURNS": lambda self: self._parse_returns(), 926 "STRICT": lambda self: self.expression(exp.StrictProperty), 927 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 928 "ROW": lambda self: self._parse_row(), 929 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 930 "SAMPLE": lambda self: self.expression( 931 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 932 ), 933 "SECURE": lambda self: self.expression(exp.SecureProperty), 934 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 935 "SETTINGS": lambda self: self._parse_settings_property(), 936 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 937 "SORTKEY": lambda self: self._parse_sortkey(), 938 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 939 "STABLE": lambda self: self.expression( 940 exp.StabilityProperty, this=exp.Literal.string("STABLE") 941 ), 942 "STORED": lambda self: self._parse_stored(), 943 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 944 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 945 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 946 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 947 "TO": lambda self: self._parse_to_table(), 948 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 949 "TRANSFORM": lambda self: self.expression( 950 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 951 ), 952 "TTL": lambda self: self._parse_ttl(), 953 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 954 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 955 "VOLATILE": lambda self: self._parse_volatile_property(), 956 "WITH": lambda self: self._parse_with_property(), 957 } 958 959 CONSTRAINT_PARSERS = { 960 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 961 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 962 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 963 "CHARACTER SET": lambda self: self.expression( 964 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 965 ), 966 "CHECK": lambda self: self.expression( 967 exp.CheckColumnConstraint, 968 this=self._parse_wrapped(self._parse_assignment), 969 enforced=self._match_text_seq("ENFORCED"), 970 ), 971 "COLLATE": lambda self: self.expression( 972 exp.CollateColumnConstraint, 973 this=self._parse_identifier() or self._parse_column(), 974 ), 975 "COMMENT": lambda self: self.expression( 976 exp.CommentColumnConstraint, this=self._parse_string() 977 ), 978 "COMPRESS": lambda self: self._parse_compress(), 979 "CLUSTERED": lambda self: self.expression( 980 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 981 ), 982 "NONCLUSTERED": lambda self: self.expression( 983 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 984 ), 985 "DEFAULT": lambda self: self.expression( 986 exp.DefaultColumnConstraint, this=self._parse_bitwise() 987 ), 988 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 989 "EPHEMERAL": lambda self: self.expression( 990 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 991 ), 992 "EXCLUDE": lambda self: self.expression( 993 exp.ExcludeColumnConstraint, this=self._parse_index_params() 994 ), 995 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 996 "FORMAT": lambda self: self.expression( 997 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 998 ), 999 "GENERATED": lambda self: self._parse_generated_as_identity(), 1000 "IDENTITY": lambda self: self._parse_auto_increment(), 1001 "INLINE": lambda self: self._parse_inline(), 1002 "LIKE": lambda self: self._parse_create_like(), 1003 "NOT": lambda self: self._parse_not_constraint(), 1004 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1005 "ON": lambda self: ( 1006 self._match(TokenType.UPDATE) 1007 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1008 ) 1009 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1010 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1011 "PERIOD": lambda self: self._parse_period_for_system_time(), 1012 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1013 "REFERENCES": lambda self: self._parse_references(match=False), 1014 "TITLE": lambda self: self.expression( 1015 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1016 ), 1017 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1018 "UNIQUE": lambda self: self._parse_unique(), 1019 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1020 "WITH": lambda self: self.expression( 1021 exp.Properties, expressions=self._parse_wrapped_properties() 1022 ), 1023 } 1024 1025 ALTER_PARSERS = { 1026 "ADD": lambda self: self._parse_alter_table_add(), 1027 "ALTER": lambda self: self._parse_alter_table_alter(), 1028 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1029 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1030 "DROP": lambda self: self._parse_alter_table_drop(), 1031 "RENAME": lambda self: self._parse_alter_table_rename(), 1032 "SET": lambda self: self._parse_alter_table_set(), 1033 "AS": lambda self: self._parse_select(), 1034 } 1035 1036 ALTER_ALTER_PARSERS = { 1037 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1038 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1039 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1040 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1041 } 1042 1043 SCHEMA_UNNAMED_CONSTRAINTS = { 1044 "CHECK", 1045 "EXCLUDE", 1046 "FOREIGN KEY", 1047 "LIKE", 1048 "PERIOD", 1049 "PRIMARY KEY", 1050 "UNIQUE", 1051 } 1052 1053 NO_PAREN_FUNCTION_PARSERS = { 1054 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1055 "CASE": lambda self: self._parse_case(), 1056 "CONNECT_BY_ROOT": lambda self: self.expression( 1057 exp.ConnectByRoot, this=self._parse_column() 1058 ), 1059 "IF": lambda self: self._parse_if(), 1060 "NEXT": lambda self: self._parse_next_value_for(), 1061 } 1062 1063 INVALID_FUNC_NAME_TOKENS = { 1064 TokenType.IDENTIFIER, 1065 TokenType.STRING, 1066 } 1067 1068 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1069 1070 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1071 1072 FUNCTION_PARSERS = { 1073 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1074 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1075 "DECODE": lambda self: self._parse_decode(), 1076 "EXTRACT": lambda self: self._parse_extract(), 1077 "GAP_FILL": lambda self: self._parse_gap_fill(), 1078 "JSON_OBJECT": lambda self: self._parse_json_object(), 1079 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1080 "JSON_TABLE": lambda self: self._parse_json_table(), 1081 "MATCH": lambda self: self._parse_match_against(), 1082 "OPENJSON": lambda self: self._parse_open_json(), 1083 "POSITION": lambda self: self._parse_position(), 1084 "PREDICT": lambda self: self._parse_predict(), 1085 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1086 "STRING_AGG": lambda self: self._parse_string_agg(), 1087 "SUBSTRING": lambda self: self._parse_substring(), 1088 "TRIM": lambda self: self._parse_trim(), 1089 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1090 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1091 } 1092 1093 QUERY_MODIFIER_PARSERS = { 1094 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1095 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1096 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1097 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1098 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1099 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1100 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1101 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1102 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1103 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1104 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1105 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1106 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1107 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1108 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1109 TokenType.CLUSTER_BY: lambda self: ( 1110 "cluster", 1111 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1112 ), 1113 TokenType.DISTRIBUTE_BY: lambda self: ( 1114 "distribute", 1115 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1116 ), 1117 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1118 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1119 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1120 } 1121 1122 SET_PARSERS = { 1123 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1124 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1125 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1126 "TRANSACTION": lambda self: self._parse_set_transaction(), 1127 } 1128 1129 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1130 1131 TYPE_LITERAL_PARSERS = { 1132 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1133 } 1134 1135 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1136 1137 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1138 1139 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1140 1141 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1142 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1143 "ISOLATION": ( 1144 ("LEVEL", "REPEATABLE", "READ"), 1145 ("LEVEL", "READ", "COMMITTED"), 1146 ("LEVEL", "READ", "UNCOMITTED"), 1147 ("LEVEL", "SERIALIZABLE"), 1148 ), 1149 "READ": ("WRITE", "ONLY"), 1150 } 1151 1152 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1153 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1154 ) 1155 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1156 1157 CREATE_SEQUENCE: OPTIONS_TYPE = { 1158 "SCALE": ("EXTEND", "NOEXTEND"), 1159 "SHARD": ("EXTEND", "NOEXTEND"), 1160 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1161 **dict.fromkeys( 1162 ( 1163 "SESSION", 1164 "GLOBAL", 1165 "KEEP", 1166 "NOKEEP", 1167 "ORDER", 1168 "NOORDER", 1169 "NOCACHE", 1170 "CYCLE", 1171 "NOCYCLE", 1172 "NOMINVALUE", 1173 "NOMAXVALUE", 1174 "NOSCALE", 1175 "NOSHARD", 1176 ), 1177 tuple(), 1178 ), 1179 } 1180 1181 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1182 1183 USABLES: OPTIONS_TYPE = dict.fromkeys( 1184 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1185 ) 1186 1187 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1188 1189 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1190 "TYPE": ("EVOLUTION",), 1191 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1192 } 1193 1194 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1195 "NOT": ("ENFORCED",), 1196 "MATCH": ( 1197 "FULL", 1198 "PARTIAL", 1199 "SIMPLE", 1200 ), 1201 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1202 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1203 } 1204 1205 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1206 1207 CLONE_KEYWORDS = {"CLONE", "COPY"} 1208 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1209 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1210 1211 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1212 1213 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1214 1215 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1216 1217 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1218 1219 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1220 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1221 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1222 1223 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1224 1225 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1226 1227 ADD_CONSTRAINT_TOKENS = { 1228 TokenType.CONSTRAINT, 1229 TokenType.FOREIGN_KEY, 1230 TokenType.INDEX, 1231 TokenType.KEY, 1232 TokenType.PRIMARY_KEY, 1233 TokenType.UNIQUE, 1234 } 1235 1236 DISTINCT_TOKENS = {TokenType.DISTINCT} 1237 1238 NULL_TOKENS = {TokenType.NULL} 1239 1240 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1241 1242 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1243 1244 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1245 1246 STRICT_CAST = True 1247 1248 PREFIXED_PIVOT_COLUMNS = False 1249 IDENTIFY_PIVOT_STRINGS = False 1250 1251 LOG_DEFAULTS_TO_LN = False 1252 1253 # Whether ADD is present for each column added by ALTER TABLE 1254 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1255 1256 # Whether the table sample clause expects CSV syntax 1257 TABLESAMPLE_CSV = False 1258 1259 # The default method used for table sampling 1260 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1261 1262 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1263 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1264 1265 # Whether the TRIM function expects the characters to trim as its first argument 1266 TRIM_PATTERN_FIRST = False 1267 1268 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1269 STRING_ALIASES = False 1270 1271 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1272 MODIFIERS_ATTACHED_TO_SET_OP = True 1273 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1274 1275 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1276 NO_PAREN_IF_COMMANDS = True 1277 1278 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1279 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1280 1281 # Whether the `:` operator is used to extract a value from a VARIANT column 1282 COLON_IS_VARIANT_EXTRACT = False 1283 1284 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1285 # If this is True and '(' is not found, the keyword will be treated as an identifier 1286 VALUES_FOLLOWED_BY_PAREN = True 1287 1288 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1289 SUPPORTS_IMPLICIT_UNNEST = False 1290 1291 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1292 INTERVAL_SPANS = True 1293 1294 # Whether a PARTITION clause can follow a table reference 1295 SUPPORTS_PARTITION_SELECTION = False 1296 1297 __slots__ = ( 1298 "error_level", 1299 "error_message_context", 1300 "max_errors", 1301 "dialect", 1302 "sql", 1303 "errors", 1304 "_tokens", 1305 "_index", 1306 "_curr", 1307 "_next", 1308 "_prev", 1309 "_prev_comments", 1310 ) 1311 1312 # Autofilled 1313 SHOW_TRIE: t.Dict = {} 1314 SET_TRIE: t.Dict = {} 1315 1316 def __init__( 1317 self, 1318 error_level: t.Optional[ErrorLevel] = None, 1319 error_message_context: int = 100, 1320 max_errors: int = 3, 1321 dialect: DialectType = None, 1322 ): 1323 from sqlglot.dialects import Dialect 1324 1325 self.error_level = error_level or ErrorLevel.IMMEDIATE 1326 self.error_message_context = error_message_context 1327 self.max_errors = max_errors 1328 self.dialect = Dialect.get_or_raise(dialect) 1329 self.reset() 1330 1331 def reset(self): 1332 self.sql = "" 1333 self.errors = [] 1334 self._tokens = [] 1335 self._index = 0 1336 self._curr = None 1337 self._next = None 1338 self._prev = None 1339 self._prev_comments = None 1340 1341 def parse( 1342 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1343 ) -> t.List[t.Optional[exp.Expression]]: 1344 """ 1345 Parses a list of tokens and returns a list of syntax trees, one tree 1346 per parsed SQL statement. 1347 1348 Args: 1349 raw_tokens: The list of tokens. 1350 sql: The original SQL string, used to produce helpful debug messages. 1351 1352 Returns: 1353 The list of the produced syntax trees. 1354 """ 1355 return self._parse( 1356 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1357 ) 1358 1359 def parse_into( 1360 self, 1361 expression_types: exp.IntoType, 1362 raw_tokens: t.List[Token], 1363 sql: t.Optional[str] = None, 1364 ) -> t.List[t.Optional[exp.Expression]]: 1365 """ 1366 Parses a list of tokens into a given Expression type. If a collection of Expression 1367 types is given instead, this method will try to parse the token list into each one 1368 of them, stopping at the first for which the parsing succeeds. 1369 1370 Args: 1371 expression_types: The expression type(s) to try and parse the token list into. 1372 raw_tokens: The list of tokens. 1373 sql: The original SQL string, used to produce helpful debug messages. 1374 1375 Returns: 1376 The target Expression. 1377 """ 1378 errors = [] 1379 for expression_type in ensure_list(expression_types): 1380 parser = self.EXPRESSION_PARSERS.get(expression_type) 1381 if not parser: 1382 raise TypeError(f"No parser registered for {expression_type}") 1383 1384 try: 1385 return self._parse(parser, raw_tokens, sql) 1386 except ParseError as e: 1387 e.errors[0]["into_expression"] = expression_type 1388 errors.append(e) 1389 1390 raise ParseError( 1391 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1392 errors=merge_errors(errors), 1393 ) from errors[-1] 1394 1395 def _parse( 1396 self, 1397 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1398 raw_tokens: t.List[Token], 1399 sql: t.Optional[str] = None, 1400 ) -> t.List[t.Optional[exp.Expression]]: 1401 self.reset() 1402 self.sql = sql or "" 1403 1404 total = len(raw_tokens) 1405 chunks: t.List[t.List[Token]] = [[]] 1406 1407 for i, token in enumerate(raw_tokens): 1408 if token.token_type == TokenType.SEMICOLON: 1409 if token.comments: 1410 chunks.append([token]) 1411 1412 if i < total - 1: 1413 chunks.append([]) 1414 else: 1415 chunks[-1].append(token) 1416 1417 expressions = [] 1418 1419 for tokens in chunks: 1420 self._index = -1 1421 self._tokens = tokens 1422 self._advance() 1423 1424 expressions.append(parse_method(self)) 1425 1426 if self._index < len(self._tokens): 1427 self.raise_error("Invalid expression / Unexpected token") 1428 1429 self.check_errors() 1430 1431 return expressions 1432 1433 def check_errors(self) -> None: 1434 """Logs or raises any found errors, depending on the chosen error level setting.""" 1435 if self.error_level == ErrorLevel.WARN: 1436 for error in self.errors: 1437 logger.error(str(error)) 1438 elif self.error_level == ErrorLevel.RAISE and self.errors: 1439 raise ParseError( 1440 concat_messages(self.errors, self.max_errors), 1441 errors=merge_errors(self.errors), 1442 ) 1443 1444 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1445 """ 1446 Appends an error in the list of recorded errors or raises it, depending on the chosen 1447 error level setting. 1448 """ 1449 token = token or self._curr or self._prev or Token.string("") 1450 start = token.start 1451 end = token.end + 1 1452 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1453 highlight = self.sql[start:end] 1454 end_context = self.sql[end : end + self.error_message_context] 1455 1456 error = ParseError.new( 1457 f"{message}. Line {token.line}, Col: {token.col}.\n" 1458 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1459 description=message, 1460 line=token.line, 1461 col=token.col, 1462 start_context=start_context, 1463 highlight=highlight, 1464 end_context=end_context, 1465 ) 1466 1467 if self.error_level == ErrorLevel.IMMEDIATE: 1468 raise error 1469 1470 self.errors.append(error) 1471 1472 def expression( 1473 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1474 ) -> E: 1475 """ 1476 Creates a new, validated Expression. 1477 1478 Args: 1479 exp_class: The expression class to instantiate. 1480 comments: An optional list of comments to attach to the expression. 1481 kwargs: The arguments to set for the expression along with their respective values. 1482 1483 Returns: 1484 The target expression. 1485 """ 1486 instance = exp_class(**kwargs) 1487 instance.add_comments(comments) if comments else self._add_comments(instance) 1488 return self.validate_expression(instance) 1489 1490 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1491 if expression and self._prev_comments: 1492 expression.add_comments(self._prev_comments) 1493 self._prev_comments = None 1494 1495 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1496 """ 1497 Validates an Expression, making sure that all its mandatory arguments are set. 1498 1499 Args: 1500 expression: The expression to validate. 1501 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1502 1503 Returns: 1504 The validated expression. 1505 """ 1506 if self.error_level != ErrorLevel.IGNORE: 1507 for error_message in expression.error_messages(args): 1508 self.raise_error(error_message) 1509 1510 return expression 1511 1512 def _find_sql(self, start: Token, end: Token) -> str: 1513 return self.sql[start.start : end.end + 1] 1514 1515 def _is_connected(self) -> bool: 1516 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1517 1518 def _advance(self, times: int = 1) -> None: 1519 self._index += times 1520 self._curr = seq_get(self._tokens, self._index) 1521 self._next = seq_get(self._tokens, self._index + 1) 1522 1523 if self._index > 0: 1524 self._prev = self._tokens[self._index - 1] 1525 self._prev_comments = self._prev.comments 1526 else: 1527 self._prev = None 1528 self._prev_comments = None 1529 1530 def _retreat(self, index: int) -> None: 1531 if index != self._index: 1532 self._advance(index - self._index) 1533 1534 def _warn_unsupported(self) -> None: 1535 if len(self._tokens) <= 1: 1536 return 1537 1538 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1539 # interested in emitting a warning for the one being currently processed. 1540 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1541 1542 logger.warning( 1543 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1544 ) 1545 1546 def _parse_command(self) -> exp.Command: 1547 self._warn_unsupported() 1548 return self.expression( 1549 exp.Command, 1550 comments=self._prev_comments, 1551 this=self._prev.text.upper(), 1552 expression=self._parse_string(), 1553 ) 1554 1555 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1556 """ 1557 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1558 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1559 solve this by setting & resetting the parser state accordingly 1560 """ 1561 index = self._index 1562 error_level = self.error_level 1563 1564 self.error_level = ErrorLevel.IMMEDIATE 1565 try: 1566 this = parse_method() 1567 except ParseError: 1568 this = None 1569 finally: 1570 if not this or retreat: 1571 self._retreat(index) 1572 self.error_level = error_level 1573 1574 return this 1575 1576 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1577 start = self._prev 1578 exists = self._parse_exists() if allow_exists else None 1579 1580 self._match(TokenType.ON) 1581 1582 materialized = self._match_text_seq("MATERIALIZED") 1583 kind = self._match_set(self.CREATABLES) and self._prev 1584 if not kind: 1585 return self._parse_as_command(start) 1586 1587 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1588 this = self._parse_user_defined_function(kind=kind.token_type) 1589 elif kind.token_type == TokenType.TABLE: 1590 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1591 elif kind.token_type == TokenType.COLUMN: 1592 this = self._parse_column() 1593 else: 1594 this = self._parse_id_var() 1595 1596 self._match(TokenType.IS) 1597 1598 return self.expression( 1599 exp.Comment, 1600 this=this, 1601 kind=kind.text, 1602 expression=self._parse_string(), 1603 exists=exists, 1604 materialized=materialized, 1605 ) 1606 1607 def _parse_to_table( 1608 self, 1609 ) -> exp.ToTableProperty: 1610 table = self._parse_table_parts(schema=True) 1611 return self.expression(exp.ToTableProperty, this=table) 1612 1613 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1614 def _parse_ttl(self) -> exp.Expression: 1615 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1616 this = self._parse_bitwise() 1617 1618 if self._match_text_seq("DELETE"): 1619 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1620 if self._match_text_seq("RECOMPRESS"): 1621 return self.expression( 1622 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1623 ) 1624 if self._match_text_seq("TO", "DISK"): 1625 return self.expression( 1626 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1627 ) 1628 if self._match_text_seq("TO", "VOLUME"): 1629 return self.expression( 1630 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1631 ) 1632 1633 return this 1634 1635 expressions = self._parse_csv(_parse_ttl_action) 1636 where = self._parse_where() 1637 group = self._parse_group() 1638 1639 aggregates = None 1640 if group and self._match(TokenType.SET): 1641 aggregates = self._parse_csv(self._parse_set_item) 1642 1643 return self.expression( 1644 exp.MergeTreeTTL, 1645 expressions=expressions, 1646 where=where, 1647 group=group, 1648 aggregates=aggregates, 1649 ) 1650 1651 def _parse_statement(self) -> t.Optional[exp.Expression]: 1652 if self._curr is None: 1653 return None 1654 1655 if self._match_set(self.STATEMENT_PARSERS): 1656 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1657 1658 if self._match_set(self.dialect.tokenizer.COMMANDS): 1659 return self._parse_command() 1660 1661 expression = self._parse_expression() 1662 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1663 return self._parse_query_modifiers(expression) 1664 1665 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1666 start = self._prev 1667 temporary = self._match(TokenType.TEMPORARY) 1668 materialized = self._match_text_seq("MATERIALIZED") 1669 1670 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1671 if not kind: 1672 return self._parse_as_command(start) 1673 1674 if_exists = exists or self._parse_exists() 1675 table = self._parse_table_parts( 1676 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1677 ) 1678 1679 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1680 1681 if self._match(TokenType.L_PAREN, advance=False): 1682 expressions = self._parse_wrapped_csv(self._parse_types) 1683 else: 1684 expressions = None 1685 1686 return self.expression( 1687 exp.Drop, 1688 comments=start.comments, 1689 exists=if_exists, 1690 this=table, 1691 expressions=expressions, 1692 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1693 temporary=temporary, 1694 materialized=materialized, 1695 cascade=self._match_text_seq("CASCADE"), 1696 constraints=self._match_text_seq("CONSTRAINTS"), 1697 purge=self._match_text_seq("PURGE"), 1698 cluster=cluster, 1699 ) 1700 1701 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1702 return ( 1703 self._match_text_seq("IF") 1704 and (not not_ or self._match(TokenType.NOT)) 1705 and self._match(TokenType.EXISTS) 1706 ) 1707 1708 def _parse_create(self) -> exp.Create | exp.Command: 1709 # Note: this can't be None because we've matched a statement parser 1710 start = self._prev 1711 comments = self._prev_comments 1712 1713 replace = ( 1714 start.token_type == TokenType.REPLACE 1715 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1716 or self._match_pair(TokenType.OR, TokenType.ALTER) 1717 ) 1718 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1719 1720 unique = self._match(TokenType.UNIQUE) 1721 1722 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1723 clustered = True 1724 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1725 "COLUMNSTORE" 1726 ): 1727 clustered = False 1728 else: 1729 clustered = None 1730 1731 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1732 self._advance() 1733 1734 properties = None 1735 create_token = self._match_set(self.CREATABLES) and self._prev 1736 1737 if not create_token: 1738 # exp.Properties.Location.POST_CREATE 1739 properties = self._parse_properties() 1740 create_token = self._match_set(self.CREATABLES) and self._prev 1741 1742 if not properties or not create_token: 1743 return self._parse_as_command(start) 1744 1745 concurrently = self._match_text_seq("CONCURRENTLY") 1746 exists = self._parse_exists(not_=True) 1747 this = None 1748 expression: t.Optional[exp.Expression] = None 1749 indexes = None 1750 no_schema_binding = None 1751 begin = None 1752 end = None 1753 clone = None 1754 1755 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1756 nonlocal properties 1757 if properties and temp_props: 1758 properties.expressions.extend(temp_props.expressions) 1759 elif temp_props: 1760 properties = temp_props 1761 1762 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1763 this = self._parse_user_defined_function(kind=create_token.token_type) 1764 1765 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1766 extend_props(self._parse_properties()) 1767 1768 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1769 extend_props(self._parse_properties()) 1770 1771 if not expression: 1772 if self._match(TokenType.COMMAND): 1773 expression = self._parse_as_command(self._prev) 1774 else: 1775 begin = self._match(TokenType.BEGIN) 1776 return_ = self._match_text_seq("RETURN") 1777 1778 if self._match(TokenType.STRING, advance=False): 1779 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1780 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1781 expression = self._parse_string() 1782 extend_props(self._parse_properties()) 1783 else: 1784 expression = self._parse_statement() 1785 1786 end = self._match_text_seq("END") 1787 1788 if return_: 1789 expression = self.expression(exp.Return, this=expression) 1790 elif create_token.token_type == TokenType.INDEX: 1791 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1792 if not self._match(TokenType.ON): 1793 index = self._parse_id_var() 1794 anonymous = False 1795 else: 1796 index = None 1797 anonymous = True 1798 1799 this = self._parse_index(index=index, anonymous=anonymous) 1800 elif create_token.token_type in self.DB_CREATABLES: 1801 table_parts = self._parse_table_parts( 1802 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1803 ) 1804 1805 # exp.Properties.Location.POST_NAME 1806 self._match(TokenType.COMMA) 1807 extend_props(self._parse_properties(before=True)) 1808 1809 this = self._parse_schema(this=table_parts) 1810 1811 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1812 extend_props(self._parse_properties()) 1813 1814 self._match(TokenType.ALIAS) 1815 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1816 # exp.Properties.Location.POST_ALIAS 1817 extend_props(self._parse_properties()) 1818 1819 if create_token.token_type == TokenType.SEQUENCE: 1820 expression = self._parse_types() 1821 extend_props(self._parse_properties()) 1822 else: 1823 expression = self._parse_ddl_select() 1824 1825 if create_token.token_type == TokenType.TABLE: 1826 # exp.Properties.Location.POST_EXPRESSION 1827 extend_props(self._parse_properties()) 1828 1829 indexes = [] 1830 while True: 1831 index = self._parse_index() 1832 1833 # exp.Properties.Location.POST_INDEX 1834 extend_props(self._parse_properties()) 1835 if not index: 1836 break 1837 else: 1838 self._match(TokenType.COMMA) 1839 indexes.append(index) 1840 elif create_token.token_type == TokenType.VIEW: 1841 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1842 no_schema_binding = True 1843 1844 shallow = self._match_text_seq("SHALLOW") 1845 1846 if self._match_texts(self.CLONE_KEYWORDS): 1847 copy = self._prev.text.lower() == "copy" 1848 clone = self.expression( 1849 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1850 ) 1851 1852 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1853 return self._parse_as_command(start) 1854 1855 create_kind_text = create_token.text.upper() 1856 return self.expression( 1857 exp.Create, 1858 comments=comments, 1859 this=this, 1860 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1861 replace=replace, 1862 refresh=refresh, 1863 unique=unique, 1864 expression=expression, 1865 exists=exists, 1866 properties=properties, 1867 indexes=indexes, 1868 no_schema_binding=no_schema_binding, 1869 begin=begin, 1870 end=end, 1871 clone=clone, 1872 concurrently=concurrently, 1873 clustered=clustered, 1874 ) 1875 1876 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1877 seq = exp.SequenceProperties() 1878 1879 options = [] 1880 index = self._index 1881 1882 while self._curr: 1883 self._match(TokenType.COMMA) 1884 if self._match_text_seq("INCREMENT"): 1885 self._match_text_seq("BY") 1886 self._match_text_seq("=") 1887 seq.set("increment", self._parse_term()) 1888 elif self._match_text_seq("MINVALUE"): 1889 seq.set("minvalue", self._parse_term()) 1890 elif self._match_text_seq("MAXVALUE"): 1891 seq.set("maxvalue", self._parse_term()) 1892 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1893 self._match_text_seq("=") 1894 seq.set("start", self._parse_term()) 1895 elif self._match_text_seq("CACHE"): 1896 # T-SQL allows empty CACHE which is initialized dynamically 1897 seq.set("cache", self._parse_number() or True) 1898 elif self._match_text_seq("OWNED", "BY"): 1899 # "OWNED BY NONE" is the default 1900 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1901 else: 1902 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1903 if opt: 1904 options.append(opt) 1905 else: 1906 break 1907 1908 seq.set("options", options if options else None) 1909 return None if self._index == index else seq 1910 1911 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1912 # only used for teradata currently 1913 self._match(TokenType.COMMA) 1914 1915 kwargs = { 1916 "no": self._match_text_seq("NO"), 1917 "dual": self._match_text_seq("DUAL"), 1918 "before": self._match_text_seq("BEFORE"), 1919 "default": self._match_text_seq("DEFAULT"), 1920 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1921 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1922 "after": self._match_text_seq("AFTER"), 1923 "minimum": self._match_texts(("MIN", "MINIMUM")), 1924 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1925 } 1926 1927 if self._match_texts(self.PROPERTY_PARSERS): 1928 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1929 try: 1930 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1931 except TypeError: 1932 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1933 1934 return None 1935 1936 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1937 return self._parse_wrapped_csv(self._parse_property) 1938 1939 def _parse_property(self) -> t.Optional[exp.Expression]: 1940 if self._match_texts(self.PROPERTY_PARSERS): 1941 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1942 1943 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1944 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1945 1946 if self._match_text_seq("COMPOUND", "SORTKEY"): 1947 return self._parse_sortkey(compound=True) 1948 1949 if self._match_text_seq("SQL", "SECURITY"): 1950 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1951 1952 index = self._index 1953 key = self._parse_column() 1954 1955 if not self._match(TokenType.EQ): 1956 self._retreat(index) 1957 return self._parse_sequence_properties() 1958 1959 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1960 if isinstance(key, exp.Column): 1961 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1962 1963 value = self._parse_bitwise() or self._parse_var(any_token=True) 1964 1965 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1966 if isinstance(value, exp.Column): 1967 value = exp.var(value.name) 1968 1969 return self.expression(exp.Property, this=key, value=value) 1970 1971 def _parse_stored(self) -> exp.FileFormatProperty: 1972 self._match(TokenType.ALIAS) 1973 1974 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1975 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1976 1977 return self.expression( 1978 exp.FileFormatProperty, 1979 this=( 1980 self.expression( 1981 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1982 ) 1983 if input_format or output_format 1984 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1985 ), 1986 ) 1987 1988 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1989 field = self._parse_field() 1990 if isinstance(field, exp.Identifier) and not field.quoted: 1991 field = exp.var(field) 1992 1993 return field 1994 1995 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1996 self._match(TokenType.EQ) 1997 self._match(TokenType.ALIAS) 1998 1999 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2000 2001 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2002 properties = [] 2003 while True: 2004 if before: 2005 prop = self._parse_property_before() 2006 else: 2007 prop = self._parse_property() 2008 if not prop: 2009 break 2010 for p in ensure_list(prop): 2011 properties.append(p) 2012 2013 if properties: 2014 return self.expression(exp.Properties, expressions=properties) 2015 2016 return None 2017 2018 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2019 return self.expression( 2020 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2021 ) 2022 2023 def _parse_settings_property(self) -> exp.SettingsProperty: 2024 return self.expression( 2025 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2026 ) 2027 2028 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2029 if self._index >= 2: 2030 pre_volatile_token = self._tokens[self._index - 2] 2031 else: 2032 pre_volatile_token = None 2033 2034 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2035 return exp.VolatileProperty() 2036 2037 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2038 2039 def _parse_retention_period(self) -> exp.Var: 2040 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2041 number = self._parse_number() 2042 number_str = f"{number} " if number else "" 2043 unit = self._parse_var(any_token=True) 2044 return exp.var(f"{number_str}{unit}") 2045 2046 def _parse_system_versioning_property( 2047 self, with_: bool = False 2048 ) -> exp.WithSystemVersioningProperty: 2049 self._match(TokenType.EQ) 2050 prop = self.expression( 2051 exp.WithSystemVersioningProperty, 2052 **{ # type: ignore 2053 "on": True, 2054 "with": with_, 2055 }, 2056 ) 2057 2058 if self._match_text_seq("OFF"): 2059 prop.set("on", False) 2060 return prop 2061 2062 self._match(TokenType.ON) 2063 if self._match(TokenType.L_PAREN): 2064 while self._curr and not self._match(TokenType.R_PAREN): 2065 if self._match_text_seq("HISTORY_TABLE", "="): 2066 prop.set("this", self._parse_table_parts()) 2067 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2068 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2069 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2070 prop.set("retention_period", self._parse_retention_period()) 2071 2072 self._match(TokenType.COMMA) 2073 2074 return prop 2075 2076 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2077 self._match(TokenType.EQ) 2078 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2079 prop = self.expression(exp.DataDeletionProperty, on=on) 2080 2081 if self._match(TokenType.L_PAREN): 2082 while self._curr and not self._match(TokenType.R_PAREN): 2083 if self._match_text_seq("FILTER_COLUMN", "="): 2084 prop.set("filter_column", self._parse_column()) 2085 elif self._match_text_seq("RETENTION_PERIOD", "="): 2086 prop.set("retention_period", self._parse_retention_period()) 2087 2088 self._match(TokenType.COMMA) 2089 2090 return prop 2091 2092 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2093 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2094 prop = self._parse_system_versioning_property(with_=True) 2095 self._match_r_paren() 2096 return prop 2097 2098 if self._match(TokenType.L_PAREN, advance=False): 2099 return self._parse_wrapped_properties() 2100 2101 if self._match_text_seq("JOURNAL"): 2102 return self._parse_withjournaltable() 2103 2104 if self._match_texts(self.VIEW_ATTRIBUTES): 2105 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2106 2107 if self._match_text_seq("DATA"): 2108 return self._parse_withdata(no=False) 2109 elif self._match_text_seq("NO", "DATA"): 2110 return self._parse_withdata(no=True) 2111 2112 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2113 return self._parse_serde_properties(with_=True) 2114 2115 if self._match(TokenType.SCHEMA): 2116 return self.expression( 2117 exp.WithSchemaBindingProperty, 2118 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2119 ) 2120 2121 if not self._next: 2122 return None 2123 2124 return self._parse_withisolatedloading() 2125 2126 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2127 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2128 self._match(TokenType.EQ) 2129 2130 user = self._parse_id_var() 2131 self._match(TokenType.PARAMETER) 2132 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2133 2134 if not user or not host: 2135 return None 2136 2137 return exp.DefinerProperty(this=f"{user}@{host}") 2138 2139 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2140 self._match(TokenType.TABLE) 2141 self._match(TokenType.EQ) 2142 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2143 2144 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2145 return self.expression(exp.LogProperty, no=no) 2146 2147 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2148 return self.expression(exp.JournalProperty, **kwargs) 2149 2150 def _parse_checksum(self) -> exp.ChecksumProperty: 2151 self._match(TokenType.EQ) 2152 2153 on = None 2154 if self._match(TokenType.ON): 2155 on = True 2156 elif self._match_text_seq("OFF"): 2157 on = False 2158 2159 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2160 2161 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2162 return self.expression( 2163 exp.Cluster, 2164 expressions=( 2165 self._parse_wrapped_csv(self._parse_ordered) 2166 if wrapped 2167 else self._parse_csv(self._parse_ordered) 2168 ), 2169 ) 2170 2171 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2172 self._match_text_seq("BY") 2173 2174 self._match_l_paren() 2175 expressions = self._parse_csv(self._parse_column) 2176 self._match_r_paren() 2177 2178 if self._match_text_seq("SORTED", "BY"): 2179 self._match_l_paren() 2180 sorted_by = self._parse_csv(self._parse_ordered) 2181 self._match_r_paren() 2182 else: 2183 sorted_by = None 2184 2185 self._match(TokenType.INTO) 2186 buckets = self._parse_number() 2187 self._match_text_seq("BUCKETS") 2188 2189 return self.expression( 2190 exp.ClusteredByProperty, 2191 expressions=expressions, 2192 sorted_by=sorted_by, 2193 buckets=buckets, 2194 ) 2195 2196 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2197 if not self._match_text_seq("GRANTS"): 2198 self._retreat(self._index - 1) 2199 return None 2200 2201 return self.expression(exp.CopyGrantsProperty) 2202 2203 def _parse_freespace(self) -> exp.FreespaceProperty: 2204 self._match(TokenType.EQ) 2205 return self.expression( 2206 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2207 ) 2208 2209 def _parse_mergeblockratio( 2210 self, no: bool = False, default: bool = False 2211 ) -> exp.MergeBlockRatioProperty: 2212 if self._match(TokenType.EQ): 2213 return self.expression( 2214 exp.MergeBlockRatioProperty, 2215 this=self._parse_number(), 2216 percent=self._match(TokenType.PERCENT), 2217 ) 2218 2219 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2220 2221 def _parse_datablocksize( 2222 self, 2223 default: t.Optional[bool] = None, 2224 minimum: t.Optional[bool] = None, 2225 maximum: t.Optional[bool] = None, 2226 ) -> exp.DataBlocksizeProperty: 2227 self._match(TokenType.EQ) 2228 size = self._parse_number() 2229 2230 units = None 2231 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2232 units = self._prev.text 2233 2234 return self.expression( 2235 exp.DataBlocksizeProperty, 2236 size=size, 2237 units=units, 2238 default=default, 2239 minimum=minimum, 2240 maximum=maximum, 2241 ) 2242 2243 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2244 self._match(TokenType.EQ) 2245 always = self._match_text_seq("ALWAYS") 2246 manual = self._match_text_seq("MANUAL") 2247 never = self._match_text_seq("NEVER") 2248 default = self._match_text_seq("DEFAULT") 2249 2250 autotemp = None 2251 if self._match_text_seq("AUTOTEMP"): 2252 autotemp = self._parse_schema() 2253 2254 return self.expression( 2255 exp.BlockCompressionProperty, 2256 always=always, 2257 manual=manual, 2258 never=never, 2259 default=default, 2260 autotemp=autotemp, 2261 ) 2262 2263 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2264 index = self._index 2265 no = self._match_text_seq("NO") 2266 concurrent = self._match_text_seq("CONCURRENT") 2267 2268 if not self._match_text_seq("ISOLATED", "LOADING"): 2269 self._retreat(index) 2270 return None 2271 2272 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2273 return self.expression( 2274 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2275 ) 2276 2277 def _parse_locking(self) -> exp.LockingProperty: 2278 if self._match(TokenType.TABLE): 2279 kind = "TABLE" 2280 elif self._match(TokenType.VIEW): 2281 kind = "VIEW" 2282 elif self._match(TokenType.ROW): 2283 kind = "ROW" 2284 elif self._match_text_seq("DATABASE"): 2285 kind = "DATABASE" 2286 else: 2287 kind = None 2288 2289 if kind in ("DATABASE", "TABLE", "VIEW"): 2290 this = self._parse_table_parts() 2291 else: 2292 this = None 2293 2294 if self._match(TokenType.FOR): 2295 for_or_in = "FOR" 2296 elif self._match(TokenType.IN): 2297 for_or_in = "IN" 2298 else: 2299 for_or_in = None 2300 2301 if self._match_text_seq("ACCESS"): 2302 lock_type = "ACCESS" 2303 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2304 lock_type = "EXCLUSIVE" 2305 elif self._match_text_seq("SHARE"): 2306 lock_type = "SHARE" 2307 elif self._match_text_seq("READ"): 2308 lock_type = "READ" 2309 elif self._match_text_seq("WRITE"): 2310 lock_type = "WRITE" 2311 elif self._match_text_seq("CHECKSUM"): 2312 lock_type = "CHECKSUM" 2313 else: 2314 lock_type = None 2315 2316 override = self._match_text_seq("OVERRIDE") 2317 2318 return self.expression( 2319 exp.LockingProperty, 2320 this=this, 2321 kind=kind, 2322 for_or_in=for_or_in, 2323 lock_type=lock_type, 2324 override=override, 2325 ) 2326 2327 def _parse_partition_by(self) -> t.List[exp.Expression]: 2328 if self._match(TokenType.PARTITION_BY): 2329 return self._parse_csv(self._parse_assignment) 2330 return [] 2331 2332 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2333 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2334 if self._match_text_seq("MINVALUE"): 2335 return exp.var("MINVALUE") 2336 if self._match_text_seq("MAXVALUE"): 2337 return exp.var("MAXVALUE") 2338 return self._parse_bitwise() 2339 2340 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2341 expression = None 2342 from_expressions = None 2343 to_expressions = None 2344 2345 if self._match(TokenType.IN): 2346 this = self._parse_wrapped_csv(self._parse_bitwise) 2347 elif self._match(TokenType.FROM): 2348 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2349 self._match_text_seq("TO") 2350 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2351 elif self._match_text_seq("WITH", "(", "MODULUS"): 2352 this = self._parse_number() 2353 self._match_text_seq(",", "REMAINDER") 2354 expression = self._parse_number() 2355 self._match_r_paren() 2356 else: 2357 self.raise_error("Failed to parse partition bound spec.") 2358 2359 return self.expression( 2360 exp.PartitionBoundSpec, 2361 this=this, 2362 expression=expression, 2363 from_expressions=from_expressions, 2364 to_expressions=to_expressions, 2365 ) 2366 2367 # https://www.postgresql.org/docs/current/sql-createtable.html 2368 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2369 if not self._match_text_seq("OF"): 2370 self._retreat(self._index - 1) 2371 return None 2372 2373 this = self._parse_table(schema=True) 2374 2375 if self._match(TokenType.DEFAULT): 2376 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2377 elif self._match_text_seq("FOR", "VALUES"): 2378 expression = self._parse_partition_bound_spec() 2379 else: 2380 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2381 2382 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2383 2384 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2385 self._match(TokenType.EQ) 2386 return self.expression( 2387 exp.PartitionedByProperty, 2388 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2389 ) 2390 2391 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2392 if self._match_text_seq("AND", "STATISTICS"): 2393 statistics = True 2394 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2395 statistics = False 2396 else: 2397 statistics = None 2398 2399 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2400 2401 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2402 if self._match_text_seq("SQL"): 2403 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2404 return None 2405 2406 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2407 if self._match_text_seq("SQL", "DATA"): 2408 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2409 return None 2410 2411 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2412 if self._match_text_seq("PRIMARY", "INDEX"): 2413 return exp.NoPrimaryIndexProperty() 2414 if self._match_text_seq("SQL"): 2415 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2416 return None 2417 2418 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2419 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2420 return exp.OnCommitProperty() 2421 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2422 return exp.OnCommitProperty(delete=True) 2423 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2424 2425 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2426 if self._match_text_seq("SQL", "DATA"): 2427 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2428 return None 2429 2430 def _parse_distkey(self) -> exp.DistKeyProperty: 2431 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2432 2433 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2434 table = self._parse_table(schema=True) 2435 2436 options = [] 2437 while self._match_texts(("INCLUDING", "EXCLUDING")): 2438 this = self._prev.text.upper() 2439 2440 id_var = self._parse_id_var() 2441 if not id_var: 2442 return None 2443 2444 options.append( 2445 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2446 ) 2447 2448 return self.expression(exp.LikeProperty, this=table, expressions=options) 2449 2450 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2451 return self.expression( 2452 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2453 ) 2454 2455 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2456 self._match(TokenType.EQ) 2457 return self.expression( 2458 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2459 ) 2460 2461 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2462 self._match_text_seq("WITH", "CONNECTION") 2463 return self.expression( 2464 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2465 ) 2466 2467 def _parse_returns(self) -> exp.ReturnsProperty: 2468 value: t.Optional[exp.Expression] 2469 null = None 2470 is_table = self._match(TokenType.TABLE) 2471 2472 if is_table: 2473 if self._match(TokenType.LT): 2474 value = self.expression( 2475 exp.Schema, 2476 this="TABLE", 2477 expressions=self._parse_csv(self._parse_struct_types), 2478 ) 2479 if not self._match(TokenType.GT): 2480 self.raise_error("Expecting >") 2481 else: 2482 value = self._parse_schema(exp.var("TABLE")) 2483 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2484 null = True 2485 value = None 2486 else: 2487 value = self._parse_types() 2488 2489 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2490 2491 def _parse_describe(self) -> exp.Describe: 2492 kind = self._match_set(self.CREATABLES) and self._prev.text 2493 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2494 if self._match(TokenType.DOT): 2495 style = None 2496 self._retreat(self._index - 2) 2497 this = self._parse_table(schema=True) 2498 properties = self._parse_properties() 2499 expressions = properties.expressions if properties else None 2500 partition = self._parse_partition() 2501 return self.expression( 2502 exp.Describe, 2503 this=this, 2504 style=style, 2505 kind=kind, 2506 expressions=expressions, 2507 partition=partition, 2508 ) 2509 2510 def _parse_insert(self) -> exp.Insert: 2511 comments = ensure_list(self._prev_comments) 2512 hint = self._parse_hint() 2513 overwrite = self._match(TokenType.OVERWRITE) 2514 ignore = self._match(TokenType.IGNORE) 2515 local = self._match_text_seq("LOCAL") 2516 alternative = None 2517 is_function = None 2518 2519 if self._match_text_seq("DIRECTORY"): 2520 this: t.Optional[exp.Expression] = self.expression( 2521 exp.Directory, 2522 this=self._parse_var_or_string(), 2523 local=local, 2524 row_format=self._parse_row_format(match_row=True), 2525 ) 2526 else: 2527 if self._match(TokenType.OR): 2528 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2529 2530 self._match(TokenType.INTO) 2531 comments += ensure_list(self._prev_comments) 2532 self._match(TokenType.TABLE) 2533 is_function = self._match(TokenType.FUNCTION) 2534 2535 this = ( 2536 self._parse_table(schema=True, parse_partition=True) 2537 if not is_function 2538 else self._parse_function() 2539 ) 2540 2541 returning = self._parse_returning() 2542 2543 return self.expression( 2544 exp.Insert, 2545 comments=comments, 2546 hint=hint, 2547 is_function=is_function, 2548 this=this, 2549 stored=self._match_text_seq("STORED") and self._parse_stored(), 2550 by_name=self._match_text_seq("BY", "NAME"), 2551 exists=self._parse_exists(), 2552 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2553 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2554 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2555 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2556 conflict=self._parse_on_conflict(), 2557 returning=returning or self._parse_returning(), 2558 overwrite=overwrite, 2559 alternative=alternative, 2560 ignore=ignore, 2561 ) 2562 2563 def _parse_kill(self) -> exp.Kill: 2564 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2565 2566 return self.expression( 2567 exp.Kill, 2568 this=self._parse_primary(), 2569 kind=kind, 2570 ) 2571 2572 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2573 conflict = self._match_text_seq("ON", "CONFLICT") 2574 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2575 2576 if not conflict and not duplicate: 2577 return None 2578 2579 conflict_keys = None 2580 constraint = None 2581 2582 if conflict: 2583 if self._match_text_seq("ON", "CONSTRAINT"): 2584 constraint = self._parse_id_var() 2585 elif self._match(TokenType.L_PAREN): 2586 conflict_keys = self._parse_csv(self._parse_id_var) 2587 self._match_r_paren() 2588 2589 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2590 if self._prev.token_type == TokenType.UPDATE: 2591 self._match(TokenType.SET) 2592 expressions = self._parse_csv(self._parse_equality) 2593 else: 2594 expressions = None 2595 2596 return self.expression( 2597 exp.OnConflict, 2598 duplicate=duplicate, 2599 expressions=expressions, 2600 action=action, 2601 conflict_keys=conflict_keys, 2602 constraint=constraint, 2603 ) 2604 2605 def _parse_returning(self) -> t.Optional[exp.Returning]: 2606 if not self._match(TokenType.RETURNING): 2607 return None 2608 return self.expression( 2609 exp.Returning, 2610 expressions=self._parse_csv(self._parse_expression), 2611 into=self._match(TokenType.INTO) and self._parse_table_part(), 2612 ) 2613 2614 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2615 if not self._match(TokenType.FORMAT): 2616 return None 2617 return self._parse_row_format() 2618 2619 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2620 index = self._index 2621 with_ = with_ or self._match_text_seq("WITH") 2622 2623 if not self._match(TokenType.SERDE_PROPERTIES): 2624 self._retreat(index) 2625 return None 2626 return self.expression( 2627 exp.SerdeProperties, 2628 **{ # type: ignore 2629 "expressions": self._parse_wrapped_properties(), 2630 "with": with_, 2631 }, 2632 ) 2633 2634 def _parse_row_format( 2635 self, match_row: bool = False 2636 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2637 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2638 return None 2639 2640 if self._match_text_seq("SERDE"): 2641 this = self._parse_string() 2642 2643 serde_properties = self._parse_serde_properties() 2644 2645 return self.expression( 2646 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2647 ) 2648 2649 self._match_text_seq("DELIMITED") 2650 2651 kwargs = {} 2652 2653 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2654 kwargs["fields"] = self._parse_string() 2655 if self._match_text_seq("ESCAPED", "BY"): 2656 kwargs["escaped"] = self._parse_string() 2657 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2658 kwargs["collection_items"] = self._parse_string() 2659 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2660 kwargs["map_keys"] = self._parse_string() 2661 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2662 kwargs["lines"] = self._parse_string() 2663 if self._match_text_seq("NULL", "DEFINED", "AS"): 2664 kwargs["null"] = self._parse_string() 2665 2666 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2667 2668 def _parse_load(self) -> exp.LoadData | exp.Command: 2669 if self._match_text_seq("DATA"): 2670 local = self._match_text_seq("LOCAL") 2671 self._match_text_seq("INPATH") 2672 inpath = self._parse_string() 2673 overwrite = self._match(TokenType.OVERWRITE) 2674 self._match_pair(TokenType.INTO, TokenType.TABLE) 2675 2676 return self.expression( 2677 exp.LoadData, 2678 this=self._parse_table(schema=True), 2679 local=local, 2680 overwrite=overwrite, 2681 inpath=inpath, 2682 partition=self._parse_partition(), 2683 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2684 serde=self._match_text_seq("SERDE") and self._parse_string(), 2685 ) 2686 return self._parse_as_command(self._prev) 2687 2688 def _parse_delete(self) -> exp.Delete: 2689 # This handles MySQL's "Multiple-Table Syntax" 2690 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2691 tables = None 2692 comments = self._prev_comments 2693 if not self._match(TokenType.FROM, advance=False): 2694 tables = self._parse_csv(self._parse_table) or None 2695 2696 returning = self._parse_returning() 2697 2698 return self.expression( 2699 exp.Delete, 2700 comments=comments, 2701 tables=tables, 2702 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2703 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2704 where=self._parse_where(), 2705 returning=returning or self._parse_returning(), 2706 limit=self._parse_limit(), 2707 ) 2708 2709 def _parse_update(self) -> exp.Update: 2710 comments = self._prev_comments 2711 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2712 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2713 returning = self._parse_returning() 2714 return self.expression( 2715 exp.Update, 2716 comments=comments, 2717 **{ # type: ignore 2718 "this": this, 2719 "expressions": expressions, 2720 "from": self._parse_from(joins=True), 2721 "where": self._parse_where(), 2722 "returning": returning or self._parse_returning(), 2723 "order": self._parse_order(), 2724 "limit": self._parse_limit(), 2725 }, 2726 ) 2727 2728 def _parse_uncache(self) -> exp.Uncache: 2729 if not self._match(TokenType.TABLE): 2730 self.raise_error("Expecting TABLE after UNCACHE") 2731 2732 return self.expression( 2733 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2734 ) 2735 2736 def _parse_cache(self) -> exp.Cache: 2737 lazy = self._match_text_seq("LAZY") 2738 self._match(TokenType.TABLE) 2739 table = self._parse_table(schema=True) 2740 2741 options = [] 2742 if self._match_text_seq("OPTIONS"): 2743 self._match_l_paren() 2744 k = self._parse_string() 2745 self._match(TokenType.EQ) 2746 v = self._parse_string() 2747 options = [k, v] 2748 self._match_r_paren() 2749 2750 self._match(TokenType.ALIAS) 2751 return self.expression( 2752 exp.Cache, 2753 this=table, 2754 lazy=lazy, 2755 options=options, 2756 expression=self._parse_select(nested=True), 2757 ) 2758 2759 def _parse_partition(self) -> t.Optional[exp.Partition]: 2760 if not self._match(TokenType.PARTITION): 2761 return None 2762 2763 return self.expression( 2764 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2765 ) 2766 2767 def _parse_value(self) -> t.Optional[exp.Tuple]: 2768 if self._match(TokenType.L_PAREN): 2769 expressions = self._parse_csv(self._parse_expression) 2770 self._match_r_paren() 2771 return self.expression(exp.Tuple, expressions=expressions) 2772 2773 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2774 expression = self._parse_expression() 2775 if expression: 2776 return self.expression(exp.Tuple, expressions=[expression]) 2777 return None 2778 2779 def _parse_projections(self) -> t.List[exp.Expression]: 2780 return self._parse_expressions() 2781 2782 def _parse_select( 2783 self, 2784 nested: bool = False, 2785 table: bool = False, 2786 parse_subquery_alias: bool = True, 2787 parse_set_operation: bool = True, 2788 ) -> t.Optional[exp.Expression]: 2789 cte = self._parse_with() 2790 2791 if cte: 2792 this = self._parse_statement() 2793 2794 if not this: 2795 self.raise_error("Failed to parse any statement following CTE") 2796 return cte 2797 2798 if "with" in this.arg_types: 2799 this.set("with", cte) 2800 else: 2801 self.raise_error(f"{this.key} does not support CTE") 2802 this = cte 2803 2804 return this 2805 2806 # duckdb supports leading with FROM x 2807 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2808 2809 if self._match(TokenType.SELECT): 2810 comments = self._prev_comments 2811 2812 hint = self._parse_hint() 2813 2814 if self._next and not self._next.token_type == TokenType.DOT: 2815 all_ = self._match(TokenType.ALL) 2816 distinct = self._match_set(self.DISTINCT_TOKENS) 2817 else: 2818 all_, distinct = None, None 2819 2820 kind = ( 2821 self._match(TokenType.ALIAS) 2822 and self._match_texts(("STRUCT", "VALUE")) 2823 and self._prev.text.upper() 2824 ) 2825 2826 if distinct: 2827 distinct = self.expression( 2828 exp.Distinct, 2829 on=self._parse_value() if self._match(TokenType.ON) else None, 2830 ) 2831 2832 if all_ and distinct: 2833 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2834 2835 limit = self._parse_limit(top=True) 2836 projections = self._parse_projections() 2837 2838 this = self.expression( 2839 exp.Select, 2840 kind=kind, 2841 hint=hint, 2842 distinct=distinct, 2843 expressions=projections, 2844 limit=limit, 2845 ) 2846 this.comments = comments 2847 2848 into = self._parse_into() 2849 if into: 2850 this.set("into", into) 2851 2852 if not from_: 2853 from_ = self._parse_from() 2854 2855 if from_: 2856 this.set("from", from_) 2857 2858 this = self._parse_query_modifiers(this) 2859 elif (table or nested) and self._match(TokenType.L_PAREN): 2860 if self._match(TokenType.PIVOT): 2861 this = self._parse_simplified_pivot() 2862 elif self._match(TokenType.FROM): 2863 this = exp.select("*").from_( 2864 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2865 ) 2866 else: 2867 this = ( 2868 self._parse_table() 2869 if table 2870 else self._parse_select(nested=True, parse_set_operation=False) 2871 ) 2872 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2873 2874 self._match_r_paren() 2875 2876 # We return early here so that the UNION isn't attached to the subquery by the 2877 # following call to _parse_set_operations, but instead becomes the parent node 2878 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2879 elif self._match(TokenType.VALUES, advance=False): 2880 this = self._parse_derived_table_values() 2881 elif from_: 2882 this = exp.select("*").from_(from_.this, copy=False) 2883 elif self._match(TokenType.SUMMARIZE): 2884 table = self._match(TokenType.TABLE) 2885 this = self._parse_select() or self._parse_string() or self._parse_table() 2886 return self.expression(exp.Summarize, this=this, table=table) 2887 elif self._match(TokenType.DESCRIBE): 2888 this = self._parse_describe() 2889 elif self._match_text_seq("STREAM"): 2890 this = self.expression(exp.Stream, this=self._parse_function()) 2891 else: 2892 this = None 2893 2894 return self._parse_set_operations(this) if parse_set_operation else this 2895 2896 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2897 if not skip_with_token and not self._match(TokenType.WITH): 2898 return None 2899 2900 comments = self._prev_comments 2901 recursive = self._match(TokenType.RECURSIVE) 2902 2903 expressions = [] 2904 while True: 2905 expressions.append(self._parse_cte()) 2906 2907 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2908 break 2909 else: 2910 self._match(TokenType.WITH) 2911 2912 return self.expression( 2913 exp.With, comments=comments, expressions=expressions, recursive=recursive 2914 ) 2915 2916 def _parse_cte(self) -> exp.CTE: 2917 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2918 if not alias or not alias.this: 2919 self.raise_error("Expected CTE to have alias") 2920 2921 self._match(TokenType.ALIAS) 2922 comments = self._prev_comments 2923 2924 if self._match_text_seq("NOT", "MATERIALIZED"): 2925 materialized = False 2926 elif self._match_text_seq("MATERIALIZED"): 2927 materialized = True 2928 else: 2929 materialized = None 2930 2931 return self.expression( 2932 exp.CTE, 2933 this=self._parse_wrapped(self._parse_statement), 2934 alias=alias, 2935 materialized=materialized, 2936 comments=comments, 2937 ) 2938 2939 def _parse_table_alias( 2940 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2941 ) -> t.Optional[exp.TableAlias]: 2942 any_token = self._match(TokenType.ALIAS) 2943 alias = ( 2944 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2945 or self._parse_string_as_identifier() 2946 ) 2947 2948 index = self._index 2949 if self._match(TokenType.L_PAREN): 2950 columns = self._parse_csv(self._parse_function_parameter) 2951 self._match_r_paren() if columns else self._retreat(index) 2952 else: 2953 columns = None 2954 2955 if not alias and not columns: 2956 return None 2957 2958 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2959 2960 # We bubble up comments from the Identifier to the TableAlias 2961 if isinstance(alias, exp.Identifier): 2962 table_alias.add_comments(alias.pop_comments()) 2963 2964 return table_alias 2965 2966 def _parse_subquery( 2967 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2968 ) -> t.Optional[exp.Subquery]: 2969 if not this: 2970 return None 2971 2972 return self.expression( 2973 exp.Subquery, 2974 this=this, 2975 pivots=self._parse_pivots(), 2976 alias=self._parse_table_alias() if parse_alias else None, 2977 ) 2978 2979 def _implicit_unnests_to_explicit(self, this: E) -> E: 2980 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2981 2982 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2983 for i, join in enumerate(this.args.get("joins") or []): 2984 table = join.this 2985 normalized_table = table.copy() 2986 normalized_table.meta["maybe_column"] = True 2987 normalized_table = _norm(normalized_table, dialect=self.dialect) 2988 2989 if isinstance(table, exp.Table) and not join.args.get("on"): 2990 if normalized_table.parts[0].name in refs: 2991 table_as_column = table.to_column() 2992 unnest = exp.Unnest(expressions=[table_as_column]) 2993 2994 # Table.to_column creates a parent Alias node that we want to convert to 2995 # a TableAlias and attach to the Unnest, so it matches the parser's output 2996 if isinstance(table.args.get("alias"), exp.TableAlias): 2997 table_as_column.replace(table_as_column.this) 2998 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2999 3000 table.replace(unnest) 3001 3002 refs.add(normalized_table.alias_or_name) 3003 3004 return this 3005 3006 def _parse_query_modifiers( 3007 self, this: t.Optional[exp.Expression] 3008 ) -> t.Optional[exp.Expression]: 3009 if isinstance(this, (exp.Query, exp.Table)): 3010 for join in self._parse_joins(): 3011 this.append("joins", join) 3012 for lateral in iter(self._parse_lateral, None): 3013 this.append("laterals", lateral) 3014 3015 while True: 3016 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3017 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3018 key, expression = parser(self) 3019 3020 if expression: 3021 this.set(key, expression) 3022 if key == "limit": 3023 offset = expression.args.pop("offset", None) 3024 3025 if offset: 3026 offset = exp.Offset(expression=offset) 3027 this.set("offset", offset) 3028 3029 limit_by_expressions = expression.expressions 3030 expression.set("expressions", None) 3031 offset.set("expressions", limit_by_expressions) 3032 continue 3033 break 3034 3035 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3036 this = self._implicit_unnests_to_explicit(this) 3037 3038 return this 3039 3040 def _parse_hint(self) -> t.Optional[exp.Hint]: 3041 if self._match(TokenType.HINT): 3042 hints = [] 3043 for hint in iter( 3044 lambda: self._parse_csv( 3045 lambda: self._parse_function() or self._parse_var(upper=True) 3046 ), 3047 [], 3048 ): 3049 hints.extend(hint) 3050 3051 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3052 self.raise_error("Expected */ after HINT") 3053 3054 return self.expression(exp.Hint, expressions=hints) 3055 3056 return None 3057 3058 def _parse_into(self) -> t.Optional[exp.Into]: 3059 if not self._match(TokenType.INTO): 3060 return None 3061 3062 temp = self._match(TokenType.TEMPORARY) 3063 unlogged = self._match_text_seq("UNLOGGED") 3064 self._match(TokenType.TABLE) 3065 3066 return self.expression( 3067 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3068 ) 3069 3070 def _parse_from( 3071 self, joins: bool = False, skip_from_token: bool = False 3072 ) -> t.Optional[exp.From]: 3073 if not skip_from_token and not self._match(TokenType.FROM): 3074 return None 3075 3076 return self.expression( 3077 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3078 ) 3079 3080 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3081 return self.expression( 3082 exp.MatchRecognizeMeasure, 3083 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3084 this=self._parse_expression(), 3085 ) 3086 3087 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3088 if not self._match(TokenType.MATCH_RECOGNIZE): 3089 return None 3090 3091 self._match_l_paren() 3092 3093 partition = self._parse_partition_by() 3094 order = self._parse_order() 3095 3096 measures = ( 3097 self._parse_csv(self._parse_match_recognize_measure) 3098 if self._match_text_seq("MEASURES") 3099 else None 3100 ) 3101 3102 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3103 rows = exp.var("ONE ROW PER MATCH") 3104 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3105 text = "ALL ROWS PER MATCH" 3106 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3107 text += " SHOW EMPTY MATCHES" 3108 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3109 text += " OMIT EMPTY MATCHES" 3110 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3111 text += " WITH UNMATCHED ROWS" 3112 rows = exp.var(text) 3113 else: 3114 rows = None 3115 3116 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3117 text = "AFTER MATCH SKIP" 3118 if self._match_text_seq("PAST", "LAST", "ROW"): 3119 text += " PAST LAST ROW" 3120 elif self._match_text_seq("TO", "NEXT", "ROW"): 3121 text += " TO NEXT ROW" 3122 elif self._match_text_seq("TO", "FIRST"): 3123 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3124 elif self._match_text_seq("TO", "LAST"): 3125 text += f" TO LAST {self._advance_any().text}" # type: ignore 3126 after = exp.var(text) 3127 else: 3128 after = None 3129 3130 if self._match_text_seq("PATTERN"): 3131 self._match_l_paren() 3132 3133 if not self._curr: 3134 self.raise_error("Expecting )", self._curr) 3135 3136 paren = 1 3137 start = self._curr 3138 3139 while self._curr and paren > 0: 3140 if self._curr.token_type == TokenType.L_PAREN: 3141 paren += 1 3142 if self._curr.token_type == TokenType.R_PAREN: 3143 paren -= 1 3144 3145 end = self._prev 3146 self._advance() 3147 3148 if paren > 0: 3149 self.raise_error("Expecting )", self._curr) 3150 3151 pattern = exp.var(self._find_sql(start, end)) 3152 else: 3153 pattern = None 3154 3155 define = ( 3156 self._parse_csv(self._parse_name_as_expression) 3157 if self._match_text_seq("DEFINE") 3158 else None 3159 ) 3160 3161 self._match_r_paren() 3162 3163 return self.expression( 3164 exp.MatchRecognize, 3165 partition_by=partition, 3166 order=order, 3167 measures=measures, 3168 rows=rows, 3169 after=after, 3170 pattern=pattern, 3171 define=define, 3172 alias=self._parse_table_alias(), 3173 ) 3174 3175 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3176 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3177 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3178 cross_apply = False 3179 3180 if cross_apply is not None: 3181 this = self._parse_select(table=True) 3182 view = None 3183 outer = None 3184 elif self._match(TokenType.LATERAL): 3185 this = self._parse_select(table=True) 3186 view = self._match(TokenType.VIEW) 3187 outer = self._match(TokenType.OUTER) 3188 else: 3189 return None 3190 3191 if not this: 3192 this = ( 3193 self._parse_unnest() 3194 or self._parse_function() 3195 or self._parse_id_var(any_token=False) 3196 ) 3197 3198 while self._match(TokenType.DOT): 3199 this = exp.Dot( 3200 this=this, 3201 expression=self._parse_function() or self._parse_id_var(any_token=False), 3202 ) 3203 3204 if view: 3205 table = self._parse_id_var(any_token=False) 3206 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3207 table_alias: t.Optional[exp.TableAlias] = self.expression( 3208 exp.TableAlias, this=table, columns=columns 3209 ) 3210 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3211 # We move the alias from the lateral's child node to the lateral itself 3212 table_alias = this.args["alias"].pop() 3213 else: 3214 table_alias = self._parse_table_alias() 3215 3216 return self.expression( 3217 exp.Lateral, 3218 this=this, 3219 view=view, 3220 outer=outer, 3221 alias=table_alias, 3222 cross_apply=cross_apply, 3223 ) 3224 3225 def _parse_join_parts( 3226 self, 3227 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3228 return ( 3229 self._match_set(self.JOIN_METHODS) and self._prev, 3230 self._match_set(self.JOIN_SIDES) and self._prev, 3231 self._match_set(self.JOIN_KINDS) and self._prev, 3232 ) 3233 3234 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3235 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3236 this = self._parse_column() 3237 if isinstance(this, exp.Column): 3238 return this.this 3239 return this 3240 3241 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3242 3243 def _parse_join( 3244 self, skip_join_token: bool = False, parse_bracket: bool = False 3245 ) -> t.Optional[exp.Join]: 3246 if self._match(TokenType.COMMA): 3247 return self.expression(exp.Join, this=self._parse_table()) 3248 3249 index = self._index 3250 method, side, kind = self._parse_join_parts() 3251 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3252 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3253 3254 if not skip_join_token and not join: 3255 self._retreat(index) 3256 kind = None 3257 method = None 3258 side = None 3259 3260 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3261 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3262 3263 if not skip_join_token and not join and not outer_apply and not cross_apply: 3264 return None 3265 3266 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3267 3268 if method: 3269 kwargs["method"] = method.text 3270 if side: 3271 kwargs["side"] = side.text 3272 if kind: 3273 kwargs["kind"] = kind.text 3274 if hint: 3275 kwargs["hint"] = hint 3276 3277 if self._match(TokenType.MATCH_CONDITION): 3278 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3279 3280 if self._match(TokenType.ON): 3281 kwargs["on"] = self._parse_assignment() 3282 elif self._match(TokenType.USING): 3283 kwargs["using"] = self._parse_using_identifiers() 3284 elif ( 3285 not (outer_apply or cross_apply) 3286 and not isinstance(kwargs["this"], exp.Unnest) 3287 and not (kind and kind.token_type == TokenType.CROSS) 3288 ): 3289 index = self._index 3290 joins: t.Optional[list] = list(self._parse_joins()) 3291 3292 if joins and self._match(TokenType.ON): 3293 kwargs["on"] = self._parse_assignment() 3294 elif joins and self._match(TokenType.USING): 3295 kwargs["using"] = self._parse_using_identifiers() 3296 else: 3297 joins = None 3298 self._retreat(index) 3299 3300 kwargs["this"].set("joins", joins if joins else None) 3301 3302 comments = [c for token in (method, side, kind) if token for c in token.comments] 3303 return self.expression(exp.Join, comments=comments, **kwargs) 3304 3305 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3306 this = self._parse_assignment() 3307 3308 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3309 return this 3310 3311 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3312 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3313 3314 return this 3315 3316 def _parse_index_params(self) -> exp.IndexParameters: 3317 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3318 3319 if self._match(TokenType.L_PAREN, advance=False): 3320 columns = self._parse_wrapped_csv(self._parse_with_operator) 3321 else: 3322 columns = None 3323 3324 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3325 partition_by = self._parse_partition_by() 3326 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3327 tablespace = ( 3328 self._parse_var(any_token=True) 3329 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3330 else None 3331 ) 3332 where = self._parse_where() 3333 3334 on = self._parse_field() if self._match(TokenType.ON) else None 3335 3336 return self.expression( 3337 exp.IndexParameters, 3338 using=using, 3339 columns=columns, 3340 include=include, 3341 partition_by=partition_by, 3342 where=where, 3343 with_storage=with_storage, 3344 tablespace=tablespace, 3345 on=on, 3346 ) 3347 3348 def _parse_index( 3349 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3350 ) -> t.Optional[exp.Index]: 3351 if index or anonymous: 3352 unique = None 3353 primary = None 3354 amp = None 3355 3356 self._match(TokenType.ON) 3357 self._match(TokenType.TABLE) # hive 3358 table = self._parse_table_parts(schema=True) 3359 else: 3360 unique = self._match(TokenType.UNIQUE) 3361 primary = self._match_text_seq("PRIMARY") 3362 amp = self._match_text_seq("AMP") 3363 3364 if not self._match(TokenType.INDEX): 3365 return None 3366 3367 index = self._parse_id_var() 3368 table = None 3369 3370 params = self._parse_index_params() 3371 3372 return self.expression( 3373 exp.Index, 3374 this=index, 3375 table=table, 3376 unique=unique, 3377 primary=primary, 3378 amp=amp, 3379 params=params, 3380 ) 3381 3382 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3383 hints: t.List[exp.Expression] = [] 3384 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3385 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3386 hints.append( 3387 self.expression( 3388 exp.WithTableHint, 3389 expressions=self._parse_csv( 3390 lambda: self._parse_function() or self._parse_var(any_token=True) 3391 ), 3392 ) 3393 ) 3394 self._match_r_paren() 3395 else: 3396 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3397 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3398 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3399 3400 self._match_set((TokenType.INDEX, TokenType.KEY)) 3401 if self._match(TokenType.FOR): 3402 hint.set("target", self._advance_any() and self._prev.text.upper()) 3403 3404 hint.set("expressions", self._parse_wrapped_id_vars()) 3405 hints.append(hint) 3406 3407 return hints or None 3408 3409 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3410 return ( 3411 (not schema and self._parse_function(optional_parens=False)) 3412 or self._parse_id_var(any_token=False) 3413 or self._parse_string_as_identifier() 3414 or self._parse_placeholder() 3415 ) 3416 3417 def _parse_table_parts( 3418 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3419 ) -> exp.Table: 3420 catalog = None 3421 db = None 3422 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3423 3424 while self._match(TokenType.DOT): 3425 if catalog: 3426 # This allows nesting the table in arbitrarily many dot expressions if needed 3427 table = self.expression( 3428 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3429 ) 3430 else: 3431 catalog = db 3432 db = table 3433 # "" used for tsql FROM a..b case 3434 table = self._parse_table_part(schema=schema) or "" 3435 3436 if ( 3437 wildcard 3438 and self._is_connected() 3439 and (isinstance(table, exp.Identifier) or not table) 3440 and self._match(TokenType.STAR) 3441 ): 3442 if isinstance(table, exp.Identifier): 3443 table.args["this"] += "*" 3444 else: 3445 table = exp.Identifier(this="*") 3446 3447 # We bubble up comments from the Identifier to the Table 3448 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3449 3450 if is_db_reference: 3451 catalog = db 3452 db = table 3453 table = None 3454 3455 if not table and not is_db_reference: 3456 self.raise_error(f"Expected table name but got {self._curr}") 3457 if not db and is_db_reference: 3458 self.raise_error(f"Expected database name but got {self._curr}") 3459 3460 table = self.expression( 3461 exp.Table, 3462 comments=comments, 3463 this=table, 3464 db=db, 3465 catalog=catalog, 3466 ) 3467 3468 changes = self._parse_changes() 3469 if changes: 3470 table.set("changes", changes) 3471 3472 at_before = self._parse_historical_data() 3473 if at_before: 3474 table.set("when", at_before) 3475 3476 pivots = self._parse_pivots() 3477 if pivots: 3478 table.set("pivots", pivots) 3479 3480 return table 3481 3482 def _parse_table( 3483 self, 3484 schema: bool = False, 3485 joins: bool = False, 3486 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3487 parse_bracket: bool = False, 3488 is_db_reference: bool = False, 3489 parse_partition: bool = False, 3490 ) -> t.Optional[exp.Expression]: 3491 lateral = self._parse_lateral() 3492 if lateral: 3493 return lateral 3494 3495 unnest = self._parse_unnest() 3496 if unnest: 3497 return unnest 3498 3499 values = self._parse_derived_table_values() 3500 if values: 3501 return values 3502 3503 subquery = self._parse_select(table=True) 3504 if subquery: 3505 if not subquery.args.get("pivots"): 3506 subquery.set("pivots", self._parse_pivots()) 3507 return subquery 3508 3509 bracket = parse_bracket and self._parse_bracket(None) 3510 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3511 3512 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3513 self._parse_table 3514 ) 3515 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3516 3517 only = self._match(TokenType.ONLY) 3518 3519 this = t.cast( 3520 exp.Expression, 3521 bracket 3522 or rows_from 3523 or self._parse_bracket( 3524 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3525 ), 3526 ) 3527 3528 if only: 3529 this.set("only", only) 3530 3531 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3532 self._match_text_seq("*") 3533 3534 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3535 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3536 this.set("partition", self._parse_partition()) 3537 3538 if schema: 3539 return self._parse_schema(this=this) 3540 3541 version = self._parse_version() 3542 3543 if version: 3544 this.set("version", version) 3545 3546 if self.dialect.ALIAS_POST_TABLESAMPLE: 3547 table_sample = self._parse_table_sample() 3548 3549 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3550 if alias: 3551 this.set("alias", alias) 3552 3553 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3554 return self.expression( 3555 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3556 ) 3557 3558 this.set("hints", self._parse_table_hints()) 3559 3560 if not this.args.get("pivots"): 3561 this.set("pivots", self._parse_pivots()) 3562 3563 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3564 table_sample = self._parse_table_sample() 3565 3566 if table_sample: 3567 table_sample.set("this", this) 3568 this = table_sample 3569 3570 if joins: 3571 for join in self._parse_joins(): 3572 this.append("joins", join) 3573 3574 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3575 this.set("ordinality", True) 3576 this.set("alias", self._parse_table_alias()) 3577 3578 return this 3579 3580 def _parse_version(self) -> t.Optional[exp.Version]: 3581 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3582 this = "TIMESTAMP" 3583 elif self._match(TokenType.VERSION_SNAPSHOT): 3584 this = "VERSION" 3585 else: 3586 return None 3587 3588 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3589 kind = self._prev.text.upper() 3590 start = self._parse_bitwise() 3591 self._match_texts(("TO", "AND")) 3592 end = self._parse_bitwise() 3593 expression: t.Optional[exp.Expression] = self.expression( 3594 exp.Tuple, expressions=[start, end] 3595 ) 3596 elif self._match_text_seq("CONTAINED", "IN"): 3597 kind = "CONTAINED IN" 3598 expression = self.expression( 3599 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3600 ) 3601 elif self._match(TokenType.ALL): 3602 kind = "ALL" 3603 expression = None 3604 else: 3605 self._match_text_seq("AS", "OF") 3606 kind = "AS OF" 3607 expression = self._parse_type() 3608 3609 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3610 3611 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3612 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3613 index = self._index 3614 historical_data = None 3615 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3616 this = self._prev.text.upper() 3617 kind = ( 3618 self._match(TokenType.L_PAREN) 3619 and self._match_texts(self.HISTORICAL_DATA_KIND) 3620 and self._prev.text.upper() 3621 ) 3622 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3623 3624 if expression: 3625 self._match_r_paren() 3626 historical_data = self.expression( 3627 exp.HistoricalData, this=this, kind=kind, expression=expression 3628 ) 3629 else: 3630 self._retreat(index) 3631 3632 return historical_data 3633 3634 def _parse_changes(self) -> t.Optional[exp.Changes]: 3635 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3636 return None 3637 3638 information = self._parse_var(any_token=True) 3639 self._match_r_paren() 3640 3641 return self.expression( 3642 exp.Changes, 3643 information=information, 3644 at_before=self._parse_historical_data(), 3645 end=self._parse_historical_data(), 3646 ) 3647 3648 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3649 if not self._match(TokenType.UNNEST): 3650 return None 3651 3652 expressions = self._parse_wrapped_csv(self._parse_equality) 3653 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3654 3655 alias = self._parse_table_alias() if with_alias else None 3656 3657 if alias: 3658 if self.dialect.UNNEST_COLUMN_ONLY: 3659 if alias.args.get("columns"): 3660 self.raise_error("Unexpected extra column alias in unnest.") 3661 3662 alias.set("columns", [alias.this]) 3663 alias.set("this", None) 3664 3665 columns = alias.args.get("columns") or [] 3666 if offset and len(expressions) < len(columns): 3667 offset = columns.pop() 3668 3669 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3670 self._match(TokenType.ALIAS) 3671 offset = self._parse_id_var( 3672 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3673 ) or exp.to_identifier("offset") 3674 3675 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3676 3677 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3678 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3679 if not is_derived and not ( 3680 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3681 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3682 ): 3683 return None 3684 3685 expressions = self._parse_csv(self._parse_value) 3686 alias = self._parse_table_alias() 3687 3688 if is_derived: 3689 self._match_r_paren() 3690 3691 return self.expression( 3692 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3693 ) 3694 3695 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3696 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3697 as_modifier and self._match_text_seq("USING", "SAMPLE") 3698 ): 3699 return None 3700 3701 bucket_numerator = None 3702 bucket_denominator = None 3703 bucket_field = None 3704 percent = None 3705 size = None 3706 seed = None 3707 3708 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3709 matched_l_paren = self._match(TokenType.L_PAREN) 3710 3711 if self.TABLESAMPLE_CSV: 3712 num = None 3713 expressions = self._parse_csv(self._parse_primary) 3714 else: 3715 expressions = None 3716 num = ( 3717 self._parse_factor() 3718 if self._match(TokenType.NUMBER, advance=False) 3719 else self._parse_primary() or self._parse_placeholder() 3720 ) 3721 3722 if self._match_text_seq("BUCKET"): 3723 bucket_numerator = self._parse_number() 3724 self._match_text_seq("OUT", "OF") 3725 bucket_denominator = bucket_denominator = self._parse_number() 3726 self._match(TokenType.ON) 3727 bucket_field = self._parse_field() 3728 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3729 percent = num 3730 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3731 size = num 3732 else: 3733 percent = num 3734 3735 if matched_l_paren: 3736 self._match_r_paren() 3737 3738 if self._match(TokenType.L_PAREN): 3739 method = self._parse_var(upper=True) 3740 seed = self._match(TokenType.COMMA) and self._parse_number() 3741 self._match_r_paren() 3742 elif self._match_texts(("SEED", "REPEATABLE")): 3743 seed = self._parse_wrapped(self._parse_number) 3744 3745 if not method and self.DEFAULT_SAMPLING_METHOD: 3746 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3747 3748 return self.expression( 3749 exp.TableSample, 3750 expressions=expressions, 3751 method=method, 3752 bucket_numerator=bucket_numerator, 3753 bucket_denominator=bucket_denominator, 3754 bucket_field=bucket_field, 3755 percent=percent, 3756 size=size, 3757 seed=seed, 3758 ) 3759 3760 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3761 return list(iter(self._parse_pivot, None)) or None 3762 3763 def _parse_joins(self) -> t.Iterator[exp.Join]: 3764 return iter(self._parse_join, None) 3765 3766 # https://duckdb.org/docs/sql/statements/pivot 3767 def _parse_simplified_pivot(self) -> exp.Pivot: 3768 def _parse_on() -> t.Optional[exp.Expression]: 3769 this = self._parse_bitwise() 3770 return self._parse_in(this) if self._match(TokenType.IN) else this 3771 3772 this = self._parse_table() 3773 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3774 using = self._match(TokenType.USING) and self._parse_csv( 3775 lambda: self._parse_alias(self._parse_function()) 3776 ) 3777 group = self._parse_group() 3778 return self.expression( 3779 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3780 ) 3781 3782 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3783 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3784 this = self._parse_select_or_expression() 3785 3786 self._match(TokenType.ALIAS) 3787 alias = self._parse_bitwise() 3788 if alias: 3789 if isinstance(alias, exp.Column) and not alias.db: 3790 alias = alias.this 3791 return self.expression(exp.PivotAlias, this=this, alias=alias) 3792 3793 return this 3794 3795 value = self._parse_column() 3796 3797 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3798 self.raise_error("Expecting IN (") 3799 3800 if self._match(TokenType.ANY): 3801 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3802 else: 3803 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3804 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3805 3806 self._match_r_paren() 3807 return expr 3808 3809 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3810 index = self._index 3811 include_nulls = None 3812 3813 if self._match(TokenType.PIVOT): 3814 unpivot = False 3815 elif self._match(TokenType.UNPIVOT): 3816 unpivot = True 3817 3818 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3819 if self._match_text_seq("INCLUDE", "NULLS"): 3820 include_nulls = True 3821 elif self._match_text_seq("EXCLUDE", "NULLS"): 3822 include_nulls = False 3823 else: 3824 return None 3825 3826 expressions = [] 3827 3828 if not self._match(TokenType.L_PAREN): 3829 self._retreat(index) 3830 return None 3831 3832 if unpivot: 3833 expressions = self._parse_csv(self._parse_column) 3834 else: 3835 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3836 3837 if not expressions: 3838 self.raise_error("Failed to parse PIVOT's aggregation list") 3839 3840 if not self._match(TokenType.FOR): 3841 self.raise_error("Expecting FOR") 3842 3843 field = self._parse_pivot_in() 3844 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3845 self._parse_bitwise 3846 ) 3847 3848 self._match_r_paren() 3849 3850 pivot = self.expression( 3851 exp.Pivot, 3852 expressions=expressions, 3853 field=field, 3854 unpivot=unpivot, 3855 include_nulls=include_nulls, 3856 default_on_null=default_on_null, 3857 ) 3858 3859 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3860 pivot.set("alias", self._parse_table_alias()) 3861 3862 if not unpivot: 3863 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3864 3865 columns: t.List[exp.Expression] = [] 3866 for fld in pivot.args["field"].expressions: 3867 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3868 for name in names: 3869 if self.PREFIXED_PIVOT_COLUMNS: 3870 name = f"{name}_{field_name}" if name else field_name 3871 else: 3872 name = f"{field_name}_{name}" if name else field_name 3873 3874 columns.append(exp.to_identifier(name)) 3875 3876 pivot.set("columns", columns) 3877 3878 return pivot 3879 3880 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3881 return [agg.alias for agg in aggregations] 3882 3883 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3884 if not skip_where_token and not self._match(TokenType.PREWHERE): 3885 return None 3886 3887 return self.expression( 3888 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3889 ) 3890 3891 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3892 if not skip_where_token and not self._match(TokenType.WHERE): 3893 return None 3894 3895 return self.expression( 3896 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3897 ) 3898 3899 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3900 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3901 return None 3902 3903 elements: t.Dict[str, t.Any] = defaultdict(list) 3904 3905 if self._match(TokenType.ALL): 3906 elements["all"] = True 3907 elif self._match(TokenType.DISTINCT): 3908 elements["all"] = False 3909 3910 while True: 3911 expressions = self._parse_csv( 3912 lambda: None 3913 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3914 else self._parse_assignment() 3915 ) 3916 if expressions: 3917 elements["expressions"].extend(expressions) 3918 3919 grouping_sets = self._parse_grouping_sets() 3920 if grouping_sets: 3921 elements["grouping_sets"].extend(grouping_sets) 3922 3923 rollup = None 3924 cube = None 3925 totals = None 3926 3927 index = self._index 3928 with_ = self._match(TokenType.WITH) 3929 if self._match(TokenType.ROLLUP): 3930 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3931 elements["rollup"].extend(ensure_list(rollup)) 3932 3933 if self._match(TokenType.CUBE): 3934 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3935 elements["cube"].extend(ensure_list(cube)) 3936 3937 if self._match_text_seq("TOTALS"): 3938 totals = True 3939 elements["totals"] = True # type: ignore 3940 3941 if not (grouping_sets or rollup or cube or totals): 3942 if with_: 3943 self._retreat(index) 3944 break 3945 3946 return self.expression(exp.Group, **elements) # type: ignore 3947 3948 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3949 if not self._match(TokenType.GROUPING_SETS): 3950 return None 3951 3952 return self._parse_wrapped_csv(self._parse_grouping_set) 3953 3954 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3955 if self._match(TokenType.L_PAREN): 3956 grouping_set = self._parse_csv(self._parse_column) 3957 self._match_r_paren() 3958 return self.expression(exp.Tuple, expressions=grouping_set) 3959 3960 return self._parse_column() 3961 3962 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3963 if not skip_having_token and not self._match(TokenType.HAVING): 3964 return None 3965 return self.expression(exp.Having, this=self._parse_assignment()) 3966 3967 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3968 if not self._match(TokenType.QUALIFY): 3969 return None 3970 return self.expression(exp.Qualify, this=self._parse_assignment()) 3971 3972 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3973 if skip_start_token: 3974 start = None 3975 elif self._match(TokenType.START_WITH): 3976 start = self._parse_assignment() 3977 else: 3978 return None 3979 3980 self._match(TokenType.CONNECT_BY) 3981 nocycle = self._match_text_seq("NOCYCLE") 3982 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3983 exp.Prior, this=self._parse_bitwise() 3984 ) 3985 connect = self._parse_assignment() 3986 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3987 3988 if not start and self._match(TokenType.START_WITH): 3989 start = self._parse_assignment() 3990 3991 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3992 3993 def _parse_name_as_expression(self) -> exp.Alias: 3994 return self.expression( 3995 exp.Alias, 3996 alias=self._parse_id_var(any_token=True), 3997 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3998 ) 3999 4000 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4001 if self._match_text_seq("INTERPOLATE"): 4002 return self._parse_wrapped_csv(self._parse_name_as_expression) 4003 return None 4004 4005 def _parse_order( 4006 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4007 ) -> t.Optional[exp.Expression]: 4008 siblings = None 4009 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4010 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4011 return this 4012 4013 siblings = True 4014 4015 return self.expression( 4016 exp.Order, 4017 this=this, 4018 expressions=self._parse_csv(self._parse_ordered), 4019 siblings=siblings, 4020 ) 4021 4022 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4023 if not self._match(token): 4024 return None 4025 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4026 4027 def _parse_ordered( 4028 self, parse_method: t.Optional[t.Callable] = None 4029 ) -> t.Optional[exp.Ordered]: 4030 this = parse_method() if parse_method else self._parse_assignment() 4031 if not this: 4032 return None 4033 4034 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4035 this = exp.var("ALL") 4036 4037 asc = self._match(TokenType.ASC) 4038 desc = self._match(TokenType.DESC) or (asc and False) 4039 4040 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4041 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4042 4043 nulls_first = is_nulls_first or False 4044 explicitly_null_ordered = is_nulls_first or is_nulls_last 4045 4046 if ( 4047 not explicitly_null_ordered 4048 and ( 4049 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4050 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4051 ) 4052 and self.dialect.NULL_ORDERING != "nulls_are_last" 4053 ): 4054 nulls_first = True 4055 4056 if self._match_text_seq("WITH", "FILL"): 4057 with_fill = self.expression( 4058 exp.WithFill, 4059 **{ # type: ignore 4060 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4061 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4062 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4063 "interpolate": self._parse_interpolate(), 4064 }, 4065 ) 4066 else: 4067 with_fill = None 4068 4069 return self.expression( 4070 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4071 ) 4072 4073 def _parse_limit( 4074 self, 4075 this: t.Optional[exp.Expression] = None, 4076 top: bool = False, 4077 skip_limit_token: bool = False, 4078 ) -> t.Optional[exp.Expression]: 4079 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4080 comments = self._prev_comments 4081 if top: 4082 limit_paren = self._match(TokenType.L_PAREN) 4083 expression = self._parse_term() if limit_paren else self._parse_number() 4084 4085 if limit_paren: 4086 self._match_r_paren() 4087 else: 4088 expression = self._parse_term() 4089 4090 if self._match(TokenType.COMMA): 4091 offset = expression 4092 expression = self._parse_term() 4093 else: 4094 offset = None 4095 4096 limit_exp = self.expression( 4097 exp.Limit, 4098 this=this, 4099 expression=expression, 4100 offset=offset, 4101 comments=comments, 4102 expressions=self._parse_limit_by(), 4103 ) 4104 4105 return limit_exp 4106 4107 if self._match(TokenType.FETCH): 4108 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4109 direction = self._prev.text.upper() if direction else "FIRST" 4110 4111 count = self._parse_field(tokens=self.FETCH_TOKENS) 4112 percent = self._match(TokenType.PERCENT) 4113 4114 self._match_set((TokenType.ROW, TokenType.ROWS)) 4115 4116 only = self._match_text_seq("ONLY") 4117 with_ties = self._match_text_seq("WITH", "TIES") 4118 4119 if only and with_ties: 4120 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4121 4122 return self.expression( 4123 exp.Fetch, 4124 direction=direction, 4125 count=count, 4126 percent=percent, 4127 with_ties=with_ties, 4128 ) 4129 4130 return this 4131 4132 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4133 if not self._match(TokenType.OFFSET): 4134 return this 4135 4136 count = self._parse_term() 4137 self._match_set((TokenType.ROW, TokenType.ROWS)) 4138 4139 return self.expression( 4140 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4141 ) 4142 4143 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4144 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4145 4146 def _parse_locks(self) -> t.List[exp.Lock]: 4147 locks = [] 4148 while True: 4149 if self._match_text_seq("FOR", "UPDATE"): 4150 update = True 4151 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4152 "LOCK", "IN", "SHARE", "MODE" 4153 ): 4154 update = False 4155 else: 4156 break 4157 4158 expressions = None 4159 if self._match_text_seq("OF"): 4160 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4161 4162 wait: t.Optional[bool | exp.Expression] = None 4163 if self._match_text_seq("NOWAIT"): 4164 wait = True 4165 elif self._match_text_seq("WAIT"): 4166 wait = self._parse_primary() 4167 elif self._match_text_seq("SKIP", "LOCKED"): 4168 wait = False 4169 4170 locks.append( 4171 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4172 ) 4173 4174 return locks 4175 4176 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4177 while this and self._match_set(self.SET_OPERATIONS): 4178 token_type = self._prev.token_type 4179 4180 if token_type == TokenType.UNION: 4181 operation: t.Type[exp.SetOperation] = exp.Union 4182 elif token_type == TokenType.EXCEPT: 4183 operation = exp.Except 4184 else: 4185 operation = exp.Intersect 4186 4187 comments = self._prev.comments 4188 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4189 by_name = self._match_text_seq("BY", "NAME") 4190 expression = self._parse_select(nested=True, parse_set_operation=False) 4191 4192 this = self.expression( 4193 operation, 4194 comments=comments, 4195 this=this, 4196 distinct=distinct, 4197 by_name=by_name, 4198 expression=expression, 4199 ) 4200 4201 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4202 expression = this.expression 4203 4204 if expression: 4205 for arg in self.SET_OP_MODIFIERS: 4206 expr = expression.args.get(arg) 4207 if expr: 4208 this.set(arg, expr.pop()) 4209 4210 return this 4211 4212 def _parse_expression(self) -> t.Optional[exp.Expression]: 4213 return self._parse_alias(self._parse_assignment()) 4214 4215 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4216 this = self._parse_disjunction() 4217 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4218 # This allows us to parse <non-identifier token> := <expr> 4219 this = exp.column( 4220 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4221 ) 4222 4223 while self._match_set(self.ASSIGNMENT): 4224 this = self.expression( 4225 self.ASSIGNMENT[self._prev.token_type], 4226 this=this, 4227 comments=self._prev_comments, 4228 expression=self._parse_assignment(), 4229 ) 4230 4231 return this 4232 4233 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4234 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4235 4236 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4237 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4238 4239 def _parse_equality(self) -> t.Optional[exp.Expression]: 4240 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4241 4242 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4243 return self._parse_tokens(self._parse_range, self.COMPARISON) 4244 4245 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4246 this = this or self._parse_bitwise() 4247 negate = self._match(TokenType.NOT) 4248 4249 if self._match_set(self.RANGE_PARSERS): 4250 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4251 if not expression: 4252 return this 4253 4254 this = expression 4255 elif self._match(TokenType.ISNULL): 4256 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4257 4258 # Postgres supports ISNULL and NOTNULL for conditions. 4259 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4260 if self._match(TokenType.NOTNULL): 4261 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4262 this = self.expression(exp.Not, this=this) 4263 4264 if negate: 4265 this = self._negate_range(this) 4266 4267 if self._match(TokenType.IS): 4268 this = self._parse_is(this) 4269 4270 return this 4271 4272 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4273 if not this: 4274 return this 4275 4276 return self.expression(exp.Not, this=this) 4277 4278 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4279 index = self._index - 1 4280 negate = self._match(TokenType.NOT) 4281 4282 if self._match_text_seq("DISTINCT", "FROM"): 4283 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4284 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4285 4286 expression = self._parse_null() or self._parse_boolean() 4287 if not expression: 4288 self._retreat(index) 4289 return None 4290 4291 this = self.expression(exp.Is, this=this, expression=expression) 4292 return self.expression(exp.Not, this=this) if negate else this 4293 4294 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4295 unnest = self._parse_unnest(with_alias=False) 4296 if unnest: 4297 this = self.expression(exp.In, this=this, unnest=unnest) 4298 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4299 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4300 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4301 4302 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4303 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4304 else: 4305 this = self.expression(exp.In, this=this, expressions=expressions) 4306 4307 if matched_l_paren: 4308 self._match_r_paren(this) 4309 elif not self._match(TokenType.R_BRACKET, expression=this): 4310 self.raise_error("Expecting ]") 4311 else: 4312 this = self.expression(exp.In, this=this, field=self._parse_field()) 4313 4314 return this 4315 4316 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4317 low = self._parse_bitwise() 4318 self._match(TokenType.AND) 4319 high = self._parse_bitwise() 4320 return self.expression(exp.Between, this=this, low=low, high=high) 4321 4322 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4323 if not self._match(TokenType.ESCAPE): 4324 return this 4325 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4326 4327 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4328 index = self._index 4329 4330 if not self._match(TokenType.INTERVAL) and match_interval: 4331 return None 4332 4333 if self._match(TokenType.STRING, advance=False): 4334 this = self._parse_primary() 4335 else: 4336 this = self._parse_term() 4337 4338 if not this or ( 4339 isinstance(this, exp.Column) 4340 and not this.table 4341 and not this.this.quoted 4342 and this.name.upper() == "IS" 4343 ): 4344 self._retreat(index) 4345 return None 4346 4347 unit = self._parse_function() or ( 4348 not self._match(TokenType.ALIAS, advance=False) 4349 and self._parse_var(any_token=True, upper=True) 4350 ) 4351 4352 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4353 # each INTERVAL expression into this canonical form so it's easy to transpile 4354 if this and this.is_number: 4355 this = exp.Literal.string(this.to_py()) 4356 elif this and this.is_string: 4357 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4358 if len(parts) == 1: 4359 if unit: 4360 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4361 self._retreat(self._index - 1) 4362 4363 this = exp.Literal.string(parts[0][0]) 4364 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4365 4366 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4367 unit = self.expression( 4368 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4369 ) 4370 4371 interval = self.expression(exp.Interval, this=this, unit=unit) 4372 4373 index = self._index 4374 self._match(TokenType.PLUS) 4375 4376 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4377 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4378 return self.expression( 4379 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4380 ) 4381 4382 self._retreat(index) 4383 return interval 4384 4385 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4386 this = self._parse_term() 4387 4388 while True: 4389 if self._match_set(self.BITWISE): 4390 this = self.expression( 4391 self.BITWISE[self._prev.token_type], 4392 this=this, 4393 expression=self._parse_term(), 4394 ) 4395 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4396 this = self.expression( 4397 exp.DPipe, 4398 this=this, 4399 expression=self._parse_term(), 4400 safe=not self.dialect.STRICT_STRING_CONCAT, 4401 ) 4402 elif self._match(TokenType.DQMARK): 4403 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4404 elif self._match_pair(TokenType.LT, TokenType.LT): 4405 this = self.expression( 4406 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4407 ) 4408 elif self._match_pair(TokenType.GT, TokenType.GT): 4409 this = self.expression( 4410 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4411 ) 4412 else: 4413 break 4414 4415 return this 4416 4417 def _parse_term(self) -> t.Optional[exp.Expression]: 4418 this = self._parse_factor() 4419 4420 while self._match_set(self.TERM): 4421 klass = self.TERM[self._prev.token_type] 4422 comments = self._prev_comments 4423 expression = self._parse_factor() 4424 4425 this = self.expression(klass, this=this, comments=comments, expression=expression) 4426 4427 if isinstance(this, exp.Collate): 4428 expr = this.expression 4429 4430 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4431 # fallback to Identifier / Var 4432 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4433 ident = expr.this 4434 if isinstance(ident, exp.Identifier): 4435 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4436 4437 return this 4438 4439 def _parse_factor(self) -> t.Optional[exp.Expression]: 4440 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4441 this = parse_method() 4442 4443 while self._match_set(self.FACTOR): 4444 klass = self.FACTOR[self._prev.token_type] 4445 comments = self._prev_comments 4446 expression = parse_method() 4447 4448 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4449 self._retreat(self._index - 1) 4450 return this 4451 4452 this = self.expression(klass, this=this, comments=comments, expression=expression) 4453 4454 if isinstance(this, exp.Div): 4455 this.args["typed"] = self.dialect.TYPED_DIVISION 4456 this.args["safe"] = self.dialect.SAFE_DIVISION 4457 4458 return this 4459 4460 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4461 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4462 4463 def _parse_unary(self) -> t.Optional[exp.Expression]: 4464 if self._match_set(self.UNARY_PARSERS): 4465 return self.UNARY_PARSERS[self._prev.token_type](self) 4466 return self._parse_at_time_zone(self._parse_type()) 4467 4468 def _parse_type( 4469 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4470 ) -> t.Optional[exp.Expression]: 4471 interval = parse_interval and self._parse_interval() 4472 if interval: 4473 return interval 4474 4475 index = self._index 4476 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4477 4478 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4479 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4480 if isinstance(data_type, exp.Cast): 4481 # This constructor can contain ops directly after it, for instance struct unnesting: 4482 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4483 return self._parse_column_ops(data_type) 4484 4485 if data_type: 4486 index2 = self._index 4487 this = self._parse_primary() 4488 4489 if isinstance(this, exp.Literal): 4490 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4491 if parser: 4492 return parser(self, this, data_type) 4493 4494 return self.expression(exp.Cast, this=this, to=data_type) 4495 4496 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4497 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4498 # 4499 # If the index difference here is greater than 1, that means the parser itself must have 4500 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4501 # 4502 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4503 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4504 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4505 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4506 # 4507 # In these cases, we don't really want to return the converted type, but instead retreat 4508 # and try to parse a Column or Identifier in the section below. 4509 if data_type.expressions and index2 - index > 1: 4510 self._retreat(index2) 4511 return self._parse_column_ops(data_type) 4512 4513 self._retreat(index) 4514 4515 if fallback_to_identifier: 4516 return self._parse_id_var() 4517 4518 this = self._parse_column() 4519 return this and self._parse_column_ops(this) 4520 4521 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4522 this = self._parse_type() 4523 if not this: 4524 return None 4525 4526 if isinstance(this, exp.Column) and not this.table: 4527 this = exp.var(this.name.upper()) 4528 4529 return self.expression( 4530 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4531 ) 4532 4533 def _parse_types( 4534 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4535 ) -> t.Optional[exp.Expression]: 4536 index = self._index 4537 4538 this: t.Optional[exp.Expression] = None 4539 prefix = self._match_text_seq("SYSUDTLIB", ".") 4540 4541 if not self._match_set(self.TYPE_TOKENS): 4542 identifier = allow_identifiers and self._parse_id_var( 4543 any_token=False, tokens=(TokenType.VAR,) 4544 ) 4545 if isinstance(identifier, exp.Identifier): 4546 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4547 4548 if len(tokens) != 1: 4549 self.raise_error("Unexpected identifier", self._prev) 4550 4551 if tokens[0].token_type in self.TYPE_TOKENS: 4552 self._prev = tokens[0] 4553 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4554 type_name = identifier.name 4555 4556 while self._match(TokenType.DOT): 4557 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4558 4559 this = exp.DataType.build(type_name, udt=True) 4560 else: 4561 self._retreat(self._index - 1) 4562 return None 4563 else: 4564 return None 4565 4566 type_token = self._prev.token_type 4567 4568 if type_token == TokenType.PSEUDO_TYPE: 4569 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4570 4571 if type_token == TokenType.OBJECT_IDENTIFIER: 4572 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4573 4574 # https://materialize.com/docs/sql/types/map/ 4575 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4576 key_type = self._parse_types( 4577 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4578 ) 4579 if not self._match(TokenType.FARROW): 4580 self._retreat(index) 4581 return None 4582 4583 value_type = self._parse_types( 4584 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4585 ) 4586 if not self._match(TokenType.R_BRACKET): 4587 self._retreat(index) 4588 return None 4589 4590 return exp.DataType( 4591 this=exp.DataType.Type.MAP, 4592 expressions=[key_type, value_type], 4593 nested=True, 4594 prefix=prefix, 4595 ) 4596 4597 nested = type_token in self.NESTED_TYPE_TOKENS 4598 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4599 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4600 expressions = None 4601 maybe_func = False 4602 4603 if self._match(TokenType.L_PAREN): 4604 if is_struct: 4605 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4606 elif nested: 4607 expressions = self._parse_csv( 4608 lambda: self._parse_types( 4609 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4610 ) 4611 ) 4612 elif type_token in self.ENUM_TYPE_TOKENS: 4613 expressions = self._parse_csv(self._parse_equality) 4614 elif is_aggregate: 4615 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4616 any_token=False, tokens=(TokenType.VAR,) 4617 ) 4618 if not func_or_ident or not self._match(TokenType.COMMA): 4619 return None 4620 expressions = self._parse_csv( 4621 lambda: self._parse_types( 4622 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4623 ) 4624 ) 4625 expressions.insert(0, func_or_ident) 4626 else: 4627 expressions = self._parse_csv(self._parse_type_size) 4628 4629 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4630 if type_token == TokenType.VECTOR and len(expressions) == 2: 4631 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4632 4633 if not expressions or not self._match(TokenType.R_PAREN): 4634 self._retreat(index) 4635 return None 4636 4637 maybe_func = True 4638 4639 values: t.Optional[t.List[exp.Expression]] = None 4640 4641 if nested and self._match(TokenType.LT): 4642 if is_struct: 4643 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4644 else: 4645 expressions = self._parse_csv( 4646 lambda: self._parse_types( 4647 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4648 ) 4649 ) 4650 4651 if not self._match(TokenType.GT): 4652 self.raise_error("Expecting >") 4653 4654 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4655 values = self._parse_csv(self._parse_assignment) 4656 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4657 4658 if type_token in self.TIMESTAMPS: 4659 if self._match_text_seq("WITH", "TIME", "ZONE"): 4660 maybe_func = False 4661 tz_type = ( 4662 exp.DataType.Type.TIMETZ 4663 if type_token in self.TIMES 4664 else exp.DataType.Type.TIMESTAMPTZ 4665 ) 4666 this = exp.DataType(this=tz_type, expressions=expressions) 4667 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4668 maybe_func = False 4669 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4670 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4671 maybe_func = False 4672 elif type_token == TokenType.INTERVAL: 4673 unit = self._parse_var(upper=True) 4674 if unit: 4675 if self._match_text_seq("TO"): 4676 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4677 4678 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4679 else: 4680 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4681 4682 if maybe_func and check_func: 4683 index2 = self._index 4684 peek = self._parse_string() 4685 4686 if not peek: 4687 self._retreat(index) 4688 return None 4689 4690 self._retreat(index2) 4691 4692 if not this: 4693 if self._match_text_seq("UNSIGNED"): 4694 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4695 if not unsigned_type_token: 4696 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4697 4698 type_token = unsigned_type_token or type_token 4699 4700 this = exp.DataType( 4701 this=exp.DataType.Type[type_token.value], 4702 expressions=expressions, 4703 nested=nested, 4704 prefix=prefix, 4705 ) 4706 4707 # Empty arrays/structs are allowed 4708 if values is not None: 4709 cls = exp.Struct if is_struct else exp.Array 4710 this = exp.cast(cls(expressions=values), this, copy=False) 4711 4712 elif expressions: 4713 this.set("expressions", expressions) 4714 4715 # https://materialize.com/docs/sql/types/list/#type-name 4716 while self._match(TokenType.LIST): 4717 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4718 4719 index = self._index 4720 4721 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4722 matched_array = self._match(TokenType.ARRAY) 4723 4724 while self._curr: 4725 datatype_token = self._prev.token_type 4726 matched_l_bracket = self._match(TokenType.L_BRACKET) 4727 if not matched_l_bracket and not matched_array: 4728 break 4729 4730 matched_array = False 4731 values = self._parse_csv(self._parse_assignment) or None 4732 if ( 4733 values 4734 and not schema 4735 and ( 4736 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4737 ) 4738 ): 4739 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4740 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4741 self._retreat(index) 4742 break 4743 4744 this = exp.DataType( 4745 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4746 ) 4747 self._match(TokenType.R_BRACKET) 4748 4749 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4750 converter = self.TYPE_CONVERTERS.get(this.this) 4751 if converter: 4752 this = converter(t.cast(exp.DataType, this)) 4753 4754 return this 4755 4756 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4757 index = self._index 4758 4759 if ( 4760 self._curr 4761 and self._next 4762 and self._curr.token_type in self.TYPE_TOKENS 4763 and self._next.token_type in self.TYPE_TOKENS 4764 ): 4765 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4766 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4767 this = self._parse_id_var() 4768 else: 4769 this = ( 4770 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4771 or self._parse_id_var() 4772 ) 4773 4774 self._match(TokenType.COLON) 4775 4776 if ( 4777 type_required 4778 and not isinstance(this, exp.DataType) 4779 and not self._match_set(self.TYPE_TOKENS, advance=False) 4780 ): 4781 self._retreat(index) 4782 return self._parse_types() 4783 4784 return self._parse_column_def(this) 4785 4786 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4787 if not self._match_text_seq("AT", "TIME", "ZONE"): 4788 return this 4789 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4790 4791 def _parse_column(self) -> t.Optional[exp.Expression]: 4792 this = self._parse_column_reference() 4793 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4794 4795 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4796 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4797 4798 return column 4799 4800 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4801 this = self._parse_field() 4802 if ( 4803 not this 4804 and self._match(TokenType.VALUES, advance=False) 4805 and self.VALUES_FOLLOWED_BY_PAREN 4806 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4807 ): 4808 this = self._parse_id_var() 4809 4810 if isinstance(this, exp.Identifier): 4811 # We bubble up comments from the Identifier to the Column 4812 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4813 4814 return this 4815 4816 def _parse_colon_as_variant_extract( 4817 self, this: t.Optional[exp.Expression] 4818 ) -> t.Optional[exp.Expression]: 4819 casts = [] 4820 json_path = [] 4821 4822 while self._match(TokenType.COLON): 4823 start_index = self._index 4824 4825 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4826 path = self._parse_column_ops( 4827 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4828 ) 4829 4830 # The cast :: operator has a lower precedence than the extraction operator :, so 4831 # we rearrange the AST appropriately to avoid casting the JSON path 4832 while isinstance(path, exp.Cast): 4833 casts.append(path.to) 4834 path = path.this 4835 4836 if casts: 4837 dcolon_offset = next( 4838 i 4839 for i, t in enumerate(self._tokens[start_index:]) 4840 if t.token_type == TokenType.DCOLON 4841 ) 4842 end_token = self._tokens[start_index + dcolon_offset - 1] 4843 else: 4844 end_token = self._prev 4845 4846 if path: 4847 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4848 4849 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4850 # Databricks transforms it back to the colon/dot notation 4851 if json_path: 4852 this = self.expression( 4853 exp.JSONExtract, 4854 this=this, 4855 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4856 variant_extract=True, 4857 ) 4858 4859 while casts: 4860 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4861 4862 return this 4863 4864 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4865 return self._parse_types() 4866 4867 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4868 this = self._parse_bracket(this) 4869 4870 while self._match_set(self.COLUMN_OPERATORS): 4871 op_token = self._prev.token_type 4872 op = self.COLUMN_OPERATORS.get(op_token) 4873 4874 if op_token == TokenType.DCOLON: 4875 field = self._parse_dcolon() 4876 if not field: 4877 self.raise_error("Expected type") 4878 elif op and self._curr: 4879 field = self._parse_column_reference() 4880 else: 4881 field = self._parse_field(any_token=True, anonymous_func=True) 4882 4883 if isinstance(field, exp.Func) and this: 4884 # bigquery allows function calls like x.y.count(...) 4885 # SAFE.SUBSTR(...) 4886 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4887 this = exp.replace_tree( 4888 this, 4889 lambda n: ( 4890 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4891 if n.table 4892 else n.this 4893 ) 4894 if isinstance(n, exp.Column) 4895 else n, 4896 ) 4897 4898 if op: 4899 this = op(self, this, field) 4900 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4901 this = self.expression( 4902 exp.Column, 4903 this=field, 4904 table=this.this, 4905 db=this.args.get("table"), 4906 catalog=this.args.get("db"), 4907 ) 4908 else: 4909 this = self.expression(exp.Dot, this=this, expression=field) 4910 4911 this = self._parse_bracket(this) 4912 4913 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4914 4915 def _parse_primary(self) -> t.Optional[exp.Expression]: 4916 if self._match_set(self.PRIMARY_PARSERS): 4917 token_type = self._prev.token_type 4918 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4919 4920 if token_type == TokenType.STRING: 4921 expressions = [primary] 4922 while self._match(TokenType.STRING): 4923 expressions.append(exp.Literal.string(self._prev.text)) 4924 4925 if len(expressions) > 1: 4926 return self.expression(exp.Concat, expressions=expressions) 4927 4928 return primary 4929 4930 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4931 return exp.Literal.number(f"0.{self._prev.text}") 4932 4933 if self._match(TokenType.L_PAREN): 4934 comments = self._prev_comments 4935 query = self._parse_select() 4936 4937 if query: 4938 expressions = [query] 4939 else: 4940 expressions = self._parse_expressions() 4941 4942 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4943 4944 if not this and self._match(TokenType.R_PAREN, advance=False): 4945 this = self.expression(exp.Tuple) 4946 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4947 this = self._parse_subquery(this=this, parse_alias=False) 4948 elif isinstance(this, exp.Subquery): 4949 this = self._parse_subquery( 4950 this=self._parse_set_operations(this), parse_alias=False 4951 ) 4952 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4953 this = self.expression(exp.Tuple, expressions=expressions) 4954 else: 4955 this = self.expression(exp.Paren, this=this) 4956 4957 if this: 4958 this.add_comments(comments) 4959 4960 self._match_r_paren(expression=this) 4961 return this 4962 4963 return None 4964 4965 def _parse_field( 4966 self, 4967 any_token: bool = False, 4968 tokens: t.Optional[t.Collection[TokenType]] = None, 4969 anonymous_func: bool = False, 4970 ) -> t.Optional[exp.Expression]: 4971 if anonymous_func: 4972 field = ( 4973 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4974 or self._parse_primary() 4975 ) 4976 else: 4977 field = self._parse_primary() or self._parse_function( 4978 anonymous=anonymous_func, any_token=any_token 4979 ) 4980 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4981 4982 def _parse_function( 4983 self, 4984 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4985 anonymous: bool = False, 4986 optional_parens: bool = True, 4987 any_token: bool = False, 4988 ) -> t.Optional[exp.Expression]: 4989 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4990 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4991 fn_syntax = False 4992 if ( 4993 self._match(TokenType.L_BRACE, advance=False) 4994 and self._next 4995 and self._next.text.upper() == "FN" 4996 ): 4997 self._advance(2) 4998 fn_syntax = True 4999 5000 func = self._parse_function_call( 5001 functions=functions, 5002 anonymous=anonymous, 5003 optional_parens=optional_parens, 5004 any_token=any_token, 5005 ) 5006 5007 if fn_syntax: 5008 self._match(TokenType.R_BRACE) 5009 5010 return func 5011 5012 def _parse_function_call( 5013 self, 5014 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5015 anonymous: bool = False, 5016 optional_parens: bool = True, 5017 any_token: bool = False, 5018 ) -> t.Optional[exp.Expression]: 5019 if not self._curr: 5020 return None 5021 5022 comments = self._curr.comments 5023 token_type = self._curr.token_type 5024 this = self._curr.text 5025 upper = this.upper() 5026 5027 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5028 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5029 self._advance() 5030 return self._parse_window(parser(self)) 5031 5032 if not self._next or self._next.token_type != TokenType.L_PAREN: 5033 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5034 self._advance() 5035 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5036 5037 return None 5038 5039 if any_token: 5040 if token_type in self.RESERVED_TOKENS: 5041 return None 5042 elif token_type not in self.FUNC_TOKENS: 5043 return None 5044 5045 self._advance(2) 5046 5047 parser = self.FUNCTION_PARSERS.get(upper) 5048 if parser and not anonymous: 5049 this = parser(self) 5050 else: 5051 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5052 5053 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5054 this = self.expression(subquery_predicate, this=self._parse_select()) 5055 self._match_r_paren() 5056 return this 5057 5058 if functions is None: 5059 functions = self.FUNCTIONS 5060 5061 function = functions.get(upper) 5062 5063 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5064 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5065 5066 if alias: 5067 args = self._kv_to_prop_eq(args) 5068 5069 if function and not anonymous: 5070 if "dialect" in function.__code__.co_varnames: 5071 func = function(args, dialect=self.dialect) 5072 else: 5073 func = function(args) 5074 5075 func = self.validate_expression(func, args) 5076 if not self.dialect.NORMALIZE_FUNCTIONS: 5077 func.meta["name"] = this 5078 5079 this = func 5080 else: 5081 if token_type == TokenType.IDENTIFIER: 5082 this = exp.Identifier(this=this, quoted=True) 5083 this = self.expression(exp.Anonymous, this=this, expressions=args) 5084 5085 if isinstance(this, exp.Expression): 5086 this.add_comments(comments) 5087 5088 self._match_r_paren(this) 5089 return self._parse_window(this) 5090 5091 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5092 transformed = [] 5093 5094 for e in expressions: 5095 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5096 if isinstance(e, exp.Alias): 5097 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5098 5099 if not isinstance(e, exp.PropertyEQ): 5100 e = self.expression( 5101 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5102 ) 5103 5104 if isinstance(e.this, exp.Column): 5105 e.this.replace(e.this.this) 5106 5107 transformed.append(e) 5108 5109 return transformed 5110 5111 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5112 return self._parse_column_def(self._parse_id_var()) 5113 5114 def _parse_user_defined_function( 5115 self, kind: t.Optional[TokenType] = None 5116 ) -> t.Optional[exp.Expression]: 5117 this = self._parse_id_var() 5118 5119 while self._match(TokenType.DOT): 5120 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5121 5122 if not self._match(TokenType.L_PAREN): 5123 return this 5124 5125 expressions = self._parse_csv(self._parse_function_parameter) 5126 self._match_r_paren() 5127 return self.expression( 5128 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5129 ) 5130 5131 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5132 literal = self._parse_primary() 5133 if literal: 5134 return self.expression(exp.Introducer, this=token.text, expression=literal) 5135 5136 return self.expression(exp.Identifier, this=token.text) 5137 5138 def _parse_session_parameter(self) -> exp.SessionParameter: 5139 kind = None 5140 this = self._parse_id_var() or self._parse_primary() 5141 5142 if this and self._match(TokenType.DOT): 5143 kind = this.name 5144 this = self._parse_var() or self._parse_primary() 5145 5146 return self.expression(exp.SessionParameter, this=this, kind=kind) 5147 5148 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5149 return self._parse_id_var() 5150 5151 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5152 index = self._index 5153 5154 if self._match(TokenType.L_PAREN): 5155 expressions = t.cast( 5156 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5157 ) 5158 5159 if not self._match(TokenType.R_PAREN): 5160 self._retreat(index) 5161 else: 5162 expressions = [self._parse_lambda_arg()] 5163 5164 if self._match_set(self.LAMBDAS): 5165 return self.LAMBDAS[self._prev.token_type](self, expressions) 5166 5167 self._retreat(index) 5168 5169 this: t.Optional[exp.Expression] 5170 5171 if self._match(TokenType.DISTINCT): 5172 this = self.expression( 5173 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5174 ) 5175 else: 5176 this = self._parse_select_or_expression(alias=alias) 5177 5178 return self._parse_limit( 5179 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5180 ) 5181 5182 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5183 index = self._index 5184 if not self._match(TokenType.L_PAREN): 5185 return this 5186 5187 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5188 # expr can be of both types 5189 if self._match_set(self.SELECT_START_TOKENS): 5190 self._retreat(index) 5191 return this 5192 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5193 self._match_r_paren() 5194 return self.expression(exp.Schema, this=this, expressions=args) 5195 5196 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5197 return self._parse_column_def(self._parse_field(any_token=True)) 5198 5199 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5200 # column defs are not really columns, they're identifiers 5201 if isinstance(this, exp.Column): 5202 this = this.this 5203 5204 kind = self._parse_types(schema=True) 5205 5206 if self._match_text_seq("FOR", "ORDINALITY"): 5207 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5208 5209 constraints: t.List[exp.Expression] = [] 5210 5211 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5212 ("ALIAS", "MATERIALIZED") 5213 ): 5214 persisted = self._prev.text.upper() == "MATERIALIZED" 5215 constraints.append( 5216 self.expression( 5217 exp.ComputedColumnConstraint, 5218 this=self._parse_assignment(), 5219 persisted=persisted or self._match_text_seq("PERSISTED"), 5220 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5221 ) 5222 ) 5223 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5224 self._match(TokenType.ALIAS) 5225 constraints.append( 5226 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5227 ) 5228 5229 while True: 5230 constraint = self._parse_column_constraint() 5231 if not constraint: 5232 break 5233 constraints.append(constraint) 5234 5235 if not kind and not constraints: 5236 return this 5237 5238 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5239 5240 def _parse_auto_increment( 5241 self, 5242 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5243 start = None 5244 increment = None 5245 5246 if self._match(TokenType.L_PAREN, advance=False): 5247 args = self._parse_wrapped_csv(self._parse_bitwise) 5248 start = seq_get(args, 0) 5249 increment = seq_get(args, 1) 5250 elif self._match_text_seq("START"): 5251 start = self._parse_bitwise() 5252 self._match_text_seq("INCREMENT") 5253 increment = self._parse_bitwise() 5254 5255 if start and increment: 5256 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5257 5258 return exp.AutoIncrementColumnConstraint() 5259 5260 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5261 if not self._match_text_seq("REFRESH"): 5262 self._retreat(self._index - 1) 5263 return None 5264 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5265 5266 def _parse_compress(self) -> exp.CompressColumnConstraint: 5267 if self._match(TokenType.L_PAREN, advance=False): 5268 return self.expression( 5269 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5270 ) 5271 5272 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5273 5274 def _parse_generated_as_identity( 5275 self, 5276 ) -> ( 5277 exp.GeneratedAsIdentityColumnConstraint 5278 | exp.ComputedColumnConstraint 5279 | exp.GeneratedAsRowColumnConstraint 5280 ): 5281 if self._match_text_seq("BY", "DEFAULT"): 5282 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5283 this = self.expression( 5284 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5285 ) 5286 else: 5287 self._match_text_seq("ALWAYS") 5288 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5289 5290 self._match(TokenType.ALIAS) 5291 5292 if self._match_text_seq("ROW"): 5293 start = self._match_text_seq("START") 5294 if not start: 5295 self._match(TokenType.END) 5296 hidden = self._match_text_seq("HIDDEN") 5297 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5298 5299 identity = self._match_text_seq("IDENTITY") 5300 5301 if self._match(TokenType.L_PAREN): 5302 if self._match(TokenType.START_WITH): 5303 this.set("start", self._parse_bitwise()) 5304 if self._match_text_seq("INCREMENT", "BY"): 5305 this.set("increment", self._parse_bitwise()) 5306 if self._match_text_seq("MINVALUE"): 5307 this.set("minvalue", self._parse_bitwise()) 5308 if self._match_text_seq("MAXVALUE"): 5309 this.set("maxvalue", self._parse_bitwise()) 5310 5311 if self._match_text_seq("CYCLE"): 5312 this.set("cycle", True) 5313 elif self._match_text_seq("NO", "CYCLE"): 5314 this.set("cycle", False) 5315 5316 if not identity: 5317 this.set("expression", self._parse_range()) 5318 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5319 args = self._parse_csv(self._parse_bitwise) 5320 this.set("start", seq_get(args, 0)) 5321 this.set("increment", seq_get(args, 1)) 5322 5323 self._match_r_paren() 5324 5325 return this 5326 5327 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5328 self._match_text_seq("LENGTH") 5329 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5330 5331 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5332 if self._match_text_seq("NULL"): 5333 return self.expression(exp.NotNullColumnConstraint) 5334 if self._match_text_seq("CASESPECIFIC"): 5335 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5336 if self._match_text_seq("FOR", "REPLICATION"): 5337 return self.expression(exp.NotForReplicationColumnConstraint) 5338 return None 5339 5340 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5341 if self._match(TokenType.CONSTRAINT): 5342 this = self._parse_id_var() 5343 else: 5344 this = None 5345 5346 if self._match_texts(self.CONSTRAINT_PARSERS): 5347 return self.expression( 5348 exp.ColumnConstraint, 5349 this=this, 5350 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5351 ) 5352 5353 return this 5354 5355 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5356 if not self._match(TokenType.CONSTRAINT): 5357 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5358 5359 return self.expression( 5360 exp.Constraint, 5361 this=self._parse_id_var(), 5362 expressions=self._parse_unnamed_constraints(), 5363 ) 5364 5365 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5366 constraints = [] 5367 while True: 5368 constraint = self._parse_unnamed_constraint() or self._parse_function() 5369 if not constraint: 5370 break 5371 constraints.append(constraint) 5372 5373 return constraints 5374 5375 def _parse_unnamed_constraint( 5376 self, constraints: t.Optional[t.Collection[str]] = None 5377 ) -> t.Optional[exp.Expression]: 5378 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5379 constraints or self.CONSTRAINT_PARSERS 5380 ): 5381 return None 5382 5383 constraint = self._prev.text.upper() 5384 if constraint not in self.CONSTRAINT_PARSERS: 5385 self.raise_error(f"No parser found for schema constraint {constraint}.") 5386 5387 return self.CONSTRAINT_PARSERS[constraint](self) 5388 5389 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5390 return self._parse_id_var(any_token=False) 5391 5392 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5393 self._match_text_seq("KEY") 5394 return self.expression( 5395 exp.UniqueColumnConstraint, 5396 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5397 this=self._parse_schema(self._parse_unique_key()), 5398 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5399 on_conflict=self._parse_on_conflict(), 5400 ) 5401 5402 def _parse_key_constraint_options(self) -> t.List[str]: 5403 options = [] 5404 while True: 5405 if not self._curr: 5406 break 5407 5408 if self._match(TokenType.ON): 5409 action = None 5410 on = self._advance_any() and self._prev.text 5411 5412 if self._match_text_seq("NO", "ACTION"): 5413 action = "NO ACTION" 5414 elif self._match_text_seq("CASCADE"): 5415 action = "CASCADE" 5416 elif self._match_text_seq("RESTRICT"): 5417 action = "RESTRICT" 5418 elif self._match_pair(TokenType.SET, TokenType.NULL): 5419 action = "SET NULL" 5420 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5421 action = "SET DEFAULT" 5422 else: 5423 self.raise_error("Invalid key constraint") 5424 5425 options.append(f"ON {on} {action}") 5426 else: 5427 var = self._parse_var_from_options( 5428 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5429 ) 5430 if not var: 5431 break 5432 options.append(var.name) 5433 5434 return options 5435 5436 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5437 if match and not self._match(TokenType.REFERENCES): 5438 return None 5439 5440 expressions = None 5441 this = self._parse_table(schema=True) 5442 options = self._parse_key_constraint_options() 5443 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5444 5445 def _parse_foreign_key(self) -> exp.ForeignKey: 5446 expressions = self._parse_wrapped_id_vars() 5447 reference = self._parse_references() 5448 options = {} 5449 5450 while self._match(TokenType.ON): 5451 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5452 self.raise_error("Expected DELETE or UPDATE") 5453 5454 kind = self._prev.text.lower() 5455 5456 if self._match_text_seq("NO", "ACTION"): 5457 action = "NO ACTION" 5458 elif self._match(TokenType.SET): 5459 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5460 action = "SET " + self._prev.text.upper() 5461 else: 5462 self._advance() 5463 action = self._prev.text.upper() 5464 5465 options[kind] = action 5466 5467 return self.expression( 5468 exp.ForeignKey, 5469 expressions=expressions, 5470 reference=reference, 5471 **options, # type: ignore 5472 ) 5473 5474 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5475 return self._parse_field() 5476 5477 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5478 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5479 self._retreat(self._index - 1) 5480 return None 5481 5482 id_vars = self._parse_wrapped_id_vars() 5483 return self.expression( 5484 exp.PeriodForSystemTimeConstraint, 5485 this=seq_get(id_vars, 0), 5486 expression=seq_get(id_vars, 1), 5487 ) 5488 5489 def _parse_primary_key( 5490 self, wrapped_optional: bool = False, in_props: bool = False 5491 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5492 desc = ( 5493 self._match_set((TokenType.ASC, TokenType.DESC)) 5494 and self._prev.token_type == TokenType.DESC 5495 ) 5496 5497 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5498 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5499 5500 expressions = self._parse_wrapped_csv( 5501 self._parse_primary_key_part, optional=wrapped_optional 5502 ) 5503 options = self._parse_key_constraint_options() 5504 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5505 5506 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5507 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5508 5509 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5510 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5511 return this 5512 5513 bracket_kind = self._prev.token_type 5514 expressions = self._parse_csv( 5515 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5516 ) 5517 5518 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5519 self.raise_error("Expected ]") 5520 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5521 self.raise_error("Expected }") 5522 5523 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5524 if bracket_kind == TokenType.L_BRACE: 5525 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5526 elif not this: 5527 this = build_array_constructor( 5528 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5529 ) 5530 else: 5531 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5532 if constructor_type: 5533 return build_array_constructor( 5534 constructor_type, 5535 args=expressions, 5536 bracket_kind=bracket_kind, 5537 dialect=self.dialect, 5538 ) 5539 5540 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5541 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5542 5543 self._add_comments(this) 5544 return self._parse_bracket(this) 5545 5546 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5547 if self._match(TokenType.COLON): 5548 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5549 return this 5550 5551 def _parse_case(self) -> t.Optional[exp.Expression]: 5552 ifs = [] 5553 default = None 5554 5555 comments = self._prev_comments 5556 expression = self._parse_assignment() 5557 5558 while self._match(TokenType.WHEN): 5559 this = self._parse_assignment() 5560 self._match(TokenType.THEN) 5561 then = self._parse_assignment() 5562 ifs.append(self.expression(exp.If, this=this, true=then)) 5563 5564 if self._match(TokenType.ELSE): 5565 default = self._parse_assignment() 5566 5567 if not self._match(TokenType.END): 5568 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5569 default = exp.column("interval") 5570 else: 5571 self.raise_error("Expected END after CASE", self._prev) 5572 5573 return self.expression( 5574 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5575 ) 5576 5577 def _parse_if(self) -> t.Optional[exp.Expression]: 5578 if self._match(TokenType.L_PAREN): 5579 args = self._parse_csv(self._parse_assignment) 5580 this = self.validate_expression(exp.If.from_arg_list(args), args) 5581 self._match_r_paren() 5582 else: 5583 index = self._index - 1 5584 5585 if self.NO_PAREN_IF_COMMANDS and index == 0: 5586 return self._parse_as_command(self._prev) 5587 5588 condition = self._parse_assignment() 5589 5590 if not condition: 5591 self._retreat(index) 5592 return None 5593 5594 self._match(TokenType.THEN) 5595 true = self._parse_assignment() 5596 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5597 self._match(TokenType.END) 5598 this = self.expression(exp.If, this=condition, true=true, false=false) 5599 5600 return this 5601 5602 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5603 if not self._match_text_seq("VALUE", "FOR"): 5604 self._retreat(self._index - 1) 5605 return None 5606 5607 return self.expression( 5608 exp.NextValueFor, 5609 this=self._parse_column(), 5610 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5611 ) 5612 5613 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5614 this = self._parse_function() or self._parse_var_or_string(upper=True) 5615 5616 if self._match(TokenType.FROM): 5617 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5618 5619 if not self._match(TokenType.COMMA): 5620 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5621 5622 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5623 5624 def _parse_gap_fill(self) -> exp.GapFill: 5625 self._match(TokenType.TABLE) 5626 this = self._parse_table() 5627 5628 self._match(TokenType.COMMA) 5629 args = [this, *self._parse_csv(self._parse_lambda)] 5630 5631 gap_fill = exp.GapFill.from_arg_list(args) 5632 return self.validate_expression(gap_fill, args) 5633 5634 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5635 this = self._parse_assignment() 5636 5637 if not self._match(TokenType.ALIAS): 5638 if self._match(TokenType.COMMA): 5639 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5640 5641 self.raise_error("Expected AS after CAST") 5642 5643 fmt = None 5644 to = self._parse_types() 5645 5646 if self._match(TokenType.FORMAT): 5647 fmt_string = self._parse_string() 5648 fmt = self._parse_at_time_zone(fmt_string) 5649 5650 if not to: 5651 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5652 if to.this in exp.DataType.TEMPORAL_TYPES: 5653 this = self.expression( 5654 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5655 this=this, 5656 format=exp.Literal.string( 5657 format_time( 5658 fmt_string.this if fmt_string else "", 5659 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5660 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5661 ) 5662 ), 5663 safe=safe, 5664 ) 5665 5666 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5667 this.set("zone", fmt.args["zone"]) 5668 return this 5669 elif not to: 5670 self.raise_error("Expected TYPE after CAST") 5671 elif isinstance(to, exp.Identifier): 5672 to = exp.DataType.build(to.name, udt=True) 5673 elif to.this == exp.DataType.Type.CHAR: 5674 if self._match(TokenType.CHARACTER_SET): 5675 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5676 5677 return self.expression( 5678 exp.Cast if strict else exp.TryCast, 5679 this=this, 5680 to=to, 5681 format=fmt, 5682 safe=safe, 5683 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5684 ) 5685 5686 def _parse_string_agg(self) -> exp.Expression: 5687 if self._match(TokenType.DISTINCT): 5688 args: t.List[t.Optional[exp.Expression]] = [ 5689 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5690 ] 5691 if self._match(TokenType.COMMA): 5692 args.extend(self._parse_csv(self._parse_assignment)) 5693 else: 5694 args = self._parse_csv(self._parse_assignment) # type: ignore 5695 5696 index = self._index 5697 if not self._match(TokenType.R_PAREN) and args: 5698 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5699 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5700 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5701 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5702 5703 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5704 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5705 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5706 if not self._match_text_seq("WITHIN", "GROUP"): 5707 self._retreat(index) 5708 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5709 5710 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5711 order = self._parse_order(this=seq_get(args, 0)) 5712 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5713 5714 def _parse_convert( 5715 self, strict: bool, safe: t.Optional[bool] = None 5716 ) -> t.Optional[exp.Expression]: 5717 this = self._parse_bitwise() 5718 5719 if self._match(TokenType.USING): 5720 to: t.Optional[exp.Expression] = self.expression( 5721 exp.CharacterSet, this=self._parse_var() 5722 ) 5723 elif self._match(TokenType.COMMA): 5724 to = self._parse_types() 5725 else: 5726 to = None 5727 5728 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5729 5730 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5731 """ 5732 There are generally two variants of the DECODE function: 5733 5734 - DECODE(bin, charset) 5735 - DECODE(expression, search, result [, search, result] ... [, default]) 5736 5737 The second variant will always be parsed into a CASE expression. Note that NULL 5738 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5739 instead of relying on pattern matching. 5740 """ 5741 args = self._parse_csv(self._parse_assignment) 5742 5743 if len(args) < 3: 5744 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5745 5746 expression, *expressions = args 5747 if not expression: 5748 return None 5749 5750 ifs = [] 5751 for search, result in zip(expressions[::2], expressions[1::2]): 5752 if not search or not result: 5753 return None 5754 5755 if isinstance(search, exp.Literal): 5756 ifs.append( 5757 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5758 ) 5759 elif isinstance(search, exp.Null): 5760 ifs.append( 5761 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5762 ) 5763 else: 5764 cond = exp.or_( 5765 exp.EQ(this=expression.copy(), expression=search), 5766 exp.and_( 5767 exp.Is(this=expression.copy(), expression=exp.Null()), 5768 exp.Is(this=search.copy(), expression=exp.Null()), 5769 copy=False, 5770 ), 5771 copy=False, 5772 ) 5773 ifs.append(exp.If(this=cond, true=result)) 5774 5775 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5776 5777 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5778 self._match_text_seq("KEY") 5779 key = self._parse_column() 5780 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5781 self._match_text_seq("VALUE") 5782 value = self._parse_bitwise() 5783 5784 if not key and not value: 5785 return None 5786 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5787 5788 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5789 if not this or not self._match_text_seq("FORMAT", "JSON"): 5790 return this 5791 5792 return self.expression(exp.FormatJson, this=this) 5793 5794 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5795 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5796 for value in values: 5797 if self._match_text_seq(value, "ON", on): 5798 return f"{value} ON {on}" 5799 5800 return None 5801 5802 @t.overload 5803 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5804 5805 @t.overload 5806 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5807 5808 def _parse_json_object(self, agg=False): 5809 star = self._parse_star() 5810 expressions = ( 5811 [star] 5812 if star 5813 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5814 ) 5815 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5816 5817 unique_keys = None 5818 if self._match_text_seq("WITH", "UNIQUE"): 5819 unique_keys = True 5820 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5821 unique_keys = False 5822 5823 self._match_text_seq("KEYS") 5824 5825 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5826 self._parse_type() 5827 ) 5828 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5829 5830 return self.expression( 5831 exp.JSONObjectAgg if agg else exp.JSONObject, 5832 expressions=expressions, 5833 null_handling=null_handling, 5834 unique_keys=unique_keys, 5835 return_type=return_type, 5836 encoding=encoding, 5837 ) 5838 5839 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5840 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5841 if not self._match_text_seq("NESTED"): 5842 this = self._parse_id_var() 5843 kind = self._parse_types(allow_identifiers=False) 5844 nested = None 5845 else: 5846 this = None 5847 kind = None 5848 nested = True 5849 5850 path = self._match_text_seq("PATH") and self._parse_string() 5851 nested_schema = nested and self._parse_json_schema() 5852 5853 return self.expression( 5854 exp.JSONColumnDef, 5855 this=this, 5856 kind=kind, 5857 path=path, 5858 nested_schema=nested_schema, 5859 ) 5860 5861 def _parse_json_schema(self) -> exp.JSONSchema: 5862 self._match_text_seq("COLUMNS") 5863 return self.expression( 5864 exp.JSONSchema, 5865 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5866 ) 5867 5868 def _parse_json_table(self) -> exp.JSONTable: 5869 this = self._parse_format_json(self._parse_bitwise()) 5870 path = self._match(TokenType.COMMA) and self._parse_string() 5871 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5872 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5873 schema = self._parse_json_schema() 5874 5875 return exp.JSONTable( 5876 this=this, 5877 schema=schema, 5878 path=path, 5879 error_handling=error_handling, 5880 empty_handling=empty_handling, 5881 ) 5882 5883 def _parse_match_against(self) -> exp.MatchAgainst: 5884 expressions = self._parse_csv(self._parse_column) 5885 5886 self._match_text_seq(")", "AGAINST", "(") 5887 5888 this = self._parse_string() 5889 5890 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5891 modifier = "IN NATURAL LANGUAGE MODE" 5892 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5893 modifier = f"{modifier} WITH QUERY EXPANSION" 5894 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5895 modifier = "IN BOOLEAN MODE" 5896 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5897 modifier = "WITH QUERY EXPANSION" 5898 else: 5899 modifier = None 5900 5901 return self.expression( 5902 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5903 ) 5904 5905 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5906 def _parse_open_json(self) -> exp.OpenJSON: 5907 this = self._parse_bitwise() 5908 path = self._match(TokenType.COMMA) and self._parse_string() 5909 5910 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5911 this = self._parse_field(any_token=True) 5912 kind = self._parse_types() 5913 path = self._parse_string() 5914 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5915 5916 return self.expression( 5917 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5918 ) 5919 5920 expressions = None 5921 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5922 self._match_l_paren() 5923 expressions = self._parse_csv(_parse_open_json_column_def) 5924 5925 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5926 5927 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5928 args = self._parse_csv(self._parse_bitwise) 5929 5930 if self._match(TokenType.IN): 5931 return self.expression( 5932 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5933 ) 5934 5935 if haystack_first: 5936 haystack = seq_get(args, 0) 5937 needle = seq_get(args, 1) 5938 else: 5939 needle = seq_get(args, 0) 5940 haystack = seq_get(args, 1) 5941 5942 return self.expression( 5943 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5944 ) 5945 5946 def _parse_predict(self) -> exp.Predict: 5947 self._match_text_seq("MODEL") 5948 this = self._parse_table() 5949 5950 self._match(TokenType.COMMA) 5951 self._match_text_seq("TABLE") 5952 5953 return self.expression( 5954 exp.Predict, 5955 this=this, 5956 expression=self._parse_table(), 5957 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5958 ) 5959 5960 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5961 args = self._parse_csv(self._parse_table) 5962 return exp.JoinHint(this=func_name.upper(), expressions=args) 5963 5964 def _parse_substring(self) -> exp.Substring: 5965 # Postgres supports the form: substring(string [from int] [for int]) 5966 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5967 5968 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5969 5970 if self._match(TokenType.FROM): 5971 args.append(self._parse_bitwise()) 5972 if self._match(TokenType.FOR): 5973 if len(args) == 1: 5974 args.append(exp.Literal.number(1)) 5975 args.append(self._parse_bitwise()) 5976 5977 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5978 5979 def _parse_trim(self) -> exp.Trim: 5980 # https://www.w3resource.com/sql/character-functions/trim.php 5981 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5982 5983 position = None 5984 collation = None 5985 expression = None 5986 5987 if self._match_texts(self.TRIM_TYPES): 5988 position = self._prev.text.upper() 5989 5990 this = self._parse_bitwise() 5991 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5992 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5993 expression = self._parse_bitwise() 5994 5995 if invert_order: 5996 this, expression = expression, this 5997 5998 if self._match(TokenType.COLLATE): 5999 collation = self._parse_bitwise() 6000 6001 return self.expression( 6002 exp.Trim, this=this, position=position, expression=expression, collation=collation 6003 ) 6004 6005 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6006 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6007 6008 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6009 return self._parse_window(self._parse_id_var(), alias=True) 6010 6011 def _parse_respect_or_ignore_nulls( 6012 self, this: t.Optional[exp.Expression] 6013 ) -> t.Optional[exp.Expression]: 6014 if self._match_text_seq("IGNORE", "NULLS"): 6015 return self.expression(exp.IgnoreNulls, this=this) 6016 if self._match_text_seq("RESPECT", "NULLS"): 6017 return self.expression(exp.RespectNulls, this=this) 6018 return this 6019 6020 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6021 if self._match(TokenType.HAVING): 6022 self._match_texts(("MAX", "MIN")) 6023 max = self._prev.text.upper() != "MIN" 6024 return self.expression( 6025 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6026 ) 6027 6028 return this 6029 6030 def _parse_window( 6031 self, this: t.Optional[exp.Expression], alias: bool = False 6032 ) -> t.Optional[exp.Expression]: 6033 func = this 6034 comments = func.comments if isinstance(func, exp.Expression) else None 6035 6036 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6037 self._match(TokenType.WHERE) 6038 this = self.expression( 6039 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6040 ) 6041 self._match_r_paren() 6042 6043 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6044 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6045 if self._match_text_seq("WITHIN", "GROUP"): 6046 order = self._parse_wrapped(self._parse_order) 6047 this = self.expression(exp.WithinGroup, this=this, expression=order) 6048 6049 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6050 # Some dialects choose to implement and some do not. 6051 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6052 6053 # There is some code above in _parse_lambda that handles 6054 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6055 6056 # The below changes handle 6057 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6058 6059 # Oracle allows both formats 6060 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6061 # and Snowflake chose to do the same for familiarity 6062 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6063 if isinstance(this, exp.AggFunc): 6064 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6065 6066 if ignore_respect and ignore_respect is not this: 6067 ignore_respect.replace(ignore_respect.this) 6068 this = self.expression(ignore_respect.__class__, this=this) 6069 6070 this = self._parse_respect_or_ignore_nulls(this) 6071 6072 # bigquery select from window x AS (partition by ...) 6073 if alias: 6074 over = None 6075 self._match(TokenType.ALIAS) 6076 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6077 return this 6078 else: 6079 over = self._prev.text.upper() 6080 6081 if comments and isinstance(func, exp.Expression): 6082 func.pop_comments() 6083 6084 if not self._match(TokenType.L_PAREN): 6085 return self.expression( 6086 exp.Window, 6087 comments=comments, 6088 this=this, 6089 alias=self._parse_id_var(False), 6090 over=over, 6091 ) 6092 6093 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6094 6095 first = self._match(TokenType.FIRST) 6096 if self._match_text_seq("LAST"): 6097 first = False 6098 6099 partition, order = self._parse_partition_and_order() 6100 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6101 6102 if kind: 6103 self._match(TokenType.BETWEEN) 6104 start = self._parse_window_spec() 6105 self._match(TokenType.AND) 6106 end = self._parse_window_spec() 6107 6108 spec = self.expression( 6109 exp.WindowSpec, 6110 kind=kind, 6111 start=start["value"], 6112 start_side=start["side"], 6113 end=end["value"], 6114 end_side=end["side"], 6115 ) 6116 else: 6117 spec = None 6118 6119 self._match_r_paren() 6120 6121 window = self.expression( 6122 exp.Window, 6123 comments=comments, 6124 this=this, 6125 partition_by=partition, 6126 order=order, 6127 spec=spec, 6128 alias=window_alias, 6129 over=over, 6130 first=first, 6131 ) 6132 6133 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6134 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6135 return self._parse_window(window, alias=alias) 6136 6137 return window 6138 6139 def _parse_partition_and_order( 6140 self, 6141 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6142 return self._parse_partition_by(), self._parse_order() 6143 6144 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6145 self._match(TokenType.BETWEEN) 6146 6147 return { 6148 "value": ( 6149 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6150 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6151 or self._parse_bitwise() 6152 ), 6153 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6154 } 6155 6156 def _parse_alias( 6157 self, this: t.Optional[exp.Expression], explicit: bool = False 6158 ) -> t.Optional[exp.Expression]: 6159 any_token = self._match(TokenType.ALIAS) 6160 comments = self._prev_comments or [] 6161 6162 if explicit and not any_token: 6163 return this 6164 6165 if self._match(TokenType.L_PAREN): 6166 aliases = self.expression( 6167 exp.Aliases, 6168 comments=comments, 6169 this=this, 6170 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6171 ) 6172 self._match_r_paren(aliases) 6173 return aliases 6174 6175 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6176 self.STRING_ALIASES and self._parse_string_as_identifier() 6177 ) 6178 6179 if alias: 6180 comments.extend(alias.pop_comments()) 6181 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6182 column = this.this 6183 6184 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6185 if not this.comments and column and column.comments: 6186 this.comments = column.pop_comments() 6187 6188 return this 6189 6190 def _parse_id_var( 6191 self, 6192 any_token: bool = True, 6193 tokens: t.Optional[t.Collection[TokenType]] = None, 6194 ) -> t.Optional[exp.Expression]: 6195 expression = self._parse_identifier() 6196 if not expression and ( 6197 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6198 ): 6199 quoted = self._prev.token_type == TokenType.STRING 6200 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6201 6202 return expression 6203 6204 def _parse_string(self) -> t.Optional[exp.Expression]: 6205 if self._match_set(self.STRING_PARSERS): 6206 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6207 return self._parse_placeholder() 6208 6209 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6210 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6211 6212 def _parse_number(self) -> t.Optional[exp.Expression]: 6213 if self._match_set(self.NUMERIC_PARSERS): 6214 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6215 return self._parse_placeholder() 6216 6217 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6218 if self._match(TokenType.IDENTIFIER): 6219 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6220 return self._parse_placeholder() 6221 6222 def _parse_var( 6223 self, 6224 any_token: bool = False, 6225 tokens: t.Optional[t.Collection[TokenType]] = None, 6226 upper: bool = False, 6227 ) -> t.Optional[exp.Expression]: 6228 if ( 6229 (any_token and self._advance_any()) 6230 or self._match(TokenType.VAR) 6231 or (self._match_set(tokens) if tokens else False) 6232 ): 6233 return self.expression( 6234 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6235 ) 6236 return self._parse_placeholder() 6237 6238 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6239 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6240 self._advance() 6241 return self._prev 6242 return None 6243 6244 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6245 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6246 6247 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6248 return self._parse_primary() or self._parse_var(any_token=True) 6249 6250 def _parse_null(self) -> t.Optional[exp.Expression]: 6251 if self._match_set(self.NULL_TOKENS): 6252 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6253 return self._parse_placeholder() 6254 6255 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6256 if self._match(TokenType.TRUE): 6257 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6258 if self._match(TokenType.FALSE): 6259 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6260 return self._parse_placeholder() 6261 6262 def _parse_star(self) -> t.Optional[exp.Expression]: 6263 if self._match(TokenType.STAR): 6264 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6265 return self._parse_placeholder() 6266 6267 def _parse_parameter(self) -> exp.Parameter: 6268 this = self._parse_identifier() or self._parse_primary_or_var() 6269 return self.expression(exp.Parameter, this=this) 6270 6271 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6272 if self._match_set(self.PLACEHOLDER_PARSERS): 6273 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6274 if placeholder: 6275 return placeholder 6276 self._advance(-1) 6277 return None 6278 6279 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6280 if not self._match_texts(keywords): 6281 return None 6282 if self._match(TokenType.L_PAREN, advance=False): 6283 return self._parse_wrapped_csv(self._parse_expression) 6284 6285 expression = self._parse_expression() 6286 return [expression] if expression else None 6287 6288 def _parse_csv( 6289 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6290 ) -> t.List[exp.Expression]: 6291 parse_result = parse_method() 6292 items = [parse_result] if parse_result is not None else [] 6293 6294 while self._match(sep): 6295 self._add_comments(parse_result) 6296 parse_result = parse_method() 6297 if parse_result is not None: 6298 items.append(parse_result) 6299 6300 return items 6301 6302 def _parse_tokens( 6303 self, parse_method: t.Callable, expressions: t.Dict 6304 ) -> t.Optional[exp.Expression]: 6305 this = parse_method() 6306 6307 while self._match_set(expressions): 6308 this = self.expression( 6309 expressions[self._prev.token_type], 6310 this=this, 6311 comments=self._prev_comments, 6312 expression=parse_method(), 6313 ) 6314 6315 return this 6316 6317 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6318 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6319 6320 def _parse_wrapped_csv( 6321 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6322 ) -> t.List[exp.Expression]: 6323 return self._parse_wrapped( 6324 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6325 ) 6326 6327 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6328 wrapped = self._match(TokenType.L_PAREN) 6329 if not wrapped and not optional: 6330 self.raise_error("Expecting (") 6331 parse_result = parse_method() 6332 if wrapped: 6333 self._match_r_paren() 6334 return parse_result 6335 6336 def _parse_expressions(self) -> t.List[exp.Expression]: 6337 return self._parse_csv(self._parse_expression) 6338 6339 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6340 return self._parse_select() or self._parse_set_operations( 6341 self._parse_expression() if alias else self._parse_assignment() 6342 ) 6343 6344 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6345 return self._parse_query_modifiers( 6346 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6347 ) 6348 6349 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6350 this = None 6351 if self._match_texts(self.TRANSACTION_KIND): 6352 this = self._prev.text 6353 6354 self._match_texts(("TRANSACTION", "WORK")) 6355 6356 modes = [] 6357 while True: 6358 mode = [] 6359 while self._match(TokenType.VAR): 6360 mode.append(self._prev.text) 6361 6362 if mode: 6363 modes.append(" ".join(mode)) 6364 if not self._match(TokenType.COMMA): 6365 break 6366 6367 return self.expression(exp.Transaction, this=this, modes=modes) 6368 6369 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6370 chain = None 6371 savepoint = None 6372 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6373 6374 self._match_texts(("TRANSACTION", "WORK")) 6375 6376 if self._match_text_seq("TO"): 6377 self._match_text_seq("SAVEPOINT") 6378 savepoint = self._parse_id_var() 6379 6380 if self._match(TokenType.AND): 6381 chain = not self._match_text_seq("NO") 6382 self._match_text_seq("CHAIN") 6383 6384 if is_rollback: 6385 return self.expression(exp.Rollback, savepoint=savepoint) 6386 6387 return self.expression(exp.Commit, chain=chain) 6388 6389 def _parse_refresh(self) -> exp.Refresh: 6390 self._match(TokenType.TABLE) 6391 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6392 6393 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6394 if not self._match_text_seq("ADD"): 6395 return None 6396 6397 self._match(TokenType.COLUMN) 6398 exists_column = self._parse_exists(not_=True) 6399 expression = self._parse_field_def() 6400 6401 if expression: 6402 expression.set("exists", exists_column) 6403 6404 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6405 if self._match_texts(("FIRST", "AFTER")): 6406 position = self._prev.text 6407 column_position = self.expression( 6408 exp.ColumnPosition, this=self._parse_column(), position=position 6409 ) 6410 expression.set("position", column_position) 6411 6412 return expression 6413 6414 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6415 drop = self._match(TokenType.DROP) and self._parse_drop() 6416 if drop and not isinstance(drop, exp.Command): 6417 drop.set("kind", drop.args.get("kind", "COLUMN")) 6418 return drop 6419 6420 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6421 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6422 return self.expression( 6423 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6424 ) 6425 6426 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6427 index = self._index - 1 6428 6429 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6430 return self._parse_csv( 6431 lambda: self.expression( 6432 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6433 ) 6434 ) 6435 6436 self._retreat(index) 6437 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6438 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6439 6440 if self._match_text_seq("ADD", "COLUMNS"): 6441 schema = self._parse_schema() 6442 if schema: 6443 return [schema] 6444 return [] 6445 6446 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6447 6448 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6449 if self._match_texts(self.ALTER_ALTER_PARSERS): 6450 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6451 6452 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6453 # keyword after ALTER we default to parsing this statement 6454 self._match(TokenType.COLUMN) 6455 column = self._parse_field(any_token=True) 6456 6457 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6458 return self.expression(exp.AlterColumn, this=column, drop=True) 6459 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6460 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6461 if self._match(TokenType.COMMENT): 6462 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6463 if self._match_text_seq("DROP", "NOT", "NULL"): 6464 return self.expression( 6465 exp.AlterColumn, 6466 this=column, 6467 drop=True, 6468 allow_null=True, 6469 ) 6470 if self._match_text_seq("SET", "NOT", "NULL"): 6471 return self.expression( 6472 exp.AlterColumn, 6473 this=column, 6474 allow_null=False, 6475 ) 6476 self._match_text_seq("SET", "DATA") 6477 self._match_text_seq("TYPE") 6478 return self.expression( 6479 exp.AlterColumn, 6480 this=column, 6481 dtype=self._parse_types(), 6482 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6483 using=self._match(TokenType.USING) and self._parse_assignment(), 6484 ) 6485 6486 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6487 if self._match_texts(("ALL", "EVEN", "AUTO")): 6488 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6489 6490 self._match_text_seq("KEY", "DISTKEY") 6491 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6492 6493 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6494 if compound: 6495 self._match_text_seq("SORTKEY") 6496 6497 if self._match(TokenType.L_PAREN, advance=False): 6498 return self.expression( 6499 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6500 ) 6501 6502 self._match_texts(("AUTO", "NONE")) 6503 return self.expression( 6504 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6505 ) 6506 6507 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6508 index = self._index - 1 6509 6510 partition_exists = self._parse_exists() 6511 if self._match(TokenType.PARTITION, advance=False): 6512 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6513 6514 self._retreat(index) 6515 return self._parse_csv(self._parse_drop_column) 6516 6517 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6518 if self._match(TokenType.COLUMN): 6519 exists = self._parse_exists() 6520 old_column = self._parse_column() 6521 to = self._match_text_seq("TO") 6522 new_column = self._parse_column() 6523 6524 if old_column is None or to is None or new_column is None: 6525 return None 6526 6527 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6528 6529 self._match_text_seq("TO") 6530 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6531 6532 def _parse_alter_table_set(self) -> exp.AlterSet: 6533 alter_set = self.expression(exp.AlterSet) 6534 6535 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6536 "TABLE", "PROPERTIES" 6537 ): 6538 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6539 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6540 alter_set.set("expressions", [self._parse_assignment()]) 6541 elif self._match_texts(("LOGGED", "UNLOGGED")): 6542 alter_set.set("option", exp.var(self._prev.text.upper())) 6543 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6544 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6545 elif self._match_text_seq("LOCATION"): 6546 alter_set.set("location", self._parse_field()) 6547 elif self._match_text_seq("ACCESS", "METHOD"): 6548 alter_set.set("access_method", self._parse_field()) 6549 elif self._match_text_seq("TABLESPACE"): 6550 alter_set.set("tablespace", self._parse_field()) 6551 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6552 alter_set.set("file_format", [self._parse_field()]) 6553 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6554 alter_set.set("file_format", self._parse_wrapped_options()) 6555 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6556 alter_set.set("copy_options", self._parse_wrapped_options()) 6557 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6558 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6559 else: 6560 if self._match_text_seq("SERDE"): 6561 alter_set.set("serde", self._parse_field()) 6562 6563 alter_set.set("expressions", [self._parse_properties()]) 6564 6565 return alter_set 6566 6567 def _parse_alter(self) -> exp.Alter | exp.Command: 6568 start = self._prev 6569 6570 alter_token = self._match_set(self.ALTERABLES) and self._prev 6571 if not alter_token: 6572 return self._parse_as_command(start) 6573 6574 exists = self._parse_exists() 6575 only = self._match_text_seq("ONLY") 6576 this = self._parse_table(schema=True) 6577 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6578 6579 if self._next: 6580 self._advance() 6581 6582 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6583 if parser: 6584 actions = ensure_list(parser(self)) 6585 options = self._parse_csv(self._parse_property) 6586 6587 if not self._curr and actions: 6588 return self.expression( 6589 exp.Alter, 6590 this=this, 6591 kind=alter_token.text.upper(), 6592 exists=exists, 6593 actions=actions, 6594 only=only, 6595 options=options, 6596 cluster=cluster, 6597 ) 6598 6599 return self._parse_as_command(start) 6600 6601 def _parse_merge(self) -> exp.Merge: 6602 self._match(TokenType.INTO) 6603 target = self._parse_table() 6604 6605 if target and self._match(TokenType.ALIAS, advance=False): 6606 target.set("alias", self._parse_table_alias()) 6607 6608 self._match(TokenType.USING) 6609 using = self._parse_table() 6610 6611 self._match(TokenType.ON) 6612 on = self._parse_assignment() 6613 6614 return self.expression( 6615 exp.Merge, 6616 this=target, 6617 using=using, 6618 on=on, 6619 expressions=self._parse_when_matched(), 6620 ) 6621 6622 def _parse_when_matched(self) -> t.List[exp.When]: 6623 whens = [] 6624 6625 while self._match(TokenType.WHEN): 6626 matched = not self._match(TokenType.NOT) 6627 self._match_text_seq("MATCHED") 6628 source = ( 6629 False 6630 if self._match_text_seq("BY", "TARGET") 6631 else self._match_text_seq("BY", "SOURCE") 6632 ) 6633 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6634 6635 self._match(TokenType.THEN) 6636 6637 if self._match(TokenType.INSERT): 6638 _this = self._parse_star() 6639 if _this: 6640 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6641 else: 6642 then = self.expression( 6643 exp.Insert, 6644 this=self._parse_value(), 6645 expression=self._match_text_seq("VALUES") and self._parse_value(), 6646 ) 6647 elif self._match(TokenType.UPDATE): 6648 expressions = self._parse_star() 6649 if expressions: 6650 then = self.expression(exp.Update, expressions=expressions) 6651 else: 6652 then = self.expression( 6653 exp.Update, 6654 expressions=self._match(TokenType.SET) 6655 and self._parse_csv(self._parse_equality), 6656 ) 6657 elif self._match(TokenType.DELETE): 6658 then = self.expression(exp.Var, this=self._prev.text) 6659 else: 6660 then = None 6661 6662 whens.append( 6663 self.expression( 6664 exp.When, 6665 matched=matched, 6666 source=source, 6667 condition=condition, 6668 then=then, 6669 ) 6670 ) 6671 return whens 6672 6673 def _parse_show(self) -> t.Optional[exp.Expression]: 6674 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6675 if parser: 6676 return parser(self) 6677 return self._parse_as_command(self._prev) 6678 6679 def _parse_set_item_assignment( 6680 self, kind: t.Optional[str] = None 6681 ) -> t.Optional[exp.Expression]: 6682 index = self._index 6683 6684 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6685 return self._parse_set_transaction(global_=kind == "GLOBAL") 6686 6687 left = self._parse_primary() or self._parse_column() 6688 assignment_delimiter = self._match_texts(("=", "TO")) 6689 6690 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6691 self._retreat(index) 6692 return None 6693 6694 right = self._parse_statement() or self._parse_id_var() 6695 if isinstance(right, (exp.Column, exp.Identifier)): 6696 right = exp.var(right.name) 6697 6698 this = self.expression(exp.EQ, this=left, expression=right) 6699 return self.expression(exp.SetItem, this=this, kind=kind) 6700 6701 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6702 self._match_text_seq("TRANSACTION") 6703 characteristics = self._parse_csv( 6704 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6705 ) 6706 return self.expression( 6707 exp.SetItem, 6708 expressions=characteristics, 6709 kind="TRANSACTION", 6710 **{"global": global_}, # type: ignore 6711 ) 6712 6713 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6714 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6715 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6716 6717 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6718 index = self._index 6719 set_ = self.expression( 6720 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6721 ) 6722 6723 if self._curr: 6724 self._retreat(index) 6725 return self._parse_as_command(self._prev) 6726 6727 return set_ 6728 6729 def _parse_var_from_options( 6730 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6731 ) -> t.Optional[exp.Var]: 6732 start = self._curr 6733 if not start: 6734 return None 6735 6736 option = start.text.upper() 6737 continuations = options.get(option) 6738 6739 index = self._index 6740 self._advance() 6741 for keywords in continuations or []: 6742 if isinstance(keywords, str): 6743 keywords = (keywords,) 6744 6745 if self._match_text_seq(*keywords): 6746 option = f"{option} {' '.join(keywords)}" 6747 break 6748 else: 6749 if continuations or continuations is None: 6750 if raise_unmatched: 6751 self.raise_error(f"Unknown option {option}") 6752 6753 self._retreat(index) 6754 return None 6755 6756 return exp.var(option) 6757 6758 def _parse_as_command(self, start: Token) -> exp.Command: 6759 while self._curr: 6760 self._advance() 6761 text = self._find_sql(start, self._prev) 6762 size = len(start.text) 6763 self._warn_unsupported() 6764 return exp.Command(this=text[:size], expression=text[size:]) 6765 6766 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6767 settings = [] 6768 6769 self._match_l_paren() 6770 kind = self._parse_id_var() 6771 6772 if self._match(TokenType.L_PAREN): 6773 while True: 6774 key = self._parse_id_var() 6775 value = self._parse_primary() 6776 6777 if not key and value is None: 6778 break 6779 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6780 self._match(TokenType.R_PAREN) 6781 6782 self._match_r_paren() 6783 6784 return self.expression( 6785 exp.DictProperty, 6786 this=this, 6787 kind=kind.this if kind else None, 6788 settings=settings, 6789 ) 6790 6791 def _parse_dict_range(self, this: str) -> exp.DictRange: 6792 self._match_l_paren() 6793 has_min = self._match_text_seq("MIN") 6794 if has_min: 6795 min = self._parse_var() or self._parse_primary() 6796 self._match_text_seq("MAX") 6797 max = self._parse_var() or self._parse_primary() 6798 else: 6799 max = self._parse_var() or self._parse_primary() 6800 min = exp.Literal.number(0) 6801 self._match_r_paren() 6802 return self.expression(exp.DictRange, this=this, min=min, max=max) 6803 6804 def _parse_comprehension( 6805 self, this: t.Optional[exp.Expression] 6806 ) -> t.Optional[exp.Comprehension]: 6807 index = self._index 6808 expression = self._parse_column() 6809 if not self._match(TokenType.IN): 6810 self._retreat(index - 1) 6811 return None 6812 iterator = self._parse_column() 6813 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6814 return self.expression( 6815 exp.Comprehension, 6816 this=this, 6817 expression=expression, 6818 iterator=iterator, 6819 condition=condition, 6820 ) 6821 6822 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6823 if self._match(TokenType.HEREDOC_STRING): 6824 return self.expression(exp.Heredoc, this=self._prev.text) 6825 6826 if not self._match_text_seq("$"): 6827 return None 6828 6829 tags = ["$"] 6830 tag_text = None 6831 6832 if self._is_connected(): 6833 self._advance() 6834 tags.append(self._prev.text.upper()) 6835 else: 6836 self.raise_error("No closing $ found") 6837 6838 if tags[-1] != "$": 6839 if self._is_connected() and self._match_text_seq("$"): 6840 tag_text = tags[-1] 6841 tags.append("$") 6842 else: 6843 self.raise_error("No closing $ found") 6844 6845 heredoc_start = self._curr 6846 6847 while self._curr: 6848 if self._match_text_seq(*tags, advance=False): 6849 this = self._find_sql(heredoc_start, self._prev) 6850 self._advance(len(tags)) 6851 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6852 6853 self._advance() 6854 6855 self.raise_error(f"No closing {''.join(tags)} found") 6856 return None 6857 6858 def _find_parser( 6859 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6860 ) -> t.Optional[t.Callable]: 6861 if not self._curr: 6862 return None 6863 6864 index = self._index 6865 this = [] 6866 while True: 6867 # The current token might be multiple words 6868 curr = self._curr.text.upper() 6869 key = curr.split(" ") 6870 this.append(curr) 6871 6872 self._advance() 6873 result, trie = in_trie(trie, key) 6874 if result == TrieResult.FAILED: 6875 break 6876 6877 if result == TrieResult.EXISTS: 6878 subparser = parsers[" ".join(this)] 6879 return subparser 6880 6881 self._retreat(index) 6882 return None 6883 6884 def _match(self, token_type, advance=True, expression=None): 6885 if not self._curr: 6886 return None 6887 6888 if self._curr.token_type == token_type: 6889 if advance: 6890 self._advance() 6891 self._add_comments(expression) 6892 return True 6893 6894 return None 6895 6896 def _match_set(self, types, advance=True): 6897 if not self._curr: 6898 return None 6899 6900 if self._curr.token_type in types: 6901 if advance: 6902 self._advance() 6903 return True 6904 6905 return None 6906 6907 def _match_pair(self, token_type_a, token_type_b, advance=True): 6908 if not self._curr or not self._next: 6909 return None 6910 6911 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6912 if advance: 6913 self._advance(2) 6914 return True 6915 6916 return None 6917 6918 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6919 if not self._match(TokenType.L_PAREN, expression=expression): 6920 self.raise_error("Expecting (") 6921 6922 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6923 if not self._match(TokenType.R_PAREN, expression=expression): 6924 self.raise_error("Expecting )") 6925 6926 def _match_texts(self, texts, advance=True): 6927 if self._curr and self._curr.text.upper() in texts: 6928 if advance: 6929 self._advance() 6930 return True 6931 return None 6932 6933 def _match_text_seq(self, *texts, advance=True): 6934 index = self._index 6935 for text in texts: 6936 if self._curr and self._curr.text.upper() == text: 6937 self._advance() 6938 else: 6939 self._retreat(index) 6940 return None 6941 6942 if not advance: 6943 self._retreat(index) 6944 6945 return True 6946 6947 def _replace_lambda( 6948 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6949 ) -> t.Optional[exp.Expression]: 6950 if not node: 6951 return node 6952 6953 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6954 6955 for column in node.find_all(exp.Column): 6956 typ = lambda_types.get(column.parts[0].name) 6957 if typ is not None: 6958 dot_or_id = column.to_dot() if column.table else column.this 6959 6960 if typ: 6961 dot_or_id = self.expression( 6962 exp.Cast, 6963 this=dot_or_id, 6964 to=typ, 6965 ) 6966 6967 parent = column.parent 6968 6969 while isinstance(parent, exp.Dot): 6970 if not isinstance(parent.parent, exp.Dot): 6971 parent.replace(dot_or_id) 6972 break 6973 parent = parent.parent 6974 else: 6975 if column is node: 6976 node = dot_or_id 6977 else: 6978 column.replace(dot_or_id) 6979 return node 6980 6981 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6982 start = self._prev 6983 6984 # Not to be confused with TRUNCATE(number, decimals) function call 6985 if self._match(TokenType.L_PAREN): 6986 self._retreat(self._index - 2) 6987 return self._parse_function() 6988 6989 # Clickhouse supports TRUNCATE DATABASE as well 6990 is_database = self._match(TokenType.DATABASE) 6991 6992 self._match(TokenType.TABLE) 6993 6994 exists = self._parse_exists(not_=False) 6995 6996 expressions = self._parse_csv( 6997 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6998 ) 6999 7000 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7001 7002 if self._match_text_seq("RESTART", "IDENTITY"): 7003 identity = "RESTART" 7004 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7005 identity = "CONTINUE" 7006 else: 7007 identity = None 7008 7009 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7010 option = self._prev.text 7011 else: 7012 option = None 7013 7014 partition = self._parse_partition() 7015 7016 # Fallback case 7017 if self._curr: 7018 return self._parse_as_command(start) 7019 7020 return self.expression( 7021 exp.TruncateTable, 7022 expressions=expressions, 7023 is_database=is_database, 7024 exists=exists, 7025 cluster=cluster, 7026 identity=identity, 7027 option=option, 7028 partition=partition, 7029 ) 7030 7031 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7032 this = self._parse_ordered(self._parse_opclass) 7033 7034 if not self._match(TokenType.WITH): 7035 return this 7036 7037 op = self._parse_var(any_token=True) 7038 7039 return self.expression(exp.WithOperator, this=this, op=op) 7040 7041 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7042 self._match(TokenType.EQ) 7043 self._match(TokenType.L_PAREN) 7044 7045 opts: t.List[t.Optional[exp.Expression]] = [] 7046 while self._curr and not self._match(TokenType.R_PAREN): 7047 if self._match_text_seq("FORMAT_NAME", "="): 7048 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7049 # so we parse it separately to use _parse_field() 7050 prop = self.expression( 7051 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7052 ) 7053 opts.append(prop) 7054 else: 7055 opts.append(self._parse_property()) 7056 7057 self._match(TokenType.COMMA) 7058 7059 return opts 7060 7061 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7062 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7063 7064 options = [] 7065 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7066 option = self._parse_var(any_token=True) 7067 prev = self._prev.text.upper() 7068 7069 # Different dialects might separate options and values by white space, "=" and "AS" 7070 self._match(TokenType.EQ) 7071 self._match(TokenType.ALIAS) 7072 7073 param = self.expression(exp.CopyParameter, this=option) 7074 7075 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7076 TokenType.L_PAREN, advance=False 7077 ): 7078 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7079 param.set("expressions", self._parse_wrapped_options()) 7080 elif prev == "FILE_FORMAT": 7081 # T-SQL's external file format case 7082 param.set("expression", self._parse_field()) 7083 else: 7084 param.set("expression", self._parse_unquoted_field()) 7085 7086 options.append(param) 7087 self._match(sep) 7088 7089 return options 7090 7091 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7092 expr = self.expression(exp.Credentials) 7093 7094 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7095 expr.set("storage", self._parse_field()) 7096 if self._match_text_seq("CREDENTIALS"): 7097 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7098 creds = ( 7099 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7100 ) 7101 expr.set("credentials", creds) 7102 if self._match_text_seq("ENCRYPTION"): 7103 expr.set("encryption", self._parse_wrapped_options()) 7104 if self._match_text_seq("IAM_ROLE"): 7105 expr.set("iam_role", self._parse_field()) 7106 if self._match_text_seq("REGION"): 7107 expr.set("region", self._parse_field()) 7108 7109 return expr 7110 7111 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7112 return self._parse_field() 7113 7114 def _parse_copy(self) -> exp.Copy | exp.Command: 7115 start = self._prev 7116 7117 self._match(TokenType.INTO) 7118 7119 this = ( 7120 self._parse_select(nested=True, parse_subquery_alias=False) 7121 if self._match(TokenType.L_PAREN, advance=False) 7122 else self._parse_table(schema=True) 7123 ) 7124 7125 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7126 7127 files = self._parse_csv(self._parse_file_location) 7128 credentials = self._parse_credentials() 7129 7130 self._match_text_seq("WITH") 7131 7132 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7133 7134 # Fallback case 7135 if self._curr: 7136 return self._parse_as_command(start) 7137 7138 return self.expression( 7139 exp.Copy, 7140 this=this, 7141 kind=kind, 7142 credentials=credentials, 7143 files=files, 7144 params=params, 7145 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1316 def __init__( 1317 self, 1318 error_level: t.Optional[ErrorLevel] = None, 1319 error_message_context: int = 100, 1320 max_errors: int = 3, 1321 dialect: DialectType = None, 1322 ): 1323 from sqlglot.dialects import Dialect 1324 1325 self.error_level = error_level or ErrorLevel.IMMEDIATE 1326 self.error_message_context = error_message_context 1327 self.max_errors = max_errors 1328 self.dialect = Dialect.get_or_raise(dialect) 1329 self.reset()
1341 def parse( 1342 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1343 ) -> t.List[t.Optional[exp.Expression]]: 1344 """ 1345 Parses a list of tokens and returns a list of syntax trees, one tree 1346 per parsed SQL statement. 1347 1348 Args: 1349 raw_tokens: The list of tokens. 1350 sql: The original SQL string, used to produce helpful debug messages. 1351 1352 Returns: 1353 The list of the produced syntax trees. 1354 """ 1355 return self._parse( 1356 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1357 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1359 def parse_into( 1360 self, 1361 expression_types: exp.IntoType, 1362 raw_tokens: t.List[Token], 1363 sql: t.Optional[str] = None, 1364 ) -> t.List[t.Optional[exp.Expression]]: 1365 """ 1366 Parses a list of tokens into a given Expression type. If a collection of Expression 1367 types is given instead, this method will try to parse the token list into each one 1368 of them, stopping at the first for which the parsing succeeds. 1369 1370 Args: 1371 expression_types: The expression type(s) to try and parse the token list into. 1372 raw_tokens: The list of tokens. 1373 sql: The original SQL string, used to produce helpful debug messages. 1374 1375 Returns: 1376 The target Expression. 1377 """ 1378 errors = [] 1379 for expression_type in ensure_list(expression_types): 1380 parser = self.EXPRESSION_PARSERS.get(expression_type) 1381 if not parser: 1382 raise TypeError(f"No parser registered for {expression_type}") 1383 1384 try: 1385 return self._parse(parser, raw_tokens, sql) 1386 except ParseError as e: 1387 e.errors[0]["into_expression"] = expression_type 1388 errors.append(e) 1389 1390 raise ParseError( 1391 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1392 errors=merge_errors(errors), 1393 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1433 def check_errors(self) -> None: 1434 """Logs or raises any found errors, depending on the chosen error level setting.""" 1435 if self.error_level == ErrorLevel.WARN: 1436 for error in self.errors: 1437 logger.error(str(error)) 1438 elif self.error_level == ErrorLevel.RAISE and self.errors: 1439 raise ParseError( 1440 concat_messages(self.errors, self.max_errors), 1441 errors=merge_errors(self.errors), 1442 )
Logs or raises any found errors, depending on the chosen error level setting.
1444 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1445 """ 1446 Appends an error in the list of recorded errors or raises it, depending on the chosen 1447 error level setting. 1448 """ 1449 token = token or self._curr or self._prev or Token.string("") 1450 start = token.start 1451 end = token.end + 1 1452 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1453 highlight = self.sql[start:end] 1454 end_context = self.sql[end : end + self.error_message_context] 1455 1456 error = ParseError.new( 1457 f"{message}. Line {token.line}, Col: {token.col}.\n" 1458 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1459 description=message, 1460 line=token.line, 1461 col=token.col, 1462 start_context=start_context, 1463 highlight=highlight, 1464 end_context=end_context, 1465 ) 1466 1467 if self.error_level == ErrorLevel.IMMEDIATE: 1468 raise error 1469 1470 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1472 def expression( 1473 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1474 ) -> E: 1475 """ 1476 Creates a new, validated Expression. 1477 1478 Args: 1479 exp_class: The expression class to instantiate. 1480 comments: An optional list of comments to attach to the expression. 1481 kwargs: The arguments to set for the expression along with their respective values. 1482 1483 Returns: 1484 The target expression. 1485 """ 1486 instance = exp_class(**kwargs) 1487 instance.add_comments(comments) if comments else self._add_comments(instance) 1488 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1495 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1496 """ 1497 Validates an Expression, making sure that all its mandatory arguments are set. 1498 1499 Args: 1500 expression: The expression to validate. 1501 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1502 1503 Returns: 1504 The validated expression. 1505 """ 1506 if self.error_level != ErrorLevel.IGNORE: 1507 for error_message in expression.error_messages(args): 1508 self.raise_error(error_message) 1509 1510 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.