sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 20 21logger = logging.getLogger("sqlglot") 22 23OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 24 25 26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 37 38 39def build_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range 56 57 58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 69 70 71def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 72 arg = seq_get(args, 0) 73 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 74 75 76def build_lower(args: t.List) -> exp.Lower | exp.Hex: 77 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 78 arg = seq_get(args, 0) 79 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 80 81 82def build_upper(args: t.List) -> exp.Upper | exp.Hex: 83 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 84 arg = seq_get(args, 0) 85 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 86 87 88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder 99 100 101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression) 110 111 112def build_pad(args: t.List, is_left: bool = True): 113 return exp.Pad( 114 this=seq_get(args, 0), 115 expression=seq_get(args, 1), 116 fill_pattern=seq_get(args, 2), 117 is_left=is_left, 118 ) 119 120 121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp 130 131 132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args) 142 143 144def build_trim(args: t.List, is_left: bool = True): 145 return exp.Trim( 146 this=seq_get(args, 0), 147 expression=seq_get(args, 1), 148 position="LEADING" if is_left else "TRAILING", 149 ) 150 151 152def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 153 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 154 155 156class _Parser(type): 157 def __new__(cls, clsname, bases, attrs): 158 klass = super().__new__(cls, clsname, bases, attrs) 159 160 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 161 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 162 163 return klass 164 165 166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME2, 337 TokenType.DATETIME64, 338 TokenType.SMALLDATETIME, 339 TokenType.DATE, 340 TokenType.DATE32, 341 TokenType.INT4RANGE, 342 TokenType.INT4MULTIRANGE, 343 TokenType.INT8RANGE, 344 TokenType.INT8MULTIRANGE, 345 TokenType.NUMRANGE, 346 TokenType.NUMMULTIRANGE, 347 TokenType.TSRANGE, 348 TokenType.TSMULTIRANGE, 349 TokenType.TSTZRANGE, 350 TokenType.TSTZMULTIRANGE, 351 TokenType.DATERANGE, 352 TokenType.DATEMULTIRANGE, 353 TokenType.DECIMAL, 354 TokenType.DECIMAL32, 355 TokenType.DECIMAL64, 356 TokenType.DECIMAL128, 357 TokenType.DECIMAL256, 358 TokenType.UDECIMAL, 359 TokenType.BIGDECIMAL, 360 TokenType.UUID, 361 TokenType.GEOGRAPHY, 362 TokenType.GEOMETRY, 363 TokenType.POINT, 364 TokenType.RING, 365 TokenType.LINESTRING, 366 TokenType.MULTILINESTRING, 367 TokenType.POLYGON, 368 TokenType.MULTIPOLYGON, 369 TokenType.HLLSKETCH, 370 TokenType.HSTORE, 371 TokenType.PSEUDO_TYPE, 372 TokenType.SUPER, 373 TokenType.SERIAL, 374 TokenType.SMALLSERIAL, 375 TokenType.BIGSERIAL, 376 TokenType.XML, 377 TokenType.YEAR, 378 TokenType.UNIQUEIDENTIFIER, 379 TokenType.USERDEFINED, 380 TokenType.MONEY, 381 TokenType.SMALLMONEY, 382 TokenType.ROWVERSION, 383 TokenType.IMAGE, 384 TokenType.VARIANT, 385 TokenType.VECTOR, 386 TokenType.OBJECT, 387 TokenType.OBJECT_IDENTIFIER, 388 TokenType.INET, 389 TokenType.IPADDRESS, 390 TokenType.IPPREFIX, 391 TokenType.IPV4, 392 TokenType.IPV6, 393 TokenType.UNKNOWN, 394 TokenType.NULL, 395 TokenType.NAME, 396 TokenType.TDIGEST, 397 *ENUM_TYPE_TOKENS, 398 *NESTED_TYPE_TOKENS, 399 *AGGREGATE_TYPE_TOKENS, 400 } 401 402 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 403 TokenType.BIGINT: TokenType.UBIGINT, 404 TokenType.INT: TokenType.UINT, 405 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 406 TokenType.SMALLINT: TokenType.USMALLINT, 407 TokenType.TINYINT: TokenType.UTINYINT, 408 TokenType.DECIMAL: TokenType.UDECIMAL, 409 } 410 411 SUBQUERY_PREDICATES = { 412 TokenType.ANY: exp.Any, 413 TokenType.ALL: exp.All, 414 TokenType.EXISTS: exp.Exists, 415 TokenType.SOME: exp.Any, 416 } 417 418 RESERVED_TOKENS = { 419 *Tokenizer.SINGLE_TOKENS.values(), 420 TokenType.SELECT, 421 } - {TokenType.IDENTIFIER} 422 423 DB_CREATABLES = { 424 TokenType.DATABASE, 425 TokenType.DICTIONARY, 426 TokenType.MODEL, 427 TokenType.NAMESPACE, 428 TokenType.SCHEMA, 429 TokenType.SEQUENCE, 430 TokenType.SINK, 431 TokenType.SOURCE, 432 TokenType.STORAGE_INTEGRATION, 433 TokenType.STREAMLIT, 434 TokenType.TABLE, 435 TokenType.TAG, 436 TokenType.VIEW, 437 TokenType.WAREHOUSE, 438 } 439 440 CREATABLES = { 441 TokenType.COLUMN, 442 TokenType.CONSTRAINT, 443 TokenType.FOREIGN_KEY, 444 TokenType.FUNCTION, 445 TokenType.INDEX, 446 TokenType.PROCEDURE, 447 *DB_CREATABLES, 448 } 449 450 ALTERABLES = { 451 TokenType.INDEX, 452 TokenType.TABLE, 453 TokenType.VIEW, 454 } 455 456 # Tokens that can represent identifiers 457 ID_VAR_TOKENS = { 458 TokenType.ALL, 459 TokenType.ATTACH, 460 TokenType.VAR, 461 TokenType.ANTI, 462 TokenType.APPLY, 463 TokenType.ASC, 464 TokenType.ASOF, 465 TokenType.AUTO_INCREMENT, 466 TokenType.BEGIN, 467 TokenType.BPCHAR, 468 TokenType.CACHE, 469 TokenType.CASE, 470 TokenType.COLLATE, 471 TokenType.COMMAND, 472 TokenType.COMMENT, 473 TokenType.COMMIT, 474 TokenType.CONSTRAINT, 475 TokenType.COPY, 476 TokenType.CUBE, 477 TokenType.DEFAULT, 478 TokenType.DELETE, 479 TokenType.DESC, 480 TokenType.DESCRIBE, 481 TokenType.DETACH, 482 TokenType.DICTIONARY, 483 TokenType.DIV, 484 TokenType.END, 485 TokenType.EXECUTE, 486 TokenType.ESCAPE, 487 TokenType.FALSE, 488 TokenType.FIRST, 489 TokenType.FILTER, 490 TokenType.FINAL, 491 TokenType.FORMAT, 492 TokenType.FULL, 493 TokenType.IDENTIFIER, 494 TokenType.IS, 495 TokenType.ISNULL, 496 TokenType.INTERVAL, 497 TokenType.KEEP, 498 TokenType.KILL, 499 TokenType.LEFT, 500 TokenType.LIMIT, 501 TokenType.LOAD, 502 TokenType.MERGE, 503 TokenType.NATURAL, 504 TokenType.NEXT, 505 TokenType.OFFSET, 506 TokenType.OPERATOR, 507 TokenType.ORDINALITY, 508 TokenType.OVERLAPS, 509 TokenType.OVERWRITE, 510 TokenType.PARTITION, 511 TokenType.PERCENT, 512 TokenType.PIVOT, 513 TokenType.PRAGMA, 514 TokenType.RANGE, 515 TokenType.RECURSIVE, 516 TokenType.REFERENCES, 517 TokenType.REFRESH, 518 TokenType.RENAME, 519 TokenType.REPLACE, 520 TokenType.RIGHT, 521 TokenType.ROLLUP, 522 TokenType.ROW, 523 TokenType.ROWS, 524 TokenType.SEMI, 525 TokenType.SET, 526 TokenType.SETTINGS, 527 TokenType.SHOW, 528 TokenType.TEMPORARY, 529 TokenType.TOP, 530 TokenType.TRUE, 531 TokenType.TRUNCATE, 532 TokenType.UNIQUE, 533 TokenType.UNNEST, 534 TokenType.UNPIVOT, 535 TokenType.UPDATE, 536 TokenType.USE, 537 TokenType.VOLATILE, 538 TokenType.WINDOW, 539 *CREATABLES, 540 *SUBQUERY_PREDICATES, 541 *TYPE_TOKENS, 542 *NO_PAREN_FUNCTIONS, 543 } 544 ID_VAR_TOKENS.remove(TokenType.UNION) 545 546 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 547 548 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 549 TokenType.ANTI, 550 TokenType.APPLY, 551 TokenType.ASOF, 552 TokenType.FULL, 553 TokenType.LEFT, 554 TokenType.LOCK, 555 TokenType.NATURAL, 556 TokenType.RIGHT, 557 TokenType.SEMI, 558 TokenType.WINDOW, 559 } 560 561 ALIAS_TOKENS = ID_VAR_TOKENS 562 563 ARRAY_CONSTRUCTORS = { 564 "ARRAY": exp.Array, 565 "LIST": exp.List, 566 } 567 568 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 569 570 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 571 572 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 573 574 FUNC_TOKENS = { 575 TokenType.COLLATE, 576 TokenType.COMMAND, 577 TokenType.CURRENT_DATE, 578 TokenType.CURRENT_DATETIME, 579 TokenType.CURRENT_TIMESTAMP, 580 TokenType.CURRENT_TIME, 581 TokenType.CURRENT_USER, 582 TokenType.FILTER, 583 TokenType.FIRST, 584 TokenType.FORMAT, 585 TokenType.GLOB, 586 TokenType.IDENTIFIER, 587 TokenType.INDEX, 588 TokenType.ISNULL, 589 TokenType.ILIKE, 590 TokenType.INSERT, 591 TokenType.LIKE, 592 TokenType.MERGE, 593 TokenType.NEXT, 594 TokenType.OFFSET, 595 TokenType.PRIMARY_KEY, 596 TokenType.RANGE, 597 TokenType.REPLACE, 598 TokenType.RLIKE, 599 TokenType.ROW, 600 TokenType.UNNEST, 601 TokenType.VAR, 602 TokenType.LEFT, 603 TokenType.RIGHT, 604 TokenType.SEQUENCE, 605 TokenType.DATE, 606 TokenType.DATETIME, 607 TokenType.TABLE, 608 TokenType.TIMESTAMP, 609 TokenType.TIMESTAMPTZ, 610 TokenType.TRUNCATE, 611 TokenType.WINDOW, 612 TokenType.XOR, 613 *TYPE_TOKENS, 614 *SUBQUERY_PREDICATES, 615 } 616 617 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 618 TokenType.AND: exp.And, 619 } 620 621 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 622 TokenType.COLON_EQ: exp.PropertyEQ, 623 } 624 625 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 626 TokenType.OR: exp.Or, 627 } 628 629 EQUALITY = { 630 TokenType.EQ: exp.EQ, 631 TokenType.NEQ: exp.NEQ, 632 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 633 } 634 635 COMPARISON = { 636 TokenType.GT: exp.GT, 637 TokenType.GTE: exp.GTE, 638 TokenType.LT: exp.LT, 639 TokenType.LTE: exp.LTE, 640 } 641 642 BITWISE = { 643 TokenType.AMP: exp.BitwiseAnd, 644 TokenType.CARET: exp.BitwiseXor, 645 TokenType.PIPE: exp.BitwiseOr, 646 } 647 648 TERM = { 649 TokenType.DASH: exp.Sub, 650 TokenType.PLUS: exp.Add, 651 TokenType.MOD: exp.Mod, 652 TokenType.COLLATE: exp.Collate, 653 } 654 655 FACTOR = { 656 TokenType.DIV: exp.IntDiv, 657 TokenType.LR_ARROW: exp.Distance, 658 TokenType.SLASH: exp.Div, 659 TokenType.STAR: exp.Mul, 660 } 661 662 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 663 664 TIMES = { 665 TokenType.TIME, 666 TokenType.TIMETZ, 667 } 668 669 TIMESTAMPS = { 670 TokenType.TIMESTAMP, 671 TokenType.TIMESTAMPTZ, 672 TokenType.TIMESTAMPLTZ, 673 *TIMES, 674 } 675 676 SET_OPERATIONS = { 677 TokenType.UNION, 678 TokenType.INTERSECT, 679 TokenType.EXCEPT, 680 } 681 682 JOIN_METHODS = { 683 TokenType.ASOF, 684 TokenType.NATURAL, 685 TokenType.POSITIONAL, 686 } 687 688 JOIN_SIDES = { 689 TokenType.LEFT, 690 TokenType.RIGHT, 691 TokenType.FULL, 692 } 693 694 JOIN_KINDS = { 695 TokenType.ANTI, 696 TokenType.CROSS, 697 TokenType.INNER, 698 TokenType.OUTER, 699 TokenType.SEMI, 700 TokenType.STRAIGHT_JOIN, 701 } 702 703 JOIN_HINTS: t.Set[str] = set() 704 705 LAMBDAS = { 706 TokenType.ARROW: lambda self, expressions: self.expression( 707 exp.Lambda, 708 this=self._replace_lambda( 709 self._parse_assignment(), 710 expressions, 711 ), 712 expressions=expressions, 713 ), 714 TokenType.FARROW: lambda self, expressions: self.expression( 715 exp.Kwarg, 716 this=exp.var(expressions[0].name), 717 expression=self._parse_assignment(), 718 ), 719 } 720 721 COLUMN_OPERATORS = { 722 TokenType.DOT: None, 723 TokenType.DCOLON: lambda self, this, to: self.expression( 724 exp.Cast if self.STRICT_CAST else exp.TryCast, 725 this=this, 726 to=to, 727 ), 728 TokenType.ARROW: lambda self, this, path: self.expression( 729 exp.JSONExtract, 730 this=this, 731 expression=self.dialect.to_json_path(path), 732 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 733 ), 734 TokenType.DARROW: lambda self, this, path: self.expression( 735 exp.JSONExtractScalar, 736 this=this, 737 expression=self.dialect.to_json_path(path), 738 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 739 ), 740 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 741 exp.JSONBExtract, 742 this=this, 743 expression=path, 744 ), 745 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 746 exp.JSONBExtractScalar, 747 this=this, 748 expression=path, 749 ), 750 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 751 exp.JSONBContains, 752 this=this, 753 expression=key, 754 ), 755 } 756 757 EXPRESSION_PARSERS = { 758 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 759 exp.Column: lambda self: self._parse_column(), 760 exp.Condition: lambda self: self._parse_assignment(), 761 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 762 exp.Expression: lambda self: self._parse_expression(), 763 exp.From: lambda self: self._parse_from(joins=True), 764 exp.Group: lambda self: self._parse_group(), 765 exp.Having: lambda self: self._parse_having(), 766 exp.Hint: lambda self: self._parse_hint_body(), 767 exp.Identifier: lambda self: self._parse_id_var(), 768 exp.Join: lambda self: self._parse_join(), 769 exp.Lambda: lambda self: self._parse_lambda(), 770 exp.Lateral: lambda self: self._parse_lateral(), 771 exp.Limit: lambda self: self._parse_limit(), 772 exp.Offset: lambda self: self._parse_offset(), 773 exp.Order: lambda self: self._parse_order(), 774 exp.Ordered: lambda self: self._parse_ordered(), 775 exp.Properties: lambda self: self._parse_properties(), 776 exp.Qualify: lambda self: self._parse_qualify(), 777 exp.Returning: lambda self: self._parse_returning(), 778 exp.Select: lambda self: self._parse_select(), 779 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 780 exp.Table: lambda self: self._parse_table_parts(), 781 exp.TableAlias: lambda self: self._parse_table_alias(), 782 exp.Tuple: lambda self: self._parse_value(), 783 exp.Whens: lambda self: self._parse_when_matched(), 784 exp.Where: lambda self: self._parse_where(), 785 exp.Window: lambda self: self._parse_named_window(), 786 exp.With: lambda self: self._parse_with(), 787 "JOIN_TYPE": lambda self: self._parse_join_parts(), 788 } 789 790 STATEMENT_PARSERS = { 791 TokenType.ALTER: lambda self: self._parse_alter(), 792 TokenType.ANALYZE: lambda self: self._parse_analyze(), 793 TokenType.BEGIN: lambda self: self._parse_transaction(), 794 TokenType.CACHE: lambda self: self._parse_cache(), 795 TokenType.COMMENT: lambda self: self._parse_comment(), 796 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 797 TokenType.COPY: lambda self: self._parse_copy(), 798 TokenType.CREATE: lambda self: self._parse_create(), 799 TokenType.DELETE: lambda self: self._parse_delete(), 800 TokenType.DESC: lambda self: self._parse_describe(), 801 TokenType.DESCRIBE: lambda self: self._parse_describe(), 802 TokenType.DROP: lambda self: self._parse_drop(), 803 TokenType.GRANT: lambda self: self._parse_grant(), 804 TokenType.INSERT: lambda self: self._parse_insert(), 805 TokenType.KILL: lambda self: self._parse_kill(), 806 TokenType.LOAD: lambda self: self._parse_load(), 807 TokenType.MERGE: lambda self: self._parse_merge(), 808 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 809 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 810 TokenType.REFRESH: lambda self: self._parse_refresh(), 811 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 812 TokenType.SET: lambda self: self._parse_set(), 813 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 814 TokenType.UNCACHE: lambda self: self._parse_uncache(), 815 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 816 TokenType.UPDATE: lambda self: self._parse_update(), 817 TokenType.USE: lambda self: self.expression( 818 exp.Use, 819 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 820 this=self._parse_table(schema=False), 821 ), 822 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 823 } 824 825 UNARY_PARSERS = { 826 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 827 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 828 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 829 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 830 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 831 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 832 } 833 834 STRING_PARSERS = { 835 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 836 exp.RawString, this=token.text 837 ), 838 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 839 exp.National, this=token.text 840 ), 841 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 842 TokenType.STRING: lambda self, token: self.expression( 843 exp.Literal, this=token.text, is_string=True 844 ), 845 TokenType.UNICODE_STRING: lambda self, token: self.expression( 846 exp.UnicodeString, 847 this=token.text, 848 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 849 ), 850 } 851 852 NUMERIC_PARSERS = { 853 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 854 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 855 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 856 TokenType.NUMBER: lambda self, token: self.expression( 857 exp.Literal, this=token.text, is_string=False 858 ), 859 } 860 861 PRIMARY_PARSERS = { 862 **STRING_PARSERS, 863 **NUMERIC_PARSERS, 864 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 865 TokenType.NULL: lambda self, _: self.expression(exp.Null), 866 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 867 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 868 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 869 TokenType.STAR: lambda self, _: self._parse_star_ops(), 870 } 871 872 PLACEHOLDER_PARSERS = { 873 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 874 TokenType.PARAMETER: lambda self: self._parse_parameter(), 875 TokenType.COLON: lambda self: ( 876 self.expression(exp.Placeholder, this=self._prev.text) 877 if self._match_set(self.ID_VAR_TOKENS) 878 else None 879 ), 880 } 881 882 RANGE_PARSERS = { 883 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 884 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 885 TokenType.GLOB: binary_range_parser(exp.Glob), 886 TokenType.ILIKE: binary_range_parser(exp.ILike), 887 TokenType.IN: lambda self, this: self._parse_in(this), 888 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 889 TokenType.IS: lambda self, this: self._parse_is(this), 890 TokenType.LIKE: binary_range_parser(exp.Like), 891 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 892 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 893 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 894 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 895 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 896 } 897 898 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 899 "ALLOWED_VALUES": lambda self: self.expression( 900 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 901 ), 902 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 903 "AUTO": lambda self: self._parse_auto_property(), 904 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 905 "BACKUP": lambda self: self.expression( 906 exp.BackupProperty, this=self._parse_var(any_token=True) 907 ), 908 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 909 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 910 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 911 "CHECKSUM": lambda self: self._parse_checksum(), 912 "CLUSTER BY": lambda self: self._parse_cluster(), 913 "CLUSTERED": lambda self: self._parse_clustered_by(), 914 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 915 exp.CollateProperty, **kwargs 916 ), 917 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 918 "CONTAINS": lambda self: self._parse_contains_property(), 919 "COPY": lambda self: self._parse_copy_property(), 920 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 921 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 922 "DEFINER": lambda self: self._parse_definer(), 923 "DETERMINISTIC": lambda self: self.expression( 924 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 925 ), 926 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 927 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 928 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 929 "DISTKEY": lambda self: self._parse_distkey(), 930 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 931 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 932 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 933 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 934 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 935 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 936 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 937 "FREESPACE": lambda self: self._parse_freespace(), 938 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 939 "HEAP": lambda self: self.expression(exp.HeapProperty), 940 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 941 "IMMUTABLE": lambda self: self.expression( 942 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 943 ), 944 "INHERITS": lambda self: self.expression( 945 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 946 ), 947 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 948 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 949 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 950 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 951 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 952 "LIKE": lambda self: self._parse_create_like(), 953 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 954 "LOCK": lambda self: self._parse_locking(), 955 "LOCKING": lambda self: self._parse_locking(), 956 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 957 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 958 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 959 "MODIFIES": lambda self: self._parse_modifies_property(), 960 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 961 "NO": lambda self: self._parse_no_property(), 962 "ON": lambda self: self._parse_on_property(), 963 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 964 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 965 "PARTITION": lambda self: self._parse_partitioned_of(), 966 "PARTITION BY": lambda self: self._parse_partitioned_by(), 967 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 968 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 969 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 970 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 971 "READS": lambda self: self._parse_reads_property(), 972 "REMOTE": lambda self: self._parse_remote_with_connection(), 973 "RETURNS": lambda self: self._parse_returns(), 974 "STRICT": lambda self: self.expression(exp.StrictProperty), 975 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 976 "ROW": lambda self: self._parse_row(), 977 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 978 "SAMPLE": lambda self: self.expression( 979 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 980 ), 981 "SECURE": lambda self: self.expression(exp.SecureProperty), 982 "SECURITY": lambda self: self._parse_security(), 983 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 984 "SETTINGS": lambda self: self._parse_settings_property(), 985 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 986 "SORTKEY": lambda self: self._parse_sortkey(), 987 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 988 "STABLE": lambda self: self.expression( 989 exp.StabilityProperty, this=exp.Literal.string("STABLE") 990 ), 991 "STORED": lambda self: self._parse_stored(), 992 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 993 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 994 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 995 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 996 "TO": lambda self: self._parse_to_table(), 997 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 998 "TRANSFORM": lambda self: self.expression( 999 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1000 ), 1001 "TTL": lambda self: self._parse_ttl(), 1002 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1003 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1004 "VOLATILE": lambda self: self._parse_volatile_property(), 1005 "WITH": lambda self: self._parse_with_property(), 1006 } 1007 1008 CONSTRAINT_PARSERS = { 1009 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1010 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1011 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1012 "CHARACTER SET": lambda self: self.expression( 1013 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1014 ), 1015 "CHECK": lambda self: self.expression( 1016 exp.CheckColumnConstraint, 1017 this=self._parse_wrapped(self._parse_assignment), 1018 enforced=self._match_text_seq("ENFORCED"), 1019 ), 1020 "COLLATE": lambda self: self.expression( 1021 exp.CollateColumnConstraint, 1022 this=self._parse_identifier() or self._parse_column(), 1023 ), 1024 "COMMENT": lambda self: self.expression( 1025 exp.CommentColumnConstraint, this=self._parse_string() 1026 ), 1027 "COMPRESS": lambda self: self._parse_compress(), 1028 "CLUSTERED": lambda self: self.expression( 1029 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1030 ), 1031 "NONCLUSTERED": lambda self: self.expression( 1032 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1033 ), 1034 "DEFAULT": lambda self: self.expression( 1035 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1036 ), 1037 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1038 "EPHEMERAL": lambda self: self.expression( 1039 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1040 ), 1041 "EXCLUDE": lambda self: self.expression( 1042 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1043 ), 1044 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1045 "FORMAT": lambda self: self.expression( 1046 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1047 ), 1048 "GENERATED": lambda self: self._parse_generated_as_identity(), 1049 "IDENTITY": lambda self: self._parse_auto_increment(), 1050 "INLINE": lambda self: self._parse_inline(), 1051 "LIKE": lambda self: self._parse_create_like(), 1052 "NOT": lambda self: self._parse_not_constraint(), 1053 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1054 "ON": lambda self: ( 1055 self._match(TokenType.UPDATE) 1056 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1057 ) 1058 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1059 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1060 "PERIOD": lambda self: self._parse_period_for_system_time(), 1061 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1062 "REFERENCES": lambda self: self._parse_references(match=False), 1063 "TITLE": lambda self: self.expression( 1064 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1065 ), 1066 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1067 "UNIQUE": lambda self: self._parse_unique(), 1068 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1069 "WATERMARK": lambda self: self.expression( 1070 exp.WatermarkColumnConstraint, 1071 this=self._match(TokenType.FOR) and self._parse_column(), 1072 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1073 ), 1074 "WITH": lambda self: self.expression( 1075 exp.Properties, expressions=self._parse_wrapped_properties() 1076 ), 1077 } 1078 1079 ALTER_PARSERS = { 1080 "ADD": lambda self: self._parse_alter_table_add(), 1081 "AS": lambda self: self._parse_select(), 1082 "ALTER": lambda self: self._parse_alter_table_alter(), 1083 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1084 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1085 "DROP": lambda self: self._parse_alter_table_drop(), 1086 "RENAME": lambda self: self._parse_alter_table_rename(), 1087 "SET": lambda self: self._parse_alter_table_set(), 1088 "SWAP": lambda self: self.expression( 1089 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1090 ), 1091 } 1092 1093 ALTER_ALTER_PARSERS = { 1094 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1095 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1096 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1097 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1098 } 1099 1100 SCHEMA_UNNAMED_CONSTRAINTS = { 1101 "CHECK", 1102 "EXCLUDE", 1103 "FOREIGN KEY", 1104 "LIKE", 1105 "PERIOD", 1106 "PRIMARY KEY", 1107 "UNIQUE", 1108 "WATERMARK", 1109 } 1110 1111 NO_PAREN_FUNCTION_PARSERS = { 1112 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1113 "CASE": lambda self: self._parse_case(), 1114 "CONNECT_BY_ROOT": lambda self: self.expression( 1115 exp.ConnectByRoot, this=self._parse_column() 1116 ), 1117 "IF": lambda self: self._parse_if(), 1118 } 1119 1120 INVALID_FUNC_NAME_TOKENS = { 1121 TokenType.IDENTIFIER, 1122 TokenType.STRING, 1123 } 1124 1125 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1126 1127 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1128 1129 FUNCTION_PARSERS = { 1130 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1131 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1132 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1133 "DECODE": lambda self: self._parse_decode(), 1134 "EXTRACT": lambda self: self._parse_extract(), 1135 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1136 "GAP_FILL": lambda self: self._parse_gap_fill(), 1137 "JSON_OBJECT": lambda self: self._parse_json_object(), 1138 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1139 "JSON_TABLE": lambda self: self._parse_json_table(), 1140 "MATCH": lambda self: self._parse_match_against(), 1141 "NORMALIZE": lambda self: self._parse_normalize(), 1142 "OPENJSON": lambda self: self._parse_open_json(), 1143 "OVERLAY": lambda self: self._parse_overlay(), 1144 "POSITION": lambda self: self._parse_position(), 1145 "PREDICT": lambda self: self._parse_predict(), 1146 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1147 "STRING_AGG": lambda self: self._parse_string_agg(), 1148 "SUBSTRING": lambda self: self._parse_substring(), 1149 "TRIM": lambda self: self._parse_trim(), 1150 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1151 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1152 "XMLELEMENT": lambda self: self.expression( 1153 exp.XMLElement, 1154 this=self._match_text_seq("NAME") and self._parse_id_var(), 1155 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1156 ), 1157 "XMLTABLE": lambda self: self._parse_xml_table(), 1158 } 1159 1160 QUERY_MODIFIER_PARSERS = { 1161 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1162 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1163 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1164 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1165 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1166 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1167 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1168 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1169 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1170 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1171 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1172 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1173 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1174 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1175 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1176 TokenType.CLUSTER_BY: lambda self: ( 1177 "cluster", 1178 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1179 ), 1180 TokenType.DISTRIBUTE_BY: lambda self: ( 1181 "distribute", 1182 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1183 ), 1184 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1185 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1186 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1187 } 1188 1189 SET_PARSERS = { 1190 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1191 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1192 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1193 "TRANSACTION": lambda self: self._parse_set_transaction(), 1194 } 1195 1196 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1197 1198 TYPE_LITERAL_PARSERS = { 1199 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1200 } 1201 1202 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1203 1204 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1205 1206 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1207 1208 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1209 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1210 "ISOLATION": ( 1211 ("LEVEL", "REPEATABLE", "READ"), 1212 ("LEVEL", "READ", "COMMITTED"), 1213 ("LEVEL", "READ", "UNCOMITTED"), 1214 ("LEVEL", "SERIALIZABLE"), 1215 ), 1216 "READ": ("WRITE", "ONLY"), 1217 } 1218 1219 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1220 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1221 ) 1222 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1223 1224 CREATE_SEQUENCE: OPTIONS_TYPE = { 1225 "SCALE": ("EXTEND", "NOEXTEND"), 1226 "SHARD": ("EXTEND", "NOEXTEND"), 1227 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1228 **dict.fromkeys( 1229 ( 1230 "SESSION", 1231 "GLOBAL", 1232 "KEEP", 1233 "NOKEEP", 1234 "ORDER", 1235 "NOORDER", 1236 "NOCACHE", 1237 "CYCLE", 1238 "NOCYCLE", 1239 "NOMINVALUE", 1240 "NOMAXVALUE", 1241 "NOSCALE", 1242 "NOSHARD", 1243 ), 1244 tuple(), 1245 ), 1246 } 1247 1248 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1249 1250 USABLES: OPTIONS_TYPE = dict.fromkeys( 1251 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1252 ) 1253 1254 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1255 1256 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1257 "TYPE": ("EVOLUTION",), 1258 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1259 } 1260 1261 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1262 1263 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1264 1265 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1266 "NOT": ("ENFORCED",), 1267 "MATCH": ( 1268 "FULL", 1269 "PARTIAL", 1270 "SIMPLE", 1271 ), 1272 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1273 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1274 } 1275 1276 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1277 1278 CLONE_KEYWORDS = {"CLONE", "COPY"} 1279 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1280 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1281 1282 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1283 1284 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1285 1286 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1287 1288 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1289 1290 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1291 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1292 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1293 1294 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1295 1296 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1297 1298 ADD_CONSTRAINT_TOKENS = { 1299 TokenType.CONSTRAINT, 1300 TokenType.FOREIGN_KEY, 1301 TokenType.INDEX, 1302 TokenType.KEY, 1303 TokenType.PRIMARY_KEY, 1304 TokenType.UNIQUE, 1305 } 1306 1307 DISTINCT_TOKENS = {TokenType.DISTINCT} 1308 1309 NULL_TOKENS = {TokenType.NULL} 1310 1311 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1312 1313 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1314 1315 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1316 1317 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1318 1319 ODBC_DATETIME_LITERALS = { 1320 "d": exp.Date, 1321 "t": exp.Time, 1322 "ts": exp.Timestamp, 1323 } 1324 1325 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1326 1327 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1328 1329 # The style options for the DESCRIBE statement 1330 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1331 1332 # The style options for the ANALYZE statement 1333 ANALYZE_STYLES = { 1334 "BUFFER_USAGE_LIMIT", 1335 "FULL", 1336 "LOCAL", 1337 "NO_WRITE_TO_BINLOG", 1338 "SAMPLE", 1339 "SKIP_LOCKED", 1340 "VERBOSE", 1341 } 1342 1343 ANALYZE_EXPRESSION_PARSERS = { 1344 "ALL": lambda self: self._parse_analyze_columns(), 1345 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1346 "DELETE": lambda self: self._parse_analyze_delete(), 1347 "DROP": lambda self: self._parse_analyze_histogram(), 1348 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1349 "LIST": lambda self: self._parse_analyze_list(), 1350 "PREDICATE": lambda self: self._parse_analyze_columns(), 1351 "UPDATE": lambda self: self._parse_analyze_histogram(), 1352 "VALIDATE": lambda self: self._parse_analyze_validate(), 1353 } 1354 1355 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1356 1357 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1358 1359 OPERATION_MODIFIERS: t.Set[str] = set() 1360 1361 STRICT_CAST = True 1362 1363 PREFIXED_PIVOT_COLUMNS = False 1364 IDENTIFY_PIVOT_STRINGS = False 1365 1366 LOG_DEFAULTS_TO_LN = False 1367 1368 # Whether ADD is present for each column added by ALTER TABLE 1369 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1370 1371 # Whether the table sample clause expects CSV syntax 1372 TABLESAMPLE_CSV = False 1373 1374 # The default method used for table sampling 1375 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1376 1377 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1378 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1379 1380 # Whether the TRIM function expects the characters to trim as its first argument 1381 TRIM_PATTERN_FIRST = False 1382 1383 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1384 STRING_ALIASES = False 1385 1386 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1387 MODIFIERS_ATTACHED_TO_SET_OP = True 1388 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1389 1390 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1391 NO_PAREN_IF_COMMANDS = True 1392 1393 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1394 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1395 1396 # Whether the `:` operator is used to extract a value from a VARIANT column 1397 COLON_IS_VARIANT_EXTRACT = False 1398 1399 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1400 # If this is True and '(' is not found, the keyword will be treated as an identifier 1401 VALUES_FOLLOWED_BY_PAREN = True 1402 1403 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1404 SUPPORTS_IMPLICIT_UNNEST = False 1405 1406 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1407 INTERVAL_SPANS = True 1408 1409 # Whether a PARTITION clause can follow a table reference 1410 SUPPORTS_PARTITION_SELECTION = False 1411 1412 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1413 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1414 1415 # Whether the 'AS' keyword is optional in the CTE definition syntax 1416 OPTIONAL_ALIAS_TOKEN_CTE = False 1417 1418 __slots__ = ( 1419 "error_level", 1420 "error_message_context", 1421 "max_errors", 1422 "dialect", 1423 "sql", 1424 "errors", 1425 "_tokens", 1426 "_index", 1427 "_curr", 1428 "_next", 1429 "_prev", 1430 "_prev_comments", 1431 ) 1432 1433 # Autofilled 1434 SHOW_TRIE: t.Dict = {} 1435 SET_TRIE: t.Dict = {} 1436 1437 def __init__( 1438 self, 1439 error_level: t.Optional[ErrorLevel] = None, 1440 error_message_context: int = 100, 1441 max_errors: int = 3, 1442 dialect: DialectType = None, 1443 ): 1444 from sqlglot.dialects import Dialect 1445 1446 self.error_level = error_level or ErrorLevel.IMMEDIATE 1447 self.error_message_context = error_message_context 1448 self.max_errors = max_errors 1449 self.dialect = Dialect.get_or_raise(dialect) 1450 self.reset() 1451 1452 def reset(self): 1453 self.sql = "" 1454 self.errors = [] 1455 self._tokens = [] 1456 self._index = 0 1457 self._curr = None 1458 self._next = None 1459 self._prev = None 1460 self._prev_comments = None 1461 1462 def parse( 1463 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1464 ) -> t.List[t.Optional[exp.Expression]]: 1465 """ 1466 Parses a list of tokens and returns a list of syntax trees, one tree 1467 per parsed SQL statement. 1468 1469 Args: 1470 raw_tokens: The list of tokens. 1471 sql: The original SQL string, used to produce helpful debug messages. 1472 1473 Returns: 1474 The list of the produced syntax trees. 1475 """ 1476 return self._parse( 1477 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1478 ) 1479 1480 def parse_into( 1481 self, 1482 expression_types: exp.IntoType, 1483 raw_tokens: t.List[Token], 1484 sql: t.Optional[str] = None, 1485 ) -> t.List[t.Optional[exp.Expression]]: 1486 """ 1487 Parses a list of tokens into a given Expression type. If a collection of Expression 1488 types is given instead, this method will try to parse the token list into each one 1489 of them, stopping at the first for which the parsing succeeds. 1490 1491 Args: 1492 expression_types: The expression type(s) to try and parse the token list into. 1493 raw_tokens: The list of tokens. 1494 sql: The original SQL string, used to produce helpful debug messages. 1495 1496 Returns: 1497 The target Expression. 1498 """ 1499 errors = [] 1500 for expression_type in ensure_list(expression_types): 1501 parser = self.EXPRESSION_PARSERS.get(expression_type) 1502 if not parser: 1503 raise TypeError(f"No parser registered for {expression_type}") 1504 1505 try: 1506 return self._parse(parser, raw_tokens, sql) 1507 except ParseError as e: 1508 e.errors[0]["into_expression"] = expression_type 1509 errors.append(e) 1510 1511 raise ParseError( 1512 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1513 errors=merge_errors(errors), 1514 ) from errors[-1] 1515 1516 def _parse( 1517 self, 1518 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1519 raw_tokens: t.List[Token], 1520 sql: t.Optional[str] = None, 1521 ) -> t.List[t.Optional[exp.Expression]]: 1522 self.reset() 1523 self.sql = sql or "" 1524 1525 total = len(raw_tokens) 1526 chunks: t.List[t.List[Token]] = [[]] 1527 1528 for i, token in enumerate(raw_tokens): 1529 if token.token_type == TokenType.SEMICOLON: 1530 if token.comments: 1531 chunks.append([token]) 1532 1533 if i < total - 1: 1534 chunks.append([]) 1535 else: 1536 chunks[-1].append(token) 1537 1538 expressions = [] 1539 1540 for tokens in chunks: 1541 self._index = -1 1542 self._tokens = tokens 1543 self._advance() 1544 1545 expressions.append(parse_method(self)) 1546 1547 if self._index < len(self._tokens): 1548 self.raise_error("Invalid expression / Unexpected token") 1549 1550 self.check_errors() 1551 1552 return expressions 1553 1554 def check_errors(self) -> None: 1555 """Logs or raises any found errors, depending on the chosen error level setting.""" 1556 if self.error_level == ErrorLevel.WARN: 1557 for error in self.errors: 1558 logger.error(str(error)) 1559 elif self.error_level == ErrorLevel.RAISE and self.errors: 1560 raise ParseError( 1561 concat_messages(self.errors, self.max_errors), 1562 errors=merge_errors(self.errors), 1563 ) 1564 1565 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1566 """ 1567 Appends an error in the list of recorded errors or raises it, depending on the chosen 1568 error level setting. 1569 """ 1570 token = token or self._curr or self._prev or Token.string("") 1571 start = token.start 1572 end = token.end + 1 1573 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1574 highlight = self.sql[start:end] 1575 end_context = self.sql[end : end + self.error_message_context] 1576 1577 error = ParseError.new( 1578 f"{message}. Line {token.line}, Col: {token.col}.\n" 1579 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1580 description=message, 1581 line=token.line, 1582 col=token.col, 1583 start_context=start_context, 1584 highlight=highlight, 1585 end_context=end_context, 1586 ) 1587 1588 if self.error_level == ErrorLevel.IMMEDIATE: 1589 raise error 1590 1591 self.errors.append(error) 1592 1593 def expression( 1594 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1595 ) -> E: 1596 """ 1597 Creates a new, validated Expression. 1598 1599 Args: 1600 exp_class: The expression class to instantiate. 1601 comments: An optional list of comments to attach to the expression. 1602 kwargs: The arguments to set for the expression along with their respective values. 1603 1604 Returns: 1605 The target expression. 1606 """ 1607 instance = exp_class(**kwargs) 1608 instance.add_comments(comments) if comments else self._add_comments(instance) 1609 return self.validate_expression(instance) 1610 1611 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1612 if expression and self._prev_comments: 1613 expression.add_comments(self._prev_comments) 1614 self._prev_comments = None 1615 1616 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1617 """ 1618 Validates an Expression, making sure that all its mandatory arguments are set. 1619 1620 Args: 1621 expression: The expression to validate. 1622 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1623 1624 Returns: 1625 The validated expression. 1626 """ 1627 if self.error_level != ErrorLevel.IGNORE: 1628 for error_message in expression.error_messages(args): 1629 self.raise_error(error_message) 1630 1631 return expression 1632 1633 def _find_sql(self, start: Token, end: Token) -> str: 1634 return self.sql[start.start : end.end + 1] 1635 1636 def _is_connected(self) -> bool: 1637 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1638 1639 def _advance(self, times: int = 1) -> None: 1640 self._index += times 1641 self._curr = seq_get(self._tokens, self._index) 1642 self._next = seq_get(self._tokens, self._index + 1) 1643 1644 if self._index > 0: 1645 self._prev = self._tokens[self._index - 1] 1646 self._prev_comments = self._prev.comments 1647 else: 1648 self._prev = None 1649 self._prev_comments = None 1650 1651 def _retreat(self, index: int) -> None: 1652 if index != self._index: 1653 self._advance(index - self._index) 1654 1655 def _warn_unsupported(self) -> None: 1656 if len(self._tokens) <= 1: 1657 return 1658 1659 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1660 # interested in emitting a warning for the one being currently processed. 1661 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1662 1663 logger.warning( 1664 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1665 ) 1666 1667 def _parse_command(self) -> exp.Command: 1668 self._warn_unsupported() 1669 return self.expression( 1670 exp.Command, 1671 comments=self._prev_comments, 1672 this=self._prev.text.upper(), 1673 expression=self._parse_string(), 1674 ) 1675 1676 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1677 """ 1678 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1679 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1680 solve this by setting & resetting the parser state accordingly 1681 """ 1682 index = self._index 1683 error_level = self.error_level 1684 1685 self.error_level = ErrorLevel.IMMEDIATE 1686 try: 1687 this = parse_method() 1688 except ParseError: 1689 this = None 1690 finally: 1691 if not this or retreat: 1692 self._retreat(index) 1693 self.error_level = error_level 1694 1695 return this 1696 1697 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1698 start = self._prev 1699 exists = self._parse_exists() if allow_exists else None 1700 1701 self._match(TokenType.ON) 1702 1703 materialized = self._match_text_seq("MATERIALIZED") 1704 kind = self._match_set(self.CREATABLES) and self._prev 1705 if not kind: 1706 return self._parse_as_command(start) 1707 1708 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1709 this = self._parse_user_defined_function(kind=kind.token_type) 1710 elif kind.token_type == TokenType.TABLE: 1711 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1712 elif kind.token_type == TokenType.COLUMN: 1713 this = self._parse_column() 1714 else: 1715 this = self._parse_id_var() 1716 1717 self._match(TokenType.IS) 1718 1719 return self.expression( 1720 exp.Comment, 1721 this=this, 1722 kind=kind.text, 1723 expression=self._parse_string(), 1724 exists=exists, 1725 materialized=materialized, 1726 ) 1727 1728 def _parse_to_table( 1729 self, 1730 ) -> exp.ToTableProperty: 1731 table = self._parse_table_parts(schema=True) 1732 return self.expression(exp.ToTableProperty, this=table) 1733 1734 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1735 def _parse_ttl(self) -> exp.Expression: 1736 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1737 this = self._parse_bitwise() 1738 1739 if self._match_text_seq("DELETE"): 1740 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1741 if self._match_text_seq("RECOMPRESS"): 1742 return self.expression( 1743 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1744 ) 1745 if self._match_text_seq("TO", "DISK"): 1746 return self.expression( 1747 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1748 ) 1749 if self._match_text_seq("TO", "VOLUME"): 1750 return self.expression( 1751 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1752 ) 1753 1754 return this 1755 1756 expressions = self._parse_csv(_parse_ttl_action) 1757 where = self._parse_where() 1758 group = self._parse_group() 1759 1760 aggregates = None 1761 if group and self._match(TokenType.SET): 1762 aggregates = self._parse_csv(self._parse_set_item) 1763 1764 return self.expression( 1765 exp.MergeTreeTTL, 1766 expressions=expressions, 1767 where=where, 1768 group=group, 1769 aggregates=aggregates, 1770 ) 1771 1772 def _parse_statement(self) -> t.Optional[exp.Expression]: 1773 if self._curr is None: 1774 return None 1775 1776 if self._match_set(self.STATEMENT_PARSERS): 1777 comments = self._prev_comments 1778 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1779 stmt.add_comments(comments, prepend=True) 1780 return stmt 1781 1782 if self._match_set(self.dialect.tokenizer.COMMANDS): 1783 return self._parse_command() 1784 1785 expression = self._parse_expression() 1786 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1787 return self._parse_query_modifiers(expression) 1788 1789 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1790 start = self._prev 1791 temporary = self._match(TokenType.TEMPORARY) 1792 materialized = self._match_text_seq("MATERIALIZED") 1793 1794 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1795 if not kind: 1796 return self._parse_as_command(start) 1797 1798 concurrently = self._match_text_seq("CONCURRENTLY") 1799 if_exists = exists or self._parse_exists() 1800 1801 if kind == "COLUMN": 1802 this = self._parse_column() 1803 else: 1804 this = self._parse_table_parts( 1805 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1806 ) 1807 1808 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1809 1810 if self._match(TokenType.L_PAREN, advance=False): 1811 expressions = self._parse_wrapped_csv(self._parse_types) 1812 else: 1813 expressions = None 1814 1815 return self.expression( 1816 exp.Drop, 1817 exists=if_exists, 1818 this=this, 1819 expressions=expressions, 1820 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1821 temporary=temporary, 1822 materialized=materialized, 1823 cascade=self._match_text_seq("CASCADE"), 1824 constraints=self._match_text_seq("CONSTRAINTS"), 1825 purge=self._match_text_seq("PURGE"), 1826 cluster=cluster, 1827 concurrently=concurrently, 1828 ) 1829 1830 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1831 return ( 1832 self._match_text_seq("IF") 1833 and (not not_ or self._match(TokenType.NOT)) 1834 and self._match(TokenType.EXISTS) 1835 ) 1836 1837 def _parse_create(self) -> exp.Create | exp.Command: 1838 # Note: this can't be None because we've matched a statement parser 1839 start = self._prev 1840 1841 replace = ( 1842 start.token_type == TokenType.REPLACE 1843 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1844 or self._match_pair(TokenType.OR, TokenType.ALTER) 1845 ) 1846 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1847 1848 unique = self._match(TokenType.UNIQUE) 1849 1850 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1851 clustered = True 1852 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1853 "COLUMNSTORE" 1854 ): 1855 clustered = False 1856 else: 1857 clustered = None 1858 1859 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1860 self._advance() 1861 1862 properties = None 1863 create_token = self._match_set(self.CREATABLES) and self._prev 1864 1865 if not create_token: 1866 # exp.Properties.Location.POST_CREATE 1867 properties = self._parse_properties() 1868 create_token = self._match_set(self.CREATABLES) and self._prev 1869 1870 if not properties or not create_token: 1871 return self._parse_as_command(start) 1872 1873 concurrently = self._match_text_seq("CONCURRENTLY") 1874 exists = self._parse_exists(not_=True) 1875 this = None 1876 expression: t.Optional[exp.Expression] = None 1877 indexes = None 1878 no_schema_binding = None 1879 begin = None 1880 end = None 1881 clone = None 1882 1883 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1884 nonlocal properties 1885 if properties and temp_props: 1886 properties.expressions.extend(temp_props.expressions) 1887 elif temp_props: 1888 properties = temp_props 1889 1890 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1891 this = self._parse_user_defined_function(kind=create_token.token_type) 1892 1893 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1894 extend_props(self._parse_properties()) 1895 1896 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1897 extend_props(self._parse_properties()) 1898 1899 if not expression: 1900 if self._match(TokenType.COMMAND): 1901 expression = self._parse_as_command(self._prev) 1902 else: 1903 begin = self._match(TokenType.BEGIN) 1904 return_ = self._match_text_seq("RETURN") 1905 1906 if self._match(TokenType.STRING, advance=False): 1907 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1908 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1909 expression = self._parse_string() 1910 extend_props(self._parse_properties()) 1911 else: 1912 expression = self._parse_user_defined_function_expression() 1913 1914 end = self._match_text_seq("END") 1915 1916 if return_: 1917 expression = self.expression(exp.Return, this=expression) 1918 elif create_token.token_type == TokenType.INDEX: 1919 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1920 if not self._match(TokenType.ON): 1921 index = self._parse_id_var() 1922 anonymous = False 1923 else: 1924 index = None 1925 anonymous = True 1926 1927 this = self._parse_index(index=index, anonymous=anonymous) 1928 elif create_token.token_type in self.DB_CREATABLES: 1929 table_parts = self._parse_table_parts( 1930 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1931 ) 1932 1933 # exp.Properties.Location.POST_NAME 1934 self._match(TokenType.COMMA) 1935 extend_props(self._parse_properties(before=True)) 1936 1937 this = self._parse_schema(this=table_parts) 1938 1939 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1940 extend_props(self._parse_properties()) 1941 1942 self._match(TokenType.ALIAS) 1943 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1944 # exp.Properties.Location.POST_ALIAS 1945 extend_props(self._parse_properties()) 1946 1947 if create_token.token_type == TokenType.SEQUENCE: 1948 expression = self._parse_types() 1949 extend_props(self._parse_properties()) 1950 else: 1951 expression = self._parse_ddl_select() 1952 1953 if create_token.token_type == TokenType.TABLE: 1954 # exp.Properties.Location.POST_EXPRESSION 1955 extend_props(self._parse_properties()) 1956 1957 indexes = [] 1958 while True: 1959 index = self._parse_index() 1960 1961 # exp.Properties.Location.POST_INDEX 1962 extend_props(self._parse_properties()) 1963 if not index: 1964 break 1965 else: 1966 self._match(TokenType.COMMA) 1967 indexes.append(index) 1968 elif create_token.token_type == TokenType.VIEW: 1969 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1970 no_schema_binding = True 1971 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1972 extend_props(self._parse_properties()) 1973 1974 shallow = self._match_text_seq("SHALLOW") 1975 1976 if self._match_texts(self.CLONE_KEYWORDS): 1977 copy = self._prev.text.lower() == "copy" 1978 clone = self.expression( 1979 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1980 ) 1981 1982 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1983 return self._parse_as_command(start) 1984 1985 create_kind_text = create_token.text.upper() 1986 return self.expression( 1987 exp.Create, 1988 this=this, 1989 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1990 replace=replace, 1991 refresh=refresh, 1992 unique=unique, 1993 expression=expression, 1994 exists=exists, 1995 properties=properties, 1996 indexes=indexes, 1997 no_schema_binding=no_schema_binding, 1998 begin=begin, 1999 end=end, 2000 clone=clone, 2001 concurrently=concurrently, 2002 clustered=clustered, 2003 ) 2004 2005 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2006 seq = exp.SequenceProperties() 2007 2008 options = [] 2009 index = self._index 2010 2011 while self._curr: 2012 self._match(TokenType.COMMA) 2013 if self._match_text_seq("INCREMENT"): 2014 self._match_text_seq("BY") 2015 self._match_text_seq("=") 2016 seq.set("increment", self._parse_term()) 2017 elif self._match_text_seq("MINVALUE"): 2018 seq.set("minvalue", self._parse_term()) 2019 elif self._match_text_seq("MAXVALUE"): 2020 seq.set("maxvalue", self._parse_term()) 2021 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2022 self._match_text_seq("=") 2023 seq.set("start", self._parse_term()) 2024 elif self._match_text_seq("CACHE"): 2025 # T-SQL allows empty CACHE which is initialized dynamically 2026 seq.set("cache", self._parse_number() or True) 2027 elif self._match_text_seq("OWNED", "BY"): 2028 # "OWNED BY NONE" is the default 2029 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2030 else: 2031 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2032 if opt: 2033 options.append(opt) 2034 else: 2035 break 2036 2037 seq.set("options", options if options else None) 2038 return None if self._index == index else seq 2039 2040 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2041 # only used for teradata currently 2042 self._match(TokenType.COMMA) 2043 2044 kwargs = { 2045 "no": self._match_text_seq("NO"), 2046 "dual": self._match_text_seq("DUAL"), 2047 "before": self._match_text_seq("BEFORE"), 2048 "default": self._match_text_seq("DEFAULT"), 2049 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2050 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2051 "after": self._match_text_seq("AFTER"), 2052 "minimum": self._match_texts(("MIN", "MINIMUM")), 2053 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2054 } 2055 2056 if self._match_texts(self.PROPERTY_PARSERS): 2057 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2058 try: 2059 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2060 except TypeError: 2061 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2062 2063 return None 2064 2065 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2066 return self._parse_wrapped_csv(self._parse_property) 2067 2068 def _parse_property(self) -> t.Optional[exp.Expression]: 2069 if self._match_texts(self.PROPERTY_PARSERS): 2070 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2071 2072 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2073 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2074 2075 if self._match_text_seq("COMPOUND", "SORTKEY"): 2076 return self._parse_sortkey(compound=True) 2077 2078 if self._match_text_seq("SQL", "SECURITY"): 2079 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2080 2081 index = self._index 2082 key = self._parse_column() 2083 2084 if not self._match(TokenType.EQ): 2085 self._retreat(index) 2086 return self._parse_sequence_properties() 2087 2088 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2089 if isinstance(key, exp.Column): 2090 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2091 2092 value = self._parse_bitwise() or self._parse_var(any_token=True) 2093 2094 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2095 if isinstance(value, exp.Column): 2096 value = exp.var(value.name) 2097 2098 return self.expression(exp.Property, this=key, value=value) 2099 2100 def _parse_stored(self) -> exp.FileFormatProperty: 2101 self._match(TokenType.ALIAS) 2102 2103 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2104 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2105 2106 return self.expression( 2107 exp.FileFormatProperty, 2108 this=( 2109 self.expression( 2110 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2111 ) 2112 if input_format or output_format 2113 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2114 ), 2115 ) 2116 2117 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2118 field = self._parse_field() 2119 if isinstance(field, exp.Identifier) and not field.quoted: 2120 field = exp.var(field) 2121 2122 return field 2123 2124 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2125 self._match(TokenType.EQ) 2126 self._match(TokenType.ALIAS) 2127 2128 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2129 2130 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2131 properties = [] 2132 while True: 2133 if before: 2134 prop = self._parse_property_before() 2135 else: 2136 prop = self._parse_property() 2137 if not prop: 2138 break 2139 for p in ensure_list(prop): 2140 properties.append(p) 2141 2142 if properties: 2143 return self.expression(exp.Properties, expressions=properties) 2144 2145 return None 2146 2147 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2148 return self.expression( 2149 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2150 ) 2151 2152 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2153 if self._match_texts(("DEFINER", "INVOKER")): 2154 security_specifier = self._prev.text.upper() 2155 return self.expression(exp.SecurityProperty, this=security_specifier) 2156 return None 2157 2158 def _parse_settings_property(self) -> exp.SettingsProperty: 2159 return self.expression( 2160 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2161 ) 2162 2163 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2164 if self._index >= 2: 2165 pre_volatile_token = self._tokens[self._index - 2] 2166 else: 2167 pre_volatile_token = None 2168 2169 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2170 return exp.VolatileProperty() 2171 2172 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2173 2174 def _parse_retention_period(self) -> exp.Var: 2175 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2176 number = self._parse_number() 2177 number_str = f"{number} " if number else "" 2178 unit = self._parse_var(any_token=True) 2179 return exp.var(f"{number_str}{unit}") 2180 2181 def _parse_system_versioning_property( 2182 self, with_: bool = False 2183 ) -> exp.WithSystemVersioningProperty: 2184 self._match(TokenType.EQ) 2185 prop = self.expression( 2186 exp.WithSystemVersioningProperty, 2187 **{ # type: ignore 2188 "on": True, 2189 "with": with_, 2190 }, 2191 ) 2192 2193 if self._match_text_seq("OFF"): 2194 prop.set("on", False) 2195 return prop 2196 2197 self._match(TokenType.ON) 2198 if self._match(TokenType.L_PAREN): 2199 while self._curr and not self._match(TokenType.R_PAREN): 2200 if self._match_text_seq("HISTORY_TABLE", "="): 2201 prop.set("this", self._parse_table_parts()) 2202 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2203 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2204 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2205 prop.set("retention_period", self._parse_retention_period()) 2206 2207 self._match(TokenType.COMMA) 2208 2209 return prop 2210 2211 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2212 self._match(TokenType.EQ) 2213 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2214 prop = self.expression(exp.DataDeletionProperty, on=on) 2215 2216 if self._match(TokenType.L_PAREN): 2217 while self._curr and not self._match(TokenType.R_PAREN): 2218 if self._match_text_seq("FILTER_COLUMN", "="): 2219 prop.set("filter_column", self._parse_column()) 2220 elif self._match_text_seq("RETENTION_PERIOD", "="): 2221 prop.set("retention_period", self._parse_retention_period()) 2222 2223 self._match(TokenType.COMMA) 2224 2225 return prop 2226 2227 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2228 kind = "HASH" 2229 expressions: t.Optional[t.List[exp.Expression]] = None 2230 if self._match_text_seq("BY", "HASH"): 2231 expressions = self._parse_wrapped_csv(self._parse_id_var) 2232 elif self._match_text_seq("BY", "RANDOM"): 2233 kind = "RANDOM" 2234 2235 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2236 buckets: t.Optional[exp.Expression] = None 2237 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2238 buckets = self._parse_number() 2239 2240 return self.expression( 2241 exp.DistributedByProperty, 2242 expressions=expressions, 2243 kind=kind, 2244 buckets=buckets, 2245 order=self._parse_order(), 2246 ) 2247 2248 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2249 self._match_text_seq("KEY") 2250 expressions = self._parse_wrapped_id_vars() 2251 return self.expression(expr_type, expressions=expressions) 2252 2253 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2254 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2255 prop = self._parse_system_versioning_property(with_=True) 2256 self._match_r_paren() 2257 return prop 2258 2259 if self._match(TokenType.L_PAREN, advance=False): 2260 return self._parse_wrapped_properties() 2261 2262 if self._match_text_seq("JOURNAL"): 2263 return self._parse_withjournaltable() 2264 2265 if self._match_texts(self.VIEW_ATTRIBUTES): 2266 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2267 2268 if self._match_text_seq("DATA"): 2269 return self._parse_withdata(no=False) 2270 elif self._match_text_seq("NO", "DATA"): 2271 return self._parse_withdata(no=True) 2272 2273 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2274 return self._parse_serde_properties(with_=True) 2275 2276 if self._match(TokenType.SCHEMA): 2277 return self.expression( 2278 exp.WithSchemaBindingProperty, 2279 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2280 ) 2281 2282 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2283 return self.expression( 2284 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2285 ) 2286 2287 if not self._next: 2288 return None 2289 2290 return self._parse_withisolatedloading() 2291 2292 def _parse_procedure_option(self) -> exp.Expression | None: 2293 if self._match_text_seq("EXECUTE", "AS"): 2294 return self.expression( 2295 exp.ExecuteAsProperty, 2296 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2297 or self._parse_string(), 2298 ) 2299 2300 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2301 2302 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2303 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2304 self._match(TokenType.EQ) 2305 2306 user = self._parse_id_var() 2307 self._match(TokenType.PARAMETER) 2308 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2309 2310 if not user or not host: 2311 return None 2312 2313 return exp.DefinerProperty(this=f"{user}@{host}") 2314 2315 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2316 self._match(TokenType.TABLE) 2317 self._match(TokenType.EQ) 2318 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2319 2320 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2321 return self.expression(exp.LogProperty, no=no) 2322 2323 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2324 return self.expression(exp.JournalProperty, **kwargs) 2325 2326 def _parse_checksum(self) -> exp.ChecksumProperty: 2327 self._match(TokenType.EQ) 2328 2329 on = None 2330 if self._match(TokenType.ON): 2331 on = True 2332 elif self._match_text_seq("OFF"): 2333 on = False 2334 2335 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2336 2337 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2338 return self.expression( 2339 exp.Cluster, 2340 expressions=( 2341 self._parse_wrapped_csv(self._parse_ordered) 2342 if wrapped 2343 else self._parse_csv(self._parse_ordered) 2344 ), 2345 ) 2346 2347 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2348 self._match_text_seq("BY") 2349 2350 self._match_l_paren() 2351 expressions = self._parse_csv(self._parse_column) 2352 self._match_r_paren() 2353 2354 if self._match_text_seq("SORTED", "BY"): 2355 self._match_l_paren() 2356 sorted_by = self._parse_csv(self._parse_ordered) 2357 self._match_r_paren() 2358 else: 2359 sorted_by = None 2360 2361 self._match(TokenType.INTO) 2362 buckets = self._parse_number() 2363 self._match_text_seq("BUCKETS") 2364 2365 return self.expression( 2366 exp.ClusteredByProperty, 2367 expressions=expressions, 2368 sorted_by=sorted_by, 2369 buckets=buckets, 2370 ) 2371 2372 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2373 if not self._match_text_seq("GRANTS"): 2374 self._retreat(self._index - 1) 2375 return None 2376 2377 return self.expression(exp.CopyGrantsProperty) 2378 2379 def _parse_freespace(self) -> exp.FreespaceProperty: 2380 self._match(TokenType.EQ) 2381 return self.expression( 2382 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2383 ) 2384 2385 def _parse_mergeblockratio( 2386 self, no: bool = False, default: bool = False 2387 ) -> exp.MergeBlockRatioProperty: 2388 if self._match(TokenType.EQ): 2389 return self.expression( 2390 exp.MergeBlockRatioProperty, 2391 this=self._parse_number(), 2392 percent=self._match(TokenType.PERCENT), 2393 ) 2394 2395 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2396 2397 def _parse_datablocksize( 2398 self, 2399 default: t.Optional[bool] = None, 2400 minimum: t.Optional[bool] = None, 2401 maximum: t.Optional[bool] = None, 2402 ) -> exp.DataBlocksizeProperty: 2403 self._match(TokenType.EQ) 2404 size = self._parse_number() 2405 2406 units = None 2407 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2408 units = self._prev.text 2409 2410 return self.expression( 2411 exp.DataBlocksizeProperty, 2412 size=size, 2413 units=units, 2414 default=default, 2415 minimum=minimum, 2416 maximum=maximum, 2417 ) 2418 2419 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2420 self._match(TokenType.EQ) 2421 always = self._match_text_seq("ALWAYS") 2422 manual = self._match_text_seq("MANUAL") 2423 never = self._match_text_seq("NEVER") 2424 default = self._match_text_seq("DEFAULT") 2425 2426 autotemp = None 2427 if self._match_text_seq("AUTOTEMP"): 2428 autotemp = self._parse_schema() 2429 2430 return self.expression( 2431 exp.BlockCompressionProperty, 2432 always=always, 2433 manual=manual, 2434 never=never, 2435 default=default, 2436 autotemp=autotemp, 2437 ) 2438 2439 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2440 index = self._index 2441 no = self._match_text_seq("NO") 2442 concurrent = self._match_text_seq("CONCURRENT") 2443 2444 if not self._match_text_seq("ISOLATED", "LOADING"): 2445 self._retreat(index) 2446 return None 2447 2448 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2449 return self.expression( 2450 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2451 ) 2452 2453 def _parse_locking(self) -> exp.LockingProperty: 2454 if self._match(TokenType.TABLE): 2455 kind = "TABLE" 2456 elif self._match(TokenType.VIEW): 2457 kind = "VIEW" 2458 elif self._match(TokenType.ROW): 2459 kind = "ROW" 2460 elif self._match_text_seq("DATABASE"): 2461 kind = "DATABASE" 2462 else: 2463 kind = None 2464 2465 if kind in ("DATABASE", "TABLE", "VIEW"): 2466 this = self._parse_table_parts() 2467 else: 2468 this = None 2469 2470 if self._match(TokenType.FOR): 2471 for_or_in = "FOR" 2472 elif self._match(TokenType.IN): 2473 for_or_in = "IN" 2474 else: 2475 for_or_in = None 2476 2477 if self._match_text_seq("ACCESS"): 2478 lock_type = "ACCESS" 2479 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2480 lock_type = "EXCLUSIVE" 2481 elif self._match_text_seq("SHARE"): 2482 lock_type = "SHARE" 2483 elif self._match_text_seq("READ"): 2484 lock_type = "READ" 2485 elif self._match_text_seq("WRITE"): 2486 lock_type = "WRITE" 2487 elif self._match_text_seq("CHECKSUM"): 2488 lock_type = "CHECKSUM" 2489 else: 2490 lock_type = None 2491 2492 override = self._match_text_seq("OVERRIDE") 2493 2494 return self.expression( 2495 exp.LockingProperty, 2496 this=this, 2497 kind=kind, 2498 for_or_in=for_or_in, 2499 lock_type=lock_type, 2500 override=override, 2501 ) 2502 2503 def _parse_partition_by(self) -> t.List[exp.Expression]: 2504 if self._match(TokenType.PARTITION_BY): 2505 return self._parse_csv(self._parse_assignment) 2506 return [] 2507 2508 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2509 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2510 if self._match_text_seq("MINVALUE"): 2511 return exp.var("MINVALUE") 2512 if self._match_text_seq("MAXVALUE"): 2513 return exp.var("MAXVALUE") 2514 return self._parse_bitwise() 2515 2516 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2517 expression = None 2518 from_expressions = None 2519 to_expressions = None 2520 2521 if self._match(TokenType.IN): 2522 this = self._parse_wrapped_csv(self._parse_bitwise) 2523 elif self._match(TokenType.FROM): 2524 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2525 self._match_text_seq("TO") 2526 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2527 elif self._match_text_seq("WITH", "(", "MODULUS"): 2528 this = self._parse_number() 2529 self._match_text_seq(",", "REMAINDER") 2530 expression = self._parse_number() 2531 self._match_r_paren() 2532 else: 2533 self.raise_error("Failed to parse partition bound spec.") 2534 2535 return self.expression( 2536 exp.PartitionBoundSpec, 2537 this=this, 2538 expression=expression, 2539 from_expressions=from_expressions, 2540 to_expressions=to_expressions, 2541 ) 2542 2543 # https://www.postgresql.org/docs/current/sql-createtable.html 2544 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2545 if not self._match_text_seq("OF"): 2546 self._retreat(self._index - 1) 2547 return None 2548 2549 this = self._parse_table(schema=True) 2550 2551 if self._match(TokenType.DEFAULT): 2552 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2553 elif self._match_text_seq("FOR", "VALUES"): 2554 expression = self._parse_partition_bound_spec() 2555 else: 2556 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2557 2558 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2559 2560 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2561 self._match(TokenType.EQ) 2562 return self.expression( 2563 exp.PartitionedByProperty, 2564 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2565 ) 2566 2567 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2568 if self._match_text_seq("AND", "STATISTICS"): 2569 statistics = True 2570 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2571 statistics = False 2572 else: 2573 statistics = None 2574 2575 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2576 2577 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2578 if self._match_text_seq("SQL"): 2579 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2580 return None 2581 2582 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2583 if self._match_text_seq("SQL", "DATA"): 2584 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2585 return None 2586 2587 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2588 if self._match_text_seq("PRIMARY", "INDEX"): 2589 return exp.NoPrimaryIndexProperty() 2590 if self._match_text_seq("SQL"): 2591 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2592 return None 2593 2594 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2595 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2596 return exp.OnCommitProperty() 2597 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2598 return exp.OnCommitProperty(delete=True) 2599 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2600 2601 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2602 if self._match_text_seq("SQL", "DATA"): 2603 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2604 return None 2605 2606 def _parse_distkey(self) -> exp.DistKeyProperty: 2607 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2608 2609 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2610 table = self._parse_table(schema=True) 2611 2612 options = [] 2613 while self._match_texts(("INCLUDING", "EXCLUDING")): 2614 this = self._prev.text.upper() 2615 2616 id_var = self._parse_id_var() 2617 if not id_var: 2618 return None 2619 2620 options.append( 2621 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2622 ) 2623 2624 return self.expression(exp.LikeProperty, this=table, expressions=options) 2625 2626 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2627 return self.expression( 2628 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2629 ) 2630 2631 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2632 self._match(TokenType.EQ) 2633 return self.expression( 2634 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2635 ) 2636 2637 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2638 self._match_text_seq("WITH", "CONNECTION") 2639 return self.expression( 2640 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2641 ) 2642 2643 def _parse_returns(self) -> exp.ReturnsProperty: 2644 value: t.Optional[exp.Expression] 2645 null = None 2646 is_table = self._match(TokenType.TABLE) 2647 2648 if is_table: 2649 if self._match(TokenType.LT): 2650 value = self.expression( 2651 exp.Schema, 2652 this="TABLE", 2653 expressions=self._parse_csv(self._parse_struct_types), 2654 ) 2655 if not self._match(TokenType.GT): 2656 self.raise_error("Expecting >") 2657 else: 2658 value = self._parse_schema(exp.var("TABLE")) 2659 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2660 null = True 2661 value = None 2662 else: 2663 value = self._parse_types() 2664 2665 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2666 2667 def _parse_describe(self) -> exp.Describe: 2668 kind = self._match_set(self.CREATABLES) and self._prev.text 2669 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2670 if self._match(TokenType.DOT): 2671 style = None 2672 self._retreat(self._index - 2) 2673 2674 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2675 2676 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2677 this = self._parse_statement() 2678 else: 2679 this = self._parse_table(schema=True) 2680 2681 properties = self._parse_properties() 2682 expressions = properties.expressions if properties else None 2683 partition = self._parse_partition() 2684 return self.expression( 2685 exp.Describe, 2686 this=this, 2687 style=style, 2688 kind=kind, 2689 expressions=expressions, 2690 partition=partition, 2691 format=format, 2692 ) 2693 2694 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2695 kind = self._prev.text.upper() 2696 expressions = [] 2697 2698 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2699 if self._match(TokenType.WHEN): 2700 expression = self._parse_disjunction() 2701 self._match(TokenType.THEN) 2702 else: 2703 expression = None 2704 2705 else_ = self._match(TokenType.ELSE) 2706 2707 if not self._match(TokenType.INTO): 2708 return None 2709 2710 return self.expression( 2711 exp.ConditionalInsert, 2712 this=self.expression( 2713 exp.Insert, 2714 this=self._parse_table(schema=True), 2715 expression=self._parse_derived_table_values(), 2716 ), 2717 expression=expression, 2718 else_=else_, 2719 ) 2720 2721 expression = parse_conditional_insert() 2722 while expression is not None: 2723 expressions.append(expression) 2724 expression = parse_conditional_insert() 2725 2726 return self.expression( 2727 exp.MultitableInserts, 2728 kind=kind, 2729 comments=comments, 2730 expressions=expressions, 2731 source=self._parse_table(), 2732 ) 2733 2734 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2735 comments = [] 2736 hint = self._parse_hint() 2737 overwrite = self._match(TokenType.OVERWRITE) 2738 ignore = self._match(TokenType.IGNORE) 2739 local = self._match_text_seq("LOCAL") 2740 alternative = None 2741 is_function = None 2742 2743 if self._match_text_seq("DIRECTORY"): 2744 this: t.Optional[exp.Expression] = self.expression( 2745 exp.Directory, 2746 this=self._parse_var_or_string(), 2747 local=local, 2748 row_format=self._parse_row_format(match_row=True), 2749 ) 2750 else: 2751 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2752 comments += ensure_list(self._prev_comments) 2753 return self._parse_multitable_inserts(comments) 2754 2755 if self._match(TokenType.OR): 2756 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2757 2758 self._match(TokenType.INTO) 2759 comments += ensure_list(self._prev_comments) 2760 self._match(TokenType.TABLE) 2761 is_function = self._match(TokenType.FUNCTION) 2762 2763 this = ( 2764 self._parse_table(schema=True, parse_partition=True) 2765 if not is_function 2766 else self._parse_function() 2767 ) 2768 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2769 this.set("alias", self._parse_table_alias()) 2770 2771 returning = self._parse_returning() 2772 2773 return self.expression( 2774 exp.Insert, 2775 comments=comments, 2776 hint=hint, 2777 is_function=is_function, 2778 this=this, 2779 stored=self._match_text_seq("STORED") and self._parse_stored(), 2780 by_name=self._match_text_seq("BY", "NAME"), 2781 exists=self._parse_exists(), 2782 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2783 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2784 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2785 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2786 conflict=self._parse_on_conflict(), 2787 returning=returning or self._parse_returning(), 2788 overwrite=overwrite, 2789 alternative=alternative, 2790 ignore=ignore, 2791 source=self._match(TokenType.TABLE) and self._parse_table(), 2792 ) 2793 2794 def _parse_kill(self) -> exp.Kill: 2795 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2796 2797 return self.expression( 2798 exp.Kill, 2799 this=self._parse_primary(), 2800 kind=kind, 2801 ) 2802 2803 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2804 conflict = self._match_text_seq("ON", "CONFLICT") 2805 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2806 2807 if not conflict and not duplicate: 2808 return None 2809 2810 conflict_keys = None 2811 constraint = None 2812 2813 if conflict: 2814 if self._match_text_seq("ON", "CONSTRAINT"): 2815 constraint = self._parse_id_var() 2816 elif self._match(TokenType.L_PAREN): 2817 conflict_keys = self._parse_csv(self._parse_id_var) 2818 self._match_r_paren() 2819 2820 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2821 if self._prev.token_type == TokenType.UPDATE: 2822 self._match(TokenType.SET) 2823 expressions = self._parse_csv(self._parse_equality) 2824 else: 2825 expressions = None 2826 2827 return self.expression( 2828 exp.OnConflict, 2829 duplicate=duplicate, 2830 expressions=expressions, 2831 action=action, 2832 conflict_keys=conflict_keys, 2833 constraint=constraint, 2834 where=self._parse_where(), 2835 ) 2836 2837 def _parse_returning(self) -> t.Optional[exp.Returning]: 2838 if not self._match(TokenType.RETURNING): 2839 return None 2840 return self.expression( 2841 exp.Returning, 2842 expressions=self._parse_csv(self._parse_expression), 2843 into=self._match(TokenType.INTO) and self._parse_table_part(), 2844 ) 2845 2846 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2847 if not self._match(TokenType.FORMAT): 2848 return None 2849 return self._parse_row_format() 2850 2851 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2852 index = self._index 2853 with_ = with_ or self._match_text_seq("WITH") 2854 2855 if not self._match(TokenType.SERDE_PROPERTIES): 2856 self._retreat(index) 2857 return None 2858 return self.expression( 2859 exp.SerdeProperties, 2860 **{ # type: ignore 2861 "expressions": self._parse_wrapped_properties(), 2862 "with": with_, 2863 }, 2864 ) 2865 2866 def _parse_row_format( 2867 self, match_row: bool = False 2868 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2869 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2870 return None 2871 2872 if self._match_text_seq("SERDE"): 2873 this = self._parse_string() 2874 2875 serde_properties = self._parse_serde_properties() 2876 2877 return self.expression( 2878 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2879 ) 2880 2881 self._match_text_seq("DELIMITED") 2882 2883 kwargs = {} 2884 2885 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2886 kwargs["fields"] = self._parse_string() 2887 if self._match_text_seq("ESCAPED", "BY"): 2888 kwargs["escaped"] = self._parse_string() 2889 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2890 kwargs["collection_items"] = self._parse_string() 2891 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2892 kwargs["map_keys"] = self._parse_string() 2893 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2894 kwargs["lines"] = self._parse_string() 2895 if self._match_text_seq("NULL", "DEFINED", "AS"): 2896 kwargs["null"] = self._parse_string() 2897 2898 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2899 2900 def _parse_load(self) -> exp.LoadData | exp.Command: 2901 if self._match_text_seq("DATA"): 2902 local = self._match_text_seq("LOCAL") 2903 self._match_text_seq("INPATH") 2904 inpath = self._parse_string() 2905 overwrite = self._match(TokenType.OVERWRITE) 2906 self._match_pair(TokenType.INTO, TokenType.TABLE) 2907 2908 return self.expression( 2909 exp.LoadData, 2910 this=self._parse_table(schema=True), 2911 local=local, 2912 overwrite=overwrite, 2913 inpath=inpath, 2914 partition=self._parse_partition(), 2915 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2916 serde=self._match_text_seq("SERDE") and self._parse_string(), 2917 ) 2918 return self._parse_as_command(self._prev) 2919 2920 def _parse_delete(self) -> exp.Delete: 2921 # This handles MySQL's "Multiple-Table Syntax" 2922 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2923 tables = None 2924 if not self._match(TokenType.FROM, advance=False): 2925 tables = self._parse_csv(self._parse_table) or None 2926 2927 returning = self._parse_returning() 2928 2929 return self.expression( 2930 exp.Delete, 2931 tables=tables, 2932 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2933 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2934 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2935 where=self._parse_where(), 2936 returning=returning or self._parse_returning(), 2937 limit=self._parse_limit(), 2938 ) 2939 2940 def _parse_update(self) -> exp.Update: 2941 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2942 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2943 returning = self._parse_returning() 2944 return self.expression( 2945 exp.Update, 2946 **{ # type: ignore 2947 "this": this, 2948 "expressions": expressions, 2949 "from": self._parse_from(joins=True), 2950 "where": self._parse_where(), 2951 "returning": returning or self._parse_returning(), 2952 "order": self._parse_order(), 2953 "limit": self._parse_limit(), 2954 }, 2955 ) 2956 2957 def _parse_uncache(self) -> exp.Uncache: 2958 if not self._match(TokenType.TABLE): 2959 self.raise_error("Expecting TABLE after UNCACHE") 2960 2961 return self.expression( 2962 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2963 ) 2964 2965 def _parse_cache(self) -> exp.Cache: 2966 lazy = self._match_text_seq("LAZY") 2967 self._match(TokenType.TABLE) 2968 table = self._parse_table(schema=True) 2969 2970 options = [] 2971 if self._match_text_seq("OPTIONS"): 2972 self._match_l_paren() 2973 k = self._parse_string() 2974 self._match(TokenType.EQ) 2975 v = self._parse_string() 2976 options = [k, v] 2977 self._match_r_paren() 2978 2979 self._match(TokenType.ALIAS) 2980 return self.expression( 2981 exp.Cache, 2982 this=table, 2983 lazy=lazy, 2984 options=options, 2985 expression=self._parse_select(nested=True), 2986 ) 2987 2988 def _parse_partition(self) -> t.Optional[exp.Partition]: 2989 if not self._match_texts(self.PARTITION_KEYWORDS): 2990 return None 2991 2992 return self.expression( 2993 exp.Partition, 2994 subpartition=self._prev.text.upper() == "SUBPARTITION", 2995 expressions=self._parse_wrapped_csv(self._parse_assignment), 2996 ) 2997 2998 def _parse_value(self) -> t.Optional[exp.Tuple]: 2999 def _parse_value_expression() -> t.Optional[exp.Expression]: 3000 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3001 return exp.var(self._prev.text.upper()) 3002 return self._parse_expression() 3003 3004 if self._match(TokenType.L_PAREN): 3005 expressions = self._parse_csv(_parse_value_expression) 3006 self._match_r_paren() 3007 return self.expression(exp.Tuple, expressions=expressions) 3008 3009 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3010 expression = self._parse_expression() 3011 if expression: 3012 return self.expression(exp.Tuple, expressions=[expression]) 3013 return None 3014 3015 def _parse_projections(self) -> t.List[exp.Expression]: 3016 return self._parse_expressions() 3017 3018 def _parse_select( 3019 self, 3020 nested: bool = False, 3021 table: bool = False, 3022 parse_subquery_alias: bool = True, 3023 parse_set_operation: bool = True, 3024 ) -> t.Optional[exp.Expression]: 3025 cte = self._parse_with() 3026 3027 if cte: 3028 this = self._parse_statement() 3029 3030 if not this: 3031 self.raise_error("Failed to parse any statement following CTE") 3032 return cte 3033 3034 if "with" in this.arg_types: 3035 this.set("with", cte) 3036 else: 3037 self.raise_error(f"{this.key} does not support CTE") 3038 this = cte 3039 3040 return this 3041 3042 # duckdb supports leading with FROM x 3043 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3044 3045 if self._match(TokenType.SELECT): 3046 comments = self._prev_comments 3047 3048 hint = self._parse_hint() 3049 3050 if self._next and not self._next.token_type == TokenType.DOT: 3051 all_ = self._match(TokenType.ALL) 3052 distinct = self._match_set(self.DISTINCT_TOKENS) 3053 else: 3054 all_, distinct = None, None 3055 3056 kind = ( 3057 self._match(TokenType.ALIAS) 3058 and self._match_texts(("STRUCT", "VALUE")) 3059 and self._prev.text.upper() 3060 ) 3061 3062 if distinct: 3063 distinct = self.expression( 3064 exp.Distinct, 3065 on=self._parse_value() if self._match(TokenType.ON) else None, 3066 ) 3067 3068 if all_ and distinct: 3069 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3070 3071 operation_modifiers = [] 3072 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3073 operation_modifiers.append(exp.var(self._prev.text.upper())) 3074 3075 limit = self._parse_limit(top=True) 3076 projections = self._parse_projections() 3077 3078 this = self.expression( 3079 exp.Select, 3080 kind=kind, 3081 hint=hint, 3082 distinct=distinct, 3083 expressions=projections, 3084 limit=limit, 3085 operation_modifiers=operation_modifiers or None, 3086 ) 3087 this.comments = comments 3088 3089 into = self._parse_into() 3090 if into: 3091 this.set("into", into) 3092 3093 if not from_: 3094 from_ = self._parse_from() 3095 3096 if from_: 3097 this.set("from", from_) 3098 3099 this = self._parse_query_modifiers(this) 3100 elif (table or nested) and self._match(TokenType.L_PAREN): 3101 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3102 this = self._parse_simplified_pivot( 3103 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3104 ) 3105 elif self._match(TokenType.FROM): 3106 from_ = self._parse_from(skip_from_token=True) 3107 # Support parentheses for duckdb FROM-first syntax 3108 select = self._parse_select() 3109 if select: 3110 select.set("from", from_) 3111 this = select 3112 else: 3113 this = exp.select("*").from_(t.cast(exp.From, from_)) 3114 else: 3115 this = ( 3116 self._parse_table() 3117 if table 3118 else self._parse_select(nested=True, parse_set_operation=False) 3119 ) 3120 3121 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3122 # in case a modifier (e.g. join) is following 3123 if table and isinstance(this, exp.Values) and this.alias: 3124 alias = this.args["alias"].pop() 3125 this = exp.Table(this=this, alias=alias) 3126 3127 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3128 3129 self._match_r_paren() 3130 3131 # We return early here so that the UNION isn't attached to the subquery by the 3132 # following call to _parse_set_operations, but instead becomes the parent node 3133 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3134 elif self._match(TokenType.VALUES, advance=False): 3135 this = self._parse_derived_table_values() 3136 elif from_: 3137 this = exp.select("*").from_(from_.this, copy=False) 3138 elif self._match(TokenType.SUMMARIZE): 3139 table = self._match(TokenType.TABLE) 3140 this = self._parse_select() or self._parse_string() or self._parse_table() 3141 return self.expression(exp.Summarize, this=this, table=table) 3142 elif self._match(TokenType.DESCRIBE): 3143 this = self._parse_describe() 3144 elif self._match_text_seq("STREAM"): 3145 this = self._parse_function() 3146 if this: 3147 this = self.expression(exp.Stream, this=this) 3148 else: 3149 self._retreat(self._index - 1) 3150 else: 3151 this = None 3152 3153 return self._parse_set_operations(this) if parse_set_operation else this 3154 3155 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3156 if not skip_with_token and not self._match(TokenType.WITH): 3157 return None 3158 3159 comments = self._prev_comments 3160 recursive = self._match(TokenType.RECURSIVE) 3161 3162 last_comments = None 3163 expressions = [] 3164 while True: 3165 expressions.append(self._parse_cte()) 3166 if last_comments: 3167 expressions[-1].add_comments(last_comments) 3168 3169 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3170 break 3171 else: 3172 self._match(TokenType.WITH) 3173 3174 last_comments = self._prev_comments 3175 3176 return self.expression( 3177 exp.With, comments=comments, expressions=expressions, recursive=recursive 3178 ) 3179 3180 def _parse_cte(self) -> t.Optional[exp.CTE]: 3181 index = self._index 3182 3183 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3184 if not alias or not alias.this: 3185 self.raise_error("Expected CTE to have alias") 3186 3187 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3188 self._retreat(index) 3189 return None 3190 3191 comments = self._prev_comments 3192 3193 if self._match_text_seq("NOT", "MATERIALIZED"): 3194 materialized = False 3195 elif self._match_text_seq("MATERIALIZED"): 3196 materialized = True 3197 else: 3198 materialized = None 3199 3200 return self.expression( 3201 exp.CTE, 3202 this=self._parse_wrapped(self._parse_statement), 3203 alias=alias, 3204 materialized=materialized, 3205 comments=comments, 3206 ) 3207 3208 def _parse_table_alias( 3209 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3210 ) -> t.Optional[exp.TableAlias]: 3211 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3212 # so this section tries to parse the clause version and if it fails, it treats the token 3213 # as an identifier (alias) 3214 if self._can_parse_limit_or_offset(): 3215 return None 3216 3217 any_token = self._match(TokenType.ALIAS) 3218 alias = ( 3219 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3220 or self._parse_string_as_identifier() 3221 ) 3222 3223 index = self._index 3224 if self._match(TokenType.L_PAREN): 3225 columns = self._parse_csv(self._parse_function_parameter) 3226 self._match_r_paren() if columns else self._retreat(index) 3227 else: 3228 columns = None 3229 3230 if not alias and not columns: 3231 return None 3232 3233 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3234 3235 # We bubble up comments from the Identifier to the TableAlias 3236 if isinstance(alias, exp.Identifier): 3237 table_alias.add_comments(alias.pop_comments()) 3238 3239 return table_alias 3240 3241 def _parse_subquery( 3242 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3243 ) -> t.Optional[exp.Subquery]: 3244 if not this: 3245 return None 3246 3247 return self.expression( 3248 exp.Subquery, 3249 this=this, 3250 pivots=self._parse_pivots(), 3251 alias=self._parse_table_alias() if parse_alias else None, 3252 sample=self._parse_table_sample(), 3253 ) 3254 3255 def _implicit_unnests_to_explicit(self, this: E) -> E: 3256 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3257 3258 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3259 for i, join in enumerate(this.args.get("joins") or []): 3260 table = join.this 3261 normalized_table = table.copy() 3262 normalized_table.meta["maybe_column"] = True 3263 normalized_table = _norm(normalized_table, dialect=self.dialect) 3264 3265 if isinstance(table, exp.Table) and not join.args.get("on"): 3266 if normalized_table.parts[0].name in refs: 3267 table_as_column = table.to_column() 3268 unnest = exp.Unnest(expressions=[table_as_column]) 3269 3270 # Table.to_column creates a parent Alias node that we want to convert to 3271 # a TableAlias and attach to the Unnest, so it matches the parser's output 3272 if isinstance(table.args.get("alias"), exp.TableAlias): 3273 table_as_column.replace(table_as_column.this) 3274 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3275 3276 table.replace(unnest) 3277 3278 refs.add(normalized_table.alias_or_name) 3279 3280 return this 3281 3282 def _parse_query_modifiers( 3283 self, this: t.Optional[exp.Expression] 3284 ) -> t.Optional[exp.Expression]: 3285 if isinstance(this, (exp.Query, exp.Table)): 3286 for join in self._parse_joins(): 3287 this.append("joins", join) 3288 for lateral in iter(self._parse_lateral, None): 3289 this.append("laterals", lateral) 3290 3291 while True: 3292 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3293 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3294 key, expression = parser(self) 3295 3296 if expression: 3297 this.set(key, expression) 3298 if key == "limit": 3299 offset = expression.args.pop("offset", None) 3300 3301 if offset: 3302 offset = exp.Offset(expression=offset) 3303 this.set("offset", offset) 3304 3305 limit_by_expressions = expression.expressions 3306 expression.set("expressions", None) 3307 offset.set("expressions", limit_by_expressions) 3308 continue 3309 break 3310 3311 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3312 this = self._implicit_unnests_to_explicit(this) 3313 3314 return this 3315 3316 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3317 start = self._curr 3318 while self._curr: 3319 self._advance() 3320 3321 end = self._tokens[self._index - 1] 3322 return exp.Hint(expressions=[self._find_sql(start, end)]) 3323 3324 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3325 return self._parse_function_call() 3326 3327 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3328 start_index = self._index 3329 should_fallback_to_string = False 3330 3331 hints = [] 3332 try: 3333 for hint in iter( 3334 lambda: self._parse_csv( 3335 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3336 ), 3337 [], 3338 ): 3339 hints.extend(hint) 3340 except ParseError: 3341 should_fallback_to_string = True 3342 3343 if should_fallback_to_string or self._curr: 3344 self._retreat(start_index) 3345 return self._parse_hint_fallback_to_string() 3346 3347 return self.expression(exp.Hint, expressions=hints) 3348 3349 def _parse_hint(self) -> t.Optional[exp.Hint]: 3350 if self._match(TokenType.HINT) and self._prev_comments: 3351 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3352 3353 return None 3354 3355 def _parse_into(self) -> t.Optional[exp.Into]: 3356 if not self._match(TokenType.INTO): 3357 return None 3358 3359 temp = self._match(TokenType.TEMPORARY) 3360 unlogged = self._match_text_seq("UNLOGGED") 3361 self._match(TokenType.TABLE) 3362 3363 return self.expression( 3364 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3365 ) 3366 3367 def _parse_from( 3368 self, joins: bool = False, skip_from_token: bool = False 3369 ) -> t.Optional[exp.From]: 3370 if not skip_from_token and not self._match(TokenType.FROM): 3371 return None 3372 3373 return self.expression( 3374 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3375 ) 3376 3377 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3378 return self.expression( 3379 exp.MatchRecognizeMeasure, 3380 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3381 this=self._parse_expression(), 3382 ) 3383 3384 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3385 if not self._match(TokenType.MATCH_RECOGNIZE): 3386 return None 3387 3388 self._match_l_paren() 3389 3390 partition = self._parse_partition_by() 3391 order = self._parse_order() 3392 3393 measures = ( 3394 self._parse_csv(self._parse_match_recognize_measure) 3395 if self._match_text_seq("MEASURES") 3396 else None 3397 ) 3398 3399 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3400 rows = exp.var("ONE ROW PER MATCH") 3401 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3402 text = "ALL ROWS PER MATCH" 3403 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3404 text += " SHOW EMPTY MATCHES" 3405 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3406 text += " OMIT EMPTY MATCHES" 3407 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3408 text += " WITH UNMATCHED ROWS" 3409 rows = exp.var(text) 3410 else: 3411 rows = None 3412 3413 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3414 text = "AFTER MATCH SKIP" 3415 if self._match_text_seq("PAST", "LAST", "ROW"): 3416 text += " PAST LAST ROW" 3417 elif self._match_text_seq("TO", "NEXT", "ROW"): 3418 text += " TO NEXT ROW" 3419 elif self._match_text_seq("TO", "FIRST"): 3420 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3421 elif self._match_text_seq("TO", "LAST"): 3422 text += f" TO LAST {self._advance_any().text}" # type: ignore 3423 after = exp.var(text) 3424 else: 3425 after = None 3426 3427 if self._match_text_seq("PATTERN"): 3428 self._match_l_paren() 3429 3430 if not self._curr: 3431 self.raise_error("Expecting )", self._curr) 3432 3433 paren = 1 3434 start = self._curr 3435 3436 while self._curr and paren > 0: 3437 if self._curr.token_type == TokenType.L_PAREN: 3438 paren += 1 3439 if self._curr.token_type == TokenType.R_PAREN: 3440 paren -= 1 3441 3442 end = self._prev 3443 self._advance() 3444 3445 if paren > 0: 3446 self.raise_error("Expecting )", self._curr) 3447 3448 pattern = exp.var(self._find_sql(start, end)) 3449 else: 3450 pattern = None 3451 3452 define = ( 3453 self._parse_csv(self._parse_name_as_expression) 3454 if self._match_text_seq("DEFINE") 3455 else None 3456 ) 3457 3458 self._match_r_paren() 3459 3460 return self.expression( 3461 exp.MatchRecognize, 3462 partition_by=partition, 3463 order=order, 3464 measures=measures, 3465 rows=rows, 3466 after=after, 3467 pattern=pattern, 3468 define=define, 3469 alias=self._parse_table_alias(), 3470 ) 3471 3472 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3473 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3474 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3475 cross_apply = False 3476 3477 if cross_apply is not None: 3478 this = self._parse_select(table=True) 3479 view = None 3480 outer = None 3481 elif self._match(TokenType.LATERAL): 3482 this = self._parse_select(table=True) 3483 view = self._match(TokenType.VIEW) 3484 outer = self._match(TokenType.OUTER) 3485 else: 3486 return None 3487 3488 if not this: 3489 this = ( 3490 self._parse_unnest() 3491 or self._parse_function() 3492 or self._parse_id_var(any_token=False) 3493 ) 3494 3495 while self._match(TokenType.DOT): 3496 this = exp.Dot( 3497 this=this, 3498 expression=self._parse_function() or self._parse_id_var(any_token=False), 3499 ) 3500 3501 if view: 3502 table = self._parse_id_var(any_token=False) 3503 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3504 table_alias: t.Optional[exp.TableAlias] = self.expression( 3505 exp.TableAlias, this=table, columns=columns 3506 ) 3507 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3508 # We move the alias from the lateral's child node to the lateral itself 3509 table_alias = this.args["alias"].pop() 3510 else: 3511 table_alias = self._parse_table_alias() 3512 3513 return self.expression( 3514 exp.Lateral, 3515 this=this, 3516 view=view, 3517 outer=outer, 3518 alias=table_alias, 3519 cross_apply=cross_apply, 3520 ) 3521 3522 def _parse_join_parts( 3523 self, 3524 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3525 return ( 3526 self._match_set(self.JOIN_METHODS) and self._prev, 3527 self._match_set(self.JOIN_SIDES) and self._prev, 3528 self._match_set(self.JOIN_KINDS) and self._prev, 3529 ) 3530 3531 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3532 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3533 this = self._parse_column() 3534 if isinstance(this, exp.Column): 3535 return this.this 3536 return this 3537 3538 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3539 3540 def _parse_join( 3541 self, skip_join_token: bool = False, parse_bracket: bool = False 3542 ) -> t.Optional[exp.Join]: 3543 if self._match(TokenType.COMMA): 3544 return self.expression(exp.Join, this=self._parse_table()) 3545 3546 index = self._index 3547 method, side, kind = self._parse_join_parts() 3548 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3549 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3550 3551 if not skip_join_token and not join: 3552 self._retreat(index) 3553 kind = None 3554 method = None 3555 side = None 3556 3557 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3558 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3559 3560 if not skip_join_token and not join and not outer_apply and not cross_apply: 3561 return None 3562 3563 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3564 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3565 kwargs["expressions"] = self._parse_csv( 3566 lambda: self._parse_table(parse_bracket=parse_bracket) 3567 ) 3568 3569 if method: 3570 kwargs["method"] = method.text 3571 if side: 3572 kwargs["side"] = side.text 3573 if kind: 3574 kwargs["kind"] = kind.text 3575 if hint: 3576 kwargs["hint"] = hint 3577 3578 if self._match(TokenType.MATCH_CONDITION): 3579 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3580 3581 if self._match(TokenType.ON): 3582 kwargs["on"] = self._parse_assignment() 3583 elif self._match(TokenType.USING): 3584 kwargs["using"] = self._parse_using_identifiers() 3585 elif ( 3586 not (outer_apply or cross_apply) 3587 and not isinstance(kwargs["this"], exp.Unnest) 3588 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3589 ): 3590 index = self._index 3591 joins: t.Optional[list] = list(self._parse_joins()) 3592 3593 if joins and self._match(TokenType.ON): 3594 kwargs["on"] = self._parse_assignment() 3595 elif joins and self._match(TokenType.USING): 3596 kwargs["using"] = self._parse_using_identifiers() 3597 else: 3598 joins = None 3599 self._retreat(index) 3600 3601 kwargs["this"].set("joins", joins if joins else None) 3602 3603 comments = [c for token in (method, side, kind) if token for c in token.comments] 3604 return self.expression(exp.Join, comments=comments, **kwargs) 3605 3606 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3607 this = self._parse_assignment() 3608 3609 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3610 return this 3611 3612 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3613 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3614 3615 return this 3616 3617 def _parse_index_params(self) -> exp.IndexParameters: 3618 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3619 3620 if self._match(TokenType.L_PAREN, advance=False): 3621 columns = self._parse_wrapped_csv(self._parse_with_operator) 3622 else: 3623 columns = None 3624 3625 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3626 partition_by = self._parse_partition_by() 3627 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3628 tablespace = ( 3629 self._parse_var(any_token=True) 3630 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3631 else None 3632 ) 3633 where = self._parse_where() 3634 3635 on = self._parse_field() if self._match(TokenType.ON) else None 3636 3637 return self.expression( 3638 exp.IndexParameters, 3639 using=using, 3640 columns=columns, 3641 include=include, 3642 partition_by=partition_by, 3643 where=where, 3644 with_storage=with_storage, 3645 tablespace=tablespace, 3646 on=on, 3647 ) 3648 3649 def _parse_index( 3650 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3651 ) -> t.Optional[exp.Index]: 3652 if index or anonymous: 3653 unique = None 3654 primary = None 3655 amp = None 3656 3657 self._match(TokenType.ON) 3658 self._match(TokenType.TABLE) # hive 3659 table = self._parse_table_parts(schema=True) 3660 else: 3661 unique = self._match(TokenType.UNIQUE) 3662 primary = self._match_text_seq("PRIMARY") 3663 amp = self._match_text_seq("AMP") 3664 3665 if not self._match(TokenType.INDEX): 3666 return None 3667 3668 index = self._parse_id_var() 3669 table = None 3670 3671 params = self._parse_index_params() 3672 3673 return self.expression( 3674 exp.Index, 3675 this=index, 3676 table=table, 3677 unique=unique, 3678 primary=primary, 3679 amp=amp, 3680 params=params, 3681 ) 3682 3683 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3684 hints: t.List[exp.Expression] = [] 3685 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3686 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3687 hints.append( 3688 self.expression( 3689 exp.WithTableHint, 3690 expressions=self._parse_csv( 3691 lambda: self._parse_function() or self._parse_var(any_token=True) 3692 ), 3693 ) 3694 ) 3695 self._match_r_paren() 3696 else: 3697 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3698 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3699 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3700 3701 self._match_set((TokenType.INDEX, TokenType.KEY)) 3702 if self._match(TokenType.FOR): 3703 hint.set("target", self._advance_any() and self._prev.text.upper()) 3704 3705 hint.set("expressions", self._parse_wrapped_id_vars()) 3706 hints.append(hint) 3707 3708 return hints or None 3709 3710 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3711 return ( 3712 (not schema and self._parse_function(optional_parens=False)) 3713 or self._parse_id_var(any_token=False) 3714 or self._parse_string_as_identifier() 3715 or self._parse_placeholder() 3716 ) 3717 3718 def _parse_table_parts( 3719 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3720 ) -> exp.Table: 3721 catalog = None 3722 db = None 3723 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3724 3725 while self._match(TokenType.DOT): 3726 if catalog: 3727 # This allows nesting the table in arbitrarily many dot expressions if needed 3728 table = self.expression( 3729 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3730 ) 3731 else: 3732 catalog = db 3733 db = table 3734 # "" used for tsql FROM a..b case 3735 table = self._parse_table_part(schema=schema) or "" 3736 3737 if ( 3738 wildcard 3739 and self._is_connected() 3740 and (isinstance(table, exp.Identifier) or not table) 3741 and self._match(TokenType.STAR) 3742 ): 3743 if isinstance(table, exp.Identifier): 3744 table.args["this"] += "*" 3745 else: 3746 table = exp.Identifier(this="*") 3747 3748 # We bubble up comments from the Identifier to the Table 3749 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3750 3751 if is_db_reference: 3752 catalog = db 3753 db = table 3754 table = None 3755 3756 if not table and not is_db_reference: 3757 self.raise_error(f"Expected table name but got {self._curr}") 3758 if not db and is_db_reference: 3759 self.raise_error(f"Expected database name but got {self._curr}") 3760 3761 table = self.expression( 3762 exp.Table, 3763 comments=comments, 3764 this=table, 3765 db=db, 3766 catalog=catalog, 3767 ) 3768 3769 changes = self._parse_changes() 3770 if changes: 3771 table.set("changes", changes) 3772 3773 at_before = self._parse_historical_data() 3774 if at_before: 3775 table.set("when", at_before) 3776 3777 pivots = self._parse_pivots() 3778 if pivots: 3779 table.set("pivots", pivots) 3780 3781 return table 3782 3783 def _parse_table( 3784 self, 3785 schema: bool = False, 3786 joins: bool = False, 3787 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3788 parse_bracket: bool = False, 3789 is_db_reference: bool = False, 3790 parse_partition: bool = False, 3791 ) -> t.Optional[exp.Expression]: 3792 lateral = self._parse_lateral() 3793 if lateral: 3794 return lateral 3795 3796 unnest = self._parse_unnest() 3797 if unnest: 3798 return unnest 3799 3800 values = self._parse_derived_table_values() 3801 if values: 3802 return values 3803 3804 subquery = self._parse_select(table=True) 3805 if subquery: 3806 if not subquery.args.get("pivots"): 3807 subquery.set("pivots", self._parse_pivots()) 3808 return subquery 3809 3810 bracket = parse_bracket and self._parse_bracket(None) 3811 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3812 3813 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3814 self._parse_table 3815 ) 3816 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3817 3818 only = self._match(TokenType.ONLY) 3819 3820 this = t.cast( 3821 exp.Expression, 3822 bracket 3823 or rows_from 3824 or self._parse_bracket( 3825 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3826 ), 3827 ) 3828 3829 if only: 3830 this.set("only", only) 3831 3832 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3833 self._match_text_seq("*") 3834 3835 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3836 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3837 this.set("partition", self._parse_partition()) 3838 3839 if schema: 3840 return self._parse_schema(this=this) 3841 3842 version = self._parse_version() 3843 3844 if version: 3845 this.set("version", version) 3846 3847 if self.dialect.ALIAS_POST_TABLESAMPLE: 3848 this.set("sample", self._parse_table_sample()) 3849 3850 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3851 if alias: 3852 this.set("alias", alias) 3853 3854 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3855 return self.expression( 3856 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3857 ) 3858 3859 this.set("hints", self._parse_table_hints()) 3860 3861 if not this.args.get("pivots"): 3862 this.set("pivots", self._parse_pivots()) 3863 3864 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3865 this.set("sample", self._parse_table_sample()) 3866 3867 if joins: 3868 for join in self._parse_joins(): 3869 this.append("joins", join) 3870 3871 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3872 this.set("ordinality", True) 3873 this.set("alias", self._parse_table_alias()) 3874 3875 return this 3876 3877 def _parse_version(self) -> t.Optional[exp.Version]: 3878 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3879 this = "TIMESTAMP" 3880 elif self._match(TokenType.VERSION_SNAPSHOT): 3881 this = "VERSION" 3882 else: 3883 return None 3884 3885 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3886 kind = self._prev.text.upper() 3887 start = self._parse_bitwise() 3888 self._match_texts(("TO", "AND")) 3889 end = self._parse_bitwise() 3890 expression: t.Optional[exp.Expression] = self.expression( 3891 exp.Tuple, expressions=[start, end] 3892 ) 3893 elif self._match_text_seq("CONTAINED", "IN"): 3894 kind = "CONTAINED IN" 3895 expression = self.expression( 3896 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3897 ) 3898 elif self._match(TokenType.ALL): 3899 kind = "ALL" 3900 expression = None 3901 else: 3902 self._match_text_seq("AS", "OF") 3903 kind = "AS OF" 3904 expression = self._parse_type() 3905 3906 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3907 3908 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3909 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3910 index = self._index 3911 historical_data = None 3912 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3913 this = self._prev.text.upper() 3914 kind = ( 3915 self._match(TokenType.L_PAREN) 3916 and self._match_texts(self.HISTORICAL_DATA_KIND) 3917 and self._prev.text.upper() 3918 ) 3919 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3920 3921 if expression: 3922 self._match_r_paren() 3923 historical_data = self.expression( 3924 exp.HistoricalData, this=this, kind=kind, expression=expression 3925 ) 3926 else: 3927 self._retreat(index) 3928 3929 return historical_data 3930 3931 def _parse_changes(self) -> t.Optional[exp.Changes]: 3932 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3933 return None 3934 3935 information = self._parse_var(any_token=True) 3936 self._match_r_paren() 3937 3938 return self.expression( 3939 exp.Changes, 3940 information=information, 3941 at_before=self._parse_historical_data(), 3942 end=self._parse_historical_data(), 3943 ) 3944 3945 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3946 if not self._match(TokenType.UNNEST): 3947 return None 3948 3949 expressions = self._parse_wrapped_csv(self._parse_equality) 3950 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3951 3952 alias = self._parse_table_alias() if with_alias else None 3953 3954 if alias: 3955 if self.dialect.UNNEST_COLUMN_ONLY: 3956 if alias.args.get("columns"): 3957 self.raise_error("Unexpected extra column alias in unnest.") 3958 3959 alias.set("columns", [alias.this]) 3960 alias.set("this", None) 3961 3962 columns = alias.args.get("columns") or [] 3963 if offset and len(expressions) < len(columns): 3964 offset = columns.pop() 3965 3966 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3967 self._match(TokenType.ALIAS) 3968 offset = self._parse_id_var( 3969 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3970 ) or exp.to_identifier("offset") 3971 3972 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3973 3974 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3975 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3976 if not is_derived and not ( 3977 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3978 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3979 ): 3980 return None 3981 3982 expressions = self._parse_csv(self._parse_value) 3983 alias = self._parse_table_alias() 3984 3985 if is_derived: 3986 self._match_r_paren() 3987 3988 return self.expression( 3989 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3990 ) 3991 3992 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3993 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3994 as_modifier and self._match_text_seq("USING", "SAMPLE") 3995 ): 3996 return None 3997 3998 bucket_numerator = None 3999 bucket_denominator = None 4000 bucket_field = None 4001 percent = None 4002 size = None 4003 seed = None 4004 4005 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4006 matched_l_paren = self._match(TokenType.L_PAREN) 4007 4008 if self.TABLESAMPLE_CSV: 4009 num = None 4010 expressions = self._parse_csv(self._parse_primary) 4011 else: 4012 expressions = None 4013 num = ( 4014 self._parse_factor() 4015 if self._match(TokenType.NUMBER, advance=False) 4016 else self._parse_primary() or self._parse_placeholder() 4017 ) 4018 4019 if self._match_text_seq("BUCKET"): 4020 bucket_numerator = self._parse_number() 4021 self._match_text_seq("OUT", "OF") 4022 bucket_denominator = bucket_denominator = self._parse_number() 4023 self._match(TokenType.ON) 4024 bucket_field = self._parse_field() 4025 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4026 percent = num 4027 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4028 size = num 4029 else: 4030 percent = num 4031 4032 if matched_l_paren: 4033 self._match_r_paren() 4034 4035 if self._match(TokenType.L_PAREN): 4036 method = self._parse_var(upper=True) 4037 seed = self._match(TokenType.COMMA) and self._parse_number() 4038 self._match_r_paren() 4039 elif self._match_texts(("SEED", "REPEATABLE")): 4040 seed = self._parse_wrapped(self._parse_number) 4041 4042 if not method and self.DEFAULT_SAMPLING_METHOD: 4043 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4044 4045 return self.expression( 4046 exp.TableSample, 4047 expressions=expressions, 4048 method=method, 4049 bucket_numerator=bucket_numerator, 4050 bucket_denominator=bucket_denominator, 4051 bucket_field=bucket_field, 4052 percent=percent, 4053 size=size, 4054 seed=seed, 4055 ) 4056 4057 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4058 return list(iter(self._parse_pivot, None)) or None 4059 4060 def _parse_joins(self) -> t.Iterator[exp.Join]: 4061 return iter(self._parse_join, None) 4062 4063 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4064 if not self._match(TokenType.INTO): 4065 return None 4066 4067 return self.expression( 4068 exp.UnpivotColumns, 4069 this=self._match_text_seq("NAME") and self._parse_column(), 4070 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4071 ) 4072 4073 # https://duckdb.org/docs/sql/statements/pivot 4074 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4075 def _parse_on() -> t.Optional[exp.Expression]: 4076 this = self._parse_bitwise() 4077 4078 if self._match(TokenType.IN): 4079 # PIVOT ... ON col IN (row_val1, row_val2) 4080 return self._parse_in(this) 4081 if self._match(TokenType.ALIAS, advance=False): 4082 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4083 return self._parse_alias(this) 4084 4085 return this 4086 4087 this = self._parse_table() 4088 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4089 into = self._parse_unpivot_columns() 4090 using = self._match(TokenType.USING) and self._parse_csv( 4091 lambda: self._parse_alias(self._parse_function()) 4092 ) 4093 group = self._parse_group() 4094 4095 return self.expression( 4096 exp.Pivot, 4097 this=this, 4098 expressions=expressions, 4099 using=using, 4100 group=group, 4101 unpivot=is_unpivot, 4102 into=into, 4103 ) 4104 4105 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4106 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4107 this = self._parse_select_or_expression() 4108 4109 self._match(TokenType.ALIAS) 4110 alias = self._parse_bitwise() 4111 if alias: 4112 if isinstance(alias, exp.Column) and not alias.db: 4113 alias = alias.this 4114 return self.expression(exp.PivotAlias, this=this, alias=alias) 4115 4116 return this 4117 4118 value = self._parse_column() 4119 4120 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4121 self.raise_error("Expecting IN (") 4122 4123 if self._match(TokenType.ANY): 4124 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4125 else: 4126 exprs = self._parse_csv(_parse_aliased_expression) 4127 4128 self._match_r_paren() 4129 return self.expression(exp.In, this=value, expressions=exprs) 4130 4131 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4132 index = self._index 4133 include_nulls = None 4134 4135 if self._match(TokenType.PIVOT): 4136 unpivot = False 4137 elif self._match(TokenType.UNPIVOT): 4138 unpivot = True 4139 4140 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4141 if self._match_text_seq("INCLUDE", "NULLS"): 4142 include_nulls = True 4143 elif self._match_text_seq("EXCLUDE", "NULLS"): 4144 include_nulls = False 4145 else: 4146 return None 4147 4148 expressions = [] 4149 4150 if not self._match(TokenType.L_PAREN): 4151 self._retreat(index) 4152 return None 4153 4154 if unpivot: 4155 expressions = self._parse_csv(self._parse_column) 4156 else: 4157 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4158 4159 if not expressions: 4160 self.raise_error("Failed to parse PIVOT's aggregation list") 4161 4162 if not self._match(TokenType.FOR): 4163 self.raise_error("Expecting FOR") 4164 4165 field = self._parse_pivot_in() 4166 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4167 self._parse_bitwise 4168 ) 4169 4170 self._match_r_paren() 4171 4172 pivot = self.expression( 4173 exp.Pivot, 4174 expressions=expressions, 4175 field=field, 4176 unpivot=unpivot, 4177 include_nulls=include_nulls, 4178 default_on_null=default_on_null, 4179 ) 4180 4181 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4182 pivot.set("alias", self._parse_table_alias()) 4183 4184 if not unpivot: 4185 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4186 4187 columns: t.List[exp.Expression] = [] 4188 for fld in pivot.args["field"].expressions: 4189 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4190 for name in names: 4191 if self.PREFIXED_PIVOT_COLUMNS: 4192 name = f"{name}_{field_name}" if name else field_name 4193 else: 4194 name = f"{field_name}_{name}" if name else field_name 4195 4196 columns.append(exp.to_identifier(name)) 4197 4198 pivot.set("columns", columns) 4199 4200 return pivot 4201 4202 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4203 return [agg.alias for agg in aggregations] 4204 4205 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4206 if not skip_where_token and not self._match(TokenType.PREWHERE): 4207 return None 4208 4209 return self.expression( 4210 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4211 ) 4212 4213 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4214 if not skip_where_token and not self._match(TokenType.WHERE): 4215 return None 4216 4217 return self.expression( 4218 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4219 ) 4220 4221 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4222 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4223 return None 4224 4225 elements: t.Dict[str, t.Any] = defaultdict(list) 4226 4227 if self._match(TokenType.ALL): 4228 elements["all"] = True 4229 elif self._match(TokenType.DISTINCT): 4230 elements["all"] = False 4231 4232 while True: 4233 index = self._index 4234 4235 elements["expressions"].extend( 4236 self._parse_csv( 4237 lambda: None 4238 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4239 else self._parse_assignment() 4240 ) 4241 ) 4242 4243 before_with_index = self._index 4244 with_prefix = self._match(TokenType.WITH) 4245 4246 if self._match(TokenType.ROLLUP): 4247 elements["rollup"].append( 4248 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4249 ) 4250 elif self._match(TokenType.CUBE): 4251 elements["cube"].append( 4252 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4253 ) 4254 elif self._match(TokenType.GROUPING_SETS): 4255 elements["grouping_sets"].append( 4256 self.expression( 4257 exp.GroupingSets, 4258 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4259 ) 4260 ) 4261 elif self._match_text_seq("TOTALS"): 4262 elements["totals"] = True # type: ignore 4263 4264 if before_with_index <= self._index <= before_with_index + 1: 4265 self._retreat(before_with_index) 4266 break 4267 4268 if index == self._index: 4269 break 4270 4271 return self.expression(exp.Group, **elements) # type: ignore 4272 4273 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4274 return self.expression( 4275 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4276 ) 4277 4278 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4279 if self._match(TokenType.L_PAREN): 4280 grouping_set = self._parse_csv(self._parse_column) 4281 self._match_r_paren() 4282 return self.expression(exp.Tuple, expressions=grouping_set) 4283 4284 return self._parse_column() 4285 4286 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4287 if not skip_having_token and not self._match(TokenType.HAVING): 4288 return None 4289 return self.expression(exp.Having, this=self._parse_assignment()) 4290 4291 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4292 if not self._match(TokenType.QUALIFY): 4293 return None 4294 return self.expression(exp.Qualify, this=self._parse_assignment()) 4295 4296 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4297 if skip_start_token: 4298 start = None 4299 elif self._match(TokenType.START_WITH): 4300 start = self._parse_assignment() 4301 else: 4302 return None 4303 4304 self._match(TokenType.CONNECT_BY) 4305 nocycle = self._match_text_seq("NOCYCLE") 4306 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4307 exp.Prior, this=self._parse_bitwise() 4308 ) 4309 connect = self._parse_assignment() 4310 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4311 4312 if not start and self._match(TokenType.START_WITH): 4313 start = self._parse_assignment() 4314 4315 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4316 4317 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4318 this = self._parse_id_var(any_token=True) 4319 if self._match(TokenType.ALIAS): 4320 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4321 return this 4322 4323 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4324 if self._match_text_seq("INTERPOLATE"): 4325 return self._parse_wrapped_csv(self._parse_name_as_expression) 4326 return None 4327 4328 def _parse_order( 4329 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4330 ) -> t.Optional[exp.Expression]: 4331 siblings = None 4332 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4333 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4334 return this 4335 4336 siblings = True 4337 4338 return self.expression( 4339 exp.Order, 4340 this=this, 4341 expressions=self._parse_csv(self._parse_ordered), 4342 siblings=siblings, 4343 ) 4344 4345 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4346 if not self._match(token): 4347 return None 4348 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4349 4350 def _parse_ordered( 4351 self, parse_method: t.Optional[t.Callable] = None 4352 ) -> t.Optional[exp.Ordered]: 4353 this = parse_method() if parse_method else self._parse_assignment() 4354 if not this: 4355 return None 4356 4357 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4358 this = exp.var("ALL") 4359 4360 asc = self._match(TokenType.ASC) 4361 desc = self._match(TokenType.DESC) or (asc and False) 4362 4363 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4364 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4365 4366 nulls_first = is_nulls_first or False 4367 explicitly_null_ordered = is_nulls_first or is_nulls_last 4368 4369 if ( 4370 not explicitly_null_ordered 4371 and ( 4372 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4373 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4374 ) 4375 and self.dialect.NULL_ORDERING != "nulls_are_last" 4376 ): 4377 nulls_first = True 4378 4379 if self._match_text_seq("WITH", "FILL"): 4380 with_fill = self.expression( 4381 exp.WithFill, 4382 **{ # type: ignore 4383 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4384 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4385 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4386 "interpolate": self._parse_interpolate(), 4387 }, 4388 ) 4389 else: 4390 with_fill = None 4391 4392 return self.expression( 4393 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4394 ) 4395 4396 def _parse_limit( 4397 self, 4398 this: t.Optional[exp.Expression] = None, 4399 top: bool = False, 4400 skip_limit_token: bool = False, 4401 ) -> t.Optional[exp.Expression]: 4402 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4403 comments = self._prev_comments 4404 if top: 4405 limit_paren = self._match(TokenType.L_PAREN) 4406 expression = self._parse_term() if limit_paren else self._parse_number() 4407 4408 if limit_paren: 4409 self._match_r_paren() 4410 else: 4411 expression = self._parse_term() 4412 4413 if self._match(TokenType.COMMA): 4414 offset = expression 4415 expression = self._parse_term() 4416 else: 4417 offset = None 4418 4419 limit_exp = self.expression( 4420 exp.Limit, 4421 this=this, 4422 expression=expression, 4423 offset=offset, 4424 comments=comments, 4425 expressions=self._parse_limit_by(), 4426 ) 4427 4428 return limit_exp 4429 4430 if self._match(TokenType.FETCH): 4431 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4432 direction = self._prev.text.upper() if direction else "FIRST" 4433 4434 count = self._parse_field(tokens=self.FETCH_TOKENS) 4435 percent = self._match(TokenType.PERCENT) 4436 4437 self._match_set((TokenType.ROW, TokenType.ROWS)) 4438 4439 only = self._match_text_seq("ONLY") 4440 with_ties = self._match_text_seq("WITH", "TIES") 4441 4442 if only and with_ties: 4443 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4444 4445 return self.expression( 4446 exp.Fetch, 4447 direction=direction, 4448 count=count, 4449 percent=percent, 4450 with_ties=with_ties, 4451 ) 4452 4453 return this 4454 4455 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4456 if not self._match(TokenType.OFFSET): 4457 return this 4458 4459 count = self._parse_term() 4460 self._match_set((TokenType.ROW, TokenType.ROWS)) 4461 4462 return self.expression( 4463 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4464 ) 4465 4466 def _can_parse_limit_or_offset(self) -> bool: 4467 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4468 return False 4469 4470 index = self._index 4471 result = bool( 4472 self._try_parse(self._parse_limit, retreat=True) 4473 or self._try_parse(self._parse_offset, retreat=True) 4474 ) 4475 self._retreat(index) 4476 return result 4477 4478 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4479 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4480 4481 def _parse_locks(self) -> t.List[exp.Lock]: 4482 locks = [] 4483 while True: 4484 if self._match_text_seq("FOR", "UPDATE"): 4485 update = True 4486 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4487 "LOCK", "IN", "SHARE", "MODE" 4488 ): 4489 update = False 4490 else: 4491 break 4492 4493 expressions = None 4494 if self._match_text_seq("OF"): 4495 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4496 4497 wait: t.Optional[bool | exp.Expression] = None 4498 if self._match_text_seq("NOWAIT"): 4499 wait = True 4500 elif self._match_text_seq("WAIT"): 4501 wait = self._parse_primary() 4502 elif self._match_text_seq("SKIP", "LOCKED"): 4503 wait = False 4504 4505 locks.append( 4506 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4507 ) 4508 4509 return locks 4510 4511 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4512 while this and self._match_set(self.SET_OPERATIONS): 4513 token_type = self._prev.token_type 4514 4515 if token_type == TokenType.UNION: 4516 operation: t.Type[exp.SetOperation] = exp.Union 4517 elif token_type == TokenType.EXCEPT: 4518 operation = exp.Except 4519 else: 4520 operation = exp.Intersect 4521 4522 comments = self._prev.comments 4523 4524 if self._match(TokenType.DISTINCT): 4525 distinct: t.Optional[bool] = True 4526 elif self._match(TokenType.ALL): 4527 distinct = False 4528 else: 4529 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4530 if distinct is None: 4531 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4532 4533 by_name = self._match_text_seq("BY", "NAME") 4534 expression = self._parse_select(nested=True, parse_set_operation=False) 4535 4536 this = self.expression( 4537 operation, 4538 comments=comments, 4539 this=this, 4540 distinct=distinct, 4541 by_name=by_name, 4542 expression=expression, 4543 ) 4544 4545 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4546 expression = this.expression 4547 4548 if expression: 4549 for arg in self.SET_OP_MODIFIERS: 4550 expr = expression.args.get(arg) 4551 if expr: 4552 this.set(arg, expr.pop()) 4553 4554 return this 4555 4556 def _parse_expression(self) -> t.Optional[exp.Expression]: 4557 return self._parse_alias(self._parse_assignment()) 4558 4559 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4560 this = self._parse_disjunction() 4561 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4562 # This allows us to parse <non-identifier token> := <expr> 4563 this = exp.column( 4564 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4565 ) 4566 4567 while self._match_set(self.ASSIGNMENT): 4568 if isinstance(this, exp.Column) and len(this.parts) == 1: 4569 this = this.this 4570 4571 this = self.expression( 4572 self.ASSIGNMENT[self._prev.token_type], 4573 this=this, 4574 comments=self._prev_comments, 4575 expression=self._parse_assignment(), 4576 ) 4577 4578 return this 4579 4580 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4581 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4582 4583 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4584 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4585 4586 def _parse_equality(self) -> t.Optional[exp.Expression]: 4587 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4588 4589 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4590 return self._parse_tokens(self._parse_range, self.COMPARISON) 4591 4592 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4593 this = this or self._parse_bitwise() 4594 negate = self._match(TokenType.NOT) 4595 4596 if self._match_set(self.RANGE_PARSERS): 4597 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4598 if not expression: 4599 return this 4600 4601 this = expression 4602 elif self._match(TokenType.ISNULL): 4603 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4604 4605 # Postgres supports ISNULL and NOTNULL for conditions. 4606 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4607 if self._match(TokenType.NOTNULL): 4608 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4609 this = self.expression(exp.Not, this=this) 4610 4611 if negate: 4612 this = self._negate_range(this) 4613 4614 if self._match(TokenType.IS): 4615 this = self._parse_is(this) 4616 4617 return this 4618 4619 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4620 if not this: 4621 return this 4622 4623 return self.expression(exp.Not, this=this) 4624 4625 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4626 index = self._index - 1 4627 negate = self._match(TokenType.NOT) 4628 4629 if self._match_text_seq("DISTINCT", "FROM"): 4630 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4631 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4632 4633 if self._match(TokenType.JSON): 4634 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4635 4636 if self._match_text_seq("WITH"): 4637 _with = True 4638 elif self._match_text_seq("WITHOUT"): 4639 _with = False 4640 else: 4641 _with = None 4642 4643 unique = self._match(TokenType.UNIQUE) 4644 self._match_text_seq("KEYS") 4645 expression: t.Optional[exp.Expression] = self.expression( 4646 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4647 ) 4648 else: 4649 expression = self._parse_primary() or self._parse_null() 4650 if not expression: 4651 self._retreat(index) 4652 return None 4653 4654 this = self.expression(exp.Is, this=this, expression=expression) 4655 return self.expression(exp.Not, this=this) if negate else this 4656 4657 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4658 unnest = self._parse_unnest(with_alias=False) 4659 if unnest: 4660 this = self.expression(exp.In, this=this, unnest=unnest) 4661 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4662 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4663 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4664 4665 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4666 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4667 else: 4668 this = self.expression(exp.In, this=this, expressions=expressions) 4669 4670 if matched_l_paren: 4671 self._match_r_paren(this) 4672 elif not self._match(TokenType.R_BRACKET, expression=this): 4673 self.raise_error("Expecting ]") 4674 else: 4675 this = self.expression(exp.In, this=this, field=self._parse_column()) 4676 4677 return this 4678 4679 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4680 low = self._parse_bitwise() 4681 self._match(TokenType.AND) 4682 high = self._parse_bitwise() 4683 return self.expression(exp.Between, this=this, low=low, high=high) 4684 4685 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4686 if not self._match(TokenType.ESCAPE): 4687 return this 4688 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4689 4690 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4691 index = self._index 4692 4693 if not self._match(TokenType.INTERVAL) and match_interval: 4694 return None 4695 4696 if self._match(TokenType.STRING, advance=False): 4697 this = self._parse_primary() 4698 else: 4699 this = self._parse_term() 4700 4701 if not this or ( 4702 isinstance(this, exp.Column) 4703 and not this.table 4704 and not this.this.quoted 4705 and this.name.upper() == "IS" 4706 ): 4707 self._retreat(index) 4708 return None 4709 4710 unit = self._parse_function() or ( 4711 not self._match(TokenType.ALIAS, advance=False) 4712 and self._parse_var(any_token=True, upper=True) 4713 ) 4714 4715 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4716 # each INTERVAL expression into this canonical form so it's easy to transpile 4717 if this and this.is_number: 4718 this = exp.Literal.string(this.to_py()) 4719 elif this and this.is_string: 4720 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4721 if parts and unit: 4722 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4723 unit = None 4724 self._retreat(self._index - 1) 4725 4726 if len(parts) == 1: 4727 this = exp.Literal.string(parts[0][0]) 4728 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4729 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4730 unit = self.expression( 4731 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4732 ) 4733 4734 interval = self.expression(exp.Interval, this=this, unit=unit) 4735 4736 index = self._index 4737 self._match(TokenType.PLUS) 4738 4739 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4740 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4741 return self.expression( 4742 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4743 ) 4744 4745 self._retreat(index) 4746 return interval 4747 4748 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4749 this = self._parse_term() 4750 4751 while True: 4752 if self._match_set(self.BITWISE): 4753 this = self.expression( 4754 self.BITWISE[self._prev.token_type], 4755 this=this, 4756 expression=self._parse_term(), 4757 ) 4758 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4759 this = self.expression( 4760 exp.DPipe, 4761 this=this, 4762 expression=self._parse_term(), 4763 safe=not self.dialect.STRICT_STRING_CONCAT, 4764 ) 4765 elif self._match(TokenType.DQMARK): 4766 this = self.expression( 4767 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4768 ) 4769 elif self._match_pair(TokenType.LT, TokenType.LT): 4770 this = self.expression( 4771 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4772 ) 4773 elif self._match_pair(TokenType.GT, TokenType.GT): 4774 this = self.expression( 4775 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4776 ) 4777 else: 4778 break 4779 4780 return this 4781 4782 def _parse_term(self) -> t.Optional[exp.Expression]: 4783 this = self._parse_factor() 4784 4785 while self._match_set(self.TERM): 4786 klass = self.TERM[self._prev.token_type] 4787 comments = self._prev_comments 4788 expression = self._parse_factor() 4789 4790 this = self.expression(klass, this=this, comments=comments, expression=expression) 4791 4792 if isinstance(this, exp.Collate): 4793 expr = this.expression 4794 4795 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4796 # fallback to Identifier / Var 4797 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4798 ident = expr.this 4799 if isinstance(ident, exp.Identifier): 4800 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4801 4802 return this 4803 4804 def _parse_factor(self) -> t.Optional[exp.Expression]: 4805 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4806 this = parse_method() 4807 4808 while self._match_set(self.FACTOR): 4809 klass = self.FACTOR[self._prev.token_type] 4810 comments = self._prev_comments 4811 expression = parse_method() 4812 4813 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4814 self._retreat(self._index - 1) 4815 return this 4816 4817 this = self.expression(klass, this=this, comments=comments, expression=expression) 4818 4819 if isinstance(this, exp.Div): 4820 this.args["typed"] = self.dialect.TYPED_DIVISION 4821 this.args["safe"] = self.dialect.SAFE_DIVISION 4822 4823 return this 4824 4825 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4826 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4827 4828 def _parse_unary(self) -> t.Optional[exp.Expression]: 4829 if self._match_set(self.UNARY_PARSERS): 4830 return self.UNARY_PARSERS[self._prev.token_type](self) 4831 return self._parse_at_time_zone(self._parse_type()) 4832 4833 def _parse_type( 4834 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4835 ) -> t.Optional[exp.Expression]: 4836 interval = parse_interval and self._parse_interval() 4837 if interval: 4838 return interval 4839 4840 index = self._index 4841 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4842 4843 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4844 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4845 if isinstance(data_type, exp.Cast): 4846 # This constructor can contain ops directly after it, for instance struct unnesting: 4847 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4848 return self._parse_column_ops(data_type) 4849 4850 if data_type: 4851 index2 = self._index 4852 this = self._parse_primary() 4853 4854 if isinstance(this, exp.Literal): 4855 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4856 if parser: 4857 return parser(self, this, data_type) 4858 4859 return self.expression(exp.Cast, this=this, to=data_type) 4860 4861 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4862 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4863 # 4864 # If the index difference here is greater than 1, that means the parser itself must have 4865 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4866 # 4867 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4868 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4869 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4870 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4871 # 4872 # In these cases, we don't really want to return the converted type, but instead retreat 4873 # and try to parse a Column or Identifier in the section below. 4874 if data_type.expressions and index2 - index > 1: 4875 self._retreat(index2) 4876 return self._parse_column_ops(data_type) 4877 4878 self._retreat(index) 4879 4880 if fallback_to_identifier: 4881 return self._parse_id_var() 4882 4883 this = self._parse_column() 4884 return this and self._parse_column_ops(this) 4885 4886 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4887 this = self._parse_type() 4888 if not this: 4889 return None 4890 4891 if isinstance(this, exp.Column) and not this.table: 4892 this = exp.var(this.name.upper()) 4893 4894 return self.expression( 4895 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4896 ) 4897 4898 def _parse_types( 4899 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4900 ) -> t.Optional[exp.Expression]: 4901 index = self._index 4902 4903 this: t.Optional[exp.Expression] = None 4904 prefix = self._match_text_seq("SYSUDTLIB", ".") 4905 4906 if not self._match_set(self.TYPE_TOKENS): 4907 identifier = allow_identifiers and self._parse_id_var( 4908 any_token=False, tokens=(TokenType.VAR,) 4909 ) 4910 if isinstance(identifier, exp.Identifier): 4911 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4912 4913 if len(tokens) != 1: 4914 self.raise_error("Unexpected identifier", self._prev) 4915 4916 if tokens[0].token_type in self.TYPE_TOKENS: 4917 self._prev = tokens[0] 4918 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4919 type_name = identifier.name 4920 4921 while self._match(TokenType.DOT): 4922 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4923 4924 this = exp.DataType.build(type_name, udt=True) 4925 else: 4926 self._retreat(self._index - 1) 4927 return None 4928 else: 4929 return None 4930 4931 type_token = self._prev.token_type 4932 4933 if type_token == TokenType.PSEUDO_TYPE: 4934 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4935 4936 if type_token == TokenType.OBJECT_IDENTIFIER: 4937 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4938 4939 # https://materialize.com/docs/sql/types/map/ 4940 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4941 key_type = self._parse_types( 4942 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4943 ) 4944 if not self._match(TokenType.FARROW): 4945 self._retreat(index) 4946 return None 4947 4948 value_type = self._parse_types( 4949 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4950 ) 4951 if not self._match(TokenType.R_BRACKET): 4952 self._retreat(index) 4953 return None 4954 4955 return exp.DataType( 4956 this=exp.DataType.Type.MAP, 4957 expressions=[key_type, value_type], 4958 nested=True, 4959 prefix=prefix, 4960 ) 4961 4962 nested = type_token in self.NESTED_TYPE_TOKENS 4963 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4964 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4965 expressions = None 4966 maybe_func = False 4967 4968 if self._match(TokenType.L_PAREN): 4969 if is_struct: 4970 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4971 elif nested: 4972 expressions = self._parse_csv( 4973 lambda: self._parse_types( 4974 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4975 ) 4976 ) 4977 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4978 this = expressions[0] 4979 this.set("nullable", True) 4980 self._match_r_paren() 4981 return this 4982 elif type_token in self.ENUM_TYPE_TOKENS: 4983 expressions = self._parse_csv(self._parse_equality) 4984 elif is_aggregate: 4985 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4986 any_token=False, tokens=(TokenType.VAR,) 4987 ) 4988 if not func_or_ident or not self._match(TokenType.COMMA): 4989 return None 4990 expressions = self._parse_csv( 4991 lambda: self._parse_types( 4992 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4993 ) 4994 ) 4995 expressions.insert(0, func_or_ident) 4996 else: 4997 expressions = self._parse_csv(self._parse_type_size) 4998 4999 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5000 if type_token == TokenType.VECTOR and len(expressions) == 2: 5001 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5002 5003 if not expressions or not self._match(TokenType.R_PAREN): 5004 self._retreat(index) 5005 return None 5006 5007 maybe_func = True 5008 5009 values: t.Optional[t.List[exp.Expression]] = None 5010 5011 if nested and self._match(TokenType.LT): 5012 if is_struct: 5013 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5014 else: 5015 expressions = self._parse_csv( 5016 lambda: self._parse_types( 5017 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5018 ) 5019 ) 5020 5021 if not self._match(TokenType.GT): 5022 self.raise_error("Expecting >") 5023 5024 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5025 values = self._parse_csv(self._parse_assignment) 5026 if not values and is_struct: 5027 values = None 5028 self._retreat(self._index - 1) 5029 else: 5030 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5031 5032 if type_token in self.TIMESTAMPS: 5033 if self._match_text_seq("WITH", "TIME", "ZONE"): 5034 maybe_func = False 5035 tz_type = ( 5036 exp.DataType.Type.TIMETZ 5037 if type_token in self.TIMES 5038 else exp.DataType.Type.TIMESTAMPTZ 5039 ) 5040 this = exp.DataType(this=tz_type, expressions=expressions) 5041 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5042 maybe_func = False 5043 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5044 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5045 maybe_func = False 5046 elif type_token == TokenType.INTERVAL: 5047 unit = self._parse_var(upper=True) 5048 if unit: 5049 if self._match_text_seq("TO"): 5050 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5051 5052 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5053 else: 5054 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5055 5056 if maybe_func and check_func: 5057 index2 = self._index 5058 peek = self._parse_string() 5059 5060 if not peek: 5061 self._retreat(index) 5062 return None 5063 5064 self._retreat(index2) 5065 5066 if not this: 5067 if self._match_text_seq("UNSIGNED"): 5068 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5069 if not unsigned_type_token: 5070 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5071 5072 type_token = unsigned_type_token or type_token 5073 5074 this = exp.DataType( 5075 this=exp.DataType.Type[type_token.value], 5076 expressions=expressions, 5077 nested=nested, 5078 prefix=prefix, 5079 ) 5080 5081 # Empty arrays/structs are allowed 5082 if values is not None: 5083 cls = exp.Struct if is_struct else exp.Array 5084 this = exp.cast(cls(expressions=values), this, copy=False) 5085 5086 elif expressions: 5087 this.set("expressions", expressions) 5088 5089 # https://materialize.com/docs/sql/types/list/#type-name 5090 while self._match(TokenType.LIST): 5091 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5092 5093 index = self._index 5094 5095 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5096 matched_array = self._match(TokenType.ARRAY) 5097 5098 while self._curr: 5099 datatype_token = self._prev.token_type 5100 matched_l_bracket = self._match(TokenType.L_BRACKET) 5101 if not matched_l_bracket and not matched_array: 5102 break 5103 5104 matched_array = False 5105 values = self._parse_csv(self._parse_assignment) or None 5106 if ( 5107 values 5108 and not schema 5109 and ( 5110 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5111 ) 5112 ): 5113 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5114 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5115 self._retreat(index) 5116 break 5117 5118 this = exp.DataType( 5119 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5120 ) 5121 self._match(TokenType.R_BRACKET) 5122 5123 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5124 converter = self.TYPE_CONVERTERS.get(this.this) 5125 if converter: 5126 this = converter(t.cast(exp.DataType, this)) 5127 5128 return this 5129 5130 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5131 index = self._index 5132 5133 if ( 5134 self._curr 5135 and self._next 5136 and self._curr.token_type in self.TYPE_TOKENS 5137 and self._next.token_type in self.TYPE_TOKENS 5138 ): 5139 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5140 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5141 this = self._parse_id_var() 5142 else: 5143 this = ( 5144 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5145 or self._parse_id_var() 5146 ) 5147 5148 self._match(TokenType.COLON) 5149 5150 if ( 5151 type_required 5152 and not isinstance(this, exp.DataType) 5153 and not self._match_set(self.TYPE_TOKENS, advance=False) 5154 ): 5155 self._retreat(index) 5156 return self._parse_types() 5157 5158 return self._parse_column_def(this) 5159 5160 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5161 if not self._match_text_seq("AT", "TIME", "ZONE"): 5162 return this 5163 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5164 5165 def _parse_column(self) -> t.Optional[exp.Expression]: 5166 this = self._parse_column_reference() 5167 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5168 5169 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5170 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5171 5172 return column 5173 5174 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5175 this = self._parse_field() 5176 if ( 5177 not this 5178 and self._match(TokenType.VALUES, advance=False) 5179 and self.VALUES_FOLLOWED_BY_PAREN 5180 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5181 ): 5182 this = self._parse_id_var() 5183 5184 if isinstance(this, exp.Identifier): 5185 # We bubble up comments from the Identifier to the Column 5186 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5187 5188 return this 5189 5190 def _parse_colon_as_variant_extract( 5191 self, this: t.Optional[exp.Expression] 5192 ) -> t.Optional[exp.Expression]: 5193 casts = [] 5194 json_path = [] 5195 escape = None 5196 5197 while self._match(TokenType.COLON): 5198 start_index = self._index 5199 5200 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5201 path = self._parse_column_ops( 5202 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5203 ) 5204 5205 # The cast :: operator has a lower precedence than the extraction operator :, so 5206 # we rearrange the AST appropriately to avoid casting the JSON path 5207 while isinstance(path, exp.Cast): 5208 casts.append(path.to) 5209 path = path.this 5210 5211 if casts: 5212 dcolon_offset = next( 5213 i 5214 for i, t in enumerate(self._tokens[start_index:]) 5215 if t.token_type == TokenType.DCOLON 5216 ) 5217 end_token = self._tokens[start_index + dcolon_offset - 1] 5218 else: 5219 end_token = self._prev 5220 5221 if path: 5222 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5223 # it'll roundtrip to a string literal in GET_PATH 5224 if isinstance(path, exp.Identifier) and path.quoted: 5225 escape = True 5226 5227 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5228 5229 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5230 # Databricks transforms it back to the colon/dot notation 5231 if json_path: 5232 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5233 5234 if json_path_expr: 5235 json_path_expr.set("escape", escape) 5236 5237 this = self.expression( 5238 exp.JSONExtract, 5239 this=this, 5240 expression=json_path_expr, 5241 variant_extract=True, 5242 ) 5243 5244 while casts: 5245 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5246 5247 return this 5248 5249 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5250 return self._parse_types() 5251 5252 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5253 this = self._parse_bracket(this) 5254 5255 while self._match_set(self.COLUMN_OPERATORS): 5256 op_token = self._prev.token_type 5257 op = self.COLUMN_OPERATORS.get(op_token) 5258 5259 if op_token == TokenType.DCOLON: 5260 field = self._parse_dcolon() 5261 if not field: 5262 self.raise_error("Expected type") 5263 elif op and self._curr: 5264 field = self._parse_column_reference() or self._parse_bracket() 5265 else: 5266 field = self._parse_field(any_token=True, anonymous_func=True) 5267 5268 if isinstance(field, (exp.Func, exp.Window)) and this: 5269 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5270 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5271 this = exp.replace_tree( 5272 this, 5273 lambda n: ( 5274 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5275 if n.table 5276 else n.this 5277 ) 5278 if isinstance(n, exp.Column) 5279 else n, 5280 ) 5281 5282 if op: 5283 this = op(self, this, field) 5284 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5285 this = self.expression( 5286 exp.Column, 5287 comments=this.comments, 5288 this=field, 5289 table=this.this, 5290 db=this.args.get("table"), 5291 catalog=this.args.get("db"), 5292 ) 5293 elif isinstance(field, exp.Window): 5294 # Move the exp.Dot's to the window's function 5295 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5296 field.set("this", window_func) 5297 this = field 5298 else: 5299 this = self.expression(exp.Dot, this=this, expression=field) 5300 5301 if field and field.comments: 5302 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5303 5304 this = self._parse_bracket(this) 5305 5306 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5307 5308 def _parse_primary(self) -> t.Optional[exp.Expression]: 5309 if self._match_set(self.PRIMARY_PARSERS): 5310 token_type = self._prev.token_type 5311 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5312 5313 if token_type == TokenType.STRING: 5314 expressions = [primary] 5315 while self._match(TokenType.STRING): 5316 expressions.append(exp.Literal.string(self._prev.text)) 5317 5318 if len(expressions) > 1: 5319 return self.expression(exp.Concat, expressions=expressions) 5320 5321 return primary 5322 5323 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5324 return exp.Literal.number(f"0.{self._prev.text}") 5325 5326 if self._match(TokenType.L_PAREN): 5327 comments = self._prev_comments 5328 query = self._parse_select() 5329 5330 if query: 5331 expressions = [query] 5332 else: 5333 expressions = self._parse_expressions() 5334 5335 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5336 5337 if not this and self._match(TokenType.R_PAREN, advance=False): 5338 this = self.expression(exp.Tuple) 5339 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5340 this = self._parse_subquery(this=this, parse_alias=False) 5341 elif isinstance(this, exp.Subquery): 5342 this = self._parse_subquery( 5343 this=self._parse_set_operations(this), parse_alias=False 5344 ) 5345 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5346 this = self.expression(exp.Tuple, expressions=expressions) 5347 else: 5348 this = self.expression(exp.Paren, this=this) 5349 5350 if this: 5351 this.add_comments(comments) 5352 5353 self._match_r_paren(expression=this) 5354 return this 5355 5356 return None 5357 5358 def _parse_field( 5359 self, 5360 any_token: bool = False, 5361 tokens: t.Optional[t.Collection[TokenType]] = None, 5362 anonymous_func: bool = False, 5363 ) -> t.Optional[exp.Expression]: 5364 if anonymous_func: 5365 field = ( 5366 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5367 or self._parse_primary() 5368 ) 5369 else: 5370 field = self._parse_primary() or self._parse_function( 5371 anonymous=anonymous_func, any_token=any_token 5372 ) 5373 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5374 5375 def _parse_function( 5376 self, 5377 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5378 anonymous: bool = False, 5379 optional_parens: bool = True, 5380 any_token: bool = False, 5381 ) -> t.Optional[exp.Expression]: 5382 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5383 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5384 fn_syntax = False 5385 if ( 5386 self._match(TokenType.L_BRACE, advance=False) 5387 and self._next 5388 and self._next.text.upper() == "FN" 5389 ): 5390 self._advance(2) 5391 fn_syntax = True 5392 5393 func = self._parse_function_call( 5394 functions=functions, 5395 anonymous=anonymous, 5396 optional_parens=optional_parens, 5397 any_token=any_token, 5398 ) 5399 5400 if fn_syntax: 5401 self._match(TokenType.R_BRACE) 5402 5403 return func 5404 5405 def _parse_function_call( 5406 self, 5407 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5408 anonymous: bool = False, 5409 optional_parens: bool = True, 5410 any_token: bool = False, 5411 ) -> t.Optional[exp.Expression]: 5412 if not self._curr: 5413 return None 5414 5415 comments = self._curr.comments 5416 token_type = self._curr.token_type 5417 this = self._curr.text 5418 upper = this.upper() 5419 5420 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5421 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5422 self._advance() 5423 return self._parse_window(parser(self)) 5424 5425 if not self._next or self._next.token_type != TokenType.L_PAREN: 5426 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5427 self._advance() 5428 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5429 5430 return None 5431 5432 if any_token: 5433 if token_type in self.RESERVED_TOKENS: 5434 return None 5435 elif token_type not in self.FUNC_TOKENS: 5436 return None 5437 5438 self._advance(2) 5439 5440 parser = self.FUNCTION_PARSERS.get(upper) 5441 if parser and not anonymous: 5442 this = parser(self) 5443 else: 5444 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5445 5446 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5447 this = self.expression( 5448 subquery_predicate, comments=comments, this=self._parse_select() 5449 ) 5450 self._match_r_paren() 5451 return this 5452 5453 if functions is None: 5454 functions = self.FUNCTIONS 5455 5456 function = functions.get(upper) 5457 known_function = function and not anonymous 5458 5459 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5460 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5461 5462 post_func_comments = self._curr and self._curr.comments 5463 if known_function and post_func_comments: 5464 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5465 # call we'll construct it as exp.Anonymous, even if it's "known" 5466 if any( 5467 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5468 for comment in post_func_comments 5469 ): 5470 known_function = False 5471 5472 if alias and known_function: 5473 args = self._kv_to_prop_eq(args) 5474 5475 if known_function: 5476 func_builder = t.cast(t.Callable, function) 5477 5478 if "dialect" in func_builder.__code__.co_varnames: 5479 func = func_builder(args, dialect=self.dialect) 5480 else: 5481 func = func_builder(args) 5482 5483 func = self.validate_expression(func, args) 5484 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5485 func.meta["name"] = this 5486 5487 this = func 5488 else: 5489 if token_type == TokenType.IDENTIFIER: 5490 this = exp.Identifier(this=this, quoted=True) 5491 this = self.expression(exp.Anonymous, this=this, expressions=args) 5492 5493 if isinstance(this, exp.Expression): 5494 this.add_comments(comments) 5495 5496 self._match_r_paren(this) 5497 return self._parse_window(this) 5498 5499 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5500 return expression 5501 5502 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5503 transformed = [] 5504 5505 for index, e in enumerate(expressions): 5506 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5507 if isinstance(e, exp.Alias): 5508 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5509 5510 if not isinstance(e, exp.PropertyEQ): 5511 e = self.expression( 5512 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5513 ) 5514 5515 if isinstance(e.this, exp.Column): 5516 e.this.replace(e.this.this) 5517 else: 5518 e = self._to_prop_eq(e, index) 5519 5520 transformed.append(e) 5521 5522 return transformed 5523 5524 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5525 return self._parse_statement() 5526 5527 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5528 return self._parse_column_def(self._parse_id_var()) 5529 5530 def _parse_user_defined_function( 5531 self, kind: t.Optional[TokenType] = None 5532 ) -> t.Optional[exp.Expression]: 5533 this = self._parse_id_var() 5534 5535 while self._match(TokenType.DOT): 5536 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5537 5538 if not self._match(TokenType.L_PAREN): 5539 return this 5540 5541 expressions = self._parse_csv(self._parse_function_parameter) 5542 self._match_r_paren() 5543 return self.expression( 5544 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5545 ) 5546 5547 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5548 literal = self._parse_primary() 5549 if literal: 5550 return self.expression(exp.Introducer, this=token.text, expression=literal) 5551 5552 return self.expression(exp.Identifier, this=token.text) 5553 5554 def _parse_session_parameter(self) -> exp.SessionParameter: 5555 kind = None 5556 this = self._parse_id_var() or self._parse_primary() 5557 5558 if this and self._match(TokenType.DOT): 5559 kind = this.name 5560 this = self._parse_var() or self._parse_primary() 5561 5562 return self.expression(exp.SessionParameter, this=this, kind=kind) 5563 5564 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5565 return self._parse_id_var() 5566 5567 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5568 index = self._index 5569 5570 if self._match(TokenType.L_PAREN): 5571 expressions = t.cast( 5572 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5573 ) 5574 5575 if not self._match(TokenType.R_PAREN): 5576 self._retreat(index) 5577 else: 5578 expressions = [self._parse_lambda_arg()] 5579 5580 if self._match_set(self.LAMBDAS): 5581 return self.LAMBDAS[self._prev.token_type](self, expressions) 5582 5583 self._retreat(index) 5584 5585 this: t.Optional[exp.Expression] 5586 5587 if self._match(TokenType.DISTINCT): 5588 this = self.expression( 5589 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5590 ) 5591 else: 5592 this = self._parse_select_or_expression(alias=alias) 5593 5594 return self._parse_limit( 5595 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5596 ) 5597 5598 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5599 index = self._index 5600 if not self._match(TokenType.L_PAREN): 5601 return this 5602 5603 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5604 # expr can be of both types 5605 if self._match_set(self.SELECT_START_TOKENS): 5606 self._retreat(index) 5607 return this 5608 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5609 self._match_r_paren() 5610 return self.expression(exp.Schema, this=this, expressions=args) 5611 5612 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5613 return self._parse_column_def(self._parse_field(any_token=True)) 5614 5615 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5616 # column defs are not really columns, they're identifiers 5617 if isinstance(this, exp.Column): 5618 this = this.this 5619 5620 kind = self._parse_types(schema=True) 5621 5622 if self._match_text_seq("FOR", "ORDINALITY"): 5623 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5624 5625 constraints: t.List[exp.Expression] = [] 5626 5627 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5628 ("ALIAS", "MATERIALIZED") 5629 ): 5630 persisted = self._prev.text.upper() == "MATERIALIZED" 5631 constraint_kind = exp.ComputedColumnConstraint( 5632 this=self._parse_assignment(), 5633 persisted=persisted or self._match_text_seq("PERSISTED"), 5634 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5635 ) 5636 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5637 elif ( 5638 kind 5639 and self._match(TokenType.ALIAS, advance=False) 5640 and ( 5641 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5642 or (self._next and self._next.token_type == TokenType.L_PAREN) 5643 ) 5644 ): 5645 self._advance() 5646 constraints.append( 5647 self.expression( 5648 exp.ColumnConstraint, 5649 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5650 ) 5651 ) 5652 5653 while True: 5654 constraint = self._parse_column_constraint() 5655 if not constraint: 5656 break 5657 constraints.append(constraint) 5658 5659 if not kind and not constraints: 5660 return this 5661 5662 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5663 5664 def _parse_auto_increment( 5665 self, 5666 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5667 start = None 5668 increment = None 5669 5670 if self._match(TokenType.L_PAREN, advance=False): 5671 args = self._parse_wrapped_csv(self._parse_bitwise) 5672 start = seq_get(args, 0) 5673 increment = seq_get(args, 1) 5674 elif self._match_text_seq("START"): 5675 start = self._parse_bitwise() 5676 self._match_text_seq("INCREMENT") 5677 increment = self._parse_bitwise() 5678 5679 if start and increment: 5680 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5681 5682 return exp.AutoIncrementColumnConstraint() 5683 5684 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5685 if not self._match_text_seq("REFRESH"): 5686 self._retreat(self._index - 1) 5687 return None 5688 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5689 5690 def _parse_compress(self) -> exp.CompressColumnConstraint: 5691 if self._match(TokenType.L_PAREN, advance=False): 5692 return self.expression( 5693 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5694 ) 5695 5696 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5697 5698 def _parse_generated_as_identity( 5699 self, 5700 ) -> ( 5701 exp.GeneratedAsIdentityColumnConstraint 5702 | exp.ComputedColumnConstraint 5703 | exp.GeneratedAsRowColumnConstraint 5704 ): 5705 if self._match_text_seq("BY", "DEFAULT"): 5706 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5707 this = self.expression( 5708 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5709 ) 5710 else: 5711 self._match_text_seq("ALWAYS") 5712 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5713 5714 self._match(TokenType.ALIAS) 5715 5716 if self._match_text_seq("ROW"): 5717 start = self._match_text_seq("START") 5718 if not start: 5719 self._match(TokenType.END) 5720 hidden = self._match_text_seq("HIDDEN") 5721 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5722 5723 identity = self._match_text_seq("IDENTITY") 5724 5725 if self._match(TokenType.L_PAREN): 5726 if self._match(TokenType.START_WITH): 5727 this.set("start", self._parse_bitwise()) 5728 if self._match_text_seq("INCREMENT", "BY"): 5729 this.set("increment", self._parse_bitwise()) 5730 if self._match_text_seq("MINVALUE"): 5731 this.set("minvalue", self._parse_bitwise()) 5732 if self._match_text_seq("MAXVALUE"): 5733 this.set("maxvalue", self._parse_bitwise()) 5734 5735 if self._match_text_seq("CYCLE"): 5736 this.set("cycle", True) 5737 elif self._match_text_seq("NO", "CYCLE"): 5738 this.set("cycle", False) 5739 5740 if not identity: 5741 this.set("expression", self._parse_range()) 5742 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5743 args = self._parse_csv(self._parse_bitwise) 5744 this.set("start", seq_get(args, 0)) 5745 this.set("increment", seq_get(args, 1)) 5746 5747 self._match_r_paren() 5748 5749 return this 5750 5751 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5752 self._match_text_seq("LENGTH") 5753 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5754 5755 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5756 if self._match_text_seq("NULL"): 5757 return self.expression(exp.NotNullColumnConstraint) 5758 if self._match_text_seq("CASESPECIFIC"): 5759 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5760 if self._match_text_seq("FOR", "REPLICATION"): 5761 return self.expression(exp.NotForReplicationColumnConstraint) 5762 5763 # Unconsume the `NOT` token 5764 self._retreat(self._index - 1) 5765 return None 5766 5767 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5768 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5769 5770 procedure_option_follows = ( 5771 self._match(TokenType.WITH, advance=False) 5772 and self._next 5773 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5774 ) 5775 5776 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5777 return self.expression( 5778 exp.ColumnConstraint, 5779 this=this, 5780 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5781 ) 5782 5783 return this 5784 5785 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5786 if not self._match(TokenType.CONSTRAINT): 5787 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5788 5789 return self.expression( 5790 exp.Constraint, 5791 this=self._parse_id_var(), 5792 expressions=self._parse_unnamed_constraints(), 5793 ) 5794 5795 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5796 constraints = [] 5797 while True: 5798 constraint = self._parse_unnamed_constraint() or self._parse_function() 5799 if not constraint: 5800 break 5801 constraints.append(constraint) 5802 5803 return constraints 5804 5805 def _parse_unnamed_constraint( 5806 self, constraints: t.Optional[t.Collection[str]] = None 5807 ) -> t.Optional[exp.Expression]: 5808 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5809 constraints or self.CONSTRAINT_PARSERS 5810 ): 5811 return None 5812 5813 constraint = self._prev.text.upper() 5814 if constraint not in self.CONSTRAINT_PARSERS: 5815 self.raise_error(f"No parser found for schema constraint {constraint}.") 5816 5817 return self.CONSTRAINT_PARSERS[constraint](self) 5818 5819 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5820 return self._parse_id_var(any_token=False) 5821 5822 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5823 self._match_text_seq("KEY") 5824 return self.expression( 5825 exp.UniqueColumnConstraint, 5826 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5827 this=self._parse_schema(self._parse_unique_key()), 5828 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5829 on_conflict=self._parse_on_conflict(), 5830 ) 5831 5832 def _parse_key_constraint_options(self) -> t.List[str]: 5833 options = [] 5834 while True: 5835 if not self._curr: 5836 break 5837 5838 if self._match(TokenType.ON): 5839 action = None 5840 on = self._advance_any() and self._prev.text 5841 5842 if self._match_text_seq("NO", "ACTION"): 5843 action = "NO ACTION" 5844 elif self._match_text_seq("CASCADE"): 5845 action = "CASCADE" 5846 elif self._match_text_seq("RESTRICT"): 5847 action = "RESTRICT" 5848 elif self._match_pair(TokenType.SET, TokenType.NULL): 5849 action = "SET NULL" 5850 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5851 action = "SET DEFAULT" 5852 else: 5853 self.raise_error("Invalid key constraint") 5854 5855 options.append(f"ON {on} {action}") 5856 else: 5857 var = self._parse_var_from_options( 5858 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5859 ) 5860 if not var: 5861 break 5862 options.append(var.name) 5863 5864 return options 5865 5866 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5867 if match and not self._match(TokenType.REFERENCES): 5868 return None 5869 5870 expressions = None 5871 this = self._parse_table(schema=True) 5872 options = self._parse_key_constraint_options() 5873 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5874 5875 def _parse_foreign_key(self) -> exp.ForeignKey: 5876 expressions = self._parse_wrapped_id_vars() 5877 reference = self._parse_references() 5878 options = {} 5879 5880 while self._match(TokenType.ON): 5881 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5882 self.raise_error("Expected DELETE or UPDATE") 5883 5884 kind = self._prev.text.lower() 5885 5886 if self._match_text_seq("NO", "ACTION"): 5887 action = "NO ACTION" 5888 elif self._match(TokenType.SET): 5889 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5890 action = "SET " + self._prev.text.upper() 5891 else: 5892 self._advance() 5893 action = self._prev.text.upper() 5894 5895 options[kind] = action 5896 5897 return self.expression( 5898 exp.ForeignKey, 5899 expressions=expressions, 5900 reference=reference, 5901 **options, # type: ignore 5902 ) 5903 5904 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5905 return self._parse_field() 5906 5907 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5908 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5909 self._retreat(self._index - 1) 5910 return None 5911 5912 id_vars = self._parse_wrapped_id_vars() 5913 return self.expression( 5914 exp.PeriodForSystemTimeConstraint, 5915 this=seq_get(id_vars, 0), 5916 expression=seq_get(id_vars, 1), 5917 ) 5918 5919 def _parse_primary_key( 5920 self, wrapped_optional: bool = False, in_props: bool = False 5921 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5922 desc = ( 5923 self._match_set((TokenType.ASC, TokenType.DESC)) 5924 and self._prev.token_type == TokenType.DESC 5925 ) 5926 5927 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5928 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5929 5930 expressions = self._parse_wrapped_csv( 5931 self._parse_primary_key_part, optional=wrapped_optional 5932 ) 5933 options = self._parse_key_constraint_options() 5934 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5935 5936 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5937 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5938 5939 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5940 """ 5941 Parses a datetime column in ODBC format. We parse the column into the corresponding 5942 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5943 same as we did for `DATE('yyyy-mm-dd')`. 5944 5945 Reference: 5946 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5947 """ 5948 self._match(TokenType.VAR) 5949 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5950 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5951 if not self._match(TokenType.R_BRACE): 5952 self.raise_error("Expected }") 5953 return expression 5954 5955 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5956 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5957 return this 5958 5959 bracket_kind = self._prev.token_type 5960 if ( 5961 bracket_kind == TokenType.L_BRACE 5962 and self._curr 5963 and self._curr.token_type == TokenType.VAR 5964 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5965 ): 5966 return self._parse_odbc_datetime_literal() 5967 5968 expressions = self._parse_csv( 5969 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5970 ) 5971 5972 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5973 self.raise_error("Expected ]") 5974 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5975 self.raise_error("Expected }") 5976 5977 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5978 if bracket_kind == TokenType.L_BRACE: 5979 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5980 elif not this: 5981 this = build_array_constructor( 5982 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5983 ) 5984 else: 5985 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5986 if constructor_type: 5987 return build_array_constructor( 5988 constructor_type, 5989 args=expressions, 5990 bracket_kind=bracket_kind, 5991 dialect=self.dialect, 5992 ) 5993 5994 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5995 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5996 5997 self._add_comments(this) 5998 return self._parse_bracket(this) 5999 6000 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6001 if self._match(TokenType.COLON): 6002 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6003 return this 6004 6005 def _parse_case(self) -> t.Optional[exp.Expression]: 6006 ifs = [] 6007 default = None 6008 6009 comments = self._prev_comments 6010 expression = self._parse_assignment() 6011 6012 while self._match(TokenType.WHEN): 6013 this = self._parse_assignment() 6014 self._match(TokenType.THEN) 6015 then = self._parse_assignment() 6016 ifs.append(self.expression(exp.If, this=this, true=then)) 6017 6018 if self._match(TokenType.ELSE): 6019 default = self._parse_assignment() 6020 6021 if not self._match(TokenType.END): 6022 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6023 default = exp.column("interval") 6024 else: 6025 self.raise_error("Expected END after CASE", self._prev) 6026 6027 return self.expression( 6028 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6029 ) 6030 6031 def _parse_if(self) -> t.Optional[exp.Expression]: 6032 if self._match(TokenType.L_PAREN): 6033 args = self._parse_csv(self._parse_assignment) 6034 this = self.validate_expression(exp.If.from_arg_list(args), args) 6035 self._match_r_paren() 6036 else: 6037 index = self._index - 1 6038 6039 if self.NO_PAREN_IF_COMMANDS and index == 0: 6040 return self._parse_as_command(self._prev) 6041 6042 condition = self._parse_assignment() 6043 6044 if not condition: 6045 self._retreat(index) 6046 return None 6047 6048 self._match(TokenType.THEN) 6049 true = self._parse_assignment() 6050 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6051 self._match(TokenType.END) 6052 this = self.expression(exp.If, this=condition, true=true, false=false) 6053 6054 return this 6055 6056 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6057 if not self._match_text_seq("VALUE", "FOR"): 6058 self._retreat(self._index - 1) 6059 return None 6060 6061 return self.expression( 6062 exp.NextValueFor, 6063 this=self._parse_column(), 6064 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6065 ) 6066 6067 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6068 this = self._parse_function() or self._parse_var_or_string(upper=True) 6069 6070 if self._match(TokenType.FROM): 6071 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6072 6073 if not self._match(TokenType.COMMA): 6074 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6075 6076 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6077 6078 def _parse_gap_fill(self) -> exp.GapFill: 6079 self._match(TokenType.TABLE) 6080 this = self._parse_table() 6081 6082 self._match(TokenType.COMMA) 6083 args = [this, *self._parse_csv(self._parse_lambda)] 6084 6085 gap_fill = exp.GapFill.from_arg_list(args) 6086 return self.validate_expression(gap_fill, args) 6087 6088 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6089 this = self._parse_assignment() 6090 6091 if not self._match(TokenType.ALIAS): 6092 if self._match(TokenType.COMMA): 6093 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6094 6095 self.raise_error("Expected AS after CAST") 6096 6097 fmt = None 6098 to = self._parse_types() 6099 6100 if self._match(TokenType.FORMAT): 6101 fmt_string = self._parse_string() 6102 fmt = self._parse_at_time_zone(fmt_string) 6103 6104 if not to: 6105 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6106 if to.this in exp.DataType.TEMPORAL_TYPES: 6107 this = self.expression( 6108 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6109 this=this, 6110 format=exp.Literal.string( 6111 format_time( 6112 fmt_string.this if fmt_string else "", 6113 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6114 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6115 ) 6116 ), 6117 safe=safe, 6118 ) 6119 6120 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6121 this.set("zone", fmt.args["zone"]) 6122 return this 6123 elif not to: 6124 self.raise_error("Expected TYPE after CAST") 6125 elif isinstance(to, exp.Identifier): 6126 to = exp.DataType.build(to.name, udt=True) 6127 elif to.this == exp.DataType.Type.CHAR: 6128 if self._match(TokenType.CHARACTER_SET): 6129 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6130 6131 return self.expression( 6132 exp.Cast if strict else exp.TryCast, 6133 this=this, 6134 to=to, 6135 format=fmt, 6136 safe=safe, 6137 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6138 ) 6139 6140 def _parse_string_agg(self) -> exp.GroupConcat: 6141 if self._match(TokenType.DISTINCT): 6142 args: t.List[t.Optional[exp.Expression]] = [ 6143 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6144 ] 6145 if self._match(TokenType.COMMA): 6146 args.extend(self._parse_csv(self._parse_assignment)) 6147 else: 6148 args = self._parse_csv(self._parse_assignment) # type: ignore 6149 6150 if self._match_text_seq("ON", "OVERFLOW"): 6151 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6152 if self._match_text_seq("ERROR"): 6153 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6154 else: 6155 self._match_text_seq("TRUNCATE") 6156 on_overflow = self.expression( 6157 exp.OverflowTruncateBehavior, 6158 this=self._parse_string(), 6159 with_count=( 6160 self._match_text_seq("WITH", "COUNT") 6161 or not self._match_text_seq("WITHOUT", "COUNT") 6162 ), 6163 ) 6164 else: 6165 on_overflow = None 6166 6167 index = self._index 6168 if not self._match(TokenType.R_PAREN) and args: 6169 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6170 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6171 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6172 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6173 6174 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6175 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6176 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6177 if not self._match_text_seq("WITHIN", "GROUP"): 6178 self._retreat(index) 6179 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6180 6181 # The corresponding match_r_paren will be called in parse_function (caller) 6182 self._match_l_paren() 6183 6184 return self.expression( 6185 exp.GroupConcat, 6186 this=self._parse_order(this=seq_get(args, 0)), 6187 separator=seq_get(args, 1), 6188 on_overflow=on_overflow, 6189 ) 6190 6191 def _parse_convert( 6192 self, strict: bool, safe: t.Optional[bool] = None 6193 ) -> t.Optional[exp.Expression]: 6194 this = self._parse_bitwise() 6195 6196 if self._match(TokenType.USING): 6197 to: t.Optional[exp.Expression] = self.expression( 6198 exp.CharacterSet, this=self._parse_var() 6199 ) 6200 elif self._match(TokenType.COMMA): 6201 to = self._parse_types() 6202 else: 6203 to = None 6204 6205 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6206 6207 def _parse_xml_table(self) -> exp.XMLTable: 6208 this = self._parse_string() 6209 6210 passing = None 6211 columns = None 6212 6213 if self._match_text_seq("PASSING"): 6214 # The BY VALUE keywords are optional and are provided for semantic clarity 6215 self._match_text_seq("BY", "VALUE") 6216 passing = self._parse_csv(self._parse_column) 6217 6218 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6219 6220 if self._match_text_seq("COLUMNS"): 6221 columns = self._parse_csv(self._parse_field_def) 6222 6223 return self.expression( 6224 exp.XMLTable, this=this, passing=passing, columns=columns, by_ref=by_ref 6225 ) 6226 6227 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6228 """ 6229 There are generally two variants of the DECODE function: 6230 6231 - DECODE(bin, charset) 6232 - DECODE(expression, search, result [, search, result] ... [, default]) 6233 6234 The second variant will always be parsed into a CASE expression. Note that NULL 6235 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6236 instead of relying on pattern matching. 6237 """ 6238 args = self._parse_csv(self._parse_assignment) 6239 6240 if len(args) < 3: 6241 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6242 6243 expression, *expressions = args 6244 if not expression: 6245 return None 6246 6247 ifs = [] 6248 for search, result in zip(expressions[::2], expressions[1::2]): 6249 if not search or not result: 6250 return None 6251 6252 if isinstance(search, exp.Literal): 6253 ifs.append( 6254 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6255 ) 6256 elif isinstance(search, exp.Null): 6257 ifs.append( 6258 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6259 ) 6260 else: 6261 cond = exp.or_( 6262 exp.EQ(this=expression.copy(), expression=search), 6263 exp.and_( 6264 exp.Is(this=expression.copy(), expression=exp.Null()), 6265 exp.Is(this=search.copy(), expression=exp.Null()), 6266 copy=False, 6267 ), 6268 copy=False, 6269 ) 6270 ifs.append(exp.If(this=cond, true=result)) 6271 6272 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6273 6274 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6275 self._match_text_seq("KEY") 6276 key = self._parse_column() 6277 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6278 self._match_text_seq("VALUE") 6279 value = self._parse_bitwise() 6280 6281 if not key and not value: 6282 return None 6283 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6284 6285 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6286 if not this or not self._match_text_seq("FORMAT", "JSON"): 6287 return this 6288 6289 return self.expression(exp.FormatJson, this=this) 6290 6291 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6292 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6293 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6294 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6295 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6296 else: 6297 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6298 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6299 6300 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6301 6302 if not empty and not error and not null: 6303 return None 6304 6305 return self.expression( 6306 exp.OnCondition, 6307 empty=empty, 6308 error=error, 6309 null=null, 6310 ) 6311 6312 def _parse_on_handling( 6313 self, on: str, *values: str 6314 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6315 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6316 for value in values: 6317 if self._match_text_seq(value, "ON", on): 6318 return f"{value} ON {on}" 6319 6320 index = self._index 6321 if self._match(TokenType.DEFAULT): 6322 default_value = self._parse_bitwise() 6323 if self._match_text_seq("ON", on): 6324 return default_value 6325 6326 self._retreat(index) 6327 6328 return None 6329 6330 @t.overload 6331 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6332 6333 @t.overload 6334 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6335 6336 def _parse_json_object(self, agg=False): 6337 star = self._parse_star() 6338 expressions = ( 6339 [star] 6340 if star 6341 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6342 ) 6343 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6344 6345 unique_keys = None 6346 if self._match_text_seq("WITH", "UNIQUE"): 6347 unique_keys = True 6348 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6349 unique_keys = False 6350 6351 self._match_text_seq("KEYS") 6352 6353 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6354 self._parse_type() 6355 ) 6356 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6357 6358 return self.expression( 6359 exp.JSONObjectAgg if agg else exp.JSONObject, 6360 expressions=expressions, 6361 null_handling=null_handling, 6362 unique_keys=unique_keys, 6363 return_type=return_type, 6364 encoding=encoding, 6365 ) 6366 6367 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6368 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6369 if not self._match_text_seq("NESTED"): 6370 this = self._parse_id_var() 6371 kind = self._parse_types(allow_identifiers=False) 6372 nested = None 6373 else: 6374 this = None 6375 kind = None 6376 nested = True 6377 6378 path = self._match_text_seq("PATH") and self._parse_string() 6379 nested_schema = nested and self._parse_json_schema() 6380 6381 return self.expression( 6382 exp.JSONColumnDef, 6383 this=this, 6384 kind=kind, 6385 path=path, 6386 nested_schema=nested_schema, 6387 ) 6388 6389 def _parse_json_schema(self) -> exp.JSONSchema: 6390 self._match_text_seq("COLUMNS") 6391 return self.expression( 6392 exp.JSONSchema, 6393 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6394 ) 6395 6396 def _parse_json_table(self) -> exp.JSONTable: 6397 this = self._parse_format_json(self._parse_bitwise()) 6398 path = self._match(TokenType.COMMA) and self._parse_string() 6399 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6400 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6401 schema = self._parse_json_schema() 6402 6403 return exp.JSONTable( 6404 this=this, 6405 schema=schema, 6406 path=path, 6407 error_handling=error_handling, 6408 empty_handling=empty_handling, 6409 ) 6410 6411 def _parse_match_against(self) -> exp.MatchAgainst: 6412 expressions = self._parse_csv(self._parse_column) 6413 6414 self._match_text_seq(")", "AGAINST", "(") 6415 6416 this = self._parse_string() 6417 6418 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6419 modifier = "IN NATURAL LANGUAGE MODE" 6420 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6421 modifier = f"{modifier} WITH QUERY EXPANSION" 6422 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6423 modifier = "IN BOOLEAN MODE" 6424 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6425 modifier = "WITH QUERY EXPANSION" 6426 else: 6427 modifier = None 6428 6429 return self.expression( 6430 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6431 ) 6432 6433 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6434 def _parse_open_json(self) -> exp.OpenJSON: 6435 this = self._parse_bitwise() 6436 path = self._match(TokenType.COMMA) and self._parse_string() 6437 6438 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6439 this = self._parse_field(any_token=True) 6440 kind = self._parse_types() 6441 path = self._parse_string() 6442 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6443 6444 return self.expression( 6445 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6446 ) 6447 6448 expressions = None 6449 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6450 self._match_l_paren() 6451 expressions = self._parse_csv(_parse_open_json_column_def) 6452 6453 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6454 6455 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6456 args = self._parse_csv(self._parse_bitwise) 6457 6458 if self._match(TokenType.IN): 6459 return self.expression( 6460 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6461 ) 6462 6463 if haystack_first: 6464 haystack = seq_get(args, 0) 6465 needle = seq_get(args, 1) 6466 else: 6467 needle = seq_get(args, 0) 6468 haystack = seq_get(args, 1) 6469 6470 return self.expression( 6471 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6472 ) 6473 6474 def _parse_predict(self) -> exp.Predict: 6475 self._match_text_seq("MODEL") 6476 this = self._parse_table() 6477 6478 self._match(TokenType.COMMA) 6479 self._match_text_seq("TABLE") 6480 6481 return self.expression( 6482 exp.Predict, 6483 this=this, 6484 expression=self._parse_table(), 6485 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6486 ) 6487 6488 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6489 args = self._parse_csv(self._parse_table) 6490 return exp.JoinHint(this=func_name.upper(), expressions=args) 6491 6492 def _parse_substring(self) -> exp.Substring: 6493 # Postgres supports the form: substring(string [from int] [for int]) 6494 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6495 6496 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6497 6498 if self._match(TokenType.FROM): 6499 args.append(self._parse_bitwise()) 6500 if self._match(TokenType.FOR): 6501 if len(args) == 1: 6502 args.append(exp.Literal.number(1)) 6503 args.append(self._parse_bitwise()) 6504 6505 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6506 6507 def _parse_trim(self) -> exp.Trim: 6508 # https://www.w3resource.com/sql/character-functions/trim.php 6509 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6510 6511 position = None 6512 collation = None 6513 expression = None 6514 6515 if self._match_texts(self.TRIM_TYPES): 6516 position = self._prev.text.upper() 6517 6518 this = self._parse_bitwise() 6519 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6520 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6521 expression = self._parse_bitwise() 6522 6523 if invert_order: 6524 this, expression = expression, this 6525 6526 if self._match(TokenType.COLLATE): 6527 collation = self._parse_bitwise() 6528 6529 return self.expression( 6530 exp.Trim, this=this, position=position, expression=expression, collation=collation 6531 ) 6532 6533 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6534 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6535 6536 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6537 return self._parse_window(self._parse_id_var(), alias=True) 6538 6539 def _parse_respect_or_ignore_nulls( 6540 self, this: t.Optional[exp.Expression] 6541 ) -> t.Optional[exp.Expression]: 6542 if self._match_text_seq("IGNORE", "NULLS"): 6543 return self.expression(exp.IgnoreNulls, this=this) 6544 if self._match_text_seq("RESPECT", "NULLS"): 6545 return self.expression(exp.RespectNulls, this=this) 6546 return this 6547 6548 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6549 if self._match(TokenType.HAVING): 6550 self._match_texts(("MAX", "MIN")) 6551 max = self._prev.text.upper() != "MIN" 6552 return self.expression( 6553 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6554 ) 6555 6556 return this 6557 6558 def _parse_window( 6559 self, this: t.Optional[exp.Expression], alias: bool = False 6560 ) -> t.Optional[exp.Expression]: 6561 func = this 6562 comments = func.comments if isinstance(func, exp.Expression) else None 6563 6564 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6565 self._match(TokenType.WHERE) 6566 this = self.expression( 6567 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6568 ) 6569 self._match_r_paren() 6570 6571 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6572 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6573 if self._match_text_seq("WITHIN", "GROUP"): 6574 order = self._parse_wrapped(self._parse_order) 6575 this = self.expression(exp.WithinGroup, this=this, expression=order) 6576 6577 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6578 # Some dialects choose to implement and some do not. 6579 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6580 6581 # There is some code above in _parse_lambda that handles 6582 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6583 6584 # The below changes handle 6585 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6586 6587 # Oracle allows both formats 6588 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6589 # and Snowflake chose to do the same for familiarity 6590 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6591 if isinstance(this, exp.AggFunc): 6592 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6593 6594 if ignore_respect and ignore_respect is not this: 6595 ignore_respect.replace(ignore_respect.this) 6596 this = self.expression(ignore_respect.__class__, this=this) 6597 6598 this = self._parse_respect_or_ignore_nulls(this) 6599 6600 # bigquery select from window x AS (partition by ...) 6601 if alias: 6602 over = None 6603 self._match(TokenType.ALIAS) 6604 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6605 return this 6606 else: 6607 over = self._prev.text.upper() 6608 6609 if comments and isinstance(func, exp.Expression): 6610 func.pop_comments() 6611 6612 if not self._match(TokenType.L_PAREN): 6613 return self.expression( 6614 exp.Window, 6615 comments=comments, 6616 this=this, 6617 alias=self._parse_id_var(False), 6618 over=over, 6619 ) 6620 6621 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6622 6623 first = self._match(TokenType.FIRST) 6624 if self._match_text_seq("LAST"): 6625 first = False 6626 6627 partition, order = self._parse_partition_and_order() 6628 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6629 6630 if kind: 6631 self._match(TokenType.BETWEEN) 6632 start = self._parse_window_spec() 6633 self._match(TokenType.AND) 6634 end = self._parse_window_spec() 6635 6636 spec = self.expression( 6637 exp.WindowSpec, 6638 kind=kind, 6639 start=start["value"], 6640 start_side=start["side"], 6641 end=end["value"], 6642 end_side=end["side"], 6643 ) 6644 else: 6645 spec = None 6646 6647 self._match_r_paren() 6648 6649 window = self.expression( 6650 exp.Window, 6651 comments=comments, 6652 this=this, 6653 partition_by=partition, 6654 order=order, 6655 spec=spec, 6656 alias=window_alias, 6657 over=over, 6658 first=first, 6659 ) 6660 6661 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6662 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6663 return self._parse_window(window, alias=alias) 6664 6665 return window 6666 6667 def _parse_partition_and_order( 6668 self, 6669 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6670 return self._parse_partition_by(), self._parse_order() 6671 6672 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6673 self._match(TokenType.BETWEEN) 6674 6675 return { 6676 "value": ( 6677 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6678 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6679 or self._parse_bitwise() 6680 ), 6681 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6682 } 6683 6684 def _parse_alias( 6685 self, this: t.Optional[exp.Expression], explicit: bool = False 6686 ) -> t.Optional[exp.Expression]: 6687 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6688 # so this section tries to parse the clause version and if it fails, it treats the token 6689 # as an identifier (alias) 6690 if self._can_parse_limit_or_offset(): 6691 return this 6692 6693 any_token = self._match(TokenType.ALIAS) 6694 comments = self._prev_comments or [] 6695 6696 if explicit and not any_token: 6697 return this 6698 6699 if self._match(TokenType.L_PAREN): 6700 aliases = self.expression( 6701 exp.Aliases, 6702 comments=comments, 6703 this=this, 6704 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6705 ) 6706 self._match_r_paren(aliases) 6707 return aliases 6708 6709 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6710 self.STRING_ALIASES and self._parse_string_as_identifier() 6711 ) 6712 6713 if alias: 6714 comments.extend(alias.pop_comments()) 6715 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6716 column = this.this 6717 6718 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6719 if not this.comments and column and column.comments: 6720 this.comments = column.pop_comments() 6721 6722 return this 6723 6724 def _parse_id_var( 6725 self, 6726 any_token: bool = True, 6727 tokens: t.Optional[t.Collection[TokenType]] = None, 6728 ) -> t.Optional[exp.Expression]: 6729 expression = self._parse_identifier() 6730 if not expression and ( 6731 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6732 ): 6733 quoted = self._prev.token_type == TokenType.STRING 6734 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6735 6736 return expression 6737 6738 def _parse_string(self) -> t.Optional[exp.Expression]: 6739 if self._match_set(self.STRING_PARSERS): 6740 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6741 return self._parse_placeholder() 6742 6743 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6744 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6745 6746 def _parse_number(self) -> t.Optional[exp.Expression]: 6747 if self._match_set(self.NUMERIC_PARSERS): 6748 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6749 return self._parse_placeholder() 6750 6751 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6752 if self._match(TokenType.IDENTIFIER): 6753 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6754 return self._parse_placeholder() 6755 6756 def _parse_var( 6757 self, 6758 any_token: bool = False, 6759 tokens: t.Optional[t.Collection[TokenType]] = None, 6760 upper: bool = False, 6761 ) -> t.Optional[exp.Expression]: 6762 if ( 6763 (any_token and self._advance_any()) 6764 or self._match(TokenType.VAR) 6765 or (self._match_set(tokens) if tokens else False) 6766 ): 6767 return self.expression( 6768 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6769 ) 6770 return self._parse_placeholder() 6771 6772 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6773 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6774 self._advance() 6775 return self._prev 6776 return None 6777 6778 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6779 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6780 6781 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6782 return self._parse_primary() or self._parse_var(any_token=True) 6783 6784 def _parse_null(self) -> t.Optional[exp.Expression]: 6785 if self._match_set(self.NULL_TOKENS): 6786 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6787 return self._parse_placeholder() 6788 6789 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6790 if self._match(TokenType.TRUE): 6791 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6792 if self._match(TokenType.FALSE): 6793 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6794 return self._parse_placeholder() 6795 6796 def _parse_star(self) -> t.Optional[exp.Expression]: 6797 if self._match(TokenType.STAR): 6798 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6799 return self._parse_placeholder() 6800 6801 def _parse_parameter(self) -> exp.Parameter: 6802 this = self._parse_identifier() or self._parse_primary_or_var() 6803 return self.expression(exp.Parameter, this=this) 6804 6805 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6806 if self._match_set(self.PLACEHOLDER_PARSERS): 6807 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6808 if placeholder: 6809 return placeholder 6810 self._advance(-1) 6811 return None 6812 6813 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6814 if not self._match_texts(keywords): 6815 return None 6816 if self._match(TokenType.L_PAREN, advance=False): 6817 return self._parse_wrapped_csv(self._parse_expression) 6818 6819 expression = self._parse_expression() 6820 return [expression] if expression else None 6821 6822 def _parse_csv( 6823 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6824 ) -> t.List[exp.Expression]: 6825 parse_result = parse_method() 6826 items = [parse_result] if parse_result is not None else [] 6827 6828 while self._match(sep): 6829 self._add_comments(parse_result) 6830 parse_result = parse_method() 6831 if parse_result is not None: 6832 items.append(parse_result) 6833 6834 return items 6835 6836 def _parse_tokens( 6837 self, parse_method: t.Callable, expressions: t.Dict 6838 ) -> t.Optional[exp.Expression]: 6839 this = parse_method() 6840 6841 while self._match_set(expressions): 6842 this = self.expression( 6843 expressions[self._prev.token_type], 6844 this=this, 6845 comments=self._prev_comments, 6846 expression=parse_method(), 6847 ) 6848 6849 return this 6850 6851 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6852 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6853 6854 def _parse_wrapped_csv( 6855 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6856 ) -> t.List[exp.Expression]: 6857 return self._parse_wrapped( 6858 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6859 ) 6860 6861 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6862 wrapped = self._match(TokenType.L_PAREN) 6863 if not wrapped and not optional: 6864 self.raise_error("Expecting (") 6865 parse_result = parse_method() 6866 if wrapped: 6867 self._match_r_paren() 6868 return parse_result 6869 6870 def _parse_expressions(self) -> t.List[exp.Expression]: 6871 return self._parse_csv(self._parse_expression) 6872 6873 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6874 return self._parse_select() or self._parse_set_operations( 6875 self._parse_alias(self._parse_assignment(), explicit=True) 6876 if alias 6877 else self._parse_assignment() 6878 ) 6879 6880 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6881 return self._parse_query_modifiers( 6882 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6883 ) 6884 6885 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6886 this = None 6887 if self._match_texts(self.TRANSACTION_KIND): 6888 this = self._prev.text 6889 6890 self._match_texts(("TRANSACTION", "WORK")) 6891 6892 modes = [] 6893 while True: 6894 mode = [] 6895 while self._match(TokenType.VAR): 6896 mode.append(self._prev.text) 6897 6898 if mode: 6899 modes.append(" ".join(mode)) 6900 if not self._match(TokenType.COMMA): 6901 break 6902 6903 return self.expression(exp.Transaction, this=this, modes=modes) 6904 6905 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6906 chain = None 6907 savepoint = None 6908 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6909 6910 self._match_texts(("TRANSACTION", "WORK")) 6911 6912 if self._match_text_seq("TO"): 6913 self._match_text_seq("SAVEPOINT") 6914 savepoint = self._parse_id_var() 6915 6916 if self._match(TokenType.AND): 6917 chain = not self._match_text_seq("NO") 6918 self._match_text_seq("CHAIN") 6919 6920 if is_rollback: 6921 return self.expression(exp.Rollback, savepoint=savepoint) 6922 6923 return self.expression(exp.Commit, chain=chain) 6924 6925 def _parse_refresh(self) -> exp.Refresh: 6926 self._match(TokenType.TABLE) 6927 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6928 6929 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6930 if not self._match_text_seq("ADD"): 6931 return None 6932 6933 self._match(TokenType.COLUMN) 6934 exists_column = self._parse_exists(not_=True) 6935 expression = self._parse_field_def() 6936 6937 if expression: 6938 expression.set("exists", exists_column) 6939 6940 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6941 if self._match_texts(("FIRST", "AFTER")): 6942 position = self._prev.text 6943 column_position = self.expression( 6944 exp.ColumnPosition, this=self._parse_column(), position=position 6945 ) 6946 expression.set("position", column_position) 6947 6948 return expression 6949 6950 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6951 drop = self._match(TokenType.DROP) and self._parse_drop() 6952 if drop and not isinstance(drop, exp.Command): 6953 drop.set("kind", drop.args.get("kind", "COLUMN")) 6954 return drop 6955 6956 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6957 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6958 return self.expression( 6959 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6960 ) 6961 6962 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6963 index = self._index - 1 6964 6965 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6966 return self._parse_csv( 6967 lambda: self.expression( 6968 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6969 ) 6970 ) 6971 6972 self._retreat(index) 6973 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6974 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6975 6976 if self._match_text_seq("ADD", "COLUMNS"): 6977 schema = self._parse_schema() 6978 if schema: 6979 return [schema] 6980 return [] 6981 6982 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6983 6984 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6985 if self._match_texts(self.ALTER_ALTER_PARSERS): 6986 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6987 6988 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6989 # keyword after ALTER we default to parsing this statement 6990 self._match(TokenType.COLUMN) 6991 column = self._parse_field(any_token=True) 6992 6993 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6994 return self.expression(exp.AlterColumn, this=column, drop=True) 6995 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6996 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6997 if self._match(TokenType.COMMENT): 6998 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6999 if self._match_text_seq("DROP", "NOT", "NULL"): 7000 return self.expression( 7001 exp.AlterColumn, 7002 this=column, 7003 drop=True, 7004 allow_null=True, 7005 ) 7006 if self._match_text_seq("SET", "NOT", "NULL"): 7007 return self.expression( 7008 exp.AlterColumn, 7009 this=column, 7010 allow_null=False, 7011 ) 7012 self._match_text_seq("SET", "DATA") 7013 self._match_text_seq("TYPE") 7014 return self.expression( 7015 exp.AlterColumn, 7016 this=column, 7017 dtype=self._parse_types(), 7018 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7019 using=self._match(TokenType.USING) and self._parse_assignment(), 7020 ) 7021 7022 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7023 if self._match_texts(("ALL", "EVEN", "AUTO")): 7024 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7025 7026 self._match_text_seq("KEY", "DISTKEY") 7027 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7028 7029 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7030 if compound: 7031 self._match_text_seq("SORTKEY") 7032 7033 if self._match(TokenType.L_PAREN, advance=False): 7034 return self.expression( 7035 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7036 ) 7037 7038 self._match_texts(("AUTO", "NONE")) 7039 return self.expression( 7040 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7041 ) 7042 7043 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7044 index = self._index - 1 7045 7046 partition_exists = self._parse_exists() 7047 if self._match(TokenType.PARTITION, advance=False): 7048 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7049 7050 self._retreat(index) 7051 return self._parse_csv(self._parse_drop_column) 7052 7053 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7054 if self._match(TokenType.COLUMN): 7055 exists = self._parse_exists() 7056 old_column = self._parse_column() 7057 to = self._match_text_seq("TO") 7058 new_column = self._parse_column() 7059 7060 if old_column is None or to is None or new_column is None: 7061 return None 7062 7063 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7064 7065 self._match_text_seq("TO") 7066 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7067 7068 def _parse_alter_table_set(self) -> exp.AlterSet: 7069 alter_set = self.expression(exp.AlterSet) 7070 7071 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7072 "TABLE", "PROPERTIES" 7073 ): 7074 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7075 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7076 alter_set.set("expressions", [self._parse_assignment()]) 7077 elif self._match_texts(("LOGGED", "UNLOGGED")): 7078 alter_set.set("option", exp.var(self._prev.text.upper())) 7079 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7080 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7081 elif self._match_text_seq("LOCATION"): 7082 alter_set.set("location", self._parse_field()) 7083 elif self._match_text_seq("ACCESS", "METHOD"): 7084 alter_set.set("access_method", self._parse_field()) 7085 elif self._match_text_seq("TABLESPACE"): 7086 alter_set.set("tablespace", self._parse_field()) 7087 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7088 alter_set.set("file_format", [self._parse_field()]) 7089 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7090 alter_set.set("file_format", self._parse_wrapped_options()) 7091 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7092 alter_set.set("copy_options", self._parse_wrapped_options()) 7093 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7094 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7095 else: 7096 if self._match_text_seq("SERDE"): 7097 alter_set.set("serde", self._parse_field()) 7098 7099 alter_set.set("expressions", [self._parse_properties()]) 7100 7101 return alter_set 7102 7103 def _parse_alter(self) -> exp.Alter | exp.Command: 7104 start = self._prev 7105 7106 alter_token = self._match_set(self.ALTERABLES) and self._prev 7107 if not alter_token: 7108 return self._parse_as_command(start) 7109 7110 exists = self._parse_exists() 7111 only = self._match_text_seq("ONLY") 7112 this = self._parse_table(schema=True) 7113 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7114 7115 if self._next: 7116 self._advance() 7117 7118 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7119 if parser: 7120 actions = ensure_list(parser(self)) 7121 not_valid = self._match_text_seq("NOT", "VALID") 7122 options = self._parse_csv(self._parse_property) 7123 7124 if not self._curr and actions: 7125 return self.expression( 7126 exp.Alter, 7127 this=this, 7128 kind=alter_token.text.upper(), 7129 exists=exists, 7130 actions=actions, 7131 only=only, 7132 options=options, 7133 cluster=cluster, 7134 not_valid=not_valid, 7135 ) 7136 7137 return self._parse_as_command(start) 7138 7139 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7140 start = self._prev 7141 # https://duckdb.org/docs/sql/statements/analyze 7142 if not self._curr: 7143 return self.expression(exp.Analyze) 7144 7145 options = [] 7146 while self._match_texts(self.ANALYZE_STYLES): 7147 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7148 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7149 else: 7150 options.append(self._prev.text.upper()) 7151 7152 this: t.Optional[exp.Expression] = None 7153 inner_expression: t.Optional[exp.Expression] = None 7154 7155 kind = self._curr and self._curr.text.upper() 7156 7157 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7158 this = self._parse_table_parts() 7159 elif self._match_text_seq("TABLES"): 7160 if self._match_set((TokenType.FROM, TokenType.IN)): 7161 kind = f"{kind} {self._prev.text.upper()}" 7162 this = self._parse_table(schema=True, is_db_reference=True) 7163 elif self._match_text_seq("DATABASE"): 7164 this = self._parse_table(schema=True, is_db_reference=True) 7165 elif self._match_text_seq("CLUSTER"): 7166 this = self._parse_table() 7167 # Try matching inner expr keywords before fallback to parse table. 7168 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7169 kind = None 7170 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7171 else: 7172 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7173 kind = None 7174 this = self._parse_table_parts() 7175 7176 partition = self._try_parse(self._parse_partition) 7177 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7178 return self._parse_as_command(start) 7179 7180 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7181 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7182 "WITH", "ASYNC", "MODE" 7183 ): 7184 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7185 else: 7186 mode = None 7187 7188 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7189 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7190 7191 properties = self._parse_properties() 7192 return self.expression( 7193 exp.Analyze, 7194 kind=kind, 7195 this=this, 7196 mode=mode, 7197 partition=partition, 7198 properties=properties, 7199 expression=inner_expression, 7200 options=options, 7201 ) 7202 7203 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7204 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7205 this = None 7206 kind = self._prev.text.upper() 7207 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7208 expressions = [] 7209 7210 if not self._match_text_seq("STATISTICS"): 7211 self.raise_error("Expecting token STATISTICS") 7212 7213 if self._match_text_seq("NOSCAN"): 7214 this = "NOSCAN" 7215 elif self._match(TokenType.FOR): 7216 if self._match_text_seq("ALL", "COLUMNS"): 7217 this = "FOR ALL COLUMNS" 7218 if self._match_texts("COLUMNS"): 7219 this = "FOR COLUMNS" 7220 expressions = self._parse_csv(self._parse_column_reference) 7221 elif self._match_text_seq("SAMPLE"): 7222 sample = self._parse_number() 7223 expressions = [ 7224 self.expression( 7225 exp.AnalyzeSample, 7226 sample=sample, 7227 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7228 ) 7229 ] 7230 7231 return self.expression( 7232 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7233 ) 7234 7235 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7236 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7237 kind = None 7238 this = None 7239 expression: t.Optional[exp.Expression] = None 7240 if self._match_text_seq("REF", "UPDATE"): 7241 kind = "REF" 7242 this = "UPDATE" 7243 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7244 this = "UPDATE SET DANGLING TO NULL" 7245 elif self._match_text_seq("STRUCTURE"): 7246 kind = "STRUCTURE" 7247 if self._match_text_seq("CASCADE", "FAST"): 7248 this = "CASCADE FAST" 7249 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7250 ("ONLINE", "OFFLINE") 7251 ): 7252 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7253 expression = self._parse_into() 7254 7255 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7256 7257 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7258 this = self._prev.text.upper() 7259 if self._match_text_seq("COLUMNS"): 7260 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7261 return None 7262 7263 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7264 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7265 if self._match_text_seq("STATISTICS"): 7266 return self.expression(exp.AnalyzeDelete, kind=kind) 7267 return None 7268 7269 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7270 if self._match_text_seq("CHAINED", "ROWS"): 7271 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7272 return None 7273 7274 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7275 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7276 this = self._prev.text.upper() 7277 expression: t.Optional[exp.Expression] = None 7278 expressions = [] 7279 update_options = None 7280 7281 if self._match_text_seq("HISTOGRAM", "ON"): 7282 expressions = self._parse_csv(self._parse_column_reference) 7283 with_expressions = [] 7284 while self._match(TokenType.WITH): 7285 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7286 if self._match_texts(("SYNC", "ASYNC")): 7287 if self._match_text_seq("MODE", advance=False): 7288 with_expressions.append(f"{self._prev.text.upper()} MODE") 7289 self._advance() 7290 else: 7291 buckets = self._parse_number() 7292 if self._match_text_seq("BUCKETS"): 7293 with_expressions.append(f"{buckets} BUCKETS") 7294 if with_expressions: 7295 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7296 7297 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7298 TokenType.UPDATE, advance=False 7299 ): 7300 update_options = self._prev.text.upper() 7301 self._advance() 7302 elif self._match_text_seq("USING", "DATA"): 7303 expression = self.expression(exp.UsingData, this=self._parse_string()) 7304 7305 return self.expression( 7306 exp.AnalyzeHistogram, 7307 this=this, 7308 expressions=expressions, 7309 expression=expression, 7310 update_options=update_options, 7311 ) 7312 7313 def _parse_merge(self) -> exp.Merge: 7314 self._match(TokenType.INTO) 7315 target = self._parse_table() 7316 7317 if target and self._match(TokenType.ALIAS, advance=False): 7318 target.set("alias", self._parse_table_alias()) 7319 7320 self._match(TokenType.USING) 7321 using = self._parse_table() 7322 7323 self._match(TokenType.ON) 7324 on = self._parse_assignment() 7325 7326 return self.expression( 7327 exp.Merge, 7328 this=target, 7329 using=using, 7330 on=on, 7331 whens=self._parse_when_matched(), 7332 returning=self._parse_returning(), 7333 ) 7334 7335 def _parse_when_matched(self) -> exp.Whens: 7336 whens = [] 7337 7338 while self._match(TokenType.WHEN): 7339 matched = not self._match(TokenType.NOT) 7340 self._match_text_seq("MATCHED") 7341 source = ( 7342 False 7343 if self._match_text_seq("BY", "TARGET") 7344 else self._match_text_seq("BY", "SOURCE") 7345 ) 7346 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7347 7348 self._match(TokenType.THEN) 7349 7350 if self._match(TokenType.INSERT): 7351 this = self._parse_star() 7352 if this: 7353 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7354 else: 7355 then = self.expression( 7356 exp.Insert, 7357 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7358 expression=self._match_text_seq("VALUES") and self._parse_value(), 7359 ) 7360 elif self._match(TokenType.UPDATE): 7361 expressions = self._parse_star() 7362 if expressions: 7363 then = self.expression(exp.Update, expressions=expressions) 7364 else: 7365 then = self.expression( 7366 exp.Update, 7367 expressions=self._match(TokenType.SET) 7368 and self._parse_csv(self._parse_equality), 7369 ) 7370 elif self._match(TokenType.DELETE): 7371 then = self.expression(exp.Var, this=self._prev.text) 7372 else: 7373 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7374 7375 whens.append( 7376 self.expression( 7377 exp.When, 7378 matched=matched, 7379 source=source, 7380 condition=condition, 7381 then=then, 7382 ) 7383 ) 7384 return self.expression(exp.Whens, expressions=whens) 7385 7386 def _parse_show(self) -> t.Optional[exp.Expression]: 7387 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7388 if parser: 7389 return parser(self) 7390 return self._parse_as_command(self._prev) 7391 7392 def _parse_set_item_assignment( 7393 self, kind: t.Optional[str] = None 7394 ) -> t.Optional[exp.Expression]: 7395 index = self._index 7396 7397 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7398 return self._parse_set_transaction(global_=kind == "GLOBAL") 7399 7400 left = self._parse_primary() or self._parse_column() 7401 assignment_delimiter = self._match_texts(("=", "TO")) 7402 7403 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7404 self._retreat(index) 7405 return None 7406 7407 right = self._parse_statement() or self._parse_id_var() 7408 if isinstance(right, (exp.Column, exp.Identifier)): 7409 right = exp.var(right.name) 7410 7411 this = self.expression(exp.EQ, this=left, expression=right) 7412 return self.expression(exp.SetItem, this=this, kind=kind) 7413 7414 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7415 self._match_text_seq("TRANSACTION") 7416 characteristics = self._parse_csv( 7417 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7418 ) 7419 return self.expression( 7420 exp.SetItem, 7421 expressions=characteristics, 7422 kind="TRANSACTION", 7423 **{"global": global_}, # type: ignore 7424 ) 7425 7426 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7427 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7428 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7429 7430 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7431 index = self._index 7432 set_ = self.expression( 7433 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7434 ) 7435 7436 if self._curr: 7437 self._retreat(index) 7438 return self._parse_as_command(self._prev) 7439 7440 return set_ 7441 7442 def _parse_var_from_options( 7443 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7444 ) -> t.Optional[exp.Var]: 7445 start = self._curr 7446 if not start: 7447 return None 7448 7449 option = start.text.upper() 7450 continuations = options.get(option) 7451 7452 index = self._index 7453 self._advance() 7454 for keywords in continuations or []: 7455 if isinstance(keywords, str): 7456 keywords = (keywords,) 7457 7458 if self._match_text_seq(*keywords): 7459 option = f"{option} {' '.join(keywords)}" 7460 break 7461 else: 7462 if continuations or continuations is None: 7463 if raise_unmatched: 7464 self.raise_error(f"Unknown option {option}") 7465 7466 self._retreat(index) 7467 return None 7468 7469 return exp.var(option) 7470 7471 def _parse_as_command(self, start: Token) -> exp.Command: 7472 while self._curr: 7473 self._advance() 7474 text = self._find_sql(start, self._prev) 7475 size = len(start.text) 7476 self._warn_unsupported() 7477 return exp.Command(this=text[:size], expression=text[size:]) 7478 7479 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7480 settings = [] 7481 7482 self._match_l_paren() 7483 kind = self._parse_id_var() 7484 7485 if self._match(TokenType.L_PAREN): 7486 while True: 7487 key = self._parse_id_var() 7488 value = self._parse_primary() 7489 if not key and value is None: 7490 break 7491 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7492 self._match(TokenType.R_PAREN) 7493 7494 self._match_r_paren() 7495 7496 return self.expression( 7497 exp.DictProperty, 7498 this=this, 7499 kind=kind.this if kind else None, 7500 settings=settings, 7501 ) 7502 7503 def _parse_dict_range(self, this: str) -> exp.DictRange: 7504 self._match_l_paren() 7505 has_min = self._match_text_seq("MIN") 7506 if has_min: 7507 min = self._parse_var() or self._parse_primary() 7508 self._match_text_seq("MAX") 7509 max = self._parse_var() or self._parse_primary() 7510 else: 7511 max = self._parse_var() or self._parse_primary() 7512 min = exp.Literal.number(0) 7513 self._match_r_paren() 7514 return self.expression(exp.DictRange, this=this, min=min, max=max) 7515 7516 def _parse_comprehension( 7517 self, this: t.Optional[exp.Expression] 7518 ) -> t.Optional[exp.Comprehension]: 7519 index = self._index 7520 expression = self._parse_column() 7521 if not self._match(TokenType.IN): 7522 self._retreat(index - 1) 7523 return None 7524 iterator = self._parse_column() 7525 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7526 return self.expression( 7527 exp.Comprehension, 7528 this=this, 7529 expression=expression, 7530 iterator=iterator, 7531 condition=condition, 7532 ) 7533 7534 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7535 if self._match(TokenType.HEREDOC_STRING): 7536 return self.expression(exp.Heredoc, this=self._prev.text) 7537 7538 if not self._match_text_seq("$"): 7539 return None 7540 7541 tags = ["$"] 7542 tag_text = None 7543 7544 if self._is_connected(): 7545 self._advance() 7546 tags.append(self._prev.text.upper()) 7547 else: 7548 self.raise_error("No closing $ found") 7549 7550 if tags[-1] != "$": 7551 if self._is_connected() and self._match_text_seq("$"): 7552 tag_text = tags[-1] 7553 tags.append("$") 7554 else: 7555 self.raise_error("No closing $ found") 7556 7557 heredoc_start = self._curr 7558 7559 while self._curr: 7560 if self._match_text_seq(*tags, advance=False): 7561 this = self._find_sql(heredoc_start, self._prev) 7562 self._advance(len(tags)) 7563 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7564 7565 self._advance() 7566 7567 self.raise_error(f"No closing {''.join(tags)} found") 7568 return None 7569 7570 def _find_parser( 7571 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7572 ) -> t.Optional[t.Callable]: 7573 if not self._curr: 7574 return None 7575 7576 index = self._index 7577 this = [] 7578 while True: 7579 # The current token might be multiple words 7580 curr = self._curr.text.upper() 7581 key = curr.split(" ") 7582 this.append(curr) 7583 7584 self._advance() 7585 result, trie = in_trie(trie, key) 7586 if result == TrieResult.FAILED: 7587 break 7588 7589 if result == TrieResult.EXISTS: 7590 subparser = parsers[" ".join(this)] 7591 return subparser 7592 7593 self._retreat(index) 7594 return None 7595 7596 def _match(self, token_type, advance=True, expression=None): 7597 if not self._curr: 7598 return None 7599 7600 if self._curr.token_type == token_type: 7601 if advance: 7602 self._advance() 7603 self._add_comments(expression) 7604 return True 7605 7606 return None 7607 7608 def _match_set(self, types, advance=True): 7609 if not self._curr: 7610 return None 7611 7612 if self._curr.token_type in types: 7613 if advance: 7614 self._advance() 7615 return True 7616 7617 return None 7618 7619 def _match_pair(self, token_type_a, token_type_b, advance=True): 7620 if not self._curr or not self._next: 7621 return None 7622 7623 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7624 if advance: 7625 self._advance(2) 7626 return True 7627 7628 return None 7629 7630 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7631 if not self._match(TokenType.L_PAREN, expression=expression): 7632 self.raise_error("Expecting (") 7633 7634 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7635 if not self._match(TokenType.R_PAREN, expression=expression): 7636 self.raise_error("Expecting )") 7637 7638 def _match_texts(self, texts, advance=True): 7639 if ( 7640 self._curr 7641 and self._curr.token_type != TokenType.STRING 7642 and self._curr.text.upper() in texts 7643 ): 7644 if advance: 7645 self._advance() 7646 return True 7647 return None 7648 7649 def _match_text_seq(self, *texts, advance=True): 7650 index = self._index 7651 for text in texts: 7652 if ( 7653 self._curr 7654 and self._curr.token_type != TokenType.STRING 7655 and self._curr.text.upper() == text 7656 ): 7657 self._advance() 7658 else: 7659 self._retreat(index) 7660 return None 7661 7662 if not advance: 7663 self._retreat(index) 7664 7665 return True 7666 7667 def _replace_lambda( 7668 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7669 ) -> t.Optional[exp.Expression]: 7670 if not node: 7671 return node 7672 7673 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7674 7675 for column in node.find_all(exp.Column): 7676 typ = lambda_types.get(column.parts[0].name) 7677 if typ is not None: 7678 dot_or_id = column.to_dot() if column.table else column.this 7679 7680 if typ: 7681 dot_or_id = self.expression( 7682 exp.Cast, 7683 this=dot_or_id, 7684 to=typ, 7685 ) 7686 7687 parent = column.parent 7688 7689 while isinstance(parent, exp.Dot): 7690 if not isinstance(parent.parent, exp.Dot): 7691 parent.replace(dot_or_id) 7692 break 7693 parent = parent.parent 7694 else: 7695 if column is node: 7696 node = dot_or_id 7697 else: 7698 column.replace(dot_or_id) 7699 return node 7700 7701 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7702 start = self._prev 7703 7704 # Not to be confused with TRUNCATE(number, decimals) function call 7705 if self._match(TokenType.L_PAREN): 7706 self._retreat(self._index - 2) 7707 return self._parse_function() 7708 7709 # Clickhouse supports TRUNCATE DATABASE as well 7710 is_database = self._match(TokenType.DATABASE) 7711 7712 self._match(TokenType.TABLE) 7713 7714 exists = self._parse_exists(not_=False) 7715 7716 expressions = self._parse_csv( 7717 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7718 ) 7719 7720 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7721 7722 if self._match_text_seq("RESTART", "IDENTITY"): 7723 identity = "RESTART" 7724 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7725 identity = "CONTINUE" 7726 else: 7727 identity = None 7728 7729 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7730 option = self._prev.text 7731 else: 7732 option = None 7733 7734 partition = self._parse_partition() 7735 7736 # Fallback case 7737 if self._curr: 7738 return self._parse_as_command(start) 7739 7740 return self.expression( 7741 exp.TruncateTable, 7742 expressions=expressions, 7743 is_database=is_database, 7744 exists=exists, 7745 cluster=cluster, 7746 identity=identity, 7747 option=option, 7748 partition=partition, 7749 ) 7750 7751 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7752 this = self._parse_ordered(self._parse_opclass) 7753 7754 if not self._match(TokenType.WITH): 7755 return this 7756 7757 op = self._parse_var(any_token=True) 7758 7759 return self.expression(exp.WithOperator, this=this, op=op) 7760 7761 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7762 self._match(TokenType.EQ) 7763 self._match(TokenType.L_PAREN) 7764 7765 opts: t.List[t.Optional[exp.Expression]] = [] 7766 while self._curr and not self._match(TokenType.R_PAREN): 7767 if self._match_text_seq("FORMAT_NAME", "="): 7768 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7769 # so we parse it separately to use _parse_field() 7770 prop = self.expression( 7771 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7772 ) 7773 opts.append(prop) 7774 else: 7775 opts.append(self._parse_property()) 7776 7777 self._match(TokenType.COMMA) 7778 7779 return opts 7780 7781 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7782 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7783 7784 options = [] 7785 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7786 option = self._parse_var(any_token=True) 7787 prev = self._prev.text.upper() 7788 7789 # Different dialects might separate options and values by white space, "=" and "AS" 7790 self._match(TokenType.EQ) 7791 self._match(TokenType.ALIAS) 7792 7793 param = self.expression(exp.CopyParameter, this=option) 7794 7795 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7796 TokenType.L_PAREN, advance=False 7797 ): 7798 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7799 param.set("expressions", self._parse_wrapped_options()) 7800 elif prev == "FILE_FORMAT": 7801 # T-SQL's external file format case 7802 param.set("expression", self._parse_field()) 7803 else: 7804 param.set("expression", self._parse_unquoted_field()) 7805 7806 options.append(param) 7807 self._match(sep) 7808 7809 return options 7810 7811 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7812 expr = self.expression(exp.Credentials) 7813 7814 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7815 expr.set("storage", self._parse_field()) 7816 if self._match_text_seq("CREDENTIALS"): 7817 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7818 creds = ( 7819 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7820 ) 7821 expr.set("credentials", creds) 7822 if self._match_text_seq("ENCRYPTION"): 7823 expr.set("encryption", self._parse_wrapped_options()) 7824 if self._match_text_seq("IAM_ROLE"): 7825 expr.set("iam_role", self._parse_field()) 7826 if self._match_text_seq("REGION"): 7827 expr.set("region", self._parse_field()) 7828 7829 return expr 7830 7831 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7832 return self._parse_field() 7833 7834 def _parse_copy(self) -> exp.Copy | exp.Command: 7835 start = self._prev 7836 7837 self._match(TokenType.INTO) 7838 7839 this = ( 7840 self._parse_select(nested=True, parse_subquery_alias=False) 7841 if self._match(TokenType.L_PAREN, advance=False) 7842 else self._parse_table(schema=True) 7843 ) 7844 7845 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7846 7847 files = self._parse_csv(self._parse_file_location) 7848 credentials = self._parse_credentials() 7849 7850 self._match_text_seq("WITH") 7851 7852 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7853 7854 # Fallback case 7855 if self._curr: 7856 return self._parse_as_command(start) 7857 7858 return self.expression( 7859 exp.Copy, 7860 this=this, 7861 kind=kind, 7862 credentials=credentials, 7863 files=files, 7864 params=params, 7865 ) 7866 7867 def _parse_normalize(self) -> exp.Normalize: 7868 return self.expression( 7869 exp.Normalize, 7870 this=self._parse_bitwise(), 7871 form=self._match(TokenType.COMMA) and self._parse_var(), 7872 ) 7873 7874 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7875 args = self._parse_csv(lambda: self._parse_lambda()) 7876 7877 this = seq_get(args, 0) 7878 decimals = seq_get(args, 1) 7879 7880 return expr_type( 7881 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7882 ) 7883 7884 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7885 if self._match_text_seq("COLUMNS", "(", advance=False): 7886 this = self._parse_function() 7887 if isinstance(this, exp.Columns): 7888 this.set("unpack", True) 7889 return this 7890 7891 return self.expression( 7892 exp.Star, 7893 **{ # type: ignore 7894 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7895 "replace": self._parse_star_op("REPLACE"), 7896 "rename": self._parse_star_op("RENAME"), 7897 }, 7898 ) 7899 7900 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7901 privilege_parts = [] 7902 7903 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7904 # (end of privilege list) or L_PAREN (start of column list) are met 7905 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7906 privilege_parts.append(self._curr.text.upper()) 7907 self._advance() 7908 7909 this = exp.var(" ".join(privilege_parts)) 7910 expressions = ( 7911 self._parse_wrapped_csv(self._parse_column) 7912 if self._match(TokenType.L_PAREN, advance=False) 7913 else None 7914 ) 7915 7916 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7917 7918 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7919 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7920 principal = self._parse_id_var() 7921 7922 if not principal: 7923 return None 7924 7925 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7926 7927 def _parse_grant(self) -> exp.Grant | exp.Command: 7928 start = self._prev 7929 7930 privileges = self._parse_csv(self._parse_grant_privilege) 7931 7932 self._match(TokenType.ON) 7933 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7934 7935 # Attempt to parse the securable e.g. MySQL allows names 7936 # such as "foo.*", "*.*" which are not easily parseable yet 7937 securable = self._try_parse(self._parse_table_parts) 7938 7939 if not securable or not self._match_text_seq("TO"): 7940 return self._parse_as_command(start) 7941 7942 principals = self._parse_csv(self._parse_grant_principal) 7943 7944 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7945 7946 if self._curr: 7947 return self._parse_as_command(start) 7948 7949 return self.expression( 7950 exp.Grant, 7951 privileges=privileges, 7952 kind=kind, 7953 securable=securable, 7954 principals=principals, 7955 grant_option=grant_option, 7956 ) 7957 7958 def _parse_overlay(self) -> exp.Overlay: 7959 return self.expression( 7960 exp.Overlay, 7961 **{ # type: ignore 7962 "this": self._parse_bitwise(), 7963 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7964 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7965 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7966 }, 7967 )
27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range
59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder
102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression)
122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp
133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args)
167class Parser(metaclass=_Parser): 168 """ 169 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 170 171 Args: 172 error_level: The desired error level. 173 Default: ErrorLevel.IMMEDIATE 174 error_message_context: The amount of context to capture from a query string when displaying 175 the error message (in number of characters). 176 Default: 100 177 max_errors: Maximum number of error messages to include in a raised ParseError. 178 This is only relevant if error_level is ErrorLevel.RAISE. 179 Default: 3 180 """ 181 182 FUNCTIONS: t.Dict[str, t.Callable] = { 183 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 184 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 185 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 186 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 187 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 188 ), 189 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 190 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 191 ), 192 "CHAR": lambda args: exp.Chr(expressions=args), 193 "CHR": lambda args: exp.Chr(expressions=args), 194 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 195 "CONCAT": lambda args, dialect: exp.Concat( 196 expressions=args, 197 safe=not dialect.STRICT_STRING_CONCAT, 198 coalesce=dialect.CONCAT_COALESCE, 199 ), 200 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 201 expressions=args, 202 safe=not dialect.STRICT_STRING_CONCAT, 203 coalesce=dialect.CONCAT_COALESCE, 204 ), 205 "CONVERT_TIMEZONE": build_convert_timezone, 206 "DATE_TO_DATE_STR": lambda args: exp.Cast( 207 this=seq_get(args, 0), 208 to=exp.DataType(this=exp.DataType.Type.TEXT), 209 ), 210 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 211 start=seq_get(args, 0), 212 end=seq_get(args, 1), 213 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 214 ), 215 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 216 "HEX": build_hex, 217 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 218 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 219 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 220 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 221 "LIKE": build_like, 222 "LOG": build_logarithm, 223 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 224 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 225 "LOWER": build_lower, 226 "LPAD": lambda args: build_pad(args), 227 "LEFTPAD": lambda args: build_pad(args), 228 "LTRIM": lambda args: build_trim(args), 229 "MOD": build_mod, 230 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 231 "RPAD": lambda args: build_pad(args, is_left=False), 232 "RTRIM": lambda args: build_trim(args, is_left=False), 233 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 234 if len(args) != 2 235 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 236 "TIME_TO_TIME_STR": lambda args: exp.Cast( 237 this=seq_get(args, 0), 238 to=exp.DataType(this=exp.DataType.Type.TEXT), 239 ), 240 "TO_HEX": build_hex, 241 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 242 this=exp.Cast( 243 this=seq_get(args, 0), 244 to=exp.DataType(this=exp.DataType.Type.TEXT), 245 ), 246 start=exp.Literal.number(1), 247 length=exp.Literal.number(10), 248 ), 249 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 250 "UPPER": build_upper, 251 "VAR_MAP": build_var_map, 252 } 253 254 NO_PAREN_FUNCTIONS = { 255 TokenType.CURRENT_DATE: exp.CurrentDate, 256 TokenType.CURRENT_DATETIME: exp.CurrentDate, 257 TokenType.CURRENT_TIME: exp.CurrentTime, 258 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 259 TokenType.CURRENT_USER: exp.CurrentUser, 260 } 261 262 STRUCT_TYPE_TOKENS = { 263 TokenType.NESTED, 264 TokenType.OBJECT, 265 TokenType.STRUCT, 266 TokenType.UNION, 267 } 268 269 NESTED_TYPE_TOKENS = { 270 TokenType.ARRAY, 271 TokenType.LIST, 272 TokenType.LOWCARDINALITY, 273 TokenType.MAP, 274 TokenType.NULLABLE, 275 TokenType.RANGE, 276 *STRUCT_TYPE_TOKENS, 277 } 278 279 ENUM_TYPE_TOKENS = { 280 TokenType.ENUM, 281 TokenType.ENUM8, 282 TokenType.ENUM16, 283 } 284 285 AGGREGATE_TYPE_TOKENS = { 286 TokenType.AGGREGATEFUNCTION, 287 TokenType.SIMPLEAGGREGATEFUNCTION, 288 } 289 290 TYPE_TOKENS = { 291 TokenType.BIT, 292 TokenType.BOOLEAN, 293 TokenType.TINYINT, 294 TokenType.UTINYINT, 295 TokenType.SMALLINT, 296 TokenType.USMALLINT, 297 TokenType.INT, 298 TokenType.UINT, 299 TokenType.BIGINT, 300 TokenType.UBIGINT, 301 TokenType.INT128, 302 TokenType.UINT128, 303 TokenType.INT256, 304 TokenType.UINT256, 305 TokenType.MEDIUMINT, 306 TokenType.UMEDIUMINT, 307 TokenType.FIXEDSTRING, 308 TokenType.FLOAT, 309 TokenType.DOUBLE, 310 TokenType.CHAR, 311 TokenType.NCHAR, 312 TokenType.VARCHAR, 313 TokenType.NVARCHAR, 314 TokenType.BPCHAR, 315 TokenType.TEXT, 316 TokenType.MEDIUMTEXT, 317 TokenType.LONGTEXT, 318 TokenType.MEDIUMBLOB, 319 TokenType.LONGBLOB, 320 TokenType.BINARY, 321 TokenType.VARBINARY, 322 TokenType.JSON, 323 TokenType.JSONB, 324 TokenType.INTERVAL, 325 TokenType.TINYBLOB, 326 TokenType.TINYTEXT, 327 TokenType.TIME, 328 TokenType.TIMETZ, 329 TokenType.TIMESTAMP, 330 TokenType.TIMESTAMP_S, 331 TokenType.TIMESTAMP_MS, 332 TokenType.TIMESTAMP_NS, 333 TokenType.TIMESTAMPTZ, 334 TokenType.TIMESTAMPLTZ, 335 TokenType.TIMESTAMPNTZ, 336 TokenType.DATETIME, 337 TokenType.DATETIME2, 338 TokenType.DATETIME64, 339 TokenType.SMALLDATETIME, 340 TokenType.DATE, 341 TokenType.DATE32, 342 TokenType.INT4RANGE, 343 TokenType.INT4MULTIRANGE, 344 TokenType.INT8RANGE, 345 TokenType.INT8MULTIRANGE, 346 TokenType.NUMRANGE, 347 TokenType.NUMMULTIRANGE, 348 TokenType.TSRANGE, 349 TokenType.TSMULTIRANGE, 350 TokenType.TSTZRANGE, 351 TokenType.TSTZMULTIRANGE, 352 TokenType.DATERANGE, 353 TokenType.DATEMULTIRANGE, 354 TokenType.DECIMAL, 355 TokenType.DECIMAL32, 356 TokenType.DECIMAL64, 357 TokenType.DECIMAL128, 358 TokenType.DECIMAL256, 359 TokenType.UDECIMAL, 360 TokenType.BIGDECIMAL, 361 TokenType.UUID, 362 TokenType.GEOGRAPHY, 363 TokenType.GEOMETRY, 364 TokenType.POINT, 365 TokenType.RING, 366 TokenType.LINESTRING, 367 TokenType.MULTILINESTRING, 368 TokenType.POLYGON, 369 TokenType.MULTIPOLYGON, 370 TokenType.HLLSKETCH, 371 TokenType.HSTORE, 372 TokenType.PSEUDO_TYPE, 373 TokenType.SUPER, 374 TokenType.SERIAL, 375 TokenType.SMALLSERIAL, 376 TokenType.BIGSERIAL, 377 TokenType.XML, 378 TokenType.YEAR, 379 TokenType.UNIQUEIDENTIFIER, 380 TokenType.USERDEFINED, 381 TokenType.MONEY, 382 TokenType.SMALLMONEY, 383 TokenType.ROWVERSION, 384 TokenType.IMAGE, 385 TokenType.VARIANT, 386 TokenType.VECTOR, 387 TokenType.OBJECT, 388 TokenType.OBJECT_IDENTIFIER, 389 TokenType.INET, 390 TokenType.IPADDRESS, 391 TokenType.IPPREFIX, 392 TokenType.IPV4, 393 TokenType.IPV6, 394 TokenType.UNKNOWN, 395 TokenType.NULL, 396 TokenType.NAME, 397 TokenType.TDIGEST, 398 *ENUM_TYPE_TOKENS, 399 *NESTED_TYPE_TOKENS, 400 *AGGREGATE_TYPE_TOKENS, 401 } 402 403 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 404 TokenType.BIGINT: TokenType.UBIGINT, 405 TokenType.INT: TokenType.UINT, 406 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 407 TokenType.SMALLINT: TokenType.USMALLINT, 408 TokenType.TINYINT: TokenType.UTINYINT, 409 TokenType.DECIMAL: TokenType.UDECIMAL, 410 } 411 412 SUBQUERY_PREDICATES = { 413 TokenType.ANY: exp.Any, 414 TokenType.ALL: exp.All, 415 TokenType.EXISTS: exp.Exists, 416 TokenType.SOME: exp.Any, 417 } 418 419 RESERVED_TOKENS = { 420 *Tokenizer.SINGLE_TOKENS.values(), 421 TokenType.SELECT, 422 } - {TokenType.IDENTIFIER} 423 424 DB_CREATABLES = { 425 TokenType.DATABASE, 426 TokenType.DICTIONARY, 427 TokenType.MODEL, 428 TokenType.NAMESPACE, 429 TokenType.SCHEMA, 430 TokenType.SEQUENCE, 431 TokenType.SINK, 432 TokenType.SOURCE, 433 TokenType.STORAGE_INTEGRATION, 434 TokenType.STREAMLIT, 435 TokenType.TABLE, 436 TokenType.TAG, 437 TokenType.VIEW, 438 TokenType.WAREHOUSE, 439 } 440 441 CREATABLES = { 442 TokenType.COLUMN, 443 TokenType.CONSTRAINT, 444 TokenType.FOREIGN_KEY, 445 TokenType.FUNCTION, 446 TokenType.INDEX, 447 TokenType.PROCEDURE, 448 *DB_CREATABLES, 449 } 450 451 ALTERABLES = { 452 TokenType.INDEX, 453 TokenType.TABLE, 454 TokenType.VIEW, 455 } 456 457 # Tokens that can represent identifiers 458 ID_VAR_TOKENS = { 459 TokenType.ALL, 460 TokenType.ATTACH, 461 TokenType.VAR, 462 TokenType.ANTI, 463 TokenType.APPLY, 464 TokenType.ASC, 465 TokenType.ASOF, 466 TokenType.AUTO_INCREMENT, 467 TokenType.BEGIN, 468 TokenType.BPCHAR, 469 TokenType.CACHE, 470 TokenType.CASE, 471 TokenType.COLLATE, 472 TokenType.COMMAND, 473 TokenType.COMMENT, 474 TokenType.COMMIT, 475 TokenType.CONSTRAINT, 476 TokenType.COPY, 477 TokenType.CUBE, 478 TokenType.DEFAULT, 479 TokenType.DELETE, 480 TokenType.DESC, 481 TokenType.DESCRIBE, 482 TokenType.DETACH, 483 TokenType.DICTIONARY, 484 TokenType.DIV, 485 TokenType.END, 486 TokenType.EXECUTE, 487 TokenType.ESCAPE, 488 TokenType.FALSE, 489 TokenType.FIRST, 490 TokenType.FILTER, 491 TokenType.FINAL, 492 TokenType.FORMAT, 493 TokenType.FULL, 494 TokenType.IDENTIFIER, 495 TokenType.IS, 496 TokenType.ISNULL, 497 TokenType.INTERVAL, 498 TokenType.KEEP, 499 TokenType.KILL, 500 TokenType.LEFT, 501 TokenType.LIMIT, 502 TokenType.LOAD, 503 TokenType.MERGE, 504 TokenType.NATURAL, 505 TokenType.NEXT, 506 TokenType.OFFSET, 507 TokenType.OPERATOR, 508 TokenType.ORDINALITY, 509 TokenType.OVERLAPS, 510 TokenType.OVERWRITE, 511 TokenType.PARTITION, 512 TokenType.PERCENT, 513 TokenType.PIVOT, 514 TokenType.PRAGMA, 515 TokenType.RANGE, 516 TokenType.RECURSIVE, 517 TokenType.REFERENCES, 518 TokenType.REFRESH, 519 TokenType.RENAME, 520 TokenType.REPLACE, 521 TokenType.RIGHT, 522 TokenType.ROLLUP, 523 TokenType.ROW, 524 TokenType.ROWS, 525 TokenType.SEMI, 526 TokenType.SET, 527 TokenType.SETTINGS, 528 TokenType.SHOW, 529 TokenType.TEMPORARY, 530 TokenType.TOP, 531 TokenType.TRUE, 532 TokenType.TRUNCATE, 533 TokenType.UNIQUE, 534 TokenType.UNNEST, 535 TokenType.UNPIVOT, 536 TokenType.UPDATE, 537 TokenType.USE, 538 TokenType.VOLATILE, 539 TokenType.WINDOW, 540 *CREATABLES, 541 *SUBQUERY_PREDICATES, 542 *TYPE_TOKENS, 543 *NO_PAREN_FUNCTIONS, 544 } 545 ID_VAR_TOKENS.remove(TokenType.UNION) 546 547 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 548 549 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 550 TokenType.ANTI, 551 TokenType.APPLY, 552 TokenType.ASOF, 553 TokenType.FULL, 554 TokenType.LEFT, 555 TokenType.LOCK, 556 TokenType.NATURAL, 557 TokenType.RIGHT, 558 TokenType.SEMI, 559 TokenType.WINDOW, 560 } 561 562 ALIAS_TOKENS = ID_VAR_TOKENS 563 564 ARRAY_CONSTRUCTORS = { 565 "ARRAY": exp.Array, 566 "LIST": exp.List, 567 } 568 569 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 570 571 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 572 573 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 574 575 FUNC_TOKENS = { 576 TokenType.COLLATE, 577 TokenType.COMMAND, 578 TokenType.CURRENT_DATE, 579 TokenType.CURRENT_DATETIME, 580 TokenType.CURRENT_TIMESTAMP, 581 TokenType.CURRENT_TIME, 582 TokenType.CURRENT_USER, 583 TokenType.FILTER, 584 TokenType.FIRST, 585 TokenType.FORMAT, 586 TokenType.GLOB, 587 TokenType.IDENTIFIER, 588 TokenType.INDEX, 589 TokenType.ISNULL, 590 TokenType.ILIKE, 591 TokenType.INSERT, 592 TokenType.LIKE, 593 TokenType.MERGE, 594 TokenType.NEXT, 595 TokenType.OFFSET, 596 TokenType.PRIMARY_KEY, 597 TokenType.RANGE, 598 TokenType.REPLACE, 599 TokenType.RLIKE, 600 TokenType.ROW, 601 TokenType.UNNEST, 602 TokenType.VAR, 603 TokenType.LEFT, 604 TokenType.RIGHT, 605 TokenType.SEQUENCE, 606 TokenType.DATE, 607 TokenType.DATETIME, 608 TokenType.TABLE, 609 TokenType.TIMESTAMP, 610 TokenType.TIMESTAMPTZ, 611 TokenType.TRUNCATE, 612 TokenType.WINDOW, 613 TokenType.XOR, 614 *TYPE_TOKENS, 615 *SUBQUERY_PREDICATES, 616 } 617 618 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 619 TokenType.AND: exp.And, 620 } 621 622 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 623 TokenType.COLON_EQ: exp.PropertyEQ, 624 } 625 626 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 627 TokenType.OR: exp.Or, 628 } 629 630 EQUALITY = { 631 TokenType.EQ: exp.EQ, 632 TokenType.NEQ: exp.NEQ, 633 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 634 } 635 636 COMPARISON = { 637 TokenType.GT: exp.GT, 638 TokenType.GTE: exp.GTE, 639 TokenType.LT: exp.LT, 640 TokenType.LTE: exp.LTE, 641 } 642 643 BITWISE = { 644 TokenType.AMP: exp.BitwiseAnd, 645 TokenType.CARET: exp.BitwiseXor, 646 TokenType.PIPE: exp.BitwiseOr, 647 } 648 649 TERM = { 650 TokenType.DASH: exp.Sub, 651 TokenType.PLUS: exp.Add, 652 TokenType.MOD: exp.Mod, 653 TokenType.COLLATE: exp.Collate, 654 } 655 656 FACTOR = { 657 TokenType.DIV: exp.IntDiv, 658 TokenType.LR_ARROW: exp.Distance, 659 TokenType.SLASH: exp.Div, 660 TokenType.STAR: exp.Mul, 661 } 662 663 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 664 665 TIMES = { 666 TokenType.TIME, 667 TokenType.TIMETZ, 668 } 669 670 TIMESTAMPS = { 671 TokenType.TIMESTAMP, 672 TokenType.TIMESTAMPTZ, 673 TokenType.TIMESTAMPLTZ, 674 *TIMES, 675 } 676 677 SET_OPERATIONS = { 678 TokenType.UNION, 679 TokenType.INTERSECT, 680 TokenType.EXCEPT, 681 } 682 683 JOIN_METHODS = { 684 TokenType.ASOF, 685 TokenType.NATURAL, 686 TokenType.POSITIONAL, 687 } 688 689 JOIN_SIDES = { 690 TokenType.LEFT, 691 TokenType.RIGHT, 692 TokenType.FULL, 693 } 694 695 JOIN_KINDS = { 696 TokenType.ANTI, 697 TokenType.CROSS, 698 TokenType.INNER, 699 TokenType.OUTER, 700 TokenType.SEMI, 701 TokenType.STRAIGHT_JOIN, 702 } 703 704 JOIN_HINTS: t.Set[str] = set() 705 706 LAMBDAS = { 707 TokenType.ARROW: lambda self, expressions: self.expression( 708 exp.Lambda, 709 this=self._replace_lambda( 710 self._parse_assignment(), 711 expressions, 712 ), 713 expressions=expressions, 714 ), 715 TokenType.FARROW: lambda self, expressions: self.expression( 716 exp.Kwarg, 717 this=exp.var(expressions[0].name), 718 expression=self._parse_assignment(), 719 ), 720 } 721 722 COLUMN_OPERATORS = { 723 TokenType.DOT: None, 724 TokenType.DCOLON: lambda self, this, to: self.expression( 725 exp.Cast if self.STRICT_CAST else exp.TryCast, 726 this=this, 727 to=to, 728 ), 729 TokenType.ARROW: lambda self, this, path: self.expression( 730 exp.JSONExtract, 731 this=this, 732 expression=self.dialect.to_json_path(path), 733 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 734 ), 735 TokenType.DARROW: lambda self, this, path: self.expression( 736 exp.JSONExtractScalar, 737 this=this, 738 expression=self.dialect.to_json_path(path), 739 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 740 ), 741 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 742 exp.JSONBExtract, 743 this=this, 744 expression=path, 745 ), 746 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 747 exp.JSONBExtractScalar, 748 this=this, 749 expression=path, 750 ), 751 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 752 exp.JSONBContains, 753 this=this, 754 expression=key, 755 ), 756 } 757 758 EXPRESSION_PARSERS = { 759 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 760 exp.Column: lambda self: self._parse_column(), 761 exp.Condition: lambda self: self._parse_assignment(), 762 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 763 exp.Expression: lambda self: self._parse_expression(), 764 exp.From: lambda self: self._parse_from(joins=True), 765 exp.Group: lambda self: self._parse_group(), 766 exp.Having: lambda self: self._parse_having(), 767 exp.Hint: lambda self: self._parse_hint_body(), 768 exp.Identifier: lambda self: self._parse_id_var(), 769 exp.Join: lambda self: self._parse_join(), 770 exp.Lambda: lambda self: self._parse_lambda(), 771 exp.Lateral: lambda self: self._parse_lateral(), 772 exp.Limit: lambda self: self._parse_limit(), 773 exp.Offset: lambda self: self._parse_offset(), 774 exp.Order: lambda self: self._parse_order(), 775 exp.Ordered: lambda self: self._parse_ordered(), 776 exp.Properties: lambda self: self._parse_properties(), 777 exp.Qualify: lambda self: self._parse_qualify(), 778 exp.Returning: lambda self: self._parse_returning(), 779 exp.Select: lambda self: self._parse_select(), 780 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 781 exp.Table: lambda self: self._parse_table_parts(), 782 exp.TableAlias: lambda self: self._parse_table_alias(), 783 exp.Tuple: lambda self: self._parse_value(), 784 exp.Whens: lambda self: self._parse_when_matched(), 785 exp.Where: lambda self: self._parse_where(), 786 exp.Window: lambda self: self._parse_named_window(), 787 exp.With: lambda self: self._parse_with(), 788 "JOIN_TYPE": lambda self: self._parse_join_parts(), 789 } 790 791 STATEMENT_PARSERS = { 792 TokenType.ALTER: lambda self: self._parse_alter(), 793 TokenType.ANALYZE: lambda self: self._parse_analyze(), 794 TokenType.BEGIN: lambda self: self._parse_transaction(), 795 TokenType.CACHE: lambda self: self._parse_cache(), 796 TokenType.COMMENT: lambda self: self._parse_comment(), 797 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 798 TokenType.COPY: lambda self: self._parse_copy(), 799 TokenType.CREATE: lambda self: self._parse_create(), 800 TokenType.DELETE: lambda self: self._parse_delete(), 801 TokenType.DESC: lambda self: self._parse_describe(), 802 TokenType.DESCRIBE: lambda self: self._parse_describe(), 803 TokenType.DROP: lambda self: self._parse_drop(), 804 TokenType.GRANT: lambda self: self._parse_grant(), 805 TokenType.INSERT: lambda self: self._parse_insert(), 806 TokenType.KILL: lambda self: self._parse_kill(), 807 TokenType.LOAD: lambda self: self._parse_load(), 808 TokenType.MERGE: lambda self: self._parse_merge(), 809 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 810 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 811 TokenType.REFRESH: lambda self: self._parse_refresh(), 812 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 813 TokenType.SET: lambda self: self._parse_set(), 814 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 815 TokenType.UNCACHE: lambda self: self._parse_uncache(), 816 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 817 TokenType.UPDATE: lambda self: self._parse_update(), 818 TokenType.USE: lambda self: self.expression( 819 exp.Use, 820 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 821 this=self._parse_table(schema=False), 822 ), 823 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 824 } 825 826 UNARY_PARSERS = { 827 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 828 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 829 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 830 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 831 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 832 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 833 } 834 835 STRING_PARSERS = { 836 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 837 exp.RawString, this=token.text 838 ), 839 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 840 exp.National, this=token.text 841 ), 842 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 843 TokenType.STRING: lambda self, token: self.expression( 844 exp.Literal, this=token.text, is_string=True 845 ), 846 TokenType.UNICODE_STRING: lambda self, token: self.expression( 847 exp.UnicodeString, 848 this=token.text, 849 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 850 ), 851 } 852 853 NUMERIC_PARSERS = { 854 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 855 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 856 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 857 TokenType.NUMBER: lambda self, token: self.expression( 858 exp.Literal, this=token.text, is_string=False 859 ), 860 } 861 862 PRIMARY_PARSERS = { 863 **STRING_PARSERS, 864 **NUMERIC_PARSERS, 865 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 866 TokenType.NULL: lambda self, _: self.expression(exp.Null), 867 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 868 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 869 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 870 TokenType.STAR: lambda self, _: self._parse_star_ops(), 871 } 872 873 PLACEHOLDER_PARSERS = { 874 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 875 TokenType.PARAMETER: lambda self: self._parse_parameter(), 876 TokenType.COLON: lambda self: ( 877 self.expression(exp.Placeholder, this=self._prev.text) 878 if self._match_set(self.ID_VAR_TOKENS) 879 else None 880 ), 881 } 882 883 RANGE_PARSERS = { 884 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 885 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 886 TokenType.GLOB: binary_range_parser(exp.Glob), 887 TokenType.ILIKE: binary_range_parser(exp.ILike), 888 TokenType.IN: lambda self, this: self._parse_in(this), 889 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 890 TokenType.IS: lambda self, this: self._parse_is(this), 891 TokenType.LIKE: binary_range_parser(exp.Like), 892 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 893 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 894 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 895 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 896 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 897 } 898 899 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 900 "ALLOWED_VALUES": lambda self: self.expression( 901 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 902 ), 903 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 904 "AUTO": lambda self: self._parse_auto_property(), 905 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 906 "BACKUP": lambda self: self.expression( 907 exp.BackupProperty, this=self._parse_var(any_token=True) 908 ), 909 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 910 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 911 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 912 "CHECKSUM": lambda self: self._parse_checksum(), 913 "CLUSTER BY": lambda self: self._parse_cluster(), 914 "CLUSTERED": lambda self: self._parse_clustered_by(), 915 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 916 exp.CollateProperty, **kwargs 917 ), 918 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 919 "CONTAINS": lambda self: self._parse_contains_property(), 920 "COPY": lambda self: self._parse_copy_property(), 921 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 922 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 923 "DEFINER": lambda self: self._parse_definer(), 924 "DETERMINISTIC": lambda self: self.expression( 925 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 926 ), 927 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 928 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 929 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 930 "DISTKEY": lambda self: self._parse_distkey(), 931 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 932 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 933 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 934 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 935 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 936 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 937 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 938 "FREESPACE": lambda self: self._parse_freespace(), 939 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 940 "HEAP": lambda self: self.expression(exp.HeapProperty), 941 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 942 "IMMUTABLE": lambda self: self.expression( 943 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 944 ), 945 "INHERITS": lambda self: self.expression( 946 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 947 ), 948 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 949 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 950 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 951 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 952 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 953 "LIKE": lambda self: self._parse_create_like(), 954 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 955 "LOCK": lambda self: self._parse_locking(), 956 "LOCKING": lambda self: self._parse_locking(), 957 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 958 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 959 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 960 "MODIFIES": lambda self: self._parse_modifies_property(), 961 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 962 "NO": lambda self: self._parse_no_property(), 963 "ON": lambda self: self._parse_on_property(), 964 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 965 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 966 "PARTITION": lambda self: self._parse_partitioned_of(), 967 "PARTITION BY": lambda self: self._parse_partitioned_by(), 968 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 969 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 970 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 971 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 972 "READS": lambda self: self._parse_reads_property(), 973 "REMOTE": lambda self: self._parse_remote_with_connection(), 974 "RETURNS": lambda self: self._parse_returns(), 975 "STRICT": lambda self: self.expression(exp.StrictProperty), 976 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 977 "ROW": lambda self: self._parse_row(), 978 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 979 "SAMPLE": lambda self: self.expression( 980 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 981 ), 982 "SECURE": lambda self: self.expression(exp.SecureProperty), 983 "SECURITY": lambda self: self._parse_security(), 984 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 985 "SETTINGS": lambda self: self._parse_settings_property(), 986 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 987 "SORTKEY": lambda self: self._parse_sortkey(), 988 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 989 "STABLE": lambda self: self.expression( 990 exp.StabilityProperty, this=exp.Literal.string("STABLE") 991 ), 992 "STORED": lambda self: self._parse_stored(), 993 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 994 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 995 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 996 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 997 "TO": lambda self: self._parse_to_table(), 998 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 999 "TRANSFORM": lambda self: self.expression( 1000 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1001 ), 1002 "TTL": lambda self: self._parse_ttl(), 1003 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1004 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1005 "VOLATILE": lambda self: self._parse_volatile_property(), 1006 "WITH": lambda self: self._parse_with_property(), 1007 } 1008 1009 CONSTRAINT_PARSERS = { 1010 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1011 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1012 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1013 "CHARACTER SET": lambda self: self.expression( 1014 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1015 ), 1016 "CHECK": lambda self: self.expression( 1017 exp.CheckColumnConstraint, 1018 this=self._parse_wrapped(self._parse_assignment), 1019 enforced=self._match_text_seq("ENFORCED"), 1020 ), 1021 "COLLATE": lambda self: self.expression( 1022 exp.CollateColumnConstraint, 1023 this=self._parse_identifier() or self._parse_column(), 1024 ), 1025 "COMMENT": lambda self: self.expression( 1026 exp.CommentColumnConstraint, this=self._parse_string() 1027 ), 1028 "COMPRESS": lambda self: self._parse_compress(), 1029 "CLUSTERED": lambda self: self.expression( 1030 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1031 ), 1032 "NONCLUSTERED": lambda self: self.expression( 1033 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1034 ), 1035 "DEFAULT": lambda self: self.expression( 1036 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1037 ), 1038 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1039 "EPHEMERAL": lambda self: self.expression( 1040 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1041 ), 1042 "EXCLUDE": lambda self: self.expression( 1043 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1044 ), 1045 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1046 "FORMAT": lambda self: self.expression( 1047 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1048 ), 1049 "GENERATED": lambda self: self._parse_generated_as_identity(), 1050 "IDENTITY": lambda self: self._parse_auto_increment(), 1051 "INLINE": lambda self: self._parse_inline(), 1052 "LIKE": lambda self: self._parse_create_like(), 1053 "NOT": lambda self: self._parse_not_constraint(), 1054 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1055 "ON": lambda self: ( 1056 self._match(TokenType.UPDATE) 1057 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1058 ) 1059 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1060 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1061 "PERIOD": lambda self: self._parse_period_for_system_time(), 1062 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1063 "REFERENCES": lambda self: self._parse_references(match=False), 1064 "TITLE": lambda self: self.expression( 1065 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1066 ), 1067 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1068 "UNIQUE": lambda self: self._parse_unique(), 1069 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1070 "WATERMARK": lambda self: self.expression( 1071 exp.WatermarkColumnConstraint, 1072 this=self._match(TokenType.FOR) and self._parse_column(), 1073 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1074 ), 1075 "WITH": lambda self: self.expression( 1076 exp.Properties, expressions=self._parse_wrapped_properties() 1077 ), 1078 } 1079 1080 ALTER_PARSERS = { 1081 "ADD": lambda self: self._parse_alter_table_add(), 1082 "AS": lambda self: self._parse_select(), 1083 "ALTER": lambda self: self._parse_alter_table_alter(), 1084 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1085 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1086 "DROP": lambda self: self._parse_alter_table_drop(), 1087 "RENAME": lambda self: self._parse_alter_table_rename(), 1088 "SET": lambda self: self._parse_alter_table_set(), 1089 "SWAP": lambda self: self.expression( 1090 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1091 ), 1092 } 1093 1094 ALTER_ALTER_PARSERS = { 1095 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1096 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1097 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1098 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1099 } 1100 1101 SCHEMA_UNNAMED_CONSTRAINTS = { 1102 "CHECK", 1103 "EXCLUDE", 1104 "FOREIGN KEY", 1105 "LIKE", 1106 "PERIOD", 1107 "PRIMARY KEY", 1108 "UNIQUE", 1109 "WATERMARK", 1110 } 1111 1112 NO_PAREN_FUNCTION_PARSERS = { 1113 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1114 "CASE": lambda self: self._parse_case(), 1115 "CONNECT_BY_ROOT": lambda self: self.expression( 1116 exp.ConnectByRoot, this=self._parse_column() 1117 ), 1118 "IF": lambda self: self._parse_if(), 1119 } 1120 1121 INVALID_FUNC_NAME_TOKENS = { 1122 TokenType.IDENTIFIER, 1123 TokenType.STRING, 1124 } 1125 1126 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1127 1128 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1129 1130 FUNCTION_PARSERS = { 1131 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1132 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1133 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1134 "DECODE": lambda self: self._parse_decode(), 1135 "EXTRACT": lambda self: self._parse_extract(), 1136 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1137 "GAP_FILL": lambda self: self._parse_gap_fill(), 1138 "JSON_OBJECT": lambda self: self._parse_json_object(), 1139 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1140 "JSON_TABLE": lambda self: self._parse_json_table(), 1141 "MATCH": lambda self: self._parse_match_against(), 1142 "NORMALIZE": lambda self: self._parse_normalize(), 1143 "OPENJSON": lambda self: self._parse_open_json(), 1144 "OVERLAY": lambda self: self._parse_overlay(), 1145 "POSITION": lambda self: self._parse_position(), 1146 "PREDICT": lambda self: self._parse_predict(), 1147 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1148 "STRING_AGG": lambda self: self._parse_string_agg(), 1149 "SUBSTRING": lambda self: self._parse_substring(), 1150 "TRIM": lambda self: self._parse_trim(), 1151 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1152 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1153 "XMLELEMENT": lambda self: self.expression( 1154 exp.XMLElement, 1155 this=self._match_text_seq("NAME") and self._parse_id_var(), 1156 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1157 ), 1158 "XMLTABLE": lambda self: self._parse_xml_table(), 1159 } 1160 1161 QUERY_MODIFIER_PARSERS = { 1162 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1163 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1164 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1165 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1166 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1167 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1168 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1169 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1170 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1171 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1172 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1173 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1174 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1175 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1176 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1177 TokenType.CLUSTER_BY: lambda self: ( 1178 "cluster", 1179 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1180 ), 1181 TokenType.DISTRIBUTE_BY: lambda self: ( 1182 "distribute", 1183 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1184 ), 1185 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1186 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1187 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1188 } 1189 1190 SET_PARSERS = { 1191 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1192 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1193 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1194 "TRANSACTION": lambda self: self._parse_set_transaction(), 1195 } 1196 1197 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1198 1199 TYPE_LITERAL_PARSERS = { 1200 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1201 } 1202 1203 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1204 1205 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1206 1207 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1208 1209 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1210 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1211 "ISOLATION": ( 1212 ("LEVEL", "REPEATABLE", "READ"), 1213 ("LEVEL", "READ", "COMMITTED"), 1214 ("LEVEL", "READ", "UNCOMITTED"), 1215 ("LEVEL", "SERIALIZABLE"), 1216 ), 1217 "READ": ("WRITE", "ONLY"), 1218 } 1219 1220 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1221 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1222 ) 1223 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1224 1225 CREATE_SEQUENCE: OPTIONS_TYPE = { 1226 "SCALE": ("EXTEND", "NOEXTEND"), 1227 "SHARD": ("EXTEND", "NOEXTEND"), 1228 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1229 **dict.fromkeys( 1230 ( 1231 "SESSION", 1232 "GLOBAL", 1233 "KEEP", 1234 "NOKEEP", 1235 "ORDER", 1236 "NOORDER", 1237 "NOCACHE", 1238 "CYCLE", 1239 "NOCYCLE", 1240 "NOMINVALUE", 1241 "NOMAXVALUE", 1242 "NOSCALE", 1243 "NOSHARD", 1244 ), 1245 tuple(), 1246 ), 1247 } 1248 1249 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1250 1251 USABLES: OPTIONS_TYPE = dict.fromkeys( 1252 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1253 ) 1254 1255 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1256 1257 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1258 "TYPE": ("EVOLUTION",), 1259 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1260 } 1261 1262 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1263 1264 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1265 1266 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1267 "NOT": ("ENFORCED",), 1268 "MATCH": ( 1269 "FULL", 1270 "PARTIAL", 1271 "SIMPLE", 1272 ), 1273 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1274 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1275 } 1276 1277 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1278 1279 CLONE_KEYWORDS = {"CLONE", "COPY"} 1280 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1281 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1282 1283 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1284 1285 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1286 1287 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1288 1289 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1290 1291 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1292 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1293 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1294 1295 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1296 1297 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1298 1299 ADD_CONSTRAINT_TOKENS = { 1300 TokenType.CONSTRAINT, 1301 TokenType.FOREIGN_KEY, 1302 TokenType.INDEX, 1303 TokenType.KEY, 1304 TokenType.PRIMARY_KEY, 1305 TokenType.UNIQUE, 1306 } 1307 1308 DISTINCT_TOKENS = {TokenType.DISTINCT} 1309 1310 NULL_TOKENS = {TokenType.NULL} 1311 1312 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1313 1314 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1315 1316 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1317 1318 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1319 1320 ODBC_DATETIME_LITERALS = { 1321 "d": exp.Date, 1322 "t": exp.Time, 1323 "ts": exp.Timestamp, 1324 } 1325 1326 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1327 1328 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1329 1330 # The style options for the DESCRIBE statement 1331 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1332 1333 # The style options for the ANALYZE statement 1334 ANALYZE_STYLES = { 1335 "BUFFER_USAGE_LIMIT", 1336 "FULL", 1337 "LOCAL", 1338 "NO_WRITE_TO_BINLOG", 1339 "SAMPLE", 1340 "SKIP_LOCKED", 1341 "VERBOSE", 1342 } 1343 1344 ANALYZE_EXPRESSION_PARSERS = { 1345 "ALL": lambda self: self._parse_analyze_columns(), 1346 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1347 "DELETE": lambda self: self._parse_analyze_delete(), 1348 "DROP": lambda self: self._parse_analyze_histogram(), 1349 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1350 "LIST": lambda self: self._parse_analyze_list(), 1351 "PREDICATE": lambda self: self._parse_analyze_columns(), 1352 "UPDATE": lambda self: self._parse_analyze_histogram(), 1353 "VALIDATE": lambda self: self._parse_analyze_validate(), 1354 } 1355 1356 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1357 1358 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1359 1360 OPERATION_MODIFIERS: t.Set[str] = set() 1361 1362 STRICT_CAST = True 1363 1364 PREFIXED_PIVOT_COLUMNS = False 1365 IDENTIFY_PIVOT_STRINGS = False 1366 1367 LOG_DEFAULTS_TO_LN = False 1368 1369 # Whether ADD is present for each column added by ALTER TABLE 1370 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1371 1372 # Whether the table sample clause expects CSV syntax 1373 TABLESAMPLE_CSV = False 1374 1375 # The default method used for table sampling 1376 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1377 1378 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1379 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1380 1381 # Whether the TRIM function expects the characters to trim as its first argument 1382 TRIM_PATTERN_FIRST = False 1383 1384 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1385 STRING_ALIASES = False 1386 1387 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1388 MODIFIERS_ATTACHED_TO_SET_OP = True 1389 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1390 1391 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1392 NO_PAREN_IF_COMMANDS = True 1393 1394 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1395 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1396 1397 # Whether the `:` operator is used to extract a value from a VARIANT column 1398 COLON_IS_VARIANT_EXTRACT = False 1399 1400 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1401 # If this is True and '(' is not found, the keyword will be treated as an identifier 1402 VALUES_FOLLOWED_BY_PAREN = True 1403 1404 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1405 SUPPORTS_IMPLICIT_UNNEST = False 1406 1407 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1408 INTERVAL_SPANS = True 1409 1410 # Whether a PARTITION clause can follow a table reference 1411 SUPPORTS_PARTITION_SELECTION = False 1412 1413 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1414 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1415 1416 # Whether the 'AS' keyword is optional in the CTE definition syntax 1417 OPTIONAL_ALIAS_TOKEN_CTE = False 1418 1419 __slots__ = ( 1420 "error_level", 1421 "error_message_context", 1422 "max_errors", 1423 "dialect", 1424 "sql", 1425 "errors", 1426 "_tokens", 1427 "_index", 1428 "_curr", 1429 "_next", 1430 "_prev", 1431 "_prev_comments", 1432 ) 1433 1434 # Autofilled 1435 SHOW_TRIE: t.Dict = {} 1436 SET_TRIE: t.Dict = {} 1437 1438 def __init__( 1439 self, 1440 error_level: t.Optional[ErrorLevel] = None, 1441 error_message_context: int = 100, 1442 max_errors: int = 3, 1443 dialect: DialectType = None, 1444 ): 1445 from sqlglot.dialects import Dialect 1446 1447 self.error_level = error_level or ErrorLevel.IMMEDIATE 1448 self.error_message_context = error_message_context 1449 self.max_errors = max_errors 1450 self.dialect = Dialect.get_or_raise(dialect) 1451 self.reset() 1452 1453 def reset(self): 1454 self.sql = "" 1455 self.errors = [] 1456 self._tokens = [] 1457 self._index = 0 1458 self._curr = None 1459 self._next = None 1460 self._prev = None 1461 self._prev_comments = None 1462 1463 def parse( 1464 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1465 ) -> t.List[t.Optional[exp.Expression]]: 1466 """ 1467 Parses a list of tokens and returns a list of syntax trees, one tree 1468 per parsed SQL statement. 1469 1470 Args: 1471 raw_tokens: The list of tokens. 1472 sql: The original SQL string, used to produce helpful debug messages. 1473 1474 Returns: 1475 The list of the produced syntax trees. 1476 """ 1477 return self._parse( 1478 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1479 ) 1480 1481 def parse_into( 1482 self, 1483 expression_types: exp.IntoType, 1484 raw_tokens: t.List[Token], 1485 sql: t.Optional[str] = None, 1486 ) -> t.List[t.Optional[exp.Expression]]: 1487 """ 1488 Parses a list of tokens into a given Expression type. If a collection of Expression 1489 types is given instead, this method will try to parse the token list into each one 1490 of them, stopping at the first for which the parsing succeeds. 1491 1492 Args: 1493 expression_types: The expression type(s) to try and parse the token list into. 1494 raw_tokens: The list of tokens. 1495 sql: The original SQL string, used to produce helpful debug messages. 1496 1497 Returns: 1498 The target Expression. 1499 """ 1500 errors = [] 1501 for expression_type in ensure_list(expression_types): 1502 parser = self.EXPRESSION_PARSERS.get(expression_type) 1503 if not parser: 1504 raise TypeError(f"No parser registered for {expression_type}") 1505 1506 try: 1507 return self._parse(parser, raw_tokens, sql) 1508 except ParseError as e: 1509 e.errors[0]["into_expression"] = expression_type 1510 errors.append(e) 1511 1512 raise ParseError( 1513 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1514 errors=merge_errors(errors), 1515 ) from errors[-1] 1516 1517 def _parse( 1518 self, 1519 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1520 raw_tokens: t.List[Token], 1521 sql: t.Optional[str] = None, 1522 ) -> t.List[t.Optional[exp.Expression]]: 1523 self.reset() 1524 self.sql = sql or "" 1525 1526 total = len(raw_tokens) 1527 chunks: t.List[t.List[Token]] = [[]] 1528 1529 for i, token in enumerate(raw_tokens): 1530 if token.token_type == TokenType.SEMICOLON: 1531 if token.comments: 1532 chunks.append([token]) 1533 1534 if i < total - 1: 1535 chunks.append([]) 1536 else: 1537 chunks[-1].append(token) 1538 1539 expressions = [] 1540 1541 for tokens in chunks: 1542 self._index = -1 1543 self._tokens = tokens 1544 self._advance() 1545 1546 expressions.append(parse_method(self)) 1547 1548 if self._index < len(self._tokens): 1549 self.raise_error("Invalid expression / Unexpected token") 1550 1551 self.check_errors() 1552 1553 return expressions 1554 1555 def check_errors(self) -> None: 1556 """Logs or raises any found errors, depending on the chosen error level setting.""" 1557 if self.error_level == ErrorLevel.WARN: 1558 for error in self.errors: 1559 logger.error(str(error)) 1560 elif self.error_level == ErrorLevel.RAISE and self.errors: 1561 raise ParseError( 1562 concat_messages(self.errors, self.max_errors), 1563 errors=merge_errors(self.errors), 1564 ) 1565 1566 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1567 """ 1568 Appends an error in the list of recorded errors or raises it, depending on the chosen 1569 error level setting. 1570 """ 1571 token = token or self._curr or self._prev or Token.string("") 1572 start = token.start 1573 end = token.end + 1 1574 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1575 highlight = self.sql[start:end] 1576 end_context = self.sql[end : end + self.error_message_context] 1577 1578 error = ParseError.new( 1579 f"{message}. Line {token.line}, Col: {token.col}.\n" 1580 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1581 description=message, 1582 line=token.line, 1583 col=token.col, 1584 start_context=start_context, 1585 highlight=highlight, 1586 end_context=end_context, 1587 ) 1588 1589 if self.error_level == ErrorLevel.IMMEDIATE: 1590 raise error 1591 1592 self.errors.append(error) 1593 1594 def expression( 1595 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1596 ) -> E: 1597 """ 1598 Creates a new, validated Expression. 1599 1600 Args: 1601 exp_class: The expression class to instantiate. 1602 comments: An optional list of comments to attach to the expression. 1603 kwargs: The arguments to set for the expression along with their respective values. 1604 1605 Returns: 1606 The target expression. 1607 """ 1608 instance = exp_class(**kwargs) 1609 instance.add_comments(comments) if comments else self._add_comments(instance) 1610 return self.validate_expression(instance) 1611 1612 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1613 if expression and self._prev_comments: 1614 expression.add_comments(self._prev_comments) 1615 self._prev_comments = None 1616 1617 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1618 """ 1619 Validates an Expression, making sure that all its mandatory arguments are set. 1620 1621 Args: 1622 expression: The expression to validate. 1623 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1624 1625 Returns: 1626 The validated expression. 1627 """ 1628 if self.error_level != ErrorLevel.IGNORE: 1629 for error_message in expression.error_messages(args): 1630 self.raise_error(error_message) 1631 1632 return expression 1633 1634 def _find_sql(self, start: Token, end: Token) -> str: 1635 return self.sql[start.start : end.end + 1] 1636 1637 def _is_connected(self) -> bool: 1638 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1639 1640 def _advance(self, times: int = 1) -> None: 1641 self._index += times 1642 self._curr = seq_get(self._tokens, self._index) 1643 self._next = seq_get(self._tokens, self._index + 1) 1644 1645 if self._index > 0: 1646 self._prev = self._tokens[self._index - 1] 1647 self._prev_comments = self._prev.comments 1648 else: 1649 self._prev = None 1650 self._prev_comments = None 1651 1652 def _retreat(self, index: int) -> None: 1653 if index != self._index: 1654 self._advance(index - self._index) 1655 1656 def _warn_unsupported(self) -> None: 1657 if len(self._tokens) <= 1: 1658 return 1659 1660 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1661 # interested in emitting a warning for the one being currently processed. 1662 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1663 1664 logger.warning( 1665 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1666 ) 1667 1668 def _parse_command(self) -> exp.Command: 1669 self._warn_unsupported() 1670 return self.expression( 1671 exp.Command, 1672 comments=self._prev_comments, 1673 this=self._prev.text.upper(), 1674 expression=self._parse_string(), 1675 ) 1676 1677 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1678 """ 1679 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1680 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1681 solve this by setting & resetting the parser state accordingly 1682 """ 1683 index = self._index 1684 error_level = self.error_level 1685 1686 self.error_level = ErrorLevel.IMMEDIATE 1687 try: 1688 this = parse_method() 1689 except ParseError: 1690 this = None 1691 finally: 1692 if not this or retreat: 1693 self._retreat(index) 1694 self.error_level = error_level 1695 1696 return this 1697 1698 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1699 start = self._prev 1700 exists = self._parse_exists() if allow_exists else None 1701 1702 self._match(TokenType.ON) 1703 1704 materialized = self._match_text_seq("MATERIALIZED") 1705 kind = self._match_set(self.CREATABLES) and self._prev 1706 if not kind: 1707 return self._parse_as_command(start) 1708 1709 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1710 this = self._parse_user_defined_function(kind=kind.token_type) 1711 elif kind.token_type == TokenType.TABLE: 1712 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1713 elif kind.token_type == TokenType.COLUMN: 1714 this = self._parse_column() 1715 else: 1716 this = self._parse_id_var() 1717 1718 self._match(TokenType.IS) 1719 1720 return self.expression( 1721 exp.Comment, 1722 this=this, 1723 kind=kind.text, 1724 expression=self._parse_string(), 1725 exists=exists, 1726 materialized=materialized, 1727 ) 1728 1729 def _parse_to_table( 1730 self, 1731 ) -> exp.ToTableProperty: 1732 table = self._parse_table_parts(schema=True) 1733 return self.expression(exp.ToTableProperty, this=table) 1734 1735 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1736 def _parse_ttl(self) -> exp.Expression: 1737 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1738 this = self._parse_bitwise() 1739 1740 if self._match_text_seq("DELETE"): 1741 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1742 if self._match_text_seq("RECOMPRESS"): 1743 return self.expression( 1744 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1745 ) 1746 if self._match_text_seq("TO", "DISK"): 1747 return self.expression( 1748 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1749 ) 1750 if self._match_text_seq("TO", "VOLUME"): 1751 return self.expression( 1752 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1753 ) 1754 1755 return this 1756 1757 expressions = self._parse_csv(_parse_ttl_action) 1758 where = self._parse_where() 1759 group = self._parse_group() 1760 1761 aggregates = None 1762 if group and self._match(TokenType.SET): 1763 aggregates = self._parse_csv(self._parse_set_item) 1764 1765 return self.expression( 1766 exp.MergeTreeTTL, 1767 expressions=expressions, 1768 where=where, 1769 group=group, 1770 aggregates=aggregates, 1771 ) 1772 1773 def _parse_statement(self) -> t.Optional[exp.Expression]: 1774 if self._curr is None: 1775 return None 1776 1777 if self._match_set(self.STATEMENT_PARSERS): 1778 comments = self._prev_comments 1779 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1780 stmt.add_comments(comments, prepend=True) 1781 return stmt 1782 1783 if self._match_set(self.dialect.tokenizer.COMMANDS): 1784 return self._parse_command() 1785 1786 expression = self._parse_expression() 1787 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1788 return self._parse_query_modifiers(expression) 1789 1790 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1791 start = self._prev 1792 temporary = self._match(TokenType.TEMPORARY) 1793 materialized = self._match_text_seq("MATERIALIZED") 1794 1795 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1796 if not kind: 1797 return self._parse_as_command(start) 1798 1799 concurrently = self._match_text_seq("CONCURRENTLY") 1800 if_exists = exists or self._parse_exists() 1801 1802 if kind == "COLUMN": 1803 this = self._parse_column() 1804 else: 1805 this = self._parse_table_parts( 1806 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1807 ) 1808 1809 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1810 1811 if self._match(TokenType.L_PAREN, advance=False): 1812 expressions = self._parse_wrapped_csv(self._parse_types) 1813 else: 1814 expressions = None 1815 1816 return self.expression( 1817 exp.Drop, 1818 exists=if_exists, 1819 this=this, 1820 expressions=expressions, 1821 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1822 temporary=temporary, 1823 materialized=materialized, 1824 cascade=self._match_text_seq("CASCADE"), 1825 constraints=self._match_text_seq("CONSTRAINTS"), 1826 purge=self._match_text_seq("PURGE"), 1827 cluster=cluster, 1828 concurrently=concurrently, 1829 ) 1830 1831 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1832 return ( 1833 self._match_text_seq("IF") 1834 and (not not_ or self._match(TokenType.NOT)) 1835 and self._match(TokenType.EXISTS) 1836 ) 1837 1838 def _parse_create(self) -> exp.Create | exp.Command: 1839 # Note: this can't be None because we've matched a statement parser 1840 start = self._prev 1841 1842 replace = ( 1843 start.token_type == TokenType.REPLACE 1844 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1845 or self._match_pair(TokenType.OR, TokenType.ALTER) 1846 ) 1847 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1848 1849 unique = self._match(TokenType.UNIQUE) 1850 1851 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1852 clustered = True 1853 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1854 "COLUMNSTORE" 1855 ): 1856 clustered = False 1857 else: 1858 clustered = None 1859 1860 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1861 self._advance() 1862 1863 properties = None 1864 create_token = self._match_set(self.CREATABLES) and self._prev 1865 1866 if not create_token: 1867 # exp.Properties.Location.POST_CREATE 1868 properties = self._parse_properties() 1869 create_token = self._match_set(self.CREATABLES) and self._prev 1870 1871 if not properties or not create_token: 1872 return self._parse_as_command(start) 1873 1874 concurrently = self._match_text_seq("CONCURRENTLY") 1875 exists = self._parse_exists(not_=True) 1876 this = None 1877 expression: t.Optional[exp.Expression] = None 1878 indexes = None 1879 no_schema_binding = None 1880 begin = None 1881 end = None 1882 clone = None 1883 1884 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1885 nonlocal properties 1886 if properties and temp_props: 1887 properties.expressions.extend(temp_props.expressions) 1888 elif temp_props: 1889 properties = temp_props 1890 1891 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1892 this = self._parse_user_defined_function(kind=create_token.token_type) 1893 1894 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1895 extend_props(self._parse_properties()) 1896 1897 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1898 extend_props(self._parse_properties()) 1899 1900 if not expression: 1901 if self._match(TokenType.COMMAND): 1902 expression = self._parse_as_command(self._prev) 1903 else: 1904 begin = self._match(TokenType.BEGIN) 1905 return_ = self._match_text_seq("RETURN") 1906 1907 if self._match(TokenType.STRING, advance=False): 1908 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1909 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1910 expression = self._parse_string() 1911 extend_props(self._parse_properties()) 1912 else: 1913 expression = self._parse_user_defined_function_expression() 1914 1915 end = self._match_text_seq("END") 1916 1917 if return_: 1918 expression = self.expression(exp.Return, this=expression) 1919 elif create_token.token_type == TokenType.INDEX: 1920 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1921 if not self._match(TokenType.ON): 1922 index = self._parse_id_var() 1923 anonymous = False 1924 else: 1925 index = None 1926 anonymous = True 1927 1928 this = self._parse_index(index=index, anonymous=anonymous) 1929 elif create_token.token_type in self.DB_CREATABLES: 1930 table_parts = self._parse_table_parts( 1931 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1932 ) 1933 1934 # exp.Properties.Location.POST_NAME 1935 self._match(TokenType.COMMA) 1936 extend_props(self._parse_properties(before=True)) 1937 1938 this = self._parse_schema(this=table_parts) 1939 1940 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1941 extend_props(self._parse_properties()) 1942 1943 self._match(TokenType.ALIAS) 1944 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1945 # exp.Properties.Location.POST_ALIAS 1946 extend_props(self._parse_properties()) 1947 1948 if create_token.token_type == TokenType.SEQUENCE: 1949 expression = self._parse_types() 1950 extend_props(self._parse_properties()) 1951 else: 1952 expression = self._parse_ddl_select() 1953 1954 if create_token.token_type == TokenType.TABLE: 1955 # exp.Properties.Location.POST_EXPRESSION 1956 extend_props(self._parse_properties()) 1957 1958 indexes = [] 1959 while True: 1960 index = self._parse_index() 1961 1962 # exp.Properties.Location.POST_INDEX 1963 extend_props(self._parse_properties()) 1964 if not index: 1965 break 1966 else: 1967 self._match(TokenType.COMMA) 1968 indexes.append(index) 1969 elif create_token.token_type == TokenType.VIEW: 1970 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1971 no_schema_binding = True 1972 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1973 extend_props(self._parse_properties()) 1974 1975 shallow = self._match_text_seq("SHALLOW") 1976 1977 if self._match_texts(self.CLONE_KEYWORDS): 1978 copy = self._prev.text.lower() == "copy" 1979 clone = self.expression( 1980 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1981 ) 1982 1983 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1984 return self._parse_as_command(start) 1985 1986 create_kind_text = create_token.text.upper() 1987 return self.expression( 1988 exp.Create, 1989 this=this, 1990 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1991 replace=replace, 1992 refresh=refresh, 1993 unique=unique, 1994 expression=expression, 1995 exists=exists, 1996 properties=properties, 1997 indexes=indexes, 1998 no_schema_binding=no_schema_binding, 1999 begin=begin, 2000 end=end, 2001 clone=clone, 2002 concurrently=concurrently, 2003 clustered=clustered, 2004 ) 2005 2006 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2007 seq = exp.SequenceProperties() 2008 2009 options = [] 2010 index = self._index 2011 2012 while self._curr: 2013 self._match(TokenType.COMMA) 2014 if self._match_text_seq("INCREMENT"): 2015 self._match_text_seq("BY") 2016 self._match_text_seq("=") 2017 seq.set("increment", self._parse_term()) 2018 elif self._match_text_seq("MINVALUE"): 2019 seq.set("minvalue", self._parse_term()) 2020 elif self._match_text_seq("MAXVALUE"): 2021 seq.set("maxvalue", self._parse_term()) 2022 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2023 self._match_text_seq("=") 2024 seq.set("start", self._parse_term()) 2025 elif self._match_text_seq("CACHE"): 2026 # T-SQL allows empty CACHE which is initialized dynamically 2027 seq.set("cache", self._parse_number() or True) 2028 elif self._match_text_seq("OWNED", "BY"): 2029 # "OWNED BY NONE" is the default 2030 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2031 else: 2032 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2033 if opt: 2034 options.append(opt) 2035 else: 2036 break 2037 2038 seq.set("options", options if options else None) 2039 return None if self._index == index else seq 2040 2041 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2042 # only used for teradata currently 2043 self._match(TokenType.COMMA) 2044 2045 kwargs = { 2046 "no": self._match_text_seq("NO"), 2047 "dual": self._match_text_seq("DUAL"), 2048 "before": self._match_text_seq("BEFORE"), 2049 "default": self._match_text_seq("DEFAULT"), 2050 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2051 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2052 "after": self._match_text_seq("AFTER"), 2053 "minimum": self._match_texts(("MIN", "MINIMUM")), 2054 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2055 } 2056 2057 if self._match_texts(self.PROPERTY_PARSERS): 2058 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2059 try: 2060 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2061 except TypeError: 2062 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2063 2064 return None 2065 2066 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2067 return self._parse_wrapped_csv(self._parse_property) 2068 2069 def _parse_property(self) -> t.Optional[exp.Expression]: 2070 if self._match_texts(self.PROPERTY_PARSERS): 2071 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2072 2073 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2074 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2075 2076 if self._match_text_seq("COMPOUND", "SORTKEY"): 2077 return self._parse_sortkey(compound=True) 2078 2079 if self._match_text_seq("SQL", "SECURITY"): 2080 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2081 2082 index = self._index 2083 key = self._parse_column() 2084 2085 if not self._match(TokenType.EQ): 2086 self._retreat(index) 2087 return self._parse_sequence_properties() 2088 2089 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2090 if isinstance(key, exp.Column): 2091 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2092 2093 value = self._parse_bitwise() or self._parse_var(any_token=True) 2094 2095 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2096 if isinstance(value, exp.Column): 2097 value = exp.var(value.name) 2098 2099 return self.expression(exp.Property, this=key, value=value) 2100 2101 def _parse_stored(self) -> exp.FileFormatProperty: 2102 self._match(TokenType.ALIAS) 2103 2104 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2105 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2106 2107 return self.expression( 2108 exp.FileFormatProperty, 2109 this=( 2110 self.expression( 2111 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2112 ) 2113 if input_format or output_format 2114 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2115 ), 2116 ) 2117 2118 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2119 field = self._parse_field() 2120 if isinstance(field, exp.Identifier) and not field.quoted: 2121 field = exp.var(field) 2122 2123 return field 2124 2125 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2126 self._match(TokenType.EQ) 2127 self._match(TokenType.ALIAS) 2128 2129 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2130 2131 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2132 properties = [] 2133 while True: 2134 if before: 2135 prop = self._parse_property_before() 2136 else: 2137 prop = self._parse_property() 2138 if not prop: 2139 break 2140 for p in ensure_list(prop): 2141 properties.append(p) 2142 2143 if properties: 2144 return self.expression(exp.Properties, expressions=properties) 2145 2146 return None 2147 2148 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2149 return self.expression( 2150 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2151 ) 2152 2153 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2154 if self._match_texts(("DEFINER", "INVOKER")): 2155 security_specifier = self._prev.text.upper() 2156 return self.expression(exp.SecurityProperty, this=security_specifier) 2157 return None 2158 2159 def _parse_settings_property(self) -> exp.SettingsProperty: 2160 return self.expression( 2161 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2162 ) 2163 2164 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2165 if self._index >= 2: 2166 pre_volatile_token = self._tokens[self._index - 2] 2167 else: 2168 pre_volatile_token = None 2169 2170 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2171 return exp.VolatileProperty() 2172 2173 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2174 2175 def _parse_retention_period(self) -> exp.Var: 2176 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2177 number = self._parse_number() 2178 number_str = f"{number} " if number else "" 2179 unit = self._parse_var(any_token=True) 2180 return exp.var(f"{number_str}{unit}") 2181 2182 def _parse_system_versioning_property( 2183 self, with_: bool = False 2184 ) -> exp.WithSystemVersioningProperty: 2185 self._match(TokenType.EQ) 2186 prop = self.expression( 2187 exp.WithSystemVersioningProperty, 2188 **{ # type: ignore 2189 "on": True, 2190 "with": with_, 2191 }, 2192 ) 2193 2194 if self._match_text_seq("OFF"): 2195 prop.set("on", False) 2196 return prop 2197 2198 self._match(TokenType.ON) 2199 if self._match(TokenType.L_PAREN): 2200 while self._curr and not self._match(TokenType.R_PAREN): 2201 if self._match_text_seq("HISTORY_TABLE", "="): 2202 prop.set("this", self._parse_table_parts()) 2203 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2204 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2205 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2206 prop.set("retention_period", self._parse_retention_period()) 2207 2208 self._match(TokenType.COMMA) 2209 2210 return prop 2211 2212 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2213 self._match(TokenType.EQ) 2214 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2215 prop = self.expression(exp.DataDeletionProperty, on=on) 2216 2217 if self._match(TokenType.L_PAREN): 2218 while self._curr and not self._match(TokenType.R_PAREN): 2219 if self._match_text_seq("FILTER_COLUMN", "="): 2220 prop.set("filter_column", self._parse_column()) 2221 elif self._match_text_seq("RETENTION_PERIOD", "="): 2222 prop.set("retention_period", self._parse_retention_period()) 2223 2224 self._match(TokenType.COMMA) 2225 2226 return prop 2227 2228 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2229 kind = "HASH" 2230 expressions: t.Optional[t.List[exp.Expression]] = None 2231 if self._match_text_seq("BY", "HASH"): 2232 expressions = self._parse_wrapped_csv(self._parse_id_var) 2233 elif self._match_text_seq("BY", "RANDOM"): 2234 kind = "RANDOM" 2235 2236 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2237 buckets: t.Optional[exp.Expression] = None 2238 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2239 buckets = self._parse_number() 2240 2241 return self.expression( 2242 exp.DistributedByProperty, 2243 expressions=expressions, 2244 kind=kind, 2245 buckets=buckets, 2246 order=self._parse_order(), 2247 ) 2248 2249 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2250 self._match_text_seq("KEY") 2251 expressions = self._parse_wrapped_id_vars() 2252 return self.expression(expr_type, expressions=expressions) 2253 2254 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2255 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2256 prop = self._parse_system_versioning_property(with_=True) 2257 self._match_r_paren() 2258 return prop 2259 2260 if self._match(TokenType.L_PAREN, advance=False): 2261 return self._parse_wrapped_properties() 2262 2263 if self._match_text_seq("JOURNAL"): 2264 return self._parse_withjournaltable() 2265 2266 if self._match_texts(self.VIEW_ATTRIBUTES): 2267 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2268 2269 if self._match_text_seq("DATA"): 2270 return self._parse_withdata(no=False) 2271 elif self._match_text_seq("NO", "DATA"): 2272 return self._parse_withdata(no=True) 2273 2274 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2275 return self._parse_serde_properties(with_=True) 2276 2277 if self._match(TokenType.SCHEMA): 2278 return self.expression( 2279 exp.WithSchemaBindingProperty, 2280 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2281 ) 2282 2283 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2284 return self.expression( 2285 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2286 ) 2287 2288 if not self._next: 2289 return None 2290 2291 return self._parse_withisolatedloading() 2292 2293 def _parse_procedure_option(self) -> exp.Expression | None: 2294 if self._match_text_seq("EXECUTE", "AS"): 2295 return self.expression( 2296 exp.ExecuteAsProperty, 2297 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2298 or self._parse_string(), 2299 ) 2300 2301 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2302 2303 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2304 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2305 self._match(TokenType.EQ) 2306 2307 user = self._parse_id_var() 2308 self._match(TokenType.PARAMETER) 2309 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2310 2311 if not user or not host: 2312 return None 2313 2314 return exp.DefinerProperty(this=f"{user}@{host}") 2315 2316 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2317 self._match(TokenType.TABLE) 2318 self._match(TokenType.EQ) 2319 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2320 2321 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2322 return self.expression(exp.LogProperty, no=no) 2323 2324 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2325 return self.expression(exp.JournalProperty, **kwargs) 2326 2327 def _parse_checksum(self) -> exp.ChecksumProperty: 2328 self._match(TokenType.EQ) 2329 2330 on = None 2331 if self._match(TokenType.ON): 2332 on = True 2333 elif self._match_text_seq("OFF"): 2334 on = False 2335 2336 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2337 2338 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2339 return self.expression( 2340 exp.Cluster, 2341 expressions=( 2342 self._parse_wrapped_csv(self._parse_ordered) 2343 if wrapped 2344 else self._parse_csv(self._parse_ordered) 2345 ), 2346 ) 2347 2348 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2349 self._match_text_seq("BY") 2350 2351 self._match_l_paren() 2352 expressions = self._parse_csv(self._parse_column) 2353 self._match_r_paren() 2354 2355 if self._match_text_seq("SORTED", "BY"): 2356 self._match_l_paren() 2357 sorted_by = self._parse_csv(self._parse_ordered) 2358 self._match_r_paren() 2359 else: 2360 sorted_by = None 2361 2362 self._match(TokenType.INTO) 2363 buckets = self._parse_number() 2364 self._match_text_seq("BUCKETS") 2365 2366 return self.expression( 2367 exp.ClusteredByProperty, 2368 expressions=expressions, 2369 sorted_by=sorted_by, 2370 buckets=buckets, 2371 ) 2372 2373 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2374 if not self._match_text_seq("GRANTS"): 2375 self._retreat(self._index - 1) 2376 return None 2377 2378 return self.expression(exp.CopyGrantsProperty) 2379 2380 def _parse_freespace(self) -> exp.FreespaceProperty: 2381 self._match(TokenType.EQ) 2382 return self.expression( 2383 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2384 ) 2385 2386 def _parse_mergeblockratio( 2387 self, no: bool = False, default: bool = False 2388 ) -> exp.MergeBlockRatioProperty: 2389 if self._match(TokenType.EQ): 2390 return self.expression( 2391 exp.MergeBlockRatioProperty, 2392 this=self._parse_number(), 2393 percent=self._match(TokenType.PERCENT), 2394 ) 2395 2396 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2397 2398 def _parse_datablocksize( 2399 self, 2400 default: t.Optional[bool] = None, 2401 minimum: t.Optional[bool] = None, 2402 maximum: t.Optional[bool] = None, 2403 ) -> exp.DataBlocksizeProperty: 2404 self._match(TokenType.EQ) 2405 size = self._parse_number() 2406 2407 units = None 2408 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2409 units = self._prev.text 2410 2411 return self.expression( 2412 exp.DataBlocksizeProperty, 2413 size=size, 2414 units=units, 2415 default=default, 2416 minimum=minimum, 2417 maximum=maximum, 2418 ) 2419 2420 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2421 self._match(TokenType.EQ) 2422 always = self._match_text_seq("ALWAYS") 2423 manual = self._match_text_seq("MANUAL") 2424 never = self._match_text_seq("NEVER") 2425 default = self._match_text_seq("DEFAULT") 2426 2427 autotemp = None 2428 if self._match_text_seq("AUTOTEMP"): 2429 autotemp = self._parse_schema() 2430 2431 return self.expression( 2432 exp.BlockCompressionProperty, 2433 always=always, 2434 manual=manual, 2435 never=never, 2436 default=default, 2437 autotemp=autotemp, 2438 ) 2439 2440 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2441 index = self._index 2442 no = self._match_text_seq("NO") 2443 concurrent = self._match_text_seq("CONCURRENT") 2444 2445 if not self._match_text_seq("ISOLATED", "LOADING"): 2446 self._retreat(index) 2447 return None 2448 2449 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2450 return self.expression( 2451 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2452 ) 2453 2454 def _parse_locking(self) -> exp.LockingProperty: 2455 if self._match(TokenType.TABLE): 2456 kind = "TABLE" 2457 elif self._match(TokenType.VIEW): 2458 kind = "VIEW" 2459 elif self._match(TokenType.ROW): 2460 kind = "ROW" 2461 elif self._match_text_seq("DATABASE"): 2462 kind = "DATABASE" 2463 else: 2464 kind = None 2465 2466 if kind in ("DATABASE", "TABLE", "VIEW"): 2467 this = self._parse_table_parts() 2468 else: 2469 this = None 2470 2471 if self._match(TokenType.FOR): 2472 for_or_in = "FOR" 2473 elif self._match(TokenType.IN): 2474 for_or_in = "IN" 2475 else: 2476 for_or_in = None 2477 2478 if self._match_text_seq("ACCESS"): 2479 lock_type = "ACCESS" 2480 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2481 lock_type = "EXCLUSIVE" 2482 elif self._match_text_seq("SHARE"): 2483 lock_type = "SHARE" 2484 elif self._match_text_seq("READ"): 2485 lock_type = "READ" 2486 elif self._match_text_seq("WRITE"): 2487 lock_type = "WRITE" 2488 elif self._match_text_seq("CHECKSUM"): 2489 lock_type = "CHECKSUM" 2490 else: 2491 lock_type = None 2492 2493 override = self._match_text_seq("OVERRIDE") 2494 2495 return self.expression( 2496 exp.LockingProperty, 2497 this=this, 2498 kind=kind, 2499 for_or_in=for_or_in, 2500 lock_type=lock_type, 2501 override=override, 2502 ) 2503 2504 def _parse_partition_by(self) -> t.List[exp.Expression]: 2505 if self._match(TokenType.PARTITION_BY): 2506 return self._parse_csv(self._parse_assignment) 2507 return [] 2508 2509 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2510 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2511 if self._match_text_seq("MINVALUE"): 2512 return exp.var("MINVALUE") 2513 if self._match_text_seq("MAXVALUE"): 2514 return exp.var("MAXVALUE") 2515 return self._parse_bitwise() 2516 2517 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2518 expression = None 2519 from_expressions = None 2520 to_expressions = None 2521 2522 if self._match(TokenType.IN): 2523 this = self._parse_wrapped_csv(self._parse_bitwise) 2524 elif self._match(TokenType.FROM): 2525 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2526 self._match_text_seq("TO") 2527 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2528 elif self._match_text_seq("WITH", "(", "MODULUS"): 2529 this = self._parse_number() 2530 self._match_text_seq(",", "REMAINDER") 2531 expression = self._parse_number() 2532 self._match_r_paren() 2533 else: 2534 self.raise_error("Failed to parse partition bound spec.") 2535 2536 return self.expression( 2537 exp.PartitionBoundSpec, 2538 this=this, 2539 expression=expression, 2540 from_expressions=from_expressions, 2541 to_expressions=to_expressions, 2542 ) 2543 2544 # https://www.postgresql.org/docs/current/sql-createtable.html 2545 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2546 if not self._match_text_seq("OF"): 2547 self._retreat(self._index - 1) 2548 return None 2549 2550 this = self._parse_table(schema=True) 2551 2552 if self._match(TokenType.DEFAULT): 2553 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2554 elif self._match_text_seq("FOR", "VALUES"): 2555 expression = self._parse_partition_bound_spec() 2556 else: 2557 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2558 2559 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2560 2561 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2562 self._match(TokenType.EQ) 2563 return self.expression( 2564 exp.PartitionedByProperty, 2565 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2566 ) 2567 2568 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2569 if self._match_text_seq("AND", "STATISTICS"): 2570 statistics = True 2571 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2572 statistics = False 2573 else: 2574 statistics = None 2575 2576 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2577 2578 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2579 if self._match_text_seq("SQL"): 2580 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2581 return None 2582 2583 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2584 if self._match_text_seq("SQL", "DATA"): 2585 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2586 return None 2587 2588 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2589 if self._match_text_seq("PRIMARY", "INDEX"): 2590 return exp.NoPrimaryIndexProperty() 2591 if self._match_text_seq("SQL"): 2592 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2593 return None 2594 2595 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2596 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2597 return exp.OnCommitProperty() 2598 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2599 return exp.OnCommitProperty(delete=True) 2600 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2601 2602 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2603 if self._match_text_seq("SQL", "DATA"): 2604 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2605 return None 2606 2607 def _parse_distkey(self) -> exp.DistKeyProperty: 2608 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2609 2610 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2611 table = self._parse_table(schema=True) 2612 2613 options = [] 2614 while self._match_texts(("INCLUDING", "EXCLUDING")): 2615 this = self._prev.text.upper() 2616 2617 id_var = self._parse_id_var() 2618 if not id_var: 2619 return None 2620 2621 options.append( 2622 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2623 ) 2624 2625 return self.expression(exp.LikeProperty, this=table, expressions=options) 2626 2627 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2628 return self.expression( 2629 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2630 ) 2631 2632 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2633 self._match(TokenType.EQ) 2634 return self.expression( 2635 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2636 ) 2637 2638 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2639 self._match_text_seq("WITH", "CONNECTION") 2640 return self.expression( 2641 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2642 ) 2643 2644 def _parse_returns(self) -> exp.ReturnsProperty: 2645 value: t.Optional[exp.Expression] 2646 null = None 2647 is_table = self._match(TokenType.TABLE) 2648 2649 if is_table: 2650 if self._match(TokenType.LT): 2651 value = self.expression( 2652 exp.Schema, 2653 this="TABLE", 2654 expressions=self._parse_csv(self._parse_struct_types), 2655 ) 2656 if not self._match(TokenType.GT): 2657 self.raise_error("Expecting >") 2658 else: 2659 value = self._parse_schema(exp.var("TABLE")) 2660 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2661 null = True 2662 value = None 2663 else: 2664 value = self._parse_types() 2665 2666 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2667 2668 def _parse_describe(self) -> exp.Describe: 2669 kind = self._match_set(self.CREATABLES) and self._prev.text 2670 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2671 if self._match(TokenType.DOT): 2672 style = None 2673 self._retreat(self._index - 2) 2674 2675 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2676 2677 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2678 this = self._parse_statement() 2679 else: 2680 this = self._parse_table(schema=True) 2681 2682 properties = self._parse_properties() 2683 expressions = properties.expressions if properties else None 2684 partition = self._parse_partition() 2685 return self.expression( 2686 exp.Describe, 2687 this=this, 2688 style=style, 2689 kind=kind, 2690 expressions=expressions, 2691 partition=partition, 2692 format=format, 2693 ) 2694 2695 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2696 kind = self._prev.text.upper() 2697 expressions = [] 2698 2699 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2700 if self._match(TokenType.WHEN): 2701 expression = self._parse_disjunction() 2702 self._match(TokenType.THEN) 2703 else: 2704 expression = None 2705 2706 else_ = self._match(TokenType.ELSE) 2707 2708 if not self._match(TokenType.INTO): 2709 return None 2710 2711 return self.expression( 2712 exp.ConditionalInsert, 2713 this=self.expression( 2714 exp.Insert, 2715 this=self._parse_table(schema=True), 2716 expression=self._parse_derived_table_values(), 2717 ), 2718 expression=expression, 2719 else_=else_, 2720 ) 2721 2722 expression = parse_conditional_insert() 2723 while expression is not None: 2724 expressions.append(expression) 2725 expression = parse_conditional_insert() 2726 2727 return self.expression( 2728 exp.MultitableInserts, 2729 kind=kind, 2730 comments=comments, 2731 expressions=expressions, 2732 source=self._parse_table(), 2733 ) 2734 2735 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2736 comments = [] 2737 hint = self._parse_hint() 2738 overwrite = self._match(TokenType.OVERWRITE) 2739 ignore = self._match(TokenType.IGNORE) 2740 local = self._match_text_seq("LOCAL") 2741 alternative = None 2742 is_function = None 2743 2744 if self._match_text_seq("DIRECTORY"): 2745 this: t.Optional[exp.Expression] = self.expression( 2746 exp.Directory, 2747 this=self._parse_var_or_string(), 2748 local=local, 2749 row_format=self._parse_row_format(match_row=True), 2750 ) 2751 else: 2752 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2753 comments += ensure_list(self._prev_comments) 2754 return self._parse_multitable_inserts(comments) 2755 2756 if self._match(TokenType.OR): 2757 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2758 2759 self._match(TokenType.INTO) 2760 comments += ensure_list(self._prev_comments) 2761 self._match(TokenType.TABLE) 2762 is_function = self._match(TokenType.FUNCTION) 2763 2764 this = ( 2765 self._parse_table(schema=True, parse_partition=True) 2766 if not is_function 2767 else self._parse_function() 2768 ) 2769 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2770 this.set("alias", self._parse_table_alias()) 2771 2772 returning = self._parse_returning() 2773 2774 return self.expression( 2775 exp.Insert, 2776 comments=comments, 2777 hint=hint, 2778 is_function=is_function, 2779 this=this, 2780 stored=self._match_text_seq("STORED") and self._parse_stored(), 2781 by_name=self._match_text_seq("BY", "NAME"), 2782 exists=self._parse_exists(), 2783 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2784 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2785 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2786 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2787 conflict=self._parse_on_conflict(), 2788 returning=returning or self._parse_returning(), 2789 overwrite=overwrite, 2790 alternative=alternative, 2791 ignore=ignore, 2792 source=self._match(TokenType.TABLE) and self._parse_table(), 2793 ) 2794 2795 def _parse_kill(self) -> exp.Kill: 2796 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2797 2798 return self.expression( 2799 exp.Kill, 2800 this=self._parse_primary(), 2801 kind=kind, 2802 ) 2803 2804 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2805 conflict = self._match_text_seq("ON", "CONFLICT") 2806 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2807 2808 if not conflict and not duplicate: 2809 return None 2810 2811 conflict_keys = None 2812 constraint = None 2813 2814 if conflict: 2815 if self._match_text_seq("ON", "CONSTRAINT"): 2816 constraint = self._parse_id_var() 2817 elif self._match(TokenType.L_PAREN): 2818 conflict_keys = self._parse_csv(self._parse_id_var) 2819 self._match_r_paren() 2820 2821 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2822 if self._prev.token_type == TokenType.UPDATE: 2823 self._match(TokenType.SET) 2824 expressions = self._parse_csv(self._parse_equality) 2825 else: 2826 expressions = None 2827 2828 return self.expression( 2829 exp.OnConflict, 2830 duplicate=duplicate, 2831 expressions=expressions, 2832 action=action, 2833 conflict_keys=conflict_keys, 2834 constraint=constraint, 2835 where=self._parse_where(), 2836 ) 2837 2838 def _parse_returning(self) -> t.Optional[exp.Returning]: 2839 if not self._match(TokenType.RETURNING): 2840 return None 2841 return self.expression( 2842 exp.Returning, 2843 expressions=self._parse_csv(self._parse_expression), 2844 into=self._match(TokenType.INTO) and self._parse_table_part(), 2845 ) 2846 2847 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2848 if not self._match(TokenType.FORMAT): 2849 return None 2850 return self._parse_row_format() 2851 2852 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2853 index = self._index 2854 with_ = with_ or self._match_text_seq("WITH") 2855 2856 if not self._match(TokenType.SERDE_PROPERTIES): 2857 self._retreat(index) 2858 return None 2859 return self.expression( 2860 exp.SerdeProperties, 2861 **{ # type: ignore 2862 "expressions": self._parse_wrapped_properties(), 2863 "with": with_, 2864 }, 2865 ) 2866 2867 def _parse_row_format( 2868 self, match_row: bool = False 2869 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2870 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2871 return None 2872 2873 if self._match_text_seq("SERDE"): 2874 this = self._parse_string() 2875 2876 serde_properties = self._parse_serde_properties() 2877 2878 return self.expression( 2879 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2880 ) 2881 2882 self._match_text_seq("DELIMITED") 2883 2884 kwargs = {} 2885 2886 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2887 kwargs["fields"] = self._parse_string() 2888 if self._match_text_seq("ESCAPED", "BY"): 2889 kwargs["escaped"] = self._parse_string() 2890 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2891 kwargs["collection_items"] = self._parse_string() 2892 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2893 kwargs["map_keys"] = self._parse_string() 2894 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2895 kwargs["lines"] = self._parse_string() 2896 if self._match_text_seq("NULL", "DEFINED", "AS"): 2897 kwargs["null"] = self._parse_string() 2898 2899 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2900 2901 def _parse_load(self) -> exp.LoadData | exp.Command: 2902 if self._match_text_seq("DATA"): 2903 local = self._match_text_seq("LOCAL") 2904 self._match_text_seq("INPATH") 2905 inpath = self._parse_string() 2906 overwrite = self._match(TokenType.OVERWRITE) 2907 self._match_pair(TokenType.INTO, TokenType.TABLE) 2908 2909 return self.expression( 2910 exp.LoadData, 2911 this=self._parse_table(schema=True), 2912 local=local, 2913 overwrite=overwrite, 2914 inpath=inpath, 2915 partition=self._parse_partition(), 2916 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2917 serde=self._match_text_seq("SERDE") and self._parse_string(), 2918 ) 2919 return self._parse_as_command(self._prev) 2920 2921 def _parse_delete(self) -> exp.Delete: 2922 # This handles MySQL's "Multiple-Table Syntax" 2923 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2924 tables = None 2925 if not self._match(TokenType.FROM, advance=False): 2926 tables = self._parse_csv(self._parse_table) or None 2927 2928 returning = self._parse_returning() 2929 2930 return self.expression( 2931 exp.Delete, 2932 tables=tables, 2933 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2934 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2935 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2936 where=self._parse_where(), 2937 returning=returning or self._parse_returning(), 2938 limit=self._parse_limit(), 2939 ) 2940 2941 def _parse_update(self) -> exp.Update: 2942 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2943 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2944 returning = self._parse_returning() 2945 return self.expression( 2946 exp.Update, 2947 **{ # type: ignore 2948 "this": this, 2949 "expressions": expressions, 2950 "from": self._parse_from(joins=True), 2951 "where": self._parse_where(), 2952 "returning": returning or self._parse_returning(), 2953 "order": self._parse_order(), 2954 "limit": self._parse_limit(), 2955 }, 2956 ) 2957 2958 def _parse_uncache(self) -> exp.Uncache: 2959 if not self._match(TokenType.TABLE): 2960 self.raise_error("Expecting TABLE after UNCACHE") 2961 2962 return self.expression( 2963 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2964 ) 2965 2966 def _parse_cache(self) -> exp.Cache: 2967 lazy = self._match_text_seq("LAZY") 2968 self._match(TokenType.TABLE) 2969 table = self._parse_table(schema=True) 2970 2971 options = [] 2972 if self._match_text_seq("OPTIONS"): 2973 self._match_l_paren() 2974 k = self._parse_string() 2975 self._match(TokenType.EQ) 2976 v = self._parse_string() 2977 options = [k, v] 2978 self._match_r_paren() 2979 2980 self._match(TokenType.ALIAS) 2981 return self.expression( 2982 exp.Cache, 2983 this=table, 2984 lazy=lazy, 2985 options=options, 2986 expression=self._parse_select(nested=True), 2987 ) 2988 2989 def _parse_partition(self) -> t.Optional[exp.Partition]: 2990 if not self._match_texts(self.PARTITION_KEYWORDS): 2991 return None 2992 2993 return self.expression( 2994 exp.Partition, 2995 subpartition=self._prev.text.upper() == "SUBPARTITION", 2996 expressions=self._parse_wrapped_csv(self._parse_assignment), 2997 ) 2998 2999 def _parse_value(self) -> t.Optional[exp.Tuple]: 3000 def _parse_value_expression() -> t.Optional[exp.Expression]: 3001 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3002 return exp.var(self._prev.text.upper()) 3003 return self._parse_expression() 3004 3005 if self._match(TokenType.L_PAREN): 3006 expressions = self._parse_csv(_parse_value_expression) 3007 self._match_r_paren() 3008 return self.expression(exp.Tuple, expressions=expressions) 3009 3010 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3011 expression = self._parse_expression() 3012 if expression: 3013 return self.expression(exp.Tuple, expressions=[expression]) 3014 return None 3015 3016 def _parse_projections(self) -> t.List[exp.Expression]: 3017 return self._parse_expressions() 3018 3019 def _parse_select( 3020 self, 3021 nested: bool = False, 3022 table: bool = False, 3023 parse_subquery_alias: bool = True, 3024 parse_set_operation: bool = True, 3025 ) -> t.Optional[exp.Expression]: 3026 cte = self._parse_with() 3027 3028 if cte: 3029 this = self._parse_statement() 3030 3031 if not this: 3032 self.raise_error("Failed to parse any statement following CTE") 3033 return cte 3034 3035 if "with" in this.arg_types: 3036 this.set("with", cte) 3037 else: 3038 self.raise_error(f"{this.key} does not support CTE") 3039 this = cte 3040 3041 return this 3042 3043 # duckdb supports leading with FROM x 3044 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3045 3046 if self._match(TokenType.SELECT): 3047 comments = self._prev_comments 3048 3049 hint = self._parse_hint() 3050 3051 if self._next and not self._next.token_type == TokenType.DOT: 3052 all_ = self._match(TokenType.ALL) 3053 distinct = self._match_set(self.DISTINCT_TOKENS) 3054 else: 3055 all_, distinct = None, None 3056 3057 kind = ( 3058 self._match(TokenType.ALIAS) 3059 and self._match_texts(("STRUCT", "VALUE")) 3060 and self._prev.text.upper() 3061 ) 3062 3063 if distinct: 3064 distinct = self.expression( 3065 exp.Distinct, 3066 on=self._parse_value() if self._match(TokenType.ON) else None, 3067 ) 3068 3069 if all_ and distinct: 3070 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3071 3072 operation_modifiers = [] 3073 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3074 operation_modifiers.append(exp.var(self._prev.text.upper())) 3075 3076 limit = self._parse_limit(top=True) 3077 projections = self._parse_projections() 3078 3079 this = self.expression( 3080 exp.Select, 3081 kind=kind, 3082 hint=hint, 3083 distinct=distinct, 3084 expressions=projections, 3085 limit=limit, 3086 operation_modifiers=operation_modifiers or None, 3087 ) 3088 this.comments = comments 3089 3090 into = self._parse_into() 3091 if into: 3092 this.set("into", into) 3093 3094 if not from_: 3095 from_ = self._parse_from() 3096 3097 if from_: 3098 this.set("from", from_) 3099 3100 this = self._parse_query_modifiers(this) 3101 elif (table or nested) and self._match(TokenType.L_PAREN): 3102 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3103 this = self._parse_simplified_pivot( 3104 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3105 ) 3106 elif self._match(TokenType.FROM): 3107 from_ = self._parse_from(skip_from_token=True) 3108 # Support parentheses for duckdb FROM-first syntax 3109 select = self._parse_select() 3110 if select: 3111 select.set("from", from_) 3112 this = select 3113 else: 3114 this = exp.select("*").from_(t.cast(exp.From, from_)) 3115 else: 3116 this = ( 3117 self._parse_table() 3118 if table 3119 else self._parse_select(nested=True, parse_set_operation=False) 3120 ) 3121 3122 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3123 # in case a modifier (e.g. join) is following 3124 if table and isinstance(this, exp.Values) and this.alias: 3125 alias = this.args["alias"].pop() 3126 this = exp.Table(this=this, alias=alias) 3127 3128 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3129 3130 self._match_r_paren() 3131 3132 # We return early here so that the UNION isn't attached to the subquery by the 3133 # following call to _parse_set_operations, but instead becomes the parent node 3134 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3135 elif self._match(TokenType.VALUES, advance=False): 3136 this = self._parse_derived_table_values() 3137 elif from_: 3138 this = exp.select("*").from_(from_.this, copy=False) 3139 elif self._match(TokenType.SUMMARIZE): 3140 table = self._match(TokenType.TABLE) 3141 this = self._parse_select() or self._parse_string() or self._parse_table() 3142 return self.expression(exp.Summarize, this=this, table=table) 3143 elif self._match(TokenType.DESCRIBE): 3144 this = self._parse_describe() 3145 elif self._match_text_seq("STREAM"): 3146 this = self._parse_function() 3147 if this: 3148 this = self.expression(exp.Stream, this=this) 3149 else: 3150 self._retreat(self._index - 1) 3151 else: 3152 this = None 3153 3154 return self._parse_set_operations(this) if parse_set_operation else this 3155 3156 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3157 if not skip_with_token and not self._match(TokenType.WITH): 3158 return None 3159 3160 comments = self._prev_comments 3161 recursive = self._match(TokenType.RECURSIVE) 3162 3163 last_comments = None 3164 expressions = [] 3165 while True: 3166 expressions.append(self._parse_cte()) 3167 if last_comments: 3168 expressions[-1].add_comments(last_comments) 3169 3170 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3171 break 3172 else: 3173 self._match(TokenType.WITH) 3174 3175 last_comments = self._prev_comments 3176 3177 return self.expression( 3178 exp.With, comments=comments, expressions=expressions, recursive=recursive 3179 ) 3180 3181 def _parse_cte(self) -> t.Optional[exp.CTE]: 3182 index = self._index 3183 3184 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3185 if not alias or not alias.this: 3186 self.raise_error("Expected CTE to have alias") 3187 3188 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3189 self._retreat(index) 3190 return None 3191 3192 comments = self._prev_comments 3193 3194 if self._match_text_seq("NOT", "MATERIALIZED"): 3195 materialized = False 3196 elif self._match_text_seq("MATERIALIZED"): 3197 materialized = True 3198 else: 3199 materialized = None 3200 3201 return self.expression( 3202 exp.CTE, 3203 this=self._parse_wrapped(self._parse_statement), 3204 alias=alias, 3205 materialized=materialized, 3206 comments=comments, 3207 ) 3208 3209 def _parse_table_alias( 3210 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3211 ) -> t.Optional[exp.TableAlias]: 3212 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3213 # so this section tries to parse the clause version and if it fails, it treats the token 3214 # as an identifier (alias) 3215 if self._can_parse_limit_or_offset(): 3216 return None 3217 3218 any_token = self._match(TokenType.ALIAS) 3219 alias = ( 3220 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3221 or self._parse_string_as_identifier() 3222 ) 3223 3224 index = self._index 3225 if self._match(TokenType.L_PAREN): 3226 columns = self._parse_csv(self._parse_function_parameter) 3227 self._match_r_paren() if columns else self._retreat(index) 3228 else: 3229 columns = None 3230 3231 if not alias and not columns: 3232 return None 3233 3234 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3235 3236 # We bubble up comments from the Identifier to the TableAlias 3237 if isinstance(alias, exp.Identifier): 3238 table_alias.add_comments(alias.pop_comments()) 3239 3240 return table_alias 3241 3242 def _parse_subquery( 3243 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3244 ) -> t.Optional[exp.Subquery]: 3245 if not this: 3246 return None 3247 3248 return self.expression( 3249 exp.Subquery, 3250 this=this, 3251 pivots=self._parse_pivots(), 3252 alias=self._parse_table_alias() if parse_alias else None, 3253 sample=self._parse_table_sample(), 3254 ) 3255 3256 def _implicit_unnests_to_explicit(self, this: E) -> E: 3257 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3258 3259 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3260 for i, join in enumerate(this.args.get("joins") or []): 3261 table = join.this 3262 normalized_table = table.copy() 3263 normalized_table.meta["maybe_column"] = True 3264 normalized_table = _norm(normalized_table, dialect=self.dialect) 3265 3266 if isinstance(table, exp.Table) and not join.args.get("on"): 3267 if normalized_table.parts[0].name in refs: 3268 table_as_column = table.to_column() 3269 unnest = exp.Unnest(expressions=[table_as_column]) 3270 3271 # Table.to_column creates a parent Alias node that we want to convert to 3272 # a TableAlias and attach to the Unnest, so it matches the parser's output 3273 if isinstance(table.args.get("alias"), exp.TableAlias): 3274 table_as_column.replace(table_as_column.this) 3275 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3276 3277 table.replace(unnest) 3278 3279 refs.add(normalized_table.alias_or_name) 3280 3281 return this 3282 3283 def _parse_query_modifiers( 3284 self, this: t.Optional[exp.Expression] 3285 ) -> t.Optional[exp.Expression]: 3286 if isinstance(this, (exp.Query, exp.Table)): 3287 for join in self._parse_joins(): 3288 this.append("joins", join) 3289 for lateral in iter(self._parse_lateral, None): 3290 this.append("laterals", lateral) 3291 3292 while True: 3293 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3294 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3295 key, expression = parser(self) 3296 3297 if expression: 3298 this.set(key, expression) 3299 if key == "limit": 3300 offset = expression.args.pop("offset", None) 3301 3302 if offset: 3303 offset = exp.Offset(expression=offset) 3304 this.set("offset", offset) 3305 3306 limit_by_expressions = expression.expressions 3307 expression.set("expressions", None) 3308 offset.set("expressions", limit_by_expressions) 3309 continue 3310 break 3311 3312 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3313 this = self._implicit_unnests_to_explicit(this) 3314 3315 return this 3316 3317 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3318 start = self._curr 3319 while self._curr: 3320 self._advance() 3321 3322 end = self._tokens[self._index - 1] 3323 return exp.Hint(expressions=[self._find_sql(start, end)]) 3324 3325 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3326 return self._parse_function_call() 3327 3328 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3329 start_index = self._index 3330 should_fallback_to_string = False 3331 3332 hints = [] 3333 try: 3334 for hint in iter( 3335 lambda: self._parse_csv( 3336 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3337 ), 3338 [], 3339 ): 3340 hints.extend(hint) 3341 except ParseError: 3342 should_fallback_to_string = True 3343 3344 if should_fallback_to_string or self._curr: 3345 self._retreat(start_index) 3346 return self._parse_hint_fallback_to_string() 3347 3348 return self.expression(exp.Hint, expressions=hints) 3349 3350 def _parse_hint(self) -> t.Optional[exp.Hint]: 3351 if self._match(TokenType.HINT) and self._prev_comments: 3352 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3353 3354 return None 3355 3356 def _parse_into(self) -> t.Optional[exp.Into]: 3357 if not self._match(TokenType.INTO): 3358 return None 3359 3360 temp = self._match(TokenType.TEMPORARY) 3361 unlogged = self._match_text_seq("UNLOGGED") 3362 self._match(TokenType.TABLE) 3363 3364 return self.expression( 3365 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3366 ) 3367 3368 def _parse_from( 3369 self, joins: bool = False, skip_from_token: bool = False 3370 ) -> t.Optional[exp.From]: 3371 if not skip_from_token and not self._match(TokenType.FROM): 3372 return None 3373 3374 return self.expression( 3375 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3376 ) 3377 3378 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3379 return self.expression( 3380 exp.MatchRecognizeMeasure, 3381 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3382 this=self._parse_expression(), 3383 ) 3384 3385 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3386 if not self._match(TokenType.MATCH_RECOGNIZE): 3387 return None 3388 3389 self._match_l_paren() 3390 3391 partition = self._parse_partition_by() 3392 order = self._parse_order() 3393 3394 measures = ( 3395 self._parse_csv(self._parse_match_recognize_measure) 3396 if self._match_text_seq("MEASURES") 3397 else None 3398 ) 3399 3400 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3401 rows = exp.var("ONE ROW PER MATCH") 3402 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3403 text = "ALL ROWS PER MATCH" 3404 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3405 text += " SHOW EMPTY MATCHES" 3406 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3407 text += " OMIT EMPTY MATCHES" 3408 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3409 text += " WITH UNMATCHED ROWS" 3410 rows = exp.var(text) 3411 else: 3412 rows = None 3413 3414 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3415 text = "AFTER MATCH SKIP" 3416 if self._match_text_seq("PAST", "LAST", "ROW"): 3417 text += " PAST LAST ROW" 3418 elif self._match_text_seq("TO", "NEXT", "ROW"): 3419 text += " TO NEXT ROW" 3420 elif self._match_text_seq("TO", "FIRST"): 3421 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3422 elif self._match_text_seq("TO", "LAST"): 3423 text += f" TO LAST {self._advance_any().text}" # type: ignore 3424 after = exp.var(text) 3425 else: 3426 after = None 3427 3428 if self._match_text_seq("PATTERN"): 3429 self._match_l_paren() 3430 3431 if not self._curr: 3432 self.raise_error("Expecting )", self._curr) 3433 3434 paren = 1 3435 start = self._curr 3436 3437 while self._curr and paren > 0: 3438 if self._curr.token_type == TokenType.L_PAREN: 3439 paren += 1 3440 if self._curr.token_type == TokenType.R_PAREN: 3441 paren -= 1 3442 3443 end = self._prev 3444 self._advance() 3445 3446 if paren > 0: 3447 self.raise_error("Expecting )", self._curr) 3448 3449 pattern = exp.var(self._find_sql(start, end)) 3450 else: 3451 pattern = None 3452 3453 define = ( 3454 self._parse_csv(self._parse_name_as_expression) 3455 if self._match_text_seq("DEFINE") 3456 else None 3457 ) 3458 3459 self._match_r_paren() 3460 3461 return self.expression( 3462 exp.MatchRecognize, 3463 partition_by=partition, 3464 order=order, 3465 measures=measures, 3466 rows=rows, 3467 after=after, 3468 pattern=pattern, 3469 define=define, 3470 alias=self._parse_table_alias(), 3471 ) 3472 3473 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3474 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3475 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3476 cross_apply = False 3477 3478 if cross_apply is not None: 3479 this = self._parse_select(table=True) 3480 view = None 3481 outer = None 3482 elif self._match(TokenType.LATERAL): 3483 this = self._parse_select(table=True) 3484 view = self._match(TokenType.VIEW) 3485 outer = self._match(TokenType.OUTER) 3486 else: 3487 return None 3488 3489 if not this: 3490 this = ( 3491 self._parse_unnest() 3492 or self._parse_function() 3493 or self._parse_id_var(any_token=False) 3494 ) 3495 3496 while self._match(TokenType.DOT): 3497 this = exp.Dot( 3498 this=this, 3499 expression=self._parse_function() or self._parse_id_var(any_token=False), 3500 ) 3501 3502 if view: 3503 table = self._parse_id_var(any_token=False) 3504 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3505 table_alias: t.Optional[exp.TableAlias] = self.expression( 3506 exp.TableAlias, this=table, columns=columns 3507 ) 3508 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3509 # We move the alias from the lateral's child node to the lateral itself 3510 table_alias = this.args["alias"].pop() 3511 else: 3512 table_alias = self._parse_table_alias() 3513 3514 return self.expression( 3515 exp.Lateral, 3516 this=this, 3517 view=view, 3518 outer=outer, 3519 alias=table_alias, 3520 cross_apply=cross_apply, 3521 ) 3522 3523 def _parse_join_parts( 3524 self, 3525 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3526 return ( 3527 self._match_set(self.JOIN_METHODS) and self._prev, 3528 self._match_set(self.JOIN_SIDES) and self._prev, 3529 self._match_set(self.JOIN_KINDS) and self._prev, 3530 ) 3531 3532 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3533 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3534 this = self._parse_column() 3535 if isinstance(this, exp.Column): 3536 return this.this 3537 return this 3538 3539 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3540 3541 def _parse_join( 3542 self, skip_join_token: bool = False, parse_bracket: bool = False 3543 ) -> t.Optional[exp.Join]: 3544 if self._match(TokenType.COMMA): 3545 return self.expression(exp.Join, this=self._parse_table()) 3546 3547 index = self._index 3548 method, side, kind = self._parse_join_parts() 3549 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3550 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3551 3552 if not skip_join_token and not join: 3553 self._retreat(index) 3554 kind = None 3555 method = None 3556 side = None 3557 3558 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3559 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3560 3561 if not skip_join_token and not join and not outer_apply and not cross_apply: 3562 return None 3563 3564 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3565 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3566 kwargs["expressions"] = self._parse_csv( 3567 lambda: self._parse_table(parse_bracket=parse_bracket) 3568 ) 3569 3570 if method: 3571 kwargs["method"] = method.text 3572 if side: 3573 kwargs["side"] = side.text 3574 if kind: 3575 kwargs["kind"] = kind.text 3576 if hint: 3577 kwargs["hint"] = hint 3578 3579 if self._match(TokenType.MATCH_CONDITION): 3580 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3581 3582 if self._match(TokenType.ON): 3583 kwargs["on"] = self._parse_assignment() 3584 elif self._match(TokenType.USING): 3585 kwargs["using"] = self._parse_using_identifiers() 3586 elif ( 3587 not (outer_apply or cross_apply) 3588 and not isinstance(kwargs["this"], exp.Unnest) 3589 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3590 ): 3591 index = self._index 3592 joins: t.Optional[list] = list(self._parse_joins()) 3593 3594 if joins and self._match(TokenType.ON): 3595 kwargs["on"] = self._parse_assignment() 3596 elif joins and self._match(TokenType.USING): 3597 kwargs["using"] = self._parse_using_identifiers() 3598 else: 3599 joins = None 3600 self._retreat(index) 3601 3602 kwargs["this"].set("joins", joins if joins else None) 3603 3604 comments = [c for token in (method, side, kind) if token for c in token.comments] 3605 return self.expression(exp.Join, comments=comments, **kwargs) 3606 3607 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3608 this = self._parse_assignment() 3609 3610 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3611 return this 3612 3613 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3614 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3615 3616 return this 3617 3618 def _parse_index_params(self) -> exp.IndexParameters: 3619 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3620 3621 if self._match(TokenType.L_PAREN, advance=False): 3622 columns = self._parse_wrapped_csv(self._parse_with_operator) 3623 else: 3624 columns = None 3625 3626 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3627 partition_by = self._parse_partition_by() 3628 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3629 tablespace = ( 3630 self._parse_var(any_token=True) 3631 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3632 else None 3633 ) 3634 where = self._parse_where() 3635 3636 on = self._parse_field() if self._match(TokenType.ON) else None 3637 3638 return self.expression( 3639 exp.IndexParameters, 3640 using=using, 3641 columns=columns, 3642 include=include, 3643 partition_by=partition_by, 3644 where=where, 3645 with_storage=with_storage, 3646 tablespace=tablespace, 3647 on=on, 3648 ) 3649 3650 def _parse_index( 3651 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3652 ) -> t.Optional[exp.Index]: 3653 if index or anonymous: 3654 unique = None 3655 primary = None 3656 amp = None 3657 3658 self._match(TokenType.ON) 3659 self._match(TokenType.TABLE) # hive 3660 table = self._parse_table_parts(schema=True) 3661 else: 3662 unique = self._match(TokenType.UNIQUE) 3663 primary = self._match_text_seq("PRIMARY") 3664 amp = self._match_text_seq("AMP") 3665 3666 if not self._match(TokenType.INDEX): 3667 return None 3668 3669 index = self._parse_id_var() 3670 table = None 3671 3672 params = self._parse_index_params() 3673 3674 return self.expression( 3675 exp.Index, 3676 this=index, 3677 table=table, 3678 unique=unique, 3679 primary=primary, 3680 amp=amp, 3681 params=params, 3682 ) 3683 3684 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3685 hints: t.List[exp.Expression] = [] 3686 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3687 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3688 hints.append( 3689 self.expression( 3690 exp.WithTableHint, 3691 expressions=self._parse_csv( 3692 lambda: self._parse_function() or self._parse_var(any_token=True) 3693 ), 3694 ) 3695 ) 3696 self._match_r_paren() 3697 else: 3698 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3699 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3700 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3701 3702 self._match_set((TokenType.INDEX, TokenType.KEY)) 3703 if self._match(TokenType.FOR): 3704 hint.set("target", self._advance_any() and self._prev.text.upper()) 3705 3706 hint.set("expressions", self._parse_wrapped_id_vars()) 3707 hints.append(hint) 3708 3709 return hints or None 3710 3711 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3712 return ( 3713 (not schema and self._parse_function(optional_parens=False)) 3714 or self._parse_id_var(any_token=False) 3715 or self._parse_string_as_identifier() 3716 or self._parse_placeholder() 3717 ) 3718 3719 def _parse_table_parts( 3720 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3721 ) -> exp.Table: 3722 catalog = None 3723 db = None 3724 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3725 3726 while self._match(TokenType.DOT): 3727 if catalog: 3728 # This allows nesting the table in arbitrarily many dot expressions if needed 3729 table = self.expression( 3730 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3731 ) 3732 else: 3733 catalog = db 3734 db = table 3735 # "" used for tsql FROM a..b case 3736 table = self._parse_table_part(schema=schema) or "" 3737 3738 if ( 3739 wildcard 3740 and self._is_connected() 3741 and (isinstance(table, exp.Identifier) or not table) 3742 and self._match(TokenType.STAR) 3743 ): 3744 if isinstance(table, exp.Identifier): 3745 table.args["this"] += "*" 3746 else: 3747 table = exp.Identifier(this="*") 3748 3749 # We bubble up comments from the Identifier to the Table 3750 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3751 3752 if is_db_reference: 3753 catalog = db 3754 db = table 3755 table = None 3756 3757 if not table and not is_db_reference: 3758 self.raise_error(f"Expected table name but got {self._curr}") 3759 if not db and is_db_reference: 3760 self.raise_error(f"Expected database name but got {self._curr}") 3761 3762 table = self.expression( 3763 exp.Table, 3764 comments=comments, 3765 this=table, 3766 db=db, 3767 catalog=catalog, 3768 ) 3769 3770 changes = self._parse_changes() 3771 if changes: 3772 table.set("changes", changes) 3773 3774 at_before = self._parse_historical_data() 3775 if at_before: 3776 table.set("when", at_before) 3777 3778 pivots = self._parse_pivots() 3779 if pivots: 3780 table.set("pivots", pivots) 3781 3782 return table 3783 3784 def _parse_table( 3785 self, 3786 schema: bool = False, 3787 joins: bool = False, 3788 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3789 parse_bracket: bool = False, 3790 is_db_reference: bool = False, 3791 parse_partition: bool = False, 3792 ) -> t.Optional[exp.Expression]: 3793 lateral = self._parse_lateral() 3794 if lateral: 3795 return lateral 3796 3797 unnest = self._parse_unnest() 3798 if unnest: 3799 return unnest 3800 3801 values = self._parse_derived_table_values() 3802 if values: 3803 return values 3804 3805 subquery = self._parse_select(table=True) 3806 if subquery: 3807 if not subquery.args.get("pivots"): 3808 subquery.set("pivots", self._parse_pivots()) 3809 return subquery 3810 3811 bracket = parse_bracket and self._parse_bracket(None) 3812 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3813 3814 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3815 self._parse_table 3816 ) 3817 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3818 3819 only = self._match(TokenType.ONLY) 3820 3821 this = t.cast( 3822 exp.Expression, 3823 bracket 3824 or rows_from 3825 or self._parse_bracket( 3826 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3827 ), 3828 ) 3829 3830 if only: 3831 this.set("only", only) 3832 3833 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3834 self._match_text_seq("*") 3835 3836 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3837 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3838 this.set("partition", self._parse_partition()) 3839 3840 if schema: 3841 return self._parse_schema(this=this) 3842 3843 version = self._parse_version() 3844 3845 if version: 3846 this.set("version", version) 3847 3848 if self.dialect.ALIAS_POST_TABLESAMPLE: 3849 this.set("sample", self._parse_table_sample()) 3850 3851 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3852 if alias: 3853 this.set("alias", alias) 3854 3855 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3856 return self.expression( 3857 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3858 ) 3859 3860 this.set("hints", self._parse_table_hints()) 3861 3862 if not this.args.get("pivots"): 3863 this.set("pivots", self._parse_pivots()) 3864 3865 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3866 this.set("sample", self._parse_table_sample()) 3867 3868 if joins: 3869 for join in self._parse_joins(): 3870 this.append("joins", join) 3871 3872 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3873 this.set("ordinality", True) 3874 this.set("alias", self._parse_table_alias()) 3875 3876 return this 3877 3878 def _parse_version(self) -> t.Optional[exp.Version]: 3879 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3880 this = "TIMESTAMP" 3881 elif self._match(TokenType.VERSION_SNAPSHOT): 3882 this = "VERSION" 3883 else: 3884 return None 3885 3886 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3887 kind = self._prev.text.upper() 3888 start = self._parse_bitwise() 3889 self._match_texts(("TO", "AND")) 3890 end = self._parse_bitwise() 3891 expression: t.Optional[exp.Expression] = self.expression( 3892 exp.Tuple, expressions=[start, end] 3893 ) 3894 elif self._match_text_seq("CONTAINED", "IN"): 3895 kind = "CONTAINED IN" 3896 expression = self.expression( 3897 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3898 ) 3899 elif self._match(TokenType.ALL): 3900 kind = "ALL" 3901 expression = None 3902 else: 3903 self._match_text_seq("AS", "OF") 3904 kind = "AS OF" 3905 expression = self._parse_type() 3906 3907 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3908 3909 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3910 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3911 index = self._index 3912 historical_data = None 3913 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3914 this = self._prev.text.upper() 3915 kind = ( 3916 self._match(TokenType.L_PAREN) 3917 and self._match_texts(self.HISTORICAL_DATA_KIND) 3918 and self._prev.text.upper() 3919 ) 3920 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3921 3922 if expression: 3923 self._match_r_paren() 3924 historical_data = self.expression( 3925 exp.HistoricalData, this=this, kind=kind, expression=expression 3926 ) 3927 else: 3928 self._retreat(index) 3929 3930 return historical_data 3931 3932 def _parse_changes(self) -> t.Optional[exp.Changes]: 3933 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3934 return None 3935 3936 information = self._parse_var(any_token=True) 3937 self._match_r_paren() 3938 3939 return self.expression( 3940 exp.Changes, 3941 information=information, 3942 at_before=self._parse_historical_data(), 3943 end=self._parse_historical_data(), 3944 ) 3945 3946 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3947 if not self._match(TokenType.UNNEST): 3948 return None 3949 3950 expressions = self._parse_wrapped_csv(self._parse_equality) 3951 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3952 3953 alias = self._parse_table_alias() if with_alias else None 3954 3955 if alias: 3956 if self.dialect.UNNEST_COLUMN_ONLY: 3957 if alias.args.get("columns"): 3958 self.raise_error("Unexpected extra column alias in unnest.") 3959 3960 alias.set("columns", [alias.this]) 3961 alias.set("this", None) 3962 3963 columns = alias.args.get("columns") or [] 3964 if offset and len(expressions) < len(columns): 3965 offset = columns.pop() 3966 3967 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3968 self._match(TokenType.ALIAS) 3969 offset = self._parse_id_var( 3970 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3971 ) or exp.to_identifier("offset") 3972 3973 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3974 3975 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3976 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3977 if not is_derived and not ( 3978 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3979 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3980 ): 3981 return None 3982 3983 expressions = self._parse_csv(self._parse_value) 3984 alias = self._parse_table_alias() 3985 3986 if is_derived: 3987 self._match_r_paren() 3988 3989 return self.expression( 3990 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3991 ) 3992 3993 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3994 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3995 as_modifier and self._match_text_seq("USING", "SAMPLE") 3996 ): 3997 return None 3998 3999 bucket_numerator = None 4000 bucket_denominator = None 4001 bucket_field = None 4002 percent = None 4003 size = None 4004 seed = None 4005 4006 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4007 matched_l_paren = self._match(TokenType.L_PAREN) 4008 4009 if self.TABLESAMPLE_CSV: 4010 num = None 4011 expressions = self._parse_csv(self._parse_primary) 4012 else: 4013 expressions = None 4014 num = ( 4015 self._parse_factor() 4016 if self._match(TokenType.NUMBER, advance=False) 4017 else self._parse_primary() or self._parse_placeholder() 4018 ) 4019 4020 if self._match_text_seq("BUCKET"): 4021 bucket_numerator = self._parse_number() 4022 self._match_text_seq("OUT", "OF") 4023 bucket_denominator = bucket_denominator = self._parse_number() 4024 self._match(TokenType.ON) 4025 bucket_field = self._parse_field() 4026 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4027 percent = num 4028 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4029 size = num 4030 else: 4031 percent = num 4032 4033 if matched_l_paren: 4034 self._match_r_paren() 4035 4036 if self._match(TokenType.L_PAREN): 4037 method = self._parse_var(upper=True) 4038 seed = self._match(TokenType.COMMA) and self._parse_number() 4039 self._match_r_paren() 4040 elif self._match_texts(("SEED", "REPEATABLE")): 4041 seed = self._parse_wrapped(self._parse_number) 4042 4043 if not method and self.DEFAULT_SAMPLING_METHOD: 4044 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4045 4046 return self.expression( 4047 exp.TableSample, 4048 expressions=expressions, 4049 method=method, 4050 bucket_numerator=bucket_numerator, 4051 bucket_denominator=bucket_denominator, 4052 bucket_field=bucket_field, 4053 percent=percent, 4054 size=size, 4055 seed=seed, 4056 ) 4057 4058 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4059 return list(iter(self._parse_pivot, None)) or None 4060 4061 def _parse_joins(self) -> t.Iterator[exp.Join]: 4062 return iter(self._parse_join, None) 4063 4064 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4065 if not self._match(TokenType.INTO): 4066 return None 4067 4068 return self.expression( 4069 exp.UnpivotColumns, 4070 this=self._match_text_seq("NAME") and self._parse_column(), 4071 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4072 ) 4073 4074 # https://duckdb.org/docs/sql/statements/pivot 4075 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4076 def _parse_on() -> t.Optional[exp.Expression]: 4077 this = self._parse_bitwise() 4078 4079 if self._match(TokenType.IN): 4080 # PIVOT ... ON col IN (row_val1, row_val2) 4081 return self._parse_in(this) 4082 if self._match(TokenType.ALIAS, advance=False): 4083 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4084 return self._parse_alias(this) 4085 4086 return this 4087 4088 this = self._parse_table() 4089 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4090 into = self._parse_unpivot_columns() 4091 using = self._match(TokenType.USING) and self._parse_csv( 4092 lambda: self._parse_alias(self._parse_function()) 4093 ) 4094 group = self._parse_group() 4095 4096 return self.expression( 4097 exp.Pivot, 4098 this=this, 4099 expressions=expressions, 4100 using=using, 4101 group=group, 4102 unpivot=is_unpivot, 4103 into=into, 4104 ) 4105 4106 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4107 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4108 this = self._parse_select_or_expression() 4109 4110 self._match(TokenType.ALIAS) 4111 alias = self._parse_bitwise() 4112 if alias: 4113 if isinstance(alias, exp.Column) and not alias.db: 4114 alias = alias.this 4115 return self.expression(exp.PivotAlias, this=this, alias=alias) 4116 4117 return this 4118 4119 value = self._parse_column() 4120 4121 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4122 self.raise_error("Expecting IN (") 4123 4124 if self._match(TokenType.ANY): 4125 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4126 else: 4127 exprs = self._parse_csv(_parse_aliased_expression) 4128 4129 self._match_r_paren() 4130 return self.expression(exp.In, this=value, expressions=exprs) 4131 4132 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4133 index = self._index 4134 include_nulls = None 4135 4136 if self._match(TokenType.PIVOT): 4137 unpivot = False 4138 elif self._match(TokenType.UNPIVOT): 4139 unpivot = True 4140 4141 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4142 if self._match_text_seq("INCLUDE", "NULLS"): 4143 include_nulls = True 4144 elif self._match_text_seq("EXCLUDE", "NULLS"): 4145 include_nulls = False 4146 else: 4147 return None 4148 4149 expressions = [] 4150 4151 if not self._match(TokenType.L_PAREN): 4152 self._retreat(index) 4153 return None 4154 4155 if unpivot: 4156 expressions = self._parse_csv(self._parse_column) 4157 else: 4158 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4159 4160 if not expressions: 4161 self.raise_error("Failed to parse PIVOT's aggregation list") 4162 4163 if not self._match(TokenType.FOR): 4164 self.raise_error("Expecting FOR") 4165 4166 field = self._parse_pivot_in() 4167 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4168 self._parse_bitwise 4169 ) 4170 4171 self._match_r_paren() 4172 4173 pivot = self.expression( 4174 exp.Pivot, 4175 expressions=expressions, 4176 field=field, 4177 unpivot=unpivot, 4178 include_nulls=include_nulls, 4179 default_on_null=default_on_null, 4180 ) 4181 4182 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4183 pivot.set("alias", self._parse_table_alias()) 4184 4185 if not unpivot: 4186 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4187 4188 columns: t.List[exp.Expression] = [] 4189 for fld in pivot.args["field"].expressions: 4190 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4191 for name in names: 4192 if self.PREFIXED_PIVOT_COLUMNS: 4193 name = f"{name}_{field_name}" if name else field_name 4194 else: 4195 name = f"{field_name}_{name}" if name else field_name 4196 4197 columns.append(exp.to_identifier(name)) 4198 4199 pivot.set("columns", columns) 4200 4201 return pivot 4202 4203 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4204 return [agg.alias for agg in aggregations] 4205 4206 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4207 if not skip_where_token and not self._match(TokenType.PREWHERE): 4208 return None 4209 4210 return self.expression( 4211 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4212 ) 4213 4214 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4215 if not skip_where_token and not self._match(TokenType.WHERE): 4216 return None 4217 4218 return self.expression( 4219 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4220 ) 4221 4222 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4223 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4224 return None 4225 4226 elements: t.Dict[str, t.Any] = defaultdict(list) 4227 4228 if self._match(TokenType.ALL): 4229 elements["all"] = True 4230 elif self._match(TokenType.DISTINCT): 4231 elements["all"] = False 4232 4233 while True: 4234 index = self._index 4235 4236 elements["expressions"].extend( 4237 self._parse_csv( 4238 lambda: None 4239 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4240 else self._parse_assignment() 4241 ) 4242 ) 4243 4244 before_with_index = self._index 4245 with_prefix = self._match(TokenType.WITH) 4246 4247 if self._match(TokenType.ROLLUP): 4248 elements["rollup"].append( 4249 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4250 ) 4251 elif self._match(TokenType.CUBE): 4252 elements["cube"].append( 4253 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4254 ) 4255 elif self._match(TokenType.GROUPING_SETS): 4256 elements["grouping_sets"].append( 4257 self.expression( 4258 exp.GroupingSets, 4259 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4260 ) 4261 ) 4262 elif self._match_text_seq("TOTALS"): 4263 elements["totals"] = True # type: ignore 4264 4265 if before_with_index <= self._index <= before_with_index + 1: 4266 self._retreat(before_with_index) 4267 break 4268 4269 if index == self._index: 4270 break 4271 4272 return self.expression(exp.Group, **elements) # type: ignore 4273 4274 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4275 return self.expression( 4276 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4277 ) 4278 4279 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4280 if self._match(TokenType.L_PAREN): 4281 grouping_set = self._parse_csv(self._parse_column) 4282 self._match_r_paren() 4283 return self.expression(exp.Tuple, expressions=grouping_set) 4284 4285 return self._parse_column() 4286 4287 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4288 if not skip_having_token and not self._match(TokenType.HAVING): 4289 return None 4290 return self.expression(exp.Having, this=self._parse_assignment()) 4291 4292 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4293 if not self._match(TokenType.QUALIFY): 4294 return None 4295 return self.expression(exp.Qualify, this=self._parse_assignment()) 4296 4297 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4298 if skip_start_token: 4299 start = None 4300 elif self._match(TokenType.START_WITH): 4301 start = self._parse_assignment() 4302 else: 4303 return None 4304 4305 self._match(TokenType.CONNECT_BY) 4306 nocycle = self._match_text_seq("NOCYCLE") 4307 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4308 exp.Prior, this=self._parse_bitwise() 4309 ) 4310 connect = self._parse_assignment() 4311 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4312 4313 if not start and self._match(TokenType.START_WITH): 4314 start = self._parse_assignment() 4315 4316 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4317 4318 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4319 this = self._parse_id_var(any_token=True) 4320 if self._match(TokenType.ALIAS): 4321 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4322 return this 4323 4324 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4325 if self._match_text_seq("INTERPOLATE"): 4326 return self._parse_wrapped_csv(self._parse_name_as_expression) 4327 return None 4328 4329 def _parse_order( 4330 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4331 ) -> t.Optional[exp.Expression]: 4332 siblings = None 4333 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4334 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4335 return this 4336 4337 siblings = True 4338 4339 return self.expression( 4340 exp.Order, 4341 this=this, 4342 expressions=self._parse_csv(self._parse_ordered), 4343 siblings=siblings, 4344 ) 4345 4346 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4347 if not self._match(token): 4348 return None 4349 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4350 4351 def _parse_ordered( 4352 self, parse_method: t.Optional[t.Callable] = None 4353 ) -> t.Optional[exp.Ordered]: 4354 this = parse_method() if parse_method else self._parse_assignment() 4355 if not this: 4356 return None 4357 4358 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4359 this = exp.var("ALL") 4360 4361 asc = self._match(TokenType.ASC) 4362 desc = self._match(TokenType.DESC) or (asc and False) 4363 4364 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4365 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4366 4367 nulls_first = is_nulls_first or False 4368 explicitly_null_ordered = is_nulls_first or is_nulls_last 4369 4370 if ( 4371 not explicitly_null_ordered 4372 and ( 4373 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4374 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4375 ) 4376 and self.dialect.NULL_ORDERING != "nulls_are_last" 4377 ): 4378 nulls_first = True 4379 4380 if self._match_text_seq("WITH", "FILL"): 4381 with_fill = self.expression( 4382 exp.WithFill, 4383 **{ # type: ignore 4384 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4385 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4386 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4387 "interpolate": self._parse_interpolate(), 4388 }, 4389 ) 4390 else: 4391 with_fill = None 4392 4393 return self.expression( 4394 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4395 ) 4396 4397 def _parse_limit( 4398 self, 4399 this: t.Optional[exp.Expression] = None, 4400 top: bool = False, 4401 skip_limit_token: bool = False, 4402 ) -> t.Optional[exp.Expression]: 4403 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4404 comments = self._prev_comments 4405 if top: 4406 limit_paren = self._match(TokenType.L_PAREN) 4407 expression = self._parse_term() if limit_paren else self._parse_number() 4408 4409 if limit_paren: 4410 self._match_r_paren() 4411 else: 4412 expression = self._parse_term() 4413 4414 if self._match(TokenType.COMMA): 4415 offset = expression 4416 expression = self._parse_term() 4417 else: 4418 offset = None 4419 4420 limit_exp = self.expression( 4421 exp.Limit, 4422 this=this, 4423 expression=expression, 4424 offset=offset, 4425 comments=comments, 4426 expressions=self._parse_limit_by(), 4427 ) 4428 4429 return limit_exp 4430 4431 if self._match(TokenType.FETCH): 4432 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4433 direction = self._prev.text.upper() if direction else "FIRST" 4434 4435 count = self._parse_field(tokens=self.FETCH_TOKENS) 4436 percent = self._match(TokenType.PERCENT) 4437 4438 self._match_set((TokenType.ROW, TokenType.ROWS)) 4439 4440 only = self._match_text_seq("ONLY") 4441 with_ties = self._match_text_seq("WITH", "TIES") 4442 4443 if only and with_ties: 4444 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4445 4446 return self.expression( 4447 exp.Fetch, 4448 direction=direction, 4449 count=count, 4450 percent=percent, 4451 with_ties=with_ties, 4452 ) 4453 4454 return this 4455 4456 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4457 if not self._match(TokenType.OFFSET): 4458 return this 4459 4460 count = self._parse_term() 4461 self._match_set((TokenType.ROW, TokenType.ROWS)) 4462 4463 return self.expression( 4464 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4465 ) 4466 4467 def _can_parse_limit_or_offset(self) -> bool: 4468 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4469 return False 4470 4471 index = self._index 4472 result = bool( 4473 self._try_parse(self._parse_limit, retreat=True) 4474 or self._try_parse(self._parse_offset, retreat=True) 4475 ) 4476 self._retreat(index) 4477 return result 4478 4479 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4480 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4481 4482 def _parse_locks(self) -> t.List[exp.Lock]: 4483 locks = [] 4484 while True: 4485 if self._match_text_seq("FOR", "UPDATE"): 4486 update = True 4487 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4488 "LOCK", "IN", "SHARE", "MODE" 4489 ): 4490 update = False 4491 else: 4492 break 4493 4494 expressions = None 4495 if self._match_text_seq("OF"): 4496 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4497 4498 wait: t.Optional[bool | exp.Expression] = None 4499 if self._match_text_seq("NOWAIT"): 4500 wait = True 4501 elif self._match_text_seq("WAIT"): 4502 wait = self._parse_primary() 4503 elif self._match_text_seq("SKIP", "LOCKED"): 4504 wait = False 4505 4506 locks.append( 4507 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4508 ) 4509 4510 return locks 4511 4512 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4513 while this and self._match_set(self.SET_OPERATIONS): 4514 token_type = self._prev.token_type 4515 4516 if token_type == TokenType.UNION: 4517 operation: t.Type[exp.SetOperation] = exp.Union 4518 elif token_type == TokenType.EXCEPT: 4519 operation = exp.Except 4520 else: 4521 operation = exp.Intersect 4522 4523 comments = self._prev.comments 4524 4525 if self._match(TokenType.DISTINCT): 4526 distinct: t.Optional[bool] = True 4527 elif self._match(TokenType.ALL): 4528 distinct = False 4529 else: 4530 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4531 if distinct is None: 4532 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4533 4534 by_name = self._match_text_seq("BY", "NAME") 4535 expression = self._parse_select(nested=True, parse_set_operation=False) 4536 4537 this = self.expression( 4538 operation, 4539 comments=comments, 4540 this=this, 4541 distinct=distinct, 4542 by_name=by_name, 4543 expression=expression, 4544 ) 4545 4546 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4547 expression = this.expression 4548 4549 if expression: 4550 for arg in self.SET_OP_MODIFIERS: 4551 expr = expression.args.get(arg) 4552 if expr: 4553 this.set(arg, expr.pop()) 4554 4555 return this 4556 4557 def _parse_expression(self) -> t.Optional[exp.Expression]: 4558 return self._parse_alias(self._parse_assignment()) 4559 4560 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4561 this = self._parse_disjunction() 4562 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4563 # This allows us to parse <non-identifier token> := <expr> 4564 this = exp.column( 4565 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4566 ) 4567 4568 while self._match_set(self.ASSIGNMENT): 4569 if isinstance(this, exp.Column) and len(this.parts) == 1: 4570 this = this.this 4571 4572 this = self.expression( 4573 self.ASSIGNMENT[self._prev.token_type], 4574 this=this, 4575 comments=self._prev_comments, 4576 expression=self._parse_assignment(), 4577 ) 4578 4579 return this 4580 4581 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4582 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4583 4584 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4585 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4586 4587 def _parse_equality(self) -> t.Optional[exp.Expression]: 4588 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4589 4590 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4591 return self._parse_tokens(self._parse_range, self.COMPARISON) 4592 4593 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4594 this = this or self._parse_bitwise() 4595 negate = self._match(TokenType.NOT) 4596 4597 if self._match_set(self.RANGE_PARSERS): 4598 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4599 if not expression: 4600 return this 4601 4602 this = expression 4603 elif self._match(TokenType.ISNULL): 4604 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4605 4606 # Postgres supports ISNULL and NOTNULL for conditions. 4607 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4608 if self._match(TokenType.NOTNULL): 4609 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4610 this = self.expression(exp.Not, this=this) 4611 4612 if negate: 4613 this = self._negate_range(this) 4614 4615 if self._match(TokenType.IS): 4616 this = self._parse_is(this) 4617 4618 return this 4619 4620 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4621 if not this: 4622 return this 4623 4624 return self.expression(exp.Not, this=this) 4625 4626 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4627 index = self._index - 1 4628 negate = self._match(TokenType.NOT) 4629 4630 if self._match_text_seq("DISTINCT", "FROM"): 4631 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4632 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4633 4634 if self._match(TokenType.JSON): 4635 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4636 4637 if self._match_text_seq("WITH"): 4638 _with = True 4639 elif self._match_text_seq("WITHOUT"): 4640 _with = False 4641 else: 4642 _with = None 4643 4644 unique = self._match(TokenType.UNIQUE) 4645 self._match_text_seq("KEYS") 4646 expression: t.Optional[exp.Expression] = self.expression( 4647 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4648 ) 4649 else: 4650 expression = self._parse_primary() or self._parse_null() 4651 if not expression: 4652 self._retreat(index) 4653 return None 4654 4655 this = self.expression(exp.Is, this=this, expression=expression) 4656 return self.expression(exp.Not, this=this) if negate else this 4657 4658 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4659 unnest = self._parse_unnest(with_alias=False) 4660 if unnest: 4661 this = self.expression(exp.In, this=this, unnest=unnest) 4662 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4663 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4664 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4665 4666 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4667 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4668 else: 4669 this = self.expression(exp.In, this=this, expressions=expressions) 4670 4671 if matched_l_paren: 4672 self._match_r_paren(this) 4673 elif not self._match(TokenType.R_BRACKET, expression=this): 4674 self.raise_error("Expecting ]") 4675 else: 4676 this = self.expression(exp.In, this=this, field=self._parse_column()) 4677 4678 return this 4679 4680 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4681 low = self._parse_bitwise() 4682 self._match(TokenType.AND) 4683 high = self._parse_bitwise() 4684 return self.expression(exp.Between, this=this, low=low, high=high) 4685 4686 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4687 if not self._match(TokenType.ESCAPE): 4688 return this 4689 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4690 4691 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4692 index = self._index 4693 4694 if not self._match(TokenType.INTERVAL) and match_interval: 4695 return None 4696 4697 if self._match(TokenType.STRING, advance=False): 4698 this = self._parse_primary() 4699 else: 4700 this = self._parse_term() 4701 4702 if not this or ( 4703 isinstance(this, exp.Column) 4704 and not this.table 4705 and not this.this.quoted 4706 and this.name.upper() == "IS" 4707 ): 4708 self._retreat(index) 4709 return None 4710 4711 unit = self._parse_function() or ( 4712 not self._match(TokenType.ALIAS, advance=False) 4713 and self._parse_var(any_token=True, upper=True) 4714 ) 4715 4716 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4717 # each INTERVAL expression into this canonical form so it's easy to transpile 4718 if this and this.is_number: 4719 this = exp.Literal.string(this.to_py()) 4720 elif this and this.is_string: 4721 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4722 if parts and unit: 4723 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4724 unit = None 4725 self._retreat(self._index - 1) 4726 4727 if len(parts) == 1: 4728 this = exp.Literal.string(parts[0][0]) 4729 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4730 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4731 unit = self.expression( 4732 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4733 ) 4734 4735 interval = self.expression(exp.Interval, this=this, unit=unit) 4736 4737 index = self._index 4738 self._match(TokenType.PLUS) 4739 4740 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4741 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4742 return self.expression( 4743 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4744 ) 4745 4746 self._retreat(index) 4747 return interval 4748 4749 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4750 this = self._parse_term() 4751 4752 while True: 4753 if self._match_set(self.BITWISE): 4754 this = self.expression( 4755 self.BITWISE[self._prev.token_type], 4756 this=this, 4757 expression=self._parse_term(), 4758 ) 4759 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4760 this = self.expression( 4761 exp.DPipe, 4762 this=this, 4763 expression=self._parse_term(), 4764 safe=not self.dialect.STRICT_STRING_CONCAT, 4765 ) 4766 elif self._match(TokenType.DQMARK): 4767 this = self.expression( 4768 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4769 ) 4770 elif self._match_pair(TokenType.LT, TokenType.LT): 4771 this = self.expression( 4772 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4773 ) 4774 elif self._match_pair(TokenType.GT, TokenType.GT): 4775 this = self.expression( 4776 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4777 ) 4778 else: 4779 break 4780 4781 return this 4782 4783 def _parse_term(self) -> t.Optional[exp.Expression]: 4784 this = self._parse_factor() 4785 4786 while self._match_set(self.TERM): 4787 klass = self.TERM[self._prev.token_type] 4788 comments = self._prev_comments 4789 expression = self._parse_factor() 4790 4791 this = self.expression(klass, this=this, comments=comments, expression=expression) 4792 4793 if isinstance(this, exp.Collate): 4794 expr = this.expression 4795 4796 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4797 # fallback to Identifier / Var 4798 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4799 ident = expr.this 4800 if isinstance(ident, exp.Identifier): 4801 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4802 4803 return this 4804 4805 def _parse_factor(self) -> t.Optional[exp.Expression]: 4806 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4807 this = parse_method() 4808 4809 while self._match_set(self.FACTOR): 4810 klass = self.FACTOR[self._prev.token_type] 4811 comments = self._prev_comments 4812 expression = parse_method() 4813 4814 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4815 self._retreat(self._index - 1) 4816 return this 4817 4818 this = self.expression(klass, this=this, comments=comments, expression=expression) 4819 4820 if isinstance(this, exp.Div): 4821 this.args["typed"] = self.dialect.TYPED_DIVISION 4822 this.args["safe"] = self.dialect.SAFE_DIVISION 4823 4824 return this 4825 4826 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4827 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4828 4829 def _parse_unary(self) -> t.Optional[exp.Expression]: 4830 if self._match_set(self.UNARY_PARSERS): 4831 return self.UNARY_PARSERS[self._prev.token_type](self) 4832 return self._parse_at_time_zone(self._parse_type()) 4833 4834 def _parse_type( 4835 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4836 ) -> t.Optional[exp.Expression]: 4837 interval = parse_interval and self._parse_interval() 4838 if interval: 4839 return interval 4840 4841 index = self._index 4842 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4843 4844 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4845 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4846 if isinstance(data_type, exp.Cast): 4847 # This constructor can contain ops directly after it, for instance struct unnesting: 4848 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4849 return self._parse_column_ops(data_type) 4850 4851 if data_type: 4852 index2 = self._index 4853 this = self._parse_primary() 4854 4855 if isinstance(this, exp.Literal): 4856 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4857 if parser: 4858 return parser(self, this, data_type) 4859 4860 return self.expression(exp.Cast, this=this, to=data_type) 4861 4862 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4863 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4864 # 4865 # If the index difference here is greater than 1, that means the parser itself must have 4866 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4867 # 4868 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4869 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4870 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4871 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4872 # 4873 # In these cases, we don't really want to return the converted type, but instead retreat 4874 # and try to parse a Column or Identifier in the section below. 4875 if data_type.expressions and index2 - index > 1: 4876 self._retreat(index2) 4877 return self._parse_column_ops(data_type) 4878 4879 self._retreat(index) 4880 4881 if fallback_to_identifier: 4882 return self._parse_id_var() 4883 4884 this = self._parse_column() 4885 return this and self._parse_column_ops(this) 4886 4887 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4888 this = self._parse_type() 4889 if not this: 4890 return None 4891 4892 if isinstance(this, exp.Column) and not this.table: 4893 this = exp.var(this.name.upper()) 4894 4895 return self.expression( 4896 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4897 ) 4898 4899 def _parse_types( 4900 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4901 ) -> t.Optional[exp.Expression]: 4902 index = self._index 4903 4904 this: t.Optional[exp.Expression] = None 4905 prefix = self._match_text_seq("SYSUDTLIB", ".") 4906 4907 if not self._match_set(self.TYPE_TOKENS): 4908 identifier = allow_identifiers and self._parse_id_var( 4909 any_token=False, tokens=(TokenType.VAR,) 4910 ) 4911 if isinstance(identifier, exp.Identifier): 4912 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4913 4914 if len(tokens) != 1: 4915 self.raise_error("Unexpected identifier", self._prev) 4916 4917 if tokens[0].token_type in self.TYPE_TOKENS: 4918 self._prev = tokens[0] 4919 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4920 type_name = identifier.name 4921 4922 while self._match(TokenType.DOT): 4923 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4924 4925 this = exp.DataType.build(type_name, udt=True) 4926 else: 4927 self._retreat(self._index - 1) 4928 return None 4929 else: 4930 return None 4931 4932 type_token = self._prev.token_type 4933 4934 if type_token == TokenType.PSEUDO_TYPE: 4935 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4936 4937 if type_token == TokenType.OBJECT_IDENTIFIER: 4938 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4939 4940 # https://materialize.com/docs/sql/types/map/ 4941 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4942 key_type = self._parse_types( 4943 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4944 ) 4945 if not self._match(TokenType.FARROW): 4946 self._retreat(index) 4947 return None 4948 4949 value_type = self._parse_types( 4950 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4951 ) 4952 if not self._match(TokenType.R_BRACKET): 4953 self._retreat(index) 4954 return None 4955 4956 return exp.DataType( 4957 this=exp.DataType.Type.MAP, 4958 expressions=[key_type, value_type], 4959 nested=True, 4960 prefix=prefix, 4961 ) 4962 4963 nested = type_token in self.NESTED_TYPE_TOKENS 4964 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4965 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4966 expressions = None 4967 maybe_func = False 4968 4969 if self._match(TokenType.L_PAREN): 4970 if is_struct: 4971 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4972 elif nested: 4973 expressions = self._parse_csv( 4974 lambda: self._parse_types( 4975 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4976 ) 4977 ) 4978 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4979 this = expressions[0] 4980 this.set("nullable", True) 4981 self._match_r_paren() 4982 return this 4983 elif type_token in self.ENUM_TYPE_TOKENS: 4984 expressions = self._parse_csv(self._parse_equality) 4985 elif is_aggregate: 4986 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4987 any_token=False, tokens=(TokenType.VAR,) 4988 ) 4989 if not func_or_ident or not self._match(TokenType.COMMA): 4990 return None 4991 expressions = self._parse_csv( 4992 lambda: self._parse_types( 4993 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4994 ) 4995 ) 4996 expressions.insert(0, func_or_ident) 4997 else: 4998 expressions = self._parse_csv(self._parse_type_size) 4999 5000 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5001 if type_token == TokenType.VECTOR and len(expressions) == 2: 5002 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5003 5004 if not expressions or not self._match(TokenType.R_PAREN): 5005 self._retreat(index) 5006 return None 5007 5008 maybe_func = True 5009 5010 values: t.Optional[t.List[exp.Expression]] = None 5011 5012 if nested and self._match(TokenType.LT): 5013 if is_struct: 5014 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5015 else: 5016 expressions = self._parse_csv( 5017 lambda: self._parse_types( 5018 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5019 ) 5020 ) 5021 5022 if not self._match(TokenType.GT): 5023 self.raise_error("Expecting >") 5024 5025 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5026 values = self._parse_csv(self._parse_assignment) 5027 if not values and is_struct: 5028 values = None 5029 self._retreat(self._index - 1) 5030 else: 5031 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5032 5033 if type_token in self.TIMESTAMPS: 5034 if self._match_text_seq("WITH", "TIME", "ZONE"): 5035 maybe_func = False 5036 tz_type = ( 5037 exp.DataType.Type.TIMETZ 5038 if type_token in self.TIMES 5039 else exp.DataType.Type.TIMESTAMPTZ 5040 ) 5041 this = exp.DataType(this=tz_type, expressions=expressions) 5042 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5043 maybe_func = False 5044 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5045 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5046 maybe_func = False 5047 elif type_token == TokenType.INTERVAL: 5048 unit = self._parse_var(upper=True) 5049 if unit: 5050 if self._match_text_seq("TO"): 5051 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5052 5053 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5054 else: 5055 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5056 5057 if maybe_func and check_func: 5058 index2 = self._index 5059 peek = self._parse_string() 5060 5061 if not peek: 5062 self._retreat(index) 5063 return None 5064 5065 self._retreat(index2) 5066 5067 if not this: 5068 if self._match_text_seq("UNSIGNED"): 5069 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5070 if not unsigned_type_token: 5071 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5072 5073 type_token = unsigned_type_token or type_token 5074 5075 this = exp.DataType( 5076 this=exp.DataType.Type[type_token.value], 5077 expressions=expressions, 5078 nested=nested, 5079 prefix=prefix, 5080 ) 5081 5082 # Empty arrays/structs are allowed 5083 if values is not None: 5084 cls = exp.Struct if is_struct else exp.Array 5085 this = exp.cast(cls(expressions=values), this, copy=False) 5086 5087 elif expressions: 5088 this.set("expressions", expressions) 5089 5090 # https://materialize.com/docs/sql/types/list/#type-name 5091 while self._match(TokenType.LIST): 5092 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5093 5094 index = self._index 5095 5096 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5097 matched_array = self._match(TokenType.ARRAY) 5098 5099 while self._curr: 5100 datatype_token = self._prev.token_type 5101 matched_l_bracket = self._match(TokenType.L_BRACKET) 5102 if not matched_l_bracket and not matched_array: 5103 break 5104 5105 matched_array = False 5106 values = self._parse_csv(self._parse_assignment) or None 5107 if ( 5108 values 5109 and not schema 5110 and ( 5111 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5112 ) 5113 ): 5114 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5115 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5116 self._retreat(index) 5117 break 5118 5119 this = exp.DataType( 5120 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5121 ) 5122 self._match(TokenType.R_BRACKET) 5123 5124 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5125 converter = self.TYPE_CONVERTERS.get(this.this) 5126 if converter: 5127 this = converter(t.cast(exp.DataType, this)) 5128 5129 return this 5130 5131 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5132 index = self._index 5133 5134 if ( 5135 self._curr 5136 and self._next 5137 and self._curr.token_type in self.TYPE_TOKENS 5138 and self._next.token_type in self.TYPE_TOKENS 5139 ): 5140 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5141 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5142 this = self._parse_id_var() 5143 else: 5144 this = ( 5145 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5146 or self._parse_id_var() 5147 ) 5148 5149 self._match(TokenType.COLON) 5150 5151 if ( 5152 type_required 5153 and not isinstance(this, exp.DataType) 5154 and not self._match_set(self.TYPE_TOKENS, advance=False) 5155 ): 5156 self._retreat(index) 5157 return self._parse_types() 5158 5159 return self._parse_column_def(this) 5160 5161 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5162 if not self._match_text_seq("AT", "TIME", "ZONE"): 5163 return this 5164 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5165 5166 def _parse_column(self) -> t.Optional[exp.Expression]: 5167 this = self._parse_column_reference() 5168 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5169 5170 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5171 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5172 5173 return column 5174 5175 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5176 this = self._parse_field() 5177 if ( 5178 not this 5179 and self._match(TokenType.VALUES, advance=False) 5180 and self.VALUES_FOLLOWED_BY_PAREN 5181 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5182 ): 5183 this = self._parse_id_var() 5184 5185 if isinstance(this, exp.Identifier): 5186 # We bubble up comments from the Identifier to the Column 5187 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5188 5189 return this 5190 5191 def _parse_colon_as_variant_extract( 5192 self, this: t.Optional[exp.Expression] 5193 ) -> t.Optional[exp.Expression]: 5194 casts = [] 5195 json_path = [] 5196 escape = None 5197 5198 while self._match(TokenType.COLON): 5199 start_index = self._index 5200 5201 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5202 path = self._parse_column_ops( 5203 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5204 ) 5205 5206 # The cast :: operator has a lower precedence than the extraction operator :, so 5207 # we rearrange the AST appropriately to avoid casting the JSON path 5208 while isinstance(path, exp.Cast): 5209 casts.append(path.to) 5210 path = path.this 5211 5212 if casts: 5213 dcolon_offset = next( 5214 i 5215 for i, t in enumerate(self._tokens[start_index:]) 5216 if t.token_type == TokenType.DCOLON 5217 ) 5218 end_token = self._tokens[start_index + dcolon_offset - 1] 5219 else: 5220 end_token = self._prev 5221 5222 if path: 5223 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5224 # it'll roundtrip to a string literal in GET_PATH 5225 if isinstance(path, exp.Identifier) and path.quoted: 5226 escape = True 5227 5228 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5229 5230 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5231 # Databricks transforms it back to the colon/dot notation 5232 if json_path: 5233 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5234 5235 if json_path_expr: 5236 json_path_expr.set("escape", escape) 5237 5238 this = self.expression( 5239 exp.JSONExtract, 5240 this=this, 5241 expression=json_path_expr, 5242 variant_extract=True, 5243 ) 5244 5245 while casts: 5246 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5247 5248 return this 5249 5250 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5251 return self._parse_types() 5252 5253 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5254 this = self._parse_bracket(this) 5255 5256 while self._match_set(self.COLUMN_OPERATORS): 5257 op_token = self._prev.token_type 5258 op = self.COLUMN_OPERATORS.get(op_token) 5259 5260 if op_token == TokenType.DCOLON: 5261 field = self._parse_dcolon() 5262 if not field: 5263 self.raise_error("Expected type") 5264 elif op and self._curr: 5265 field = self._parse_column_reference() or self._parse_bracket() 5266 else: 5267 field = self._parse_field(any_token=True, anonymous_func=True) 5268 5269 if isinstance(field, (exp.Func, exp.Window)) and this: 5270 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5271 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5272 this = exp.replace_tree( 5273 this, 5274 lambda n: ( 5275 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5276 if n.table 5277 else n.this 5278 ) 5279 if isinstance(n, exp.Column) 5280 else n, 5281 ) 5282 5283 if op: 5284 this = op(self, this, field) 5285 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5286 this = self.expression( 5287 exp.Column, 5288 comments=this.comments, 5289 this=field, 5290 table=this.this, 5291 db=this.args.get("table"), 5292 catalog=this.args.get("db"), 5293 ) 5294 elif isinstance(field, exp.Window): 5295 # Move the exp.Dot's to the window's function 5296 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5297 field.set("this", window_func) 5298 this = field 5299 else: 5300 this = self.expression(exp.Dot, this=this, expression=field) 5301 5302 if field and field.comments: 5303 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5304 5305 this = self._parse_bracket(this) 5306 5307 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5308 5309 def _parse_primary(self) -> t.Optional[exp.Expression]: 5310 if self._match_set(self.PRIMARY_PARSERS): 5311 token_type = self._prev.token_type 5312 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5313 5314 if token_type == TokenType.STRING: 5315 expressions = [primary] 5316 while self._match(TokenType.STRING): 5317 expressions.append(exp.Literal.string(self._prev.text)) 5318 5319 if len(expressions) > 1: 5320 return self.expression(exp.Concat, expressions=expressions) 5321 5322 return primary 5323 5324 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5325 return exp.Literal.number(f"0.{self._prev.text}") 5326 5327 if self._match(TokenType.L_PAREN): 5328 comments = self._prev_comments 5329 query = self._parse_select() 5330 5331 if query: 5332 expressions = [query] 5333 else: 5334 expressions = self._parse_expressions() 5335 5336 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5337 5338 if not this and self._match(TokenType.R_PAREN, advance=False): 5339 this = self.expression(exp.Tuple) 5340 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5341 this = self._parse_subquery(this=this, parse_alias=False) 5342 elif isinstance(this, exp.Subquery): 5343 this = self._parse_subquery( 5344 this=self._parse_set_operations(this), parse_alias=False 5345 ) 5346 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5347 this = self.expression(exp.Tuple, expressions=expressions) 5348 else: 5349 this = self.expression(exp.Paren, this=this) 5350 5351 if this: 5352 this.add_comments(comments) 5353 5354 self._match_r_paren(expression=this) 5355 return this 5356 5357 return None 5358 5359 def _parse_field( 5360 self, 5361 any_token: bool = False, 5362 tokens: t.Optional[t.Collection[TokenType]] = None, 5363 anonymous_func: bool = False, 5364 ) -> t.Optional[exp.Expression]: 5365 if anonymous_func: 5366 field = ( 5367 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5368 or self._parse_primary() 5369 ) 5370 else: 5371 field = self._parse_primary() or self._parse_function( 5372 anonymous=anonymous_func, any_token=any_token 5373 ) 5374 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5375 5376 def _parse_function( 5377 self, 5378 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5379 anonymous: bool = False, 5380 optional_parens: bool = True, 5381 any_token: bool = False, 5382 ) -> t.Optional[exp.Expression]: 5383 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5384 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5385 fn_syntax = False 5386 if ( 5387 self._match(TokenType.L_BRACE, advance=False) 5388 and self._next 5389 and self._next.text.upper() == "FN" 5390 ): 5391 self._advance(2) 5392 fn_syntax = True 5393 5394 func = self._parse_function_call( 5395 functions=functions, 5396 anonymous=anonymous, 5397 optional_parens=optional_parens, 5398 any_token=any_token, 5399 ) 5400 5401 if fn_syntax: 5402 self._match(TokenType.R_BRACE) 5403 5404 return func 5405 5406 def _parse_function_call( 5407 self, 5408 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5409 anonymous: bool = False, 5410 optional_parens: bool = True, 5411 any_token: bool = False, 5412 ) -> t.Optional[exp.Expression]: 5413 if not self._curr: 5414 return None 5415 5416 comments = self._curr.comments 5417 token_type = self._curr.token_type 5418 this = self._curr.text 5419 upper = this.upper() 5420 5421 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5422 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5423 self._advance() 5424 return self._parse_window(parser(self)) 5425 5426 if not self._next or self._next.token_type != TokenType.L_PAREN: 5427 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5428 self._advance() 5429 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5430 5431 return None 5432 5433 if any_token: 5434 if token_type in self.RESERVED_TOKENS: 5435 return None 5436 elif token_type not in self.FUNC_TOKENS: 5437 return None 5438 5439 self._advance(2) 5440 5441 parser = self.FUNCTION_PARSERS.get(upper) 5442 if parser and not anonymous: 5443 this = parser(self) 5444 else: 5445 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5446 5447 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5448 this = self.expression( 5449 subquery_predicate, comments=comments, this=self._parse_select() 5450 ) 5451 self._match_r_paren() 5452 return this 5453 5454 if functions is None: 5455 functions = self.FUNCTIONS 5456 5457 function = functions.get(upper) 5458 known_function = function and not anonymous 5459 5460 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5461 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5462 5463 post_func_comments = self._curr and self._curr.comments 5464 if known_function and post_func_comments: 5465 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5466 # call we'll construct it as exp.Anonymous, even if it's "known" 5467 if any( 5468 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5469 for comment in post_func_comments 5470 ): 5471 known_function = False 5472 5473 if alias and known_function: 5474 args = self._kv_to_prop_eq(args) 5475 5476 if known_function: 5477 func_builder = t.cast(t.Callable, function) 5478 5479 if "dialect" in func_builder.__code__.co_varnames: 5480 func = func_builder(args, dialect=self.dialect) 5481 else: 5482 func = func_builder(args) 5483 5484 func = self.validate_expression(func, args) 5485 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5486 func.meta["name"] = this 5487 5488 this = func 5489 else: 5490 if token_type == TokenType.IDENTIFIER: 5491 this = exp.Identifier(this=this, quoted=True) 5492 this = self.expression(exp.Anonymous, this=this, expressions=args) 5493 5494 if isinstance(this, exp.Expression): 5495 this.add_comments(comments) 5496 5497 self._match_r_paren(this) 5498 return self._parse_window(this) 5499 5500 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5501 return expression 5502 5503 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5504 transformed = [] 5505 5506 for index, e in enumerate(expressions): 5507 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5508 if isinstance(e, exp.Alias): 5509 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5510 5511 if not isinstance(e, exp.PropertyEQ): 5512 e = self.expression( 5513 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5514 ) 5515 5516 if isinstance(e.this, exp.Column): 5517 e.this.replace(e.this.this) 5518 else: 5519 e = self._to_prop_eq(e, index) 5520 5521 transformed.append(e) 5522 5523 return transformed 5524 5525 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5526 return self._parse_statement() 5527 5528 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5529 return self._parse_column_def(self._parse_id_var()) 5530 5531 def _parse_user_defined_function( 5532 self, kind: t.Optional[TokenType] = None 5533 ) -> t.Optional[exp.Expression]: 5534 this = self._parse_id_var() 5535 5536 while self._match(TokenType.DOT): 5537 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5538 5539 if not self._match(TokenType.L_PAREN): 5540 return this 5541 5542 expressions = self._parse_csv(self._parse_function_parameter) 5543 self._match_r_paren() 5544 return self.expression( 5545 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5546 ) 5547 5548 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5549 literal = self._parse_primary() 5550 if literal: 5551 return self.expression(exp.Introducer, this=token.text, expression=literal) 5552 5553 return self.expression(exp.Identifier, this=token.text) 5554 5555 def _parse_session_parameter(self) -> exp.SessionParameter: 5556 kind = None 5557 this = self._parse_id_var() or self._parse_primary() 5558 5559 if this and self._match(TokenType.DOT): 5560 kind = this.name 5561 this = self._parse_var() or self._parse_primary() 5562 5563 return self.expression(exp.SessionParameter, this=this, kind=kind) 5564 5565 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5566 return self._parse_id_var() 5567 5568 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5569 index = self._index 5570 5571 if self._match(TokenType.L_PAREN): 5572 expressions = t.cast( 5573 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5574 ) 5575 5576 if not self._match(TokenType.R_PAREN): 5577 self._retreat(index) 5578 else: 5579 expressions = [self._parse_lambda_arg()] 5580 5581 if self._match_set(self.LAMBDAS): 5582 return self.LAMBDAS[self._prev.token_type](self, expressions) 5583 5584 self._retreat(index) 5585 5586 this: t.Optional[exp.Expression] 5587 5588 if self._match(TokenType.DISTINCT): 5589 this = self.expression( 5590 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5591 ) 5592 else: 5593 this = self._parse_select_or_expression(alias=alias) 5594 5595 return self._parse_limit( 5596 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5597 ) 5598 5599 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5600 index = self._index 5601 if not self._match(TokenType.L_PAREN): 5602 return this 5603 5604 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5605 # expr can be of both types 5606 if self._match_set(self.SELECT_START_TOKENS): 5607 self._retreat(index) 5608 return this 5609 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5610 self._match_r_paren() 5611 return self.expression(exp.Schema, this=this, expressions=args) 5612 5613 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5614 return self._parse_column_def(self._parse_field(any_token=True)) 5615 5616 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5617 # column defs are not really columns, they're identifiers 5618 if isinstance(this, exp.Column): 5619 this = this.this 5620 5621 kind = self._parse_types(schema=True) 5622 5623 if self._match_text_seq("FOR", "ORDINALITY"): 5624 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5625 5626 constraints: t.List[exp.Expression] = [] 5627 5628 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5629 ("ALIAS", "MATERIALIZED") 5630 ): 5631 persisted = self._prev.text.upper() == "MATERIALIZED" 5632 constraint_kind = exp.ComputedColumnConstraint( 5633 this=self._parse_assignment(), 5634 persisted=persisted or self._match_text_seq("PERSISTED"), 5635 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5636 ) 5637 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5638 elif ( 5639 kind 5640 and self._match(TokenType.ALIAS, advance=False) 5641 and ( 5642 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5643 or (self._next and self._next.token_type == TokenType.L_PAREN) 5644 ) 5645 ): 5646 self._advance() 5647 constraints.append( 5648 self.expression( 5649 exp.ColumnConstraint, 5650 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5651 ) 5652 ) 5653 5654 while True: 5655 constraint = self._parse_column_constraint() 5656 if not constraint: 5657 break 5658 constraints.append(constraint) 5659 5660 if not kind and not constraints: 5661 return this 5662 5663 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5664 5665 def _parse_auto_increment( 5666 self, 5667 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5668 start = None 5669 increment = None 5670 5671 if self._match(TokenType.L_PAREN, advance=False): 5672 args = self._parse_wrapped_csv(self._parse_bitwise) 5673 start = seq_get(args, 0) 5674 increment = seq_get(args, 1) 5675 elif self._match_text_seq("START"): 5676 start = self._parse_bitwise() 5677 self._match_text_seq("INCREMENT") 5678 increment = self._parse_bitwise() 5679 5680 if start and increment: 5681 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5682 5683 return exp.AutoIncrementColumnConstraint() 5684 5685 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5686 if not self._match_text_seq("REFRESH"): 5687 self._retreat(self._index - 1) 5688 return None 5689 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5690 5691 def _parse_compress(self) -> exp.CompressColumnConstraint: 5692 if self._match(TokenType.L_PAREN, advance=False): 5693 return self.expression( 5694 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5695 ) 5696 5697 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5698 5699 def _parse_generated_as_identity( 5700 self, 5701 ) -> ( 5702 exp.GeneratedAsIdentityColumnConstraint 5703 | exp.ComputedColumnConstraint 5704 | exp.GeneratedAsRowColumnConstraint 5705 ): 5706 if self._match_text_seq("BY", "DEFAULT"): 5707 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5708 this = self.expression( 5709 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5710 ) 5711 else: 5712 self._match_text_seq("ALWAYS") 5713 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5714 5715 self._match(TokenType.ALIAS) 5716 5717 if self._match_text_seq("ROW"): 5718 start = self._match_text_seq("START") 5719 if not start: 5720 self._match(TokenType.END) 5721 hidden = self._match_text_seq("HIDDEN") 5722 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5723 5724 identity = self._match_text_seq("IDENTITY") 5725 5726 if self._match(TokenType.L_PAREN): 5727 if self._match(TokenType.START_WITH): 5728 this.set("start", self._parse_bitwise()) 5729 if self._match_text_seq("INCREMENT", "BY"): 5730 this.set("increment", self._parse_bitwise()) 5731 if self._match_text_seq("MINVALUE"): 5732 this.set("minvalue", self._parse_bitwise()) 5733 if self._match_text_seq("MAXVALUE"): 5734 this.set("maxvalue", self._parse_bitwise()) 5735 5736 if self._match_text_seq("CYCLE"): 5737 this.set("cycle", True) 5738 elif self._match_text_seq("NO", "CYCLE"): 5739 this.set("cycle", False) 5740 5741 if not identity: 5742 this.set("expression", self._parse_range()) 5743 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5744 args = self._parse_csv(self._parse_bitwise) 5745 this.set("start", seq_get(args, 0)) 5746 this.set("increment", seq_get(args, 1)) 5747 5748 self._match_r_paren() 5749 5750 return this 5751 5752 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5753 self._match_text_seq("LENGTH") 5754 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5755 5756 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5757 if self._match_text_seq("NULL"): 5758 return self.expression(exp.NotNullColumnConstraint) 5759 if self._match_text_seq("CASESPECIFIC"): 5760 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5761 if self._match_text_seq("FOR", "REPLICATION"): 5762 return self.expression(exp.NotForReplicationColumnConstraint) 5763 5764 # Unconsume the `NOT` token 5765 self._retreat(self._index - 1) 5766 return None 5767 5768 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5769 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5770 5771 procedure_option_follows = ( 5772 self._match(TokenType.WITH, advance=False) 5773 and self._next 5774 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5775 ) 5776 5777 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5778 return self.expression( 5779 exp.ColumnConstraint, 5780 this=this, 5781 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5782 ) 5783 5784 return this 5785 5786 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5787 if not self._match(TokenType.CONSTRAINT): 5788 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5789 5790 return self.expression( 5791 exp.Constraint, 5792 this=self._parse_id_var(), 5793 expressions=self._parse_unnamed_constraints(), 5794 ) 5795 5796 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5797 constraints = [] 5798 while True: 5799 constraint = self._parse_unnamed_constraint() or self._parse_function() 5800 if not constraint: 5801 break 5802 constraints.append(constraint) 5803 5804 return constraints 5805 5806 def _parse_unnamed_constraint( 5807 self, constraints: t.Optional[t.Collection[str]] = None 5808 ) -> t.Optional[exp.Expression]: 5809 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5810 constraints or self.CONSTRAINT_PARSERS 5811 ): 5812 return None 5813 5814 constraint = self._prev.text.upper() 5815 if constraint not in self.CONSTRAINT_PARSERS: 5816 self.raise_error(f"No parser found for schema constraint {constraint}.") 5817 5818 return self.CONSTRAINT_PARSERS[constraint](self) 5819 5820 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5821 return self._parse_id_var(any_token=False) 5822 5823 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5824 self._match_text_seq("KEY") 5825 return self.expression( 5826 exp.UniqueColumnConstraint, 5827 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5828 this=self._parse_schema(self._parse_unique_key()), 5829 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5830 on_conflict=self._parse_on_conflict(), 5831 ) 5832 5833 def _parse_key_constraint_options(self) -> t.List[str]: 5834 options = [] 5835 while True: 5836 if not self._curr: 5837 break 5838 5839 if self._match(TokenType.ON): 5840 action = None 5841 on = self._advance_any() and self._prev.text 5842 5843 if self._match_text_seq("NO", "ACTION"): 5844 action = "NO ACTION" 5845 elif self._match_text_seq("CASCADE"): 5846 action = "CASCADE" 5847 elif self._match_text_seq("RESTRICT"): 5848 action = "RESTRICT" 5849 elif self._match_pair(TokenType.SET, TokenType.NULL): 5850 action = "SET NULL" 5851 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5852 action = "SET DEFAULT" 5853 else: 5854 self.raise_error("Invalid key constraint") 5855 5856 options.append(f"ON {on} {action}") 5857 else: 5858 var = self._parse_var_from_options( 5859 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5860 ) 5861 if not var: 5862 break 5863 options.append(var.name) 5864 5865 return options 5866 5867 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5868 if match and not self._match(TokenType.REFERENCES): 5869 return None 5870 5871 expressions = None 5872 this = self._parse_table(schema=True) 5873 options = self._parse_key_constraint_options() 5874 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5875 5876 def _parse_foreign_key(self) -> exp.ForeignKey: 5877 expressions = self._parse_wrapped_id_vars() 5878 reference = self._parse_references() 5879 options = {} 5880 5881 while self._match(TokenType.ON): 5882 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5883 self.raise_error("Expected DELETE or UPDATE") 5884 5885 kind = self._prev.text.lower() 5886 5887 if self._match_text_seq("NO", "ACTION"): 5888 action = "NO ACTION" 5889 elif self._match(TokenType.SET): 5890 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5891 action = "SET " + self._prev.text.upper() 5892 else: 5893 self._advance() 5894 action = self._prev.text.upper() 5895 5896 options[kind] = action 5897 5898 return self.expression( 5899 exp.ForeignKey, 5900 expressions=expressions, 5901 reference=reference, 5902 **options, # type: ignore 5903 ) 5904 5905 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5906 return self._parse_field() 5907 5908 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5909 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5910 self._retreat(self._index - 1) 5911 return None 5912 5913 id_vars = self._parse_wrapped_id_vars() 5914 return self.expression( 5915 exp.PeriodForSystemTimeConstraint, 5916 this=seq_get(id_vars, 0), 5917 expression=seq_get(id_vars, 1), 5918 ) 5919 5920 def _parse_primary_key( 5921 self, wrapped_optional: bool = False, in_props: bool = False 5922 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5923 desc = ( 5924 self._match_set((TokenType.ASC, TokenType.DESC)) 5925 and self._prev.token_type == TokenType.DESC 5926 ) 5927 5928 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5929 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5930 5931 expressions = self._parse_wrapped_csv( 5932 self._parse_primary_key_part, optional=wrapped_optional 5933 ) 5934 options = self._parse_key_constraint_options() 5935 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5936 5937 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5938 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5939 5940 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5941 """ 5942 Parses a datetime column in ODBC format. We parse the column into the corresponding 5943 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5944 same as we did for `DATE('yyyy-mm-dd')`. 5945 5946 Reference: 5947 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5948 """ 5949 self._match(TokenType.VAR) 5950 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5951 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5952 if not self._match(TokenType.R_BRACE): 5953 self.raise_error("Expected }") 5954 return expression 5955 5956 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5957 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5958 return this 5959 5960 bracket_kind = self._prev.token_type 5961 if ( 5962 bracket_kind == TokenType.L_BRACE 5963 and self._curr 5964 and self._curr.token_type == TokenType.VAR 5965 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5966 ): 5967 return self._parse_odbc_datetime_literal() 5968 5969 expressions = self._parse_csv( 5970 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5971 ) 5972 5973 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5974 self.raise_error("Expected ]") 5975 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5976 self.raise_error("Expected }") 5977 5978 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5979 if bracket_kind == TokenType.L_BRACE: 5980 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5981 elif not this: 5982 this = build_array_constructor( 5983 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5984 ) 5985 else: 5986 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5987 if constructor_type: 5988 return build_array_constructor( 5989 constructor_type, 5990 args=expressions, 5991 bracket_kind=bracket_kind, 5992 dialect=self.dialect, 5993 ) 5994 5995 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5996 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5997 5998 self._add_comments(this) 5999 return self._parse_bracket(this) 6000 6001 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6002 if self._match(TokenType.COLON): 6003 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6004 return this 6005 6006 def _parse_case(self) -> t.Optional[exp.Expression]: 6007 ifs = [] 6008 default = None 6009 6010 comments = self._prev_comments 6011 expression = self._parse_assignment() 6012 6013 while self._match(TokenType.WHEN): 6014 this = self._parse_assignment() 6015 self._match(TokenType.THEN) 6016 then = self._parse_assignment() 6017 ifs.append(self.expression(exp.If, this=this, true=then)) 6018 6019 if self._match(TokenType.ELSE): 6020 default = self._parse_assignment() 6021 6022 if not self._match(TokenType.END): 6023 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6024 default = exp.column("interval") 6025 else: 6026 self.raise_error("Expected END after CASE", self._prev) 6027 6028 return self.expression( 6029 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6030 ) 6031 6032 def _parse_if(self) -> t.Optional[exp.Expression]: 6033 if self._match(TokenType.L_PAREN): 6034 args = self._parse_csv(self._parse_assignment) 6035 this = self.validate_expression(exp.If.from_arg_list(args), args) 6036 self._match_r_paren() 6037 else: 6038 index = self._index - 1 6039 6040 if self.NO_PAREN_IF_COMMANDS and index == 0: 6041 return self._parse_as_command(self._prev) 6042 6043 condition = self._parse_assignment() 6044 6045 if not condition: 6046 self._retreat(index) 6047 return None 6048 6049 self._match(TokenType.THEN) 6050 true = self._parse_assignment() 6051 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6052 self._match(TokenType.END) 6053 this = self.expression(exp.If, this=condition, true=true, false=false) 6054 6055 return this 6056 6057 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6058 if not self._match_text_seq("VALUE", "FOR"): 6059 self._retreat(self._index - 1) 6060 return None 6061 6062 return self.expression( 6063 exp.NextValueFor, 6064 this=self._parse_column(), 6065 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6066 ) 6067 6068 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6069 this = self._parse_function() or self._parse_var_or_string(upper=True) 6070 6071 if self._match(TokenType.FROM): 6072 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6073 6074 if not self._match(TokenType.COMMA): 6075 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6076 6077 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6078 6079 def _parse_gap_fill(self) -> exp.GapFill: 6080 self._match(TokenType.TABLE) 6081 this = self._parse_table() 6082 6083 self._match(TokenType.COMMA) 6084 args = [this, *self._parse_csv(self._parse_lambda)] 6085 6086 gap_fill = exp.GapFill.from_arg_list(args) 6087 return self.validate_expression(gap_fill, args) 6088 6089 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6090 this = self._parse_assignment() 6091 6092 if not self._match(TokenType.ALIAS): 6093 if self._match(TokenType.COMMA): 6094 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6095 6096 self.raise_error("Expected AS after CAST") 6097 6098 fmt = None 6099 to = self._parse_types() 6100 6101 if self._match(TokenType.FORMAT): 6102 fmt_string = self._parse_string() 6103 fmt = self._parse_at_time_zone(fmt_string) 6104 6105 if not to: 6106 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6107 if to.this in exp.DataType.TEMPORAL_TYPES: 6108 this = self.expression( 6109 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6110 this=this, 6111 format=exp.Literal.string( 6112 format_time( 6113 fmt_string.this if fmt_string else "", 6114 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6115 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6116 ) 6117 ), 6118 safe=safe, 6119 ) 6120 6121 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6122 this.set("zone", fmt.args["zone"]) 6123 return this 6124 elif not to: 6125 self.raise_error("Expected TYPE after CAST") 6126 elif isinstance(to, exp.Identifier): 6127 to = exp.DataType.build(to.name, udt=True) 6128 elif to.this == exp.DataType.Type.CHAR: 6129 if self._match(TokenType.CHARACTER_SET): 6130 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6131 6132 return self.expression( 6133 exp.Cast if strict else exp.TryCast, 6134 this=this, 6135 to=to, 6136 format=fmt, 6137 safe=safe, 6138 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6139 ) 6140 6141 def _parse_string_agg(self) -> exp.GroupConcat: 6142 if self._match(TokenType.DISTINCT): 6143 args: t.List[t.Optional[exp.Expression]] = [ 6144 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6145 ] 6146 if self._match(TokenType.COMMA): 6147 args.extend(self._parse_csv(self._parse_assignment)) 6148 else: 6149 args = self._parse_csv(self._parse_assignment) # type: ignore 6150 6151 if self._match_text_seq("ON", "OVERFLOW"): 6152 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6153 if self._match_text_seq("ERROR"): 6154 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6155 else: 6156 self._match_text_seq("TRUNCATE") 6157 on_overflow = self.expression( 6158 exp.OverflowTruncateBehavior, 6159 this=self._parse_string(), 6160 with_count=( 6161 self._match_text_seq("WITH", "COUNT") 6162 or not self._match_text_seq("WITHOUT", "COUNT") 6163 ), 6164 ) 6165 else: 6166 on_overflow = None 6167 6168 index = self._index 6169 if not self._match(TokenType.R_PAREN) and args: 6170 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6171 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6172 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6173 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6174 6175 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6176 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6177 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6178 if not self._match_text_seq("WITHIN", "GROUP"): 6179 self._retreat(index) 6180 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6181 6182 # The corresponding match_r_paren will be called in parse_function (caller) 6183 self._match_l_paren() 6184 6185 return self.expression( 6186 exp.GroupConcat, 6187 this=self._parse_order(this=seq_get(args, 0)), 6188 separator=seq_get(args, 1), 6189 on_overflow=on_overflow, 6190 ) 6191 6192 def _parse_convert( 6193 self, strict: bool, safe: t.Optional[bool] = None 6194 ) -> t.Optional[exp.Expression]: 6195 this = self._parse_bitwise() 6196 6197 if self._match(TokenType.USING): 6198 to: t.Optional[exp.Expression] = self.expression( 6199 exp.CharacterSet, this=self._parse_var() 6200 ) 6201 elif self._match(TokenType.COMMA): 6202 to = self._parse_types() 6203 else: 6204 to = None 6205 6206 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6207 6208 def _parse_xml_table(self) -> exp.XMLTable: 6209 this = self._parse_string() 6210 6211 passing = None 6212 columns = None 6213 6214 if self._match_text_seq("PASSING"): 6215 # The BY VALUE keywords are optional and are provided for semantic clarity 6216 self._match_text_seq("BY", "VALUE") 6217 passing = self._parse_csv(self._parse_column) 6218 6219 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6220 6221 if self._match_text_seq("COLUMNS"): 6222 columns = self._parse_csv(self._parse_field_def) 6223 6224 return self.expression( 6225 exp.XMLTable, this=this, passing=passing, columns=columns, by_ref=by_ref 6226 ) 6227 6228 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6229 """ 6230 There are generally two variants of the DECODE function: 6231 6232 - DECODE(bin, charset) 6233 - DECODE(expression, search, result [, search, result] ... [, default]) 6234 6235 The second variant will always be parsed into a CASE expression. Note that NULL 6236 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6237 instead of relying on pattern matching. 6238 """ 6239 args = self._parse_csv(self._parse_assignment) 6240 6241 if len(args) < 3: 6242 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6243 6244 expression, *expressions = args 6245 if not expression: 6246 return None 6247 6248 ifs = [] 6249 for search, result in zip(expressions[::2], expressions[1::2]): 6250 if not search or not result: 6251 return None 6252 6253 if isinstance(search, exp.Literal): 6254 ifs.append( 6255 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6256 ) 6257 elif isinstance(search, exp.Null): 6258 ifs.append( 6259 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6260 ) 6261 else: 6262 cond = exp.or_( 6263 exp.EQ(this=expression.copy(), expression=search), 6264 exp.and_( 6265 exp.Is(this=expression.copy(), expression=exp.Null()), 6266 exp.Is(this=search.copy(), expression=exp.Null()), 6267 copy=False, 6268 ), 6269 copy=False, 6270 ) 6271 ifs.append(exp.If(this=cond, true=result)) 6272 6273 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6274 6275 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6276 self._match_text_seq("KEY") 6277 key = self._parse_column() 6278 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6279 self._match_text_seq("VALUE") 6280 value = self._parse_bitwise() 6281 6282 if not key and not value: 6283 return None 6284 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6285 6286 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6287 if not this or not self._match_text_seq("FORMAT", "JSON"): 6288 return this 6289 6290 return self.expression(exp.FormatJson, this=this) 6291 6292 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6293 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6294 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6295 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6296 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6297 else: 6298 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6299 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6300 6301 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6302 6303 if not empty and not error and not null: 6304 return None 6305 6306 return self.expression( 6307 exp.OnCondition, 6308 empty=empty, 6309 error=error, 6310 null=null, 6311 ) 6312 6313 def _parse_on_handling( 6314 self, on: str, *values: str 6315 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6316 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6317 for value in values: 6318 if self._match_text_seq(value, "ON", on): 6319 return f"{value} ON {on}" 6320 6321 index = self._index 6322 if self._match(TokenType.DEFAULT): 6323 default_value = self._parse_bitwise() 6324 if self._match_text_seq("ON", on): 6325 return default_value 6326 6327 self._retreat(index) 6328 6329 return None 6330 6331 @t.overload 6332 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6333 6334 @t.overload 6335 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6336 6337 def _parse_json_object(self, agg=False): 6338 star = self._parse_star() 6339 expressions = ( 6340 [star] 6341 if star 6342 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6343 ) 6344 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6345 6346 unique_keys = None 6347 if self._match_text_seq("WITH", "UNIQUE"): 6348 unique_keys = True 6349 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6350 unique_keys = False 6351 6352 self._match_text_seq("KEYS") 6353 6354 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6355 self._parse_type() 6356 ) 6357 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6358 6359 return self.expression( 6360 exp.JSONObjectAgg if agg else exp.JSONObject, 6361 expressions=expressions, 6362 null_handling=null_handling, 6363 unique_keys=unique_keys, 6364 return_type=return_type, 6365 encoding=encoding, 6366 ) 6367 6368 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6369 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6370 if not self._match_text_seq("NESTED"): 6371 this = self._parse_id_var() 6372 kind = self._parse_types(allow_identifiers=False) 6373 nested = None 6374 else: 6375 this = None 6376 kind = None 6377 nested = True 6378 6379 path = self._match_text_seq("PATH") and self._parse_string() 6380 nested_schema = nested and self._parse_json_schema() 6381 6382 return self.expression( 6383 exp.JSONColumnDef, 6384 this=this, 6385 kind=kind, 6386 path=path, 6387 nested_schema=nested_schema, 6388 ) 6389 6390 def _parse_json_schema(self) -> exp.JSONSchema: 6391 self._match_text_seq("COLUMNS") 6392 return self.expression( 6393 exp.JSONSchema, 6394 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6395 ) 6396 6397 def _parse_json_table(self) -> exp.JSONTable: 6398 this = self._parse_format_json(self._parse_bitwise()) 6399 path = self._match(TokenType.COMMA) and self._parse_string() 6400 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6401 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6402 schema = self._parse_json_schema() 6403 6404 return exp.JSONTable( 6405 this=this, 6406 schema=schema, 6407 path=path, 6408 error_handling=error_handling, 6409 empty_handling=empty_handling, 6410 ) 6411 6412 def _parse_match_against(self) -> exp.MatchAgainst: 6413 expressions = self._parse_csv(self._parse_column) 6414 6415 self._match_text_seq(")", "AGAINST", "(") 6416 6417 this = self._parse_string() 6418 6419 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6420 modifier = "IN NATURAL LANGUAGE MODE" 6421 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6422 modifier = f"{modifier} WITH QUERY EXPANSION" 6423 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6424 modifier = "IN BOOLEAN MODE" 6425 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6426 modifier = "WITH QUERY EXPANSION" 6427 else: 6428 modifier = None 6429 6430 return self.expression( 6431 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6432 ) 6433 6434 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6435 def _parse_open_json(self) -> exp.OpenJSON: 6436 this = self._parse_bitwise() 6437 path = self._match(TokenType.COMMA) and self._parse_string() 6438 6439 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6440 this = self._parse_field(any_token=True) 6441 kind = self._parse_types() 6442 path = self._parse_string() 6443 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6444 6445 return self.expression( 6446 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6447 ) 6448 6449 expressions = None 6450 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6451 self._match_l_paren() 6452 expressions = self._parse_csv(_parse_open_json_column_def) 6453 6454 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6455 6456 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6457 args = self._parse_csv(self._parse_bitwise) 6458 6459 if self._match(TokenType.IN): 6460 return self.expression( 6461 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6462 ) 6463 6464 if haystack_first: 6465 haystack = seq_get(args, 0) 6466 needle = seq_get(args, 1) 6467 else: 6468 needle = seq_get(args, 0) 6469 haystack = seq_get(args, 1) 6470 6471 return self.expression( 6472 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6473 ) 6474 6475 def _parse_predict(self) -> exp.Predict: 6476 self._match_text_seq("MODEL") 6477 this = self._parse_table() 6478 6479 self._match(TokenType.COMMA) 6480 self._match_text_seq("TABLE") 6481 6482 return self.expression( 6483 exp.Predict, 6484 this=this, 6485 expression=self._parse_table(), 6486 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6487 ) 6488 6489 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6490 args = self._parse_csv(self._parse_table) 6491 return exp.JoinHint(this=func_name.upper(), expressions=args) 6492 6493 def _parse_substring(self) -> exp.Substring: 6494 # Postgres supports the form: substring(string [from int] [for int]) 6495 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6496 6497 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6498 6499 if self._match(TokenType.FROM): 6500 args.append(self._parse_bitwise()) 6501 if self._match(TokenType.FOR): 6502 if len(args) == 1: 6503 args.append(exp.Literal.number(1)) 6504 args.append(self._parse_bitwise()) 6505 6506 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6507 6508 def _parse_trim(self) -> exp.Trim: 6509 # https://www.w3resource.com/sql/character-functions/trim.php 6510 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6511 6512 position = None 6513 collation = None 6514 expression = None 6515 6516 if self._match_texts(self.TRIM_TYPES): 6517 position = self._prev.text.upper() 6518 6519 this = self._parse_bitwise() 6520 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6521 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6522 expression = self._parse_bitwise() 6523 6524 if invert_order: 6525 this, expression = expression, this 6526 6527 if self._match(TokenType.COLLATE): 6528 collation = self._parse_bitwise() 6529 6530 return self.expression( 6531 exp.Trim, this=this, position=position, expression=expression, collation=collation 6532 ) 6533 6534 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6535 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6536 6537 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6538 return self._parse_window(self._parse_id_var(), alias=True) 6539 6540 def _parse_respect_or_ignore_nulls( 6541 self, this: t.Optional[exp.Expression] 6542 ) -> t.Optional[exp.Expression]: 6543 if self._match_text_seq("IGNORE", "NULLS"): 6544 return self.expression(exp.IgnoreNulls, this=this) 6545 if self._match_text_seq("RESPECT", "NULLS"): 6546 return self.expression(exp.RespectNulls, this=this) 6547 return this 6548 6549 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6550 if self._match(TokenType.HAVING): 6551 self._match_texts(("MAX", "MIN")) 6552 max = self._prev.text.upper() != "MIN" 6553 return self.expression( 6554 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6555 ) 6556 6557 return this 6558 6559 def _parse_window( 6560 self, this: t.Optional[exp.Expression], alias: bool = False 6561 ) -> t.Optional[exp.Expression]: 6562 func = this 6563 comments = func.comments if isinstance(func, exp.Expression) else None 6564 6565 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6566 self._match(TokenType.WHERE) 6567 this = self.expression( 6568 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6569 ) 6570 self._match_r_paren() 6571 6572 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6573 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6574 if self._match_text_seq("WITHIN", "GROUP"): 6575 order = self._parse_wrapped(self._parse_order) 6576 this = self.expression(exp.WithinGroup, this=this, expression=order) 6577 6578 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6579 # Some dialects choose to implement and some do not. 6580 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6581 6582 # There is some code above in _parse_lambda that handles 6583 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6584 6585 # The below changes handle 6586 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6587 6588 # Oracle allows both formats 6589 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6590 # and Snowflake chose to do the same for familiarity 6591 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6592 if isinstance(this, exp.AggFunc): 6593 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6594 6595 if ignore_respect and ignore_respect is not this: 6596 ignore_respect.replace(ignore_respect.this) 6597 this = self.expression(ignore_respect.__class__, this=this) 6598 6599 this = self._parse_respect_or_ignore_nulls(this) 6600 6601 # bigquery select from window x AS (partition by ...) 6602 if alias: 6603 over = None 6604 self._match(TokenType.ALIAS) 6605 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6606 return this 6607 else: 6608 over = self._prev.text.upper() 6609 6610 if comments and isinstance(func, exp.Expression): 6611 func.pop_comments() 6612 6613 if not self._match(TokenType.L_PAREN): 6614 return self.expression( 6615 exp.Window, 6616 comments=comments, 6617 this=this, 6618 alias=self._parse_id_var(False), 6619 over=over, 6620 ) 6621 6622 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6623 6624 first = self._match(TokenType.FIRST) 6625 if self._match_text_seq("LAST"): 6626 first = False 6627 6628 partition, order = self._parse_partition_and_order() 6629 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6630 6631 if kind: 6632 self._match(TokenType.BETWEEN) 6633 start = self._parse_window_spec() 6634 self._match(TokenType.AND) 6635 end = self._parse_window_spec() 6636 6637 spec = self.expression( 6638 exp.WindowSpec, 6639 kind=kind, 6640 start=start["value"], 6641 start_side=start["side"], 6642 end=end["value"], 6643 end_side=end["side"], 6644 ) 6645 else: 6646 spec = None 6647 6648 self._match_r_paren() 6649 6650 window = self.expression( 6651 exp.Window, 6652 comments=comments, 6653 this=this, 6654 partition_by=partition, 6655 order=order, 6656 spec=spec, 6657 alias=window_alias, 6658 over=over, 6659 first=first, 6660 ) 6661 6662 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6663 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6664 return self._parse_window(window, alias=alias) 6665 6666 return window 6667 6668 def _parse_partition_and_order( 6669 self, 6670 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6671 return self._parse_partition_by(), self._parse_order() 6672 6673 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6674 self._match(TokenType.BETWEEN) 6675 6676 return { 6677 "value": ( 6678 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6679 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6680 or self._parse_bitwise() 6681 ), 6682 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6683 } 6684 6685 def _parse_alias( 6686 self, this: t.Optional[exp.Expression], explicit: bool = False 6687 ) -> t.Optional[exp.Expression]: 6688 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6689 # so this section tries to parse the clause version and if it fails, it treats the token 6690 # as an identifier (alias) 6691 if self._can_parse_limit_or_offset(): 6692 return this 6693 6694 any_token = self._match(TokenType.ALIAS) 6695 comments = self._prev_comments or [] 6696 6697 if explicit and not any_token: 6698 return this 6699 6700 if self._match(TokenType.L_PAREN): 6701 aliases = self.expression( 6702 exp.Aliases, 6703 comments=comments, 6704 this=this, 6705 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6706 ) 6707 self._match_r_paren(aliases) 6708 return aliases 6709 6710 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6711 self.STRING_ALIASES and self._parse_string_as_identifier() 6712 ) 6713 6714 if alias: 6715 comments.extend(alias.pop_comments()) 6716 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6717 column = this.this 6718 6719 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6720 if not this.comments and column and column.comments: 6721 this.comments = column.pop_comments() 6722 6723 return this 6724 6725 def _parse_id_var( 6726 self, 6727 any_token: bool = True, 6728 tokens: t.Optional[t.Collection[TokenType]] = None, 6729 ) -> t.Optional[exp.Expression]: 6730 expression = self._parse_identifier() 6731 if not expression and ( 6732 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6733 ): 6734 quoted = self._prev.token_type == TokenType.STRING 6735 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6736 6737 return expression 6738 6739 def _parse_string(self) -> t.Optional[exp.Expression]: 6740 if self._match_set(self.STRING_PARSERS): 6741 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6742 return self._parse_placeholder() 6743 6744 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6745 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6746 6747 def _parse_number(self) -> t.Optional[exp.Expression]: 6748 if self._match_set(self.NUMERIC_PARSERS): 6749 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6750 return self._parse_placeholder() 6751 6752 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6753 if self._match(TokenType.IDENTIFIER): 6754 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6755 return self._parse_placeholder() 6756 6757 def _parse_var( 6758 self, 6759 any_token: bool = False, 6760 tokens: t.Optional[t.Collection[TokenType]] = None, 6761 upper: bool = False, 6762 ) -> t.Optional[exp.Expression]: 6763 if ( 6764 (any_token and self._advance_any()) 6765 or self._match(TokenType.VAR) 6766 or (self._match_set(tokens) if tokens else False) 6767 ): 6768 return self.expression( 6769 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6770 ) 6771 return self._parse_placeholder() 6772 6773 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6774 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6775 self._advance() 6776 return self._prev 6777 return None 6778 6779 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6780 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6781 6782 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6783 return self._parse_primary() or self._parse_var(any_token=True) 6784 6785 def _parse_null(self) -> t.Optional[exp.Expression]: 6786 if self._match_set(self.NULL_TOKENS): 6787 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6788 return self._parse_placeholder() 6789 6790 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6791 if self._match(TokenType.TRUE): 6792 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6793 if self._match(TokenType.FALSE): 6794 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6795 return self._parse_placeholder() 6796 6797 def _parse_star(self) -> t.Optional[exp.Expression]: 6798 if self._match(TokenType.STAR): 6799 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6800 return self._parse_placeholder() 6801 6802 def _parse_parameter(self) -> exp.Parameter: 6803 this = self._parse_identifier() or self._parse_primary_or_var() 6804 return self.expression(exp.Parameter, this=this) 6805 6806 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6807 if self._match_set(self.PLACEHOLDER_PARSERS): 6808 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6809 if placeholder: 6810 return placeholder 6811 self._advance(-1) 6812 return None 6813 6814 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6815 if not self._match_texts(keywords): 6816 return None 6817 if self._match(TokenType.L_PAREN, advance=False): 6818 return self._parse_wrapped_csv(self._parse_expression) 6819 6820 expression = self._parse_expression() 6821 return [expression] if expression else None 6822 6823 def _parse_csv( 6824 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6825 ) -> t.List[exp.Expression]: 6826 parse_result = parse_method() 6827 items = [parse_result] if parse_result is not None else [] 6828 6829 while self._match(sep): 6830 self._add_comments(parse_result) 6831 parse_result = parse_method() 6832 if parse_result is not None: 6833 items.append(parse_result) 6834 6835 return items 6836 6837 def _parse_tokens( 6838 self, parse_method: t.Callable, expressions: t.Dict 6839 ) -> t.Optional[exp.Expression]: 6840 this = parse_method() 6841 6842 while self._match_set(expressions): 6843 this = self.expression( 6844 expressions[self._prev.token_type], 6845 this=this, 6846 comments=self._prev_comments, 6847 expression=parse_method(), 6848 ) 6849 6850 return this 6851 6852 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6853 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6854 6855 def _parse_wrapped_csv( 6856 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6857 ) -> t.List[exp.Expression]: 6858 return self._parse_wrapped( 6859 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6860 ) 6861 6862 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6863 wrapped = self._match(TokenType.L_PAREN) 6864 if not wrapped and not optional: 6865 self.raise_error("Expecting (") 6866 parse_result = parse_method() 6867 if wrapped: 6868 self._match_r_paren() 6869 return parse_result 6870 6871 def _parse_expressions(self) -> t.List[exp.Expression]: 6872 return self._parse_csv(self._parse_expression) 6873 6874 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6875 return self._parse_select() or self._parse_set_operations( 6876 self._parse_alias(self._parse_assignment(), explicit=True) 6877 if alias 6878 else self._parse_assignment() 6879 ) 6880 6881 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6882 return self._parse_query_modifiers( 6883 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6884 ) 6885 6886 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6887 this = None 6888 if self._match_texts(self.TRANSACTION_KIND): 6889 this = self._prev.text 6890 6891 self._match_texts(("TRANSACTION", "WORK")) 6892 6893 modes = [] 6894 while True: 6895 mode = [] 6896 while self._match(TokenType.VAR): 6897 mode.append(self._prev.text) 6898 6899 if mode: 6900 modes.append(" ".join(mode)) 6901 if not self._match(TokenType.COMMA): 6902 break 6903 6904 return self.expression(exp.Transaction, this=this, modes=modes) 6905 6906 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6907 chain = None 6908 savepoint = None 6909 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6910 6911 self._match_texts(("TRANSACTION", "WORK")) 6912 6913 if self._match_text_seq("TO"): 6914 self._match_text_seq("SAVEPOINT") 6915 savepoint = self._parse_id_var() 6916 6917 if self._match(TokenType.AND): 6918 chain = not self._match_text_seq("NO") 6919 self._match_text_seq("CHAIN") 6920 6921 if is_rollback: 6922 return self.expression(exp.Rollback, savepoint=savepoint) 6923 6924 return self.expression(exp.Commit, chain=chain) 6925 6926 def _parse_refresh(self) -> exp.Refresh: 6927 self._match(TokenType.TABLE) 6928 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6929 6930 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6931 if not self._match_text_seq("ADD"): 6932 return None 6933 6934 self._match(TokenType.COLUMN) 6935 exists_column = self._parse_exists(not_=True) 6936 expression = self._parse_field_def() 6937 6938 if expression: 6939 expression.set("exists", exists_column) 6940 6941 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6942 if self._match_texts(("FIRST", "AFTER")): 6943 position = self._prev.text 6944 column_position = self.expression( 6945 exp.ColumnPosition, this=self._parse_column(), position=position 6946 ) 6947 expression.set("position", column_position) 6948 6949 return expression 6950 6951 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6952 drop = self._match(TokenType.DROP) and self._parse_drop() 6953 if drop and not isinstance(drop, exp.Command): 6954 drop.set("kind", drop.args.get("kind", "COLUMN")) 6955 return drop 6956 6957 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6958 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6959 return self.expression( 6960 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6961 ) 6962 6963 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6964 index = self._index - 1 6965 6966 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6967 return self._parse_csv( 6968 lambda: self.expression( 6969 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6970 ) 6971 ) 6972 6973 self._retreat(index) 6974 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6975 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6976 6977 if self._match_text_seq("ADD", "COLUMNS"): 6978 schema = self._parse_schema() 6979 if schema: 6980 return [schema] 6981 return [] 6982 6983 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6984 6985 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6986 if self._match_texts(self.ALTER_ALTER_PARSERS): 6987 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6988 6989 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6990 # keyword after ALTER we default to parsing this statement 6991 self._match(TokenType.COLUMN) 6992 column = self._parse_field(any_token=True) 6993 6994 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6995 return self.expression(exp.AlterColumn, this=column, drop=True) 6996 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6997 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6998 if self._match(TokenType.COMMENT): 6999 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7000 if self._match_text_seq("DROP", "NOT", "NULL"): 7001 return self.expression( 7002 exp.AlterColumn, 7003 this=column, 7004 drop=True, 7005 allow_null=True, 7006 ) 7007 if self._match_text_seq("SET", "NOT", "NULL"): 7008 return self.expression( 7009 exp.AlterColumn, 7010 this=column, 7011 allow_null=False, 7012 ) 7013 self._match_text_seq("SET", "DATA") 7014 self._match_text_seq("TYPE") 7015 return self.expression( 7016 exp.AlterColumn, 7017 this=column, 7018 dtype=self._parse_types(), 7019 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7020 using=self._match(TokenType.USING) and self._parse_assignment(), 7021 ) 7022 7023 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7024 if self._match_texts(("ALL", "EVEN", "AUTO")): 7025 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7026 7027 self._match_text_seq("KEY", "DISTKEY") 7028 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7029 7030 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7031 if compound: 7032 self._match_text_seq("SORTKEY") 7033 7034 if self._match(TokenType.L_PAREN, advance=False): 7035 return self.expression( 7036 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7037 ) 7038 7039 self._match_texts(("AUTO", "NONE")) 7040 return self.expression( 7041 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7042 ) 7043 7044 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7045 index = self._index - 1 7046 7047 partition_exists = self._parse_exists() 7048 if self._match(TokenType.PARTITION, advance=False): 7049 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7050 7051 self._retreat(index) 7052 return self._parse_csv(self._parse_drop_column) 7053 7054 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7055 if self._match(TokenType.COLUMN): 7056 exists = self._parse_exists() 7057 old_column = self._parse_column() 7058 to = self._match_text_seq("TO") 7059 new_column = self._parse_column() 7060 7061 if old_column is None or to is None or new_column is None: 7062 return None 7063 7064 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7065 7066 self._match_text_seq("TO") 7067 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7068 7069 def _parse_alter_table_set(self) -> exp.AlterSet: 7070 alter_set = self.expression(exp.AlterSet) 7071 7072 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7073 "TABLE", "PROPERTIES" 7074 ): 7075 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7076 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7077 alter_set.set("expressions", [self._parse_assignment()]) 7078 elif self._match_texts(("LOGGED", "UNLOGGED")): 7079 alter_set.set("option", exp.var(self._prev.text.upper())) 7080 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7081 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7082 elif self._match_text_seq("LOCATION"): 7083 alter_set.set("location", self._parse_field()) 7084 elif self._match_text_seq("ACCESS", "METHOD"): 7085 alter_set.set("access_method", self._parse_field()) 7086 elif self._match_text_seq("TABLESPACE"): 7087 alter_set.set("tablespace", self._parse_field()) 7088 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7089 alter_set.set("file_format", [self._parse_field()]) 7090 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7091 alter_set.set("file_format", self._parse_wrapped_options()) 7092 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7093 alter_set.set("copy_options", self._parse_wrapped_options()) 7094 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7095 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7096 else: 7097 if self._match_text_seq("SERDE"): 7098 alter_set.set("serde", self._parse_field()) 7099 7100 alter_set.set("expressions", [self._parse_properties()]) 7101 7102 return alter_set 7103 7104 def _parse_alter(self) -> exp.Alter | exp.Command: 7105 start = self._prev 7106 7107 alter_token = self._match_set(self.ALTERABLES) and self._prev 7108 if not alter_token: 7109 return self._parse_as_command(start) 7110 7111 exists = self._parse_exists() 7112 only = self._match_text_seq("ONLY") 7113 this = self._parse_table(schema=True) 7114 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7115 7116 if self._next: 7117 self._advance() 7118 7119 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7120 if parser: 7121 actions = ensure_list(parser(self)) 7122 not_valid = self._match_text_seq("NOT", "VALID") 7123 options = self._parse_csv(self._parse_property) 7124 7125 if not self._curr and actions: 7126 return self.expression( 7127 exp.Alter, 7128 this=this, 7129 kind=alter_token.text.upper(), 7130 exists=exists, 7131 actions=actions, 7132 only=only, 7133 options=options, 7134 cluster=cluster, 7135 not_valid=not_valid, 7136 ) 7137 7138 return self._parse_as_command(start) 7139 7140 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7141 start = self._prev 7142 # https://duckdb.org/docs/sql/statements/analyze 7143 if not self._curr: 7144 return self.expression(exp.Analyze) 7145 7146 options = [] 7147 while self._match_texts(self.ANALYZE_STYLES): 7148 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7149 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7150 else: 7151 options.append(self._prev.text.upper()) 7152 7153 this: t.Optional[exp.Expression] = None 7154 inner_expression: t.Optional[exp.Expression] = None 7155 7156 kind = self._curr and self._curr.text.upper() 7157 7158 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7159 this = self._parse_table_parts() 7160 elif self._match_text_seq("TABLES"): 7161 if self._match_set((TokenType.FROM, TokenType.IN)): 7162 kind = f"{kind} {self._prev.text.upper()}" 7163 this = self._parse_table(schema=True, is_db_reference=True) 7164 elif self._match_text_seq("DATABASE"): 7165 this = self._parse_table(schema=True, is_db_reference=True) 7166 elif self._match_text_seq("CLUSTER"): 7167 this = self._parse_table() 7168 # Try matching inner expr keywords before fallback to parse table. 7169 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7170 kind = None 7171 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7172 else: 7173 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7174 kind = None 7175 this = self._parse_table_parts() 7176 7177 partition = self._try_parse(self._parse_partition) 7178 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7179 return self._parse_as_command(start) 7180 7181 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7182 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7183 "WITH", "ASYNC", "MODE" 7184 ): 7185 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7186 else: 7187 mode = None 7188 7189 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7190 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7191 7192 properties = self._parse_properties() 7193 return self.expression( 7194 exp.Analyze, 7195 kind=kind, 7196 this=this, 7197 mode=mode, 7198 partition=partition, 7199 properties=properties, 7200 expression=inner_expression, 7201 options=options, 7202 ) 7203 7204 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7205 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7206 this = None 7207 kind = self._prev.text.upper() 7208 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7209 expressions = [] 7210 7211 if not self._match_text_seq("STATISTICS"): 7212 self.raise_error("Expecting token STATISTICS") 7213 7214 if self._match_text_seq("NOSCAN"): 7215 this = "NOSCAN" 7216 elif self._match(TokenType.FOR): 7217 if self._match_text_seq("ALL", "COLUMNS"): 7218 this = "FOR ALL COLUMNS" 7219 if self._match_texts("COLUMNS"): 7220 this = "FOR COLUMNS" 7221 expressions = self._parse_csv(self._parse_column_reference) 7222 elif self._match_text_seq("SAMPLE"): 7223 sample = self._parse_number() 7224 expressions = [ 7225 self.expression( 7226 exp.AnalyzeSample, 7227 sample=sample, 7228 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7229 ) 7230 ] 7231 7232 return self.expression( 7233 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7234 ) 7235 7236 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7237 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7238 kind = None 7239 this = None 7240 expression: t.Optional[exp.Expression] = None 7241 if self._match_text_seq("REF", "UPDATE"): 7242 kind = "REF" 7243 this = "UPDATE" 7244 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7245 this = "UPDATE SET DANGLING TO NULL" 7246 elif self._match_text_seq("STRUCTURE"): 7247 kind = "STRUCTURE" 7248 if self._match_text_seq("CASCADE", "FAST"): 7249 this = "CASCADE FAST" 7250 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7251 ("ONLINE", "OFFLINE") 7252 ): 7253 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7254 expression = self._parse_into() 7255 7256 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7257 7258 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7259 this = self._prev.text.upper() 7260 if self._match_text_seq("COLUMNS"): 7261 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7262 return None 7263 7264 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7265 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7266 if self._match_text_seq("STATISTICS"): 7267 return self.expression(exp.AnalyzeDelete, kind=kind) 7268 return None 7269 7270 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7271 if self._match_text_seq("CHAINED", "ROWS"): 7272 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7273 return None 7274 7275 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7276 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7277 this = self._prev.text.upper() 7278 expression: t.Optional[exp.Expression] = None 7279 expressions = [] 7280 update_options = None 7281 7282 if self._match_text_seq("HISTOGRAM", "ON"): 7283 expressions = self._parse_csv(self._parse_column_reference) 7284 with_expressions = [] 7285 while self._match(TokenType.WITH): 7286 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7287 if self._match_texts(("SYNC", "ASYNC")): 7288 if self._match_text_seq("MODE", advance=False): 7289 with_expressions.append(f"{self._prev.text.upper()} MODE") 7290 self._advance() 7291 else: 7292 buckets = self._parse_number() 7293 if self._match_text_seq("BUCKETS"): 7294 with_expressions.append(f"{buckets} BUCKETS") 7295 if with_expressions: 7296 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7297 7298 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7299 TokenType.UPDATE, advance=False 7300 ): 7301 update_options = self._prev.text.upper() 7302 self._advance() 7303 elif self._match_text_seq("USING", "DATA"): 7304 expression = self.expression(exp.UsingData, this=self._parse_string()) 7305 7306 return self.expression( 7307 exp.AnalyzeHistogram, 7308 this=this, 7309 expressions=expressions, 7310 expression=expression, 7311 update_options=update_options, 7312 ) 7313 7314 def _parse_merge(self) -> exp.Merge: 7315 self._match(TokenType.INTO) 7316 target = self._parse_table() 7317 7318 if target and self._match(TokenType.ALIAS, advance=False): 7319 target.set("alias", self._parse_table_alias()) 7320 7321 self._match(TokenType.USING) 7322 using = self._parse_table() 7323 7324 self._match(TokenType.ON) 7325 on = self._parse_assignment() 7326 7327 return self.expression( 7328 exp.Merge, 7329 this=target, 7330 using=using, 7331 on=on, 7332 whens=self._parse_when_matched(), 7333 returning=self._parse_returning(), 7334 ) 7335 7336 def _parse_when_matched(self) -> exp.Whens: 7337 whens = [] 7338 7339 while self._match(TokenType.WHEN): 7340 matched = not self._match(TokenType.NOT) 7341 self._match_text_seq("MATCHED") 7342 source = ( 7343 False 7344 if self._match_text_seq("BY", "TARGET") 7345 else self._match_text_seq("BY", "SOURCE") 7346 ) 7347 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7348 7349 self._match(TokenType.THEN) 7350 7351 if self._match(TokenType.INSERT): 7352 this = self._parse_star() 7353 if this: 7354 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7355 else: 7356 then = self.expression( 7357 exp.Insert, 7358 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7359 expression=self._match_text_seq("VALUES") and self._parse_value(), 7360 ) 7361 elif self._match(TokenType.UPDATE): 7362 expressions = self._parse_star() 7363 if expressions: 7364 then = self.expression(exp.Update, expressions=expressions) 7365 else: 7366 then = self.expression( 7367 exp.Update, 7368 expressions=self._match(TokenType.SET) 7369 and self._parse_csv(self._parse_equality), 7370 ) 7371 elif self._match(TokenType.DELETE): 7372 then = self.expression(exp.Var, this=self._prev.text) 7373 else: 7374 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7375 7376 whens.append( 7377 self.expression( 7378 exp.When, 7379 matched=matched, 7380 source=source, 7381 condition=condition, 7382 then=then, 7383 ) 7384 ) 7385 return self.expression(exp.Whens, expressions=whens) 7386 7387 def _parse_show(self) -> t.Optional[exp.Expression]: 7388 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7389 if parser: 7390 return parser(self) 7391 return self._parse_as_command(self._prev) 7392 7393 def _parse_set_item_assignment( 7394 self, kind: t.Optional[str] = None 7395 ) -> t.Optional[exp.Expression]: 7396 index = self._index 7397 7398 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7399 return self._parse_set_transaction(global_=kind == "GLOBAL") 7400 7401 left = self._parse_primary() or self._parse_column() 7402 assignment_delimiter = self._match_texts(("=", "TO")) 7403 7404 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7405 self._retreat(index) 7406 return None 7407 7408 right = self._parse_statement() or self._parse_id_var() 7409 if isinstance(right, (exp.Column, exp.Identifier)): 7410 right = exp.var(right.name) 7411 7412 this = self.expression(exp.EQ, this=left, expression=right) 7413 return self.expression(exp.SetItem, this=this, kind=kind) 7414 7415 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7416 self._match_text_seq("TRANSACTION") 7417 characteristics = self._parse_csv( 7418 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7419 ) 7420 return self.expression( 7421 exp.SetItem, 7422 expressions=characteristics, 7423 kind="TRANSACTION", 7424 **{"global": global_}, # type: ignore 7425 ) 7426 7427 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7428 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7429 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7430 7431 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7432 index = self._index 7433 set_ = self.expression( 7434 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7435 ) 7436 7437 if self._curr: 7438 self._retreat(index) 7439 return self._parse_as_command(self._prev) 7440 7441 return set_ 7442 7443 def _parse_var_from_options( 7444 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7445 ) -> t.Optional[exp.Var]: 7446 start = self._curr 7447 if not start: 7448 return None 7449 7450 option = start.text.upper() 7451 continuations = options.get(option) 7452 7453 index = self._index 7454 self._advance() 7455 for keywords in continuations or []: 7456 if isinstance(keywords, str): 7457 keywords = (keywords,) 7458 7459 if self._match_text_seq(*keywords): 7460 option = f"{option} {' '.join(keywords)}" 7461 break 7462 else: 7463 if continuations or continuations is None: 7464 if raise_unmatched: 7465 self.raise_error(f"Unknown option {option}") 7466 7467 self._retreat(index) 7468 return None 7469 7470 return exp.var(option) 7471 7472 def _parse_as_command(self, start: Token) -> exp.Command: 7473 while self._curr: 7474 self._advance() 7475 text = self._find_sql(start, self._prev) 7476 size = len(start.text) 7477 self._warn_unsupported() 7478 return exp.Command(this=text[:size], expression=text[size:]) 7479 7480 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7481 settings = [] 7482 7483 self._match_l_paren() 7484 kind = self._parse_id_var() 7485 7486 if self._match(TokenType.L_PAREN): 7487 while True: 7488 key = self._parse_id_var() 7489 value = self._parse_primary() 7490 if not key and value is None: 7491 break 7492 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7493 self._match(TokenType.R_PAREN) 7494 7495 self._match_r_paren() 7496 7497 return self.expression( 7498 exp.DictProperty, 7499 this=this, 7500 kind=kind.this if kind else None, 7501 settings=settings, 7502 ) 7503 7504 def _parse_dict_range(self, this: str) -> exp.DictRange: 7505 self._match_l_paren() 7506 has_min = self._match_text_seq("MIN") 7507 if has_min: 7508 min = self._parse_var() or self._parse_primary() 7509 self._match_text_seq("MAX") 7510 max = self._parse_var() or self._parse_primary() 7511 else: 7512 max = self._parse_var() or self._parse_primary() 7513 min = exp.Literal.number(0) 7514 self._match_r_paren() 7515 return self.expression(exp.DictRange, this=this, min=min, max=max) 7516 7517 def _parse_comprehension( 7518 self, this: t.Optional[exp.Expression] 7519 ) -> t.Optional[exp.Comprehension]: 7520 index = self._index 7521 expression = self._parse_column() 7522 if not self._match(TokenType.IN): 7523 self._retreat(index - 1) 7524 return None 7525 iterator = self._parse_column() 7526 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7527 return self.expression( 7528 exp.Comprehension, 7529 this=this, 7530 expression=expression, 7531 iterator=iterator, 7532 condition=condition, 7533 ) 7534 7535 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7536 if self._match(TokenType.HEREDOC_STRING): 7537 return self.expression(exp.Heredoc, this=self._prev.text) 7538 7539 if not self._match_text_seq("$"): 7540 return None 7541 7542 tags = ["$"] 7543 tag_text = None 7544 7545 if self._is_connected(): 7546 self._advance() 7547 tags.append(self._prev.text.upper()) 7548 else: 7549 self.raise_error("No closing $ found") 7550 7551 if tags[-1] != "$": 7552 if self._is_connected() and self._match_text_seq("$"): 7553 tag_text = tags[-1] 7554 tags.append("$") 7555 else: 7556 self.raise_error("No closing $ found") 7557 7558 heredoc_start = self._curr 7559 7560 while self._curr: 7561 if self._match_text_seq(*tags, advance=False): 7562 this = self._find_sql(heredoc_start, self._prev) 7563 self._advance(len(tags)) 7564 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7565 7566 self._advance() 7567 7568 self.raise_error(f"No closing {''.join(tags)} found") 7569 return None 7570 7571 def _find_parser( 7572 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7573 ) -> t.Optional[t.Callable]: 7574 if not self._curr: 7575 return None 7576 7577 index = self._index 7578 this = [] 7579 while True: 7580 # The current token might be multiple words 7581 curr = self._curr.text.upper() 7582 key = curr.split(" ") 7583 this.append(curr) 7584 7585 self._advance() 7586 result, trie = in_trie(trie, key) 7587 if result == TrieResult.FAILED: 7588 break 7589 7590 if result == TrieResult.EXISTS: 7591 subparser = parsers[" ".join(this)] 7592 return subparser 7593 7594 self._retreat(index) 7595 return None 7596 7597 def _match(self, token_type, advance=True, expression=None): 7598 if not self._curr: 7599 return None 7600 7601 if self._curr.token_type == token_type: 7602 if advance: 7603 self._advance() 7604 self._add_comments(expression) 7605 return True 7606 7607 return None 7608 7609 def _match_set(self, types, advance=True): 7610 if not self._curr: 7611 return None 7612 7613 if self._curr.token_type in types: 7614 if advance: 7615 self._advance() 7616 return True 7617 7618 return None 7619 7620 def _match_pair(self, token_type_a, token_type_b, advance=True): 7621 if not self._curr or not self._next: 7622 return None 7623 7624 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7625 if advance: 7626 self._advance(2) 7627 return True 7628 7629 return None 7630 7631 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7632 if not self._match(TokenType.L_PAREN, expression=expression): 7633 self.raise_error("Expecting (") 7634 7635 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7636 if not self._match(TokenType.R_PAREN, expression=expression): 7637 self.raise_error("Expecting )") 7638 7639 def _match_texts(self, texts, advance=True): 7640 if ( 7641 self._curr 7642 and self._curr.token_type != TokenType.STRING 7643 and self._curr.text.upper() in texts 7644 ): 7645 if advance: 7646 self._advance() 7647 return True 7648 return None 7649 7650 def _match_text_seq(self, *texts, advance=True): 7651 index = self._index 7652 for text in texts: 7653 if ( 7654 self._curr 7655 and self._curr.token_type != TokenType.STRING 7656 and self._curr.text.upper() == text 7657 ): 7658 self._advance() 7659 else: 7660 self._retreat(index) 7661 return None 7662 7663 if not advance: 7664 self._retreat(index) 7665 7666 return True 7667 7668 def _replace_lambda( 7669 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7670 ) -> t.Optional[exp.Expression]: 7671 if not node: 7672 return node 7673 7674 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7675 7676 for column in node.find_all(exp.Column): 7677 typ = lambda_types.get(column.parts[0].name) 7678 if typ is not None: 7679 dot_or_id = column.to_dot() if column.table else column.this 7680 7681 if typ: 7682 dot_or_id = self.expression( 7683 exp.Cast, 7684 this=dot_or_id, 7685 to=typ, 7686 ) 7687 7688 parent = column.parent 7689 7690 while isinstance(parent, exp.Dot): 7691 if not isinstance(parent.parent, exp.Dot): 7692 parent.replace(dot_or_id) 7693 break 7694 parent = parent.parent 7695 else: 7696 if column is node: 7697 node = dot_or_id 7698 else: 7699 column.replace(dot_or_id) 7700 return node 7701 7702 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7703 start = self._prev 7704 7705 # Not to be confused with TRUNCATE(number, decimals) function call 7706 if self._match(TokenType.L_PAREN): 7707 self._retreat(self._index - 2) 7708 return self._parse_function() 7709 7710 # Clickhouse supports TRUNCATE DATABASE as well 7711 is_database = self._match(TokenType.DATABASE) 7712 7713 self._match(TokenType.TABLE) 7714 7715 exists = self._parse_exists(not_=False) 7716 7717 expressions = self._parse_csv( 7718 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7719 ) 7720 7721 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7722 7723 if self._match_text_seq("RESTART", "IDENTITY"): 7724 identity = "RESTART" 7725 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7726 identity = "CONTINUE" 7727 else: 7728 identity = None 7729 7730 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7731 option = self._prev.text 7732 else: 7733 option = None 7734 7735 partition = self._parse_partition() 7736 7737 # Fallback case 7738 if self._curr: 7739 return self._parse_as_command(start) 7740 7741 return self.expression( 7742 exp.TruncateTable, 7743 expressions=expressions, 7744 is_database=is_database, 7745 exists=exists, 7746 cluster=cluster, 7747 identity=identity, 7748 option=option, 7749 partition=partition, 7750 ) 7751 7752 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7753 this = self._parse_ordered(self._parse_opclass) 7754 7755 if not self._match(TokenType.WITH): 7756 return this 7757 7758 op = self._parse_var(any_token=True) 7759 7760 return self.expression(exp.WithOperator, this=this, op=op) 7761 7762 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7763 self._match(TokenType.EQ) 7764 self._match(TokenType.L_PAREN) 7765 7766 opts: t.List[t.Optional[exp.Expression]] = [] 7767 while self._curr and not self._match(TokenType.R_PAREN): 7768 if self._match_text_seq("FORMAT_NAME", "="): 7769 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7770 # so we parse it separately to use _parse_field() 7771 prop = self.expression( 7772 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7773 ) 7774 opts.append(prop) 7775 else: 7776 opts.append(self._parse_property()) 7777 7778 self._match(TokenType.COMMA) 7779 7780 return opts 7781 7782 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7783 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7784 7785 options = [] 7786 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7787 option = self._parse_var(any_token=True) 7788 prev = self._prev.text.upper() 7789 7790 # Different dialects might separate options and values by white space, "=" and "AS" 7791 self._match(TokenType.EQ) 7792 self._match(TokenType.ALIAS) 7793 7794 param = self.expression(exp.CopyParameter, this=option) 7795 7796 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7797 TokenType.L_PAREN, advance=False 7798 ): 7799 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7800 param.set("expressions", self._parse_wrapped_options()) 7801 elif prev == "FILE_FORMAT": 7802 # T-SQL's external file format case 7803 param.set("expression", self._parse_field()) 7804 else: 7805 param.set("expression", self._parse_unquoted_field()) 7806 7807 options.append(param) 7808 self._match(sep) 7809 7810 return options 7811 7812 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7813 expr = self.expression(exp.Credentials) 7814 7815 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7816 expr.set("storage", self._parse_field()) 7817 if self._match_text_seq("CREDENTIALS"): 7818 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7819 creds = ( 7820 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7821 ) 7822 expr.set("credentials", creds) 7823 if self._match_text_seq("ENCRYPTION"): 7824 expr.set("encryption", self._parse_wrapped_options()) 7825 if self._match_text_seq("IAM_ROLE"): 7826 expr.set("iam_role", self._parse_field()) 7827 if self._match_text_seq("REGION"): 7828 expr.set("region", self._parse_field()) 7829 7830 return expr 7831 7832 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7833 return self._parse_field() 7834 7835 def _parse_copy(self) -> exp.Copy | exp.Command: 7836 start = self._prev 7837 7838 self._match(TokenType.INTO) 7839 7840 this = ( 7841 self._parse_select(nested=True, parse_subquery_alias=False) 7842 if self._match(TokenType.L_PAREN, advance=False) 7843 else self._parse_table(schema=True) 7844 ) 7845 7846 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7847 7848 files = self._parse_csv(self._parse_file_location) 7849 credentials = self._parse_credentials() 7850 7851 self._match_text_seq("WITH") 7852 7853 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7854 7855 # Fallback case 7856 if self._curr: 7857 return self._parse_as_command(start) 7858 7859 return self.expression( 7860 exp.Copy, 7861 this=this, 7862 kind=kind, 7863 credentials=credentials, 7864 files=files, 7865 params=params, 7866 ) 7867 7868 def _parse_normalize(self) -> exp.Normalize: 7869 return self.expression( 7870 exp.Normalize, 7871 this=self._parse_bitwise(), 7872 form=self._match(TokenType.COMMA) and self._parse_var(), 7873 ) 7874 7875 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7876 args = self._parse_csv(lambda: self._parse_lambda()) 7877 7878 this = seq_get(args, 0) 7879 decimals = seq_get(args, 1) 7880 7881 return expr_type( 7882 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7883 ) 7884 7885 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7886 if self._match_text_seq("COLUMNS", "(", advance=False): 7887 this = self._parse_function() 7888 if isinstance(this, exp.Columns): 7889 this.set("unpack", True) 7890 return this 7891 7892 return self.expression( 7893 exp.Star, 7894 **{ # type: ignore 7895 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7896 "replace": self._parse_star_op("REPLACE"), 7897 "rename": self._parse_star_op("RENAME"), 7898 }, 7899 ) 7900 7901 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7902 privilege_parts = [] 7903 7904 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7905 # (end of privilege list) or L_PAREN (start of column list) are met 7906 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7907 privilege_parts.append(self._curr.text.upper()) 7908 self._advance() 7909 7910 this = exp.var(" ".join(privilege_parts)) 7911 expressions = ( 7912 self._parse_wrapped_csv(self._parse_column) 7913 if self._match(TokenType.L_PAREN, advance=False) 7914 else None 7915 ) 7916 7917 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7918 7919 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7920 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7921 principal = self._parse_id_var() 7922 7923 if not principal: 7924 return None 7925 7926 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7927 7928 def _parse_grant(self) -> exp.Grant | exp.Command: 7929 start = self._prev 7930 7931 privileges = self._parse_csv(self._parse_grant_privilege) 7932 7933 self._match(TokenType.ON) 7934 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7935 7936 # Attempt to parse the securable e.g. MySQL allows names 7937 # such as "foo.*", "*.*" which are not easily parseable yet 7938 securable = self._try_parse(self._parse_table_parts) 7939 7940 if not securable or not self._match_text_seq("TO"): 7941 return self._parse_as_command(start) 7942 7943 principals = self._parse_csv(self._parse_grant_principal) 7944 7945 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7946 7947 if self._curr: 7948 return self._parse_as_command(start) 7949 7950 return self.expression( 7951 exp.Grant, 7952 privileges=privileges, 7953 kind=kind, 7954 securable=securable, 7955 principals=principals, 7956 grant_option=grant_option, 7957 ) 7958 7959 def _parse_overlay(self) -> exp.Overlay: 7960 return self.expression( 7961 exp.Overlay, 7962 **{ # type: ignore 7963 "this": self._parse_bitwise(), 7964 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7965 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7966 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7967 }, 7968 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1438 def __init__( 1439 self, 1440 error_level: t.Optional[ErrorLevel] = None, 1441 error_message_context: int = 100, 1442 max_errors: int = 3, 1443 dialect: DialectType = None, 1444 ): 1445 from sqlglot.dialects import Dialect 1446 1447 self.error_level = error_level or ErrorLevel.IMMEDIATE 1448 self.error_message_context = error_message_context 1449 self.max_errors = max_errors 1450 self.dialect = Dialect.get_or_raise(dialect) 1451 self.reset()
1463 def parse( 1464 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1465 ) -> t.List[t.Optional[exp.Expression]]: 1466 """ 1467 Parses a list of tokens and returns a list of syntax trees, one tree 1468 per parsed SQL statement. 1469 1470 Args: 1471 raw_tokens: The list of tokens. 1472 sql: The original SQL string, used to produce helpful debug messages. 1473 1474 Returns: 1475 The list of the produced syntax trees. 1476 """ 1477 return self._parse( 1478 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1479 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1481 def parse_into( 1482 self, 1483 expression_types: exp.IntoType, 1484 raw_tokens: t.List[Token], 1485 sql: t.Optional[str] = None, 1486 ) -> t.List[t.Optional[exp.Expression]]: 1487 """ 1488 Parses a list of tokens into a given Expression type. If a collection of Expression 1489 types is given instead, this method will try to parse the token list into each one 1490 of them, stopping at the first for which the parsing succeeds. 1491 1492 Args: 1493 expression_types: The expression type(s) to try and parse the token list into. 1494 raw_tokens: The list of tokens. 1495 sql: The original SQL string, used to produce helpful debug messages. 1496 1497 Returns: 1498 The target Expression. 1499 """ 1500 errors = [] 1501 for expression_type in ensure_list(expression_types): 1502 parser = self.EXPRESSION_PARSERS.get(expression_type) 1503 if not parser: 1504 raise TypeError(f"No parser registered for {expression_type}") 1505 1506 try: 1507 return self._parse(parser, raw_tokens, sql) 1508 except ParseError as e: 1509 e.errors[0]["into_expression"] = expression_type 1510 errors.append(e) 1511 1512 raise ParseError( 1513 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1514 errors=merge_errors(errors), 1515 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1555 def check_errors(self) -> None: 1556 """Logs or raises any found errors, depending on the chosen error level setting.""" 1557 if self.error_level == ErrorLevel.WARN: 1558 for error in self.errors: 1559 logger.error(str(error)) 1560 elif self.error_level == ErrorLevel.RAISE and self.errors: 1561 raise ParseError( 1562 concat_messages(self.errors, self.max_errors), 1563 errors=merge_errors(self.errors), 1564 )
Logs or raises any found errors, depending on the chosen error level setting.
1566 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1567 """ 1568 Appends an error in the list of recorded errors or raises it, depending on the chosen 1569 error level setting. 1570 """ 1571 token = token or self._curr or self._prev or Token.string("") 1572 start = token.start 1573 end = token.end + 1 1574 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1575 highlight = self.sql[start:end] 1576 end_context = self.sql[end : end + self.error_message_context] 1577 1578 error = ParseError.new( 1579 f"{message}. Line {token.line}, Col: {token.col}.\n" 1580 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1581 description=message, 1582 line=token.line, 1583 col=token.col, 1584 start_context=start_context, 1585 highlight=highlight, 1586 end_context=end_context, 1587 ) 1588 1589 if self.error_level == ErrorLevel.IMMEDIATE: 1590 raise error 1591 1592 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1594 def expression( 1595 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1596 ) -> E: 1597 """ 1598 Creates a new, validated Expression. 1599 1600 Args: 1601 exp_class: The expression class to instantiate. 1602 comments: An optional list of comments to attach to the expression. 1603 kwargs: The arguments to set for the expression along with their respective values. 1604 1605 Returns: 1606 The target expression. 1607 """ 1608 instance = exp_class(**kwargs) 1609 instance.add_comments(comments) if comments else self._add_comments(instance) 1610 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1617 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1618 """ 1619 Validates an Expression, making sure that all its mandatory arguments are set. 1620 1621 Args: 1622 expression: The expression to validate. 1623 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1624 1625 Returns: 1626 The validated expression. 1627 """ 1628 if self.error_level != ErrorLevel.IGNORE: 1629 for error_message in expression.error_messages(args): 1630 self.raise_error(error_message) 1631 1632 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.