sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 154 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 155 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 156 "LIKE": build_like, 157 "LOG": build_logarithm, 158 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 159 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 160 "MOD": build_mod, 161 "TIME_TO_TIME_STR": lambda args: exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 166 this=exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 start=exp.Literal.number(1), 171 length=exp.Literal.number(10), 172 ), 173 "VAR_MAP": build_var_map, 174 "LOWER": build_lower, 175 "UPPER": build_upper, 176 "HEX": build_hex, 177 "TO_HEX": build_hex, 178 } 179 180 NO_PAREN_FUNCTIONS = { 181 TokenType.CURRENT_DATE: exp.CurrentDate, 182 TokenType.CURRENT_DATETIME: exp.CurrentDate, 183 TokenType.CURRENT_TIME: exp.CurrentTime, 184 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 185 TokenType.CURRENT_USER: exp.CurrentUser, 186 } 187 188 STRUCT_TYPE_TOKENS = { 189 TokenType.NESTED, 190 TokenType.OBJECT, 191 TokenType.STRUCT, 192 } 193 194 NESTED_TYPE_TOKENS = { 195 TokenType.ARRAY, 196 TokenType.LIST, 197 TokenType.LOWCARDINALITY, 198 TokenType.MAP, 199 TokenType.NULLABLE, 200 *STRUCT_TYPE_TOKENS, 201 } 202 203 ENUM_TYPE_TOKENS = { 204 TokenType.ENUM, 205 TokenType.ENUM8, 206 TokenType.ENUM16, 207 } 208 209 AGGREGATE_TYPE_TOKENS = { 210 TokenType.AGGREGATEFUNCTION, 211 TokenType.SIMPLEAGGREGATEFUNCTION, 212 } 213 214 TYPE_TOKENS = { 215 TokenType.BIT, 216 TokenType.BOOLEAN, 217 TokenType.TINYINT, 218 TokenType.UTINYINT, 219 TokenType.SMALLINT, 220 TokenType.USMALLINT, 221 TokenType.INT, 222 TokenType.UINT, 223 TokenType.BIGINT, 224 TokenType.UBIGINT, 225 TokenType.INT128, 226 TokenType.UINT128, 227 TokenType.INT256, 228 TokenType.UINT256, 229 TokenType.MEDIUMINT, 230 TokenType.UMEDIUMINT, 231 TokenType.FIXEDSTRING, 232 TokenType.FLOAT, 233 TokenType.DOUBLE, 234 TokenType.CHAR, 235 TokenType.NCHAR, 236 TokenType.VARCHAR, 237 TokenType.NVARCHAR, 238 TokenType.BPCHAR, 239 TokenType.TEXT, 240 TokenType.MEDIUMTEXT, 241 TokenType.LONGTEXT, 242 TokenType.MEDIUMBLOB, 243 TokenType.LONGBLOB, 244 TokenType.BINARY, 245 TokenType.VARBINARY, 246 TokenType.JSON, 247 TokenType.JSONB, 248 TokenType.INTERVAL, 249 TokenType.TINYBLOB, 250 TokenType.TINYTEXT, 251 TokenType.TIME, 252 TokenType.TIMETZ, 253 TokenType.TIMESTAMP, 254 TokenType.TIMESTAMP_S, 255 TokenType.TIMESTAMP_MS, 256 TokenType.TIMESTAMP_NS, 257 TokenType.TIMESTAMPTZ, 258 TokenType.TIMESTAMPLTZ, 259 TokenType.TIMESTAMPNTZ, 260 TokenType.DATETIME, 261 TokenType.DATETIME64, 262 TokenType.DATE, 263 TokenType.DATE32, 264 TokenType.INT4RANGE, 265 TokenType.INT4MULTIRANGE, 266 TokenType.INT8RANGE, 267 TokenType.INT8MULTIRANGE, 268 TokenType.NUMRANGE, 269 TokenType.NUMMULTIRANGE, 270 TokenType.TSRANGE, 271 TokenType.TSMULTIRANGE, 272 TokenType.TSTZRANGE, 273 TokenType.TSTZMULTIRANGE, 274 TokenType.DATERANGE, 275 TokenType.DATEMULTIRANGE, 276 TokenType.DECIMAL, 277 TokenType.UDECIMAL, 278 TokenType.BIGDECIMAL, 279 TokenType.UUID, 280 TokenType.GEOGRAPHY, 281 TokenType.GEOMETRY, 282 TokenType.HLLSKETCH, 283 TokenType.HSTORE, 284 TokenType.PSEUDO_TYPE, 285 TokenType.SUPER, 286 TokenType.SERIAL, 287 TokenType.SMALLSERIAL, 288 TokenType.BIGSERIAL, 289 TokenType.XML, 290 TokenType.YEAR, 291 TokenType.UNIQUEIDENTIFIER, 292 TokenType.USERDEFINED, 293 TokenType.MONEY, 294 TokenType.SMALLMONEY, 295 TokenType.ROWVERSION, 296 TokenType.IMAGE, 297 TokenType.VARIANT, 298 TokenType.OBJECT, 299 TokenType.OBJECT_IDENTIFIER, 300 TokenType.INET, 301 TokenType.IPADDRESS, 302 TokenType.IPPREFIX, 303 TokenType.IPV4, 304 TokenType.IPV6, 305 TokenType.UNKNOWN, 306 TokenType.NULL, 307 TokenType.NAME, 308 TokenType.TDIGEST, 309 *ENUM_TYPE_TOKENS, 310 *NESTED_TYPE_TOKENS, 311 *AGGREGATE_TYPE_TOKENS, 312 } 313 314 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 315 TokenType.BIGINT: TokenType.UBIGINT, 316 TokenType.INT: TokenType.UINT, 317 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 318 TokenType.SMALLINT: TokenType.USMALLINT, 319 TokenType.TINYINT: TokenType.UTINYINT, 320 TokenType.DECIMAL: TokenType.UDECIMAL, 321 } 322 323 SUBQUERY_PREDICATES = { 324 TokenType.ANY: exp.Any, 325 TokenType.ALL: exp.All, 326 TokenType.EXISTS: exp.Exists, 327 TokenType.SOME: exp.Any, 328 } 329 330 RESERVED_TOKENS = { 331 *Tokenizer.SINGLE_TOKENS.values(), 332 TokenType.SELECT, 333 } - {TokenType.IDENTIFIER} 334 335 DB_CREATABLES = { 336 TokenType.DATABASE, 337 TokenType.DICTIONARY, 338 TokenType.MODEL, 339 TokenType.SCHEMA, 340 TokenType.SEQUENCE, 341 TokenType.STORAGE_INTEGRATION, 342 TokenType.TABLE, 343 TokenType.TAG, 344 TokenType.VIEW, 345 TokenType.WAREHOUSE, 346 TokenType.STREAMLIT, 347 } 348 349 CREATABLES = { 350 TokenType.COLUMN, 351 TokenType.CONSTRAINT, 352 TokenType.FOREIGN_KEY, 353 TokenType.FUNCTION, 354 TokenType.INDEX, 355 TokenType.PROCEDURE, 356 *DB_CREATABLES, 357 } 358 359 # Tokens that can represent identifiers 360 ID_VAR_TOKENS = { 361 TokenType.VAR, 362 TokenType.ANTI, 363 TokenType.APPLY, 364 TokenType.ASC, 365 TokenType.ASOF, 366 TokenType.AUTO_INCREMENT, 367 TokenType.BEGIN, 368 TokenType.BPCHAR, 369 TokenType.CACHE, 370 TokenType.CASE, 371 TokenType.COLLATE, 372 TokenType.COMMAND, 373 TokenType.COMMENT, 374 TokenType.COMMIT, 375 TokenType.CONSTRAINT, 376 TokenType.COPY, 377 TokenType.DEFAULT, 378 TokenType.DELETE, 379 TokenType.DESC, 380 TokenType.DESCRIBE, 381 TokenType.DICTIONARY, 382 TokenType.DIV, 383 TokenType.END, 384 TokenType.EXECUTE, 385 TokenType.ESCAPE, 386 TokenType.FALSE, 387 TokenType.FIRST, 388 TokenType.FILTER, 389 TokenType.FINAL, 390 TokenType.FORMAT, 391 TokenType.FULL, 392 TokenType.IDENTIFIER, 393 TokenType.IS, 394 TokenType.ISNULL, 395 TokenType.INTERVAL, 396 TokenType.KEEP, 397 TokenType.KILL, 398 TokenType.LEFT, 399 TokenType.LOAD, 400 TokenType.MERGE, 401 TokenType.NATURAL, 402 TokenType.NEXT, 403 TokenType.OFFSET, 404 TokenType.OPERATOR, 405 TokenType.ORDINALITY, 406 TokenType.OVERLAPS, 407 TokenType.OVERWRITE, 408 TokenType.PARTITION, 409 TokenType.PERCENT, 410 TokenType.PIVOT, 411 TokenType.PRAGMA, 412 TokenType.RANGE, 413 TokenType.RECURSIVE, 414 TokenType.REFERENCES, 415 TokenType.REFRESH, 416 TokenType.REPLACE, 417 TokenType.RIGHT, 418 TokenType.ROLLUP, 419 TokenType.ROW, 420 TokenType.ROWS, 421 TokenType.SEMI, 422 TokenType.SET, 423 TokenType.SETTINGS, 424 TokenType.SHOW, 425 TokenType.TEMPORARY, 426 TokenType.TOP, 427 TokenType.TRUE, 428 TokenType.TRUNCATE, 429 TokenType.UNIQUE, 430 TokenType.UNNEST, 431 TokenType.UNPIVOT, 432 TokenType.UPDATE, 433 TokenType.USE, 434 TokenType.VOLATILE, 435 TokenType.WINDOW, 436 *CREATABLES, 437 *SUBQUERY_PREDICATES, 438 *TYPE_TOKENS, 439 *NO_PAREN_FUNCTIONS, 440 } 441 442 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 443 444 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASOF, 448 TokenType.FULL, 449 TokenType.LEFT, 450 TokenType.LOCK, 451 TokenType.NATURAL, 452 TokenType.OFFSET, 453 TokenType.RIGHT, 454 TokenType.SEMI, 455 TokenType.WINDOW, 456 } 457 458 ALIAS_TOKENS = ID_VAR_TOKENS 459 460 ARRAY_CONSTRUCTORS = { 461 "ARRAY": exp.Array, 462 "LIST": exp.List, 463 } 464 465 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 466 467 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 468 469 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 470 471 FUNC_TOKENS = { 472 TokenType.COLLATE, 473 TokenType.COMMAND, 474 TokenType.CURRENT_DATE, 475 TokenType.CURRENT_DATETIME, 476 TokenType.CURRENT_TIMESTAMP, 477 TokenType.CURRENT_TIME, 478 TokenType.CURRENT_USER, 479 TokenType.FILTER, 480 TokenType.FIRST, 481 TokenType.FORMAT, 482 TokenType.GLOB, 483 TokenType.IDENTIFIER, 484 TokenType.INDEX, 485 TokenType.ISNULL, 486 TokenType.ILIKE, 487 TokenType.INSERT, 488 TokenType.LIKE, 489 TokenType.MERGE, 490 TokenType.OFFSET, 491 TokenType.PRIMARY_KEY, 492 TokenType.RANGE, 493 TokenType.REPLACE, 494 TokenType.RLIKE, 495 TokenType.ROW, 496 TokenType.UNNEST, 497 TokenType.VAR, 498 TokenType.LEFT, 499 TokenType.RIGHT, 500 TokenType.SEQUENCE, 501 TokenType.DATE, 502 TokenType.DATETIME, 503 TokenType.TABLE, 504 TokenType.TIMESTAMP, 505 TokenType.TIMESTAMPTZ, 506 TokenType.TRUNCATE, 507 TokenType.WINDOW, 508 TokenType.XOR, 509 *TYPE_TOKENS, 510 *SUBQUERY_PREDICATES, 511 } 512 513 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 514 TokenType.AND: exp.And, 515 } 516 517 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 518 TokenType.COLON_EQ: exp.PropertyEQ, 519 } 520 521 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 522 TokenType.OR: exp.Or, 523 } 524 525 EQUALITY = { 526 TokenType.EQ: exp.EQ, 527 TokenType.NEQ: exp.NEQ, 528 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 529 } 530 531 COMPARISON = { 532 TokenType.GT: exp.GT, 533 TokenType.GTE: exp.GTE, 534 TokenType.LT: exp.LT, 535 TokenType.LTE: exp.LTE, 536 } 537 538 BITWISE = { 539 TokenType.AMP: exp.BitwiseAnd, 540 TokenType.CARET: exp.BitwiseXor, 541 TokenType.PIPE: exp.BitwiseOr, 542 } 543 544 TERM = { 545 TokenType.DASH: exp.Sub, 546 TokenType.PLUS: exp.Add, 547 TokenType.MOD: exp.Mod, 548 TokenType.COLLATE: exp.Collate, 549 } 550 551 FACTOR = { 552 TokenType.DIV: exp.IntDiv, 553 TokenType.LR_ARROW: exp.Distance, 554 TokenType.SLASH: exp.Div, 555 TokenType.STAR: exp.Mul, 556 } 557 558 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 559 560 TIMES = { 561 TokenType.TIME, 562 TokenType.TIMETZ, 563 } 564 565 TIMESTAMPS = { 566 TokenType.TIMESTAMP, 567 TokenType.TIMESTAMPTZ, 568 TokenType.TIMESTAMPLTZ, 569 *TIMES, 570 } 571 572 SET_OPERATIONS = { 573 TokenType.UNION, 574 TokenType.INTERSECT, 575 TokenType.EXCEPT, 576 } 577 578 JOIN_METHODS = { 579 TokenType.ASOF, 580 TokenType.NATURAL, 581 TokenType.POSITIONAL, 582 } 583 584 JOIN_SIDES = { 585 TokenType.LEFT, 586 TokenType.RIGHT, 587 TokenType.FULL, 588 } 589 590 JOIN_KINDS = { 591 TokenType.INNER, 592 TokenType.OUTER, 593 TokenType.CROSS, 594 TokenType.SEMI, 595 TokenType.ANTI, 596 } 597 598 JOIN_HINTS: t.Set[str] = set() 599 600 LAMBDAS = { 601 TokenType.ARROW: lambda self, expressions: self.expression( 602 exp.Lambda, 603 this=self._replace_lambda( 604 self._parse_assignment(), 605 expressions, 606 ), 607 expressions=expressions, 608 ), 609 TokenType.FARROW: lambda self, expressions: self.expression( 610 exp.Kwarg, 611 this=exp.var(expressions[0].name), 612 expression=self._parse_assignment(), 613 ), 614 } 615 616 COLUMN_OPERATORS = { 617 TokenType.DOT: None, 618 TokenType.DCOLON: lambda self, this, to: self.expression( 619 exp.Cast if self.STRICT_CAST else exp.TryCast, 620 this=this, 621 to=to, 622 ), 623 TokenType.ARROW: lambda self, this, path: self.expression( 624 exp.JSONExtract, 625 this=this, 626 expression=self.dialect.to_json_path(path), 627 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 628 ), 629 TokenType.DARROW: lambda self, this, path: self.expression( 630 exp.JSONExtractScalar, 631 this=this, 632 expression=self.dialect.to_json_path(path), 633 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 634 ), 635 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 636 exp.JSONBExtract, 637 this=this, 638 expression=path, 639 ), 640 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 641 exp.JSONBExtractScalar, 642 this=this, 643 expression=path, 644 ), 645 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 646 exp.JSONBContains, 647 this=this, 648 expression=key, 649 ), 650 } 651 652 EXPRESSION_PARSERS = { 653 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 654 exp.Column: lambda self: self._parse_column(), 655 exp.Condition: lambda self: self._parse_assignment(), 656 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 657 exp.Expression: lambda self: self._parse_expression(), 658 exp.From: lambda self: self._parse_from(joins=True), 659 exp.Group: lambda self: self._parse_group(), 660 exp.Having: lambda self: self._parse_having(), 661 exp.Identifier: lambda self: self._parse_id_var(), 662 exp.Join: lambda self: self._parse_join(), 663 exp.Lambda: lambda self: self._parse_lambda(), 664 exp.Lateral: lambda self: self._parse_lateral(), 665 exp.Limit: lambda self: self._parse_limit(), 666 exp.Offset: lambda self: self._parse_offset(), 667 exp.Order: lambda self: self._parse_order(), 668 exp.Ordered: lambda self: self._parse_ordered(), 669 exp.Properties: lambda self: self._parse_properties(), 670 exp.Qualify: lambda self: self._parse_qualify(), 671 exp.Returning: lambda self: self._parse_returning(), 672 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 673 exp.Table: lambda self: self._parse_table_parts(), 674 exp.TableAlias: lambda self: self._parse_table_alias(), 675 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 676 exp.Where: lambda self: self._parse_where(), 677 exp.Window: lambda self: self._parse_named_window(), 678 exp.With: lambda self: self._parse_with(), 679 "JOIN_TYPE": lambda self: self._parse_join_parts(), 680 } 681 682 STATEMENT_PARSERS = { 683 TokenType.ALTER: lambda self: self._parse_alter(), 684 TokenType.BEGIN: lambda self: self._parse_transaction(), 685 TokenType.CACHE: lambda self: self._parse_cache(), 686 TokenType.COMMENT: lambda self: self._parse_comment(), 687 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 688 TokenType.COPY: lambda self: self._parse_copy(), 689 TokenType.CREATE: lambda self: self._parse_create(), 690 TokenType.DELETE: lambda self: self._parse_delete(), 691 TokenType.DESC: lambda self: self._parse_describe(), 692 TokenType.DESCRIBE: lambda self: self._parse_describe(), 693 TokenType.DROP: lambda self: self._parse_drop(), 694 TokenType.INSERT: lambda self: self._parse_insert(), 695 TokenType.KILL: lambda self: self._parse_kill(), 696 TokenType.LOAD: lambda self: self._parse_load(), 697 TokenType.MERGE: lambda self: self._parse_merge(), 698 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 699 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 700 TokenType.REFRESH: lambda self: self._parse_refresh(), 701 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 702 TokenType.SET: lambda self: self._parse_set(), 703 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 704 TokenType.UNCACHE: lambda self: self._parse_uncache(), 705 TokenType.UPDATE: lambda self: self._parse_update(), 706 TokenType.USE: lambda self: self.expression( 707 exp.Use, 708 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 709 this=self._parse_table(schema=False), 710 ), 711 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 712 } 713 714 UNARY_PARSERS = { 715 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 716 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 717 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 718 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 719 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 720 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 721 } 722 723 STRING_PARSERS = { 724 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 725 exp.RawString, this=token.text 726 ), 727 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 728 exp.National, this=token.text 729 ), 730 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 731 TokenType.STRING: lambda self, token: self.expression( 732 exp.Literal, this=token.text, is_string=True 733 ), 734 TokenType.UNICODE_STRING: lambda self, token: self.expression( 735 exp.UnicodeString, 736 this=token.text, 737 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 738 ), 739 } 740 741 NUMERIC_PARSERS = { 742 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 743 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 744 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 745 TokenType.NUMBER: lambda self, token: self.expression( 746 exp.Literal, this=token.text, is_string=False 747 ), 748 } 749 750 PRIMARY_PARSERS = { 751 **STRING_PARSERS, 752 **NUMERIC_PARSERS, 753 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 754 TokenType.NULL: lambda self, _: self.expression(exp.Null), 755 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 756 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 757 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 758 TokenType.STAR: lambda self, _: self.expression( 759 exp.Star, 760 **{ 761 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 762 "replace": self._parse_star_op("REPLACE"), 763 "rename": self._parse_star_op("RENAME"), 764 }, 765 ), 766 } 767 768 PLACEHOLDER_PARSERS = { 769 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 770 TokenType.PARAMETER: lambda self: self._parse_parameter(), 771 TokenType.COLON: lambda self: ( 772 self.expression(exp.Placeholder, this=self._prev.text) 773 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 774 else None 775 ), 776 } 777 778 RANGE_PARSERS = { 779 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 780 TokenType.GLOB: binary_range_parser(exp.Glob), 781 TokenType.ILIKE: binary_range_parser(exp.ILike), 782 TokenType.IN: lambda self, this: self._parse_in(this), 783 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 784 TokenType.IS: lambda self, this: self._parse_is(this), 785 TokenType.LIKE: binary_range_parser(exp.Like), 786 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 787 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 788 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 789 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 790 } 791 792 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 793 "ALLOWED_VALUES": lambda self: self.expression( 794 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 795 ), 796 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 797 "AUTO": lambda self: self._parse_auto_property(), 798 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 799 "BACKUP": lambda self: self.expression( 800 exp.BackupProperty, this=self._parse_var(any_token=True) 801 ), 802 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 803 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 804 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 805 "CHECKSUM": lambda self: self._parse_checksum(), 806 "CLUSTER BY": lambda self: self._parse_cluster(), 807 "CLUSTERED": lambda self: self._parse_clustered_by(), 808 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 809 exp.CollateProperty, **kwargs 810 ), 811 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 812 "CONTAINS": lambda self: self._parse_contains_property(), 813 "COPY": lambda self: self._parse_copy_property(), 814 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 815 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 816 "DEFINER": lambda self: self._parse_definer(), 817 "DETERMINISTIC": lambda self: self.expression( 818 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 819 ), 820 "DISTKEY": lambda self: self._parse_distkey(), 821 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 822 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 823 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 824 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 825 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 826 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 827 "FREESPACE": lambda self: self._parse_freespace(), 828 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 829 "HEAP": lambda self: self.expression(exp.HeapProperty), 830 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 831 "IMMUTABLE": lambda self: self.expression( 832 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 833 ), 834 "INHERITS": lambda self: self.expression( 835 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 836 ), 837 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 838 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 839 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 840 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 841 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 842 "LIKE": lambda self: self._parse_create_like(), 843 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 844 "LOCK": lambda self: self._parse_locking(), 845 "LOCKING": lambda self: self._parse_locking(), 846 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 847 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 848 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 849 "MODIFIES": lambda self: self._parse_modifies_property(), 850 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 851 "NO": lambda self: self._parse_no_property(), 852 "ON": lambda self: self._parse_on_property(), 853 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 854 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 855 "PARTITION": lambda self: self._parse_partitioned_of(), 856 "PARTITION BY": lambda self: self._parse_partitioned_by(), 857 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 858 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 859 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 860 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 861 "READS": lambda self: self._parse_reads_property(), 862 "REMOTE": lambda self: self._parse_remote_with_connection(), 863 "RETURNS": lambda self: self._parse_returns(), 864 "STRICT": lambda self: self.expression(exp.StrictProperty), 865 "ROW": lambda self: self._parse_row(), 866 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 867 "SAMPLE": lambda self: self.expression( 868 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 869 ), 870 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 871 "SETTINGS": lambda self: self.expression( 872 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 873 ), 874 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 875 "SORTKEY": lambda self: self._parse_sortkey(), 876 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 877 "STABLE": lambda self: self.expression( 878 exp.StabilityProperty, this=exp.Literal.string("STABLE") 879 ), 880 "STORED": lambda self: self._parse_stored(), 881 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 882 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 883 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 884 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 885 "TO": lambda self: self._parse_to_table(), 886 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 887 "TRANSFORM": lambda self: self.expression( 888 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 889 ), 890 "TTL": lambda self: self._parse_ttl(), 891 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 892 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 893 "VOLATILE": lambda self: self._parse_volatile_property(), 894 "WITH": lambda self: self._parse_with_property(), 895 } 896 897 CONSTRAINT_PARSERS = { 898 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 899 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 900 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 901 "CHARACTER SET": lambda self: self.expression( 902 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 903 ), 904 "CHECK": lambda self: self.expression( 905 exp.CheckColumnConstraint, 906 this=self._parse_wrapped(self._parse_assignment), 907 enforced=self._match_text_seq("ENFORCED"), 908 ), 909 "COLLATE": lambda self: self.expression( 910 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 911 ), 912 "COMMENT": lambda self: self.expression( 913 exp.CommentColumnConstraint, this=self._parse_string() 914 ), 915 "COMPRESS": lambda self: self._parse_compress(), 916 "CLUSTERED": lambda self: self.expression( 917 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 918 ), 919 "NONCLUSTERED": lambda self: self.expression( 920 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 921 ), 922 "DEFAULT": lambda self: self.expression( 923 exp.DefaultColumnConstraint, this=self._parse_bitwise() 924 ), 925 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 926 "EPHEMERAL": lambda self: self.expression( 927 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 928 ), 929 "EXCLUDE": lambda self: self.expression( 930 exp.ExcludeColumnConstraint, this=self._parse_index_params() 931 ), 932 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 933 "FORMAT": lambda self: self.expression( 934 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 935 ), 936 "GENERATED": lambda self: self._parse_generated_as_identity(), 937 "IDENTITY": lambda self: self._parse_auto_increment(), 938 "INLINE": lambda self: self._parse_inline(), 939 "LIKE": lambda self: self._parse_create_like(), 940 "NOT": lambda self: self._parse_not_constraint(), 941 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 942 "ON": lambda self: ( 943 self._match(TokenType.UPDATE) 944 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 945 ) 946 or self.expression(exp.OnProperty, this=self._parse_id_var()), 947 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 948 "PERIOD": lambda self: self._parse_period_for_system_time(), 949 "PRIMARY KEY": lambda self: self._parse_primary_key(), 950 "REFERENCES": lambda self: self._parse_references(match=False), 951 "TITLE": lambda self: self.expression( 952 exp.TitleColumnConstraint, this=self._parse_var_or_string() 953 ), 954 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 955 "UNIQUE": lambda self: self._parse_unique(), 956 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 957 "WITH": lambda self: self.expression( 958 exp.Properties, expressions=self._parse_wrapped_properties() 959 ), 960 } 961 962 ALTER_PARSERS = { 963 "ADD": lambda self: self._parse_alter_table_add(), 964 "ALTER": lambda self: self._parse_alter_table_alter(), 965 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 966 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 967 "DROP": lambda self: self._parse_alter_table_drop(), 968 "RENAME": lambda self: self._parse_alter_table_rename(), 969 "SET": lambda self: self._parse_alter_table_set(), 970 } 971 972 ALTER_ALTER_PARSERS = { 973 "DISTKEY": lambda self: self._parse_alter_diststyle(), 974 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 975 "SORTKEY": lambda self: self._parse_alter_sortkey(), 976 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 977 } 978 979 SCHEMA_UNNAMED_CONSTRAINTS = { 980 "CHECK", 981 "EXCLUDE", 982 "FOREIGN KEY", 983 "LIKE", 984 "PERIOD", 985 "PRIMARY KEY", 986 "UNIQUE", 987 } 988 989 NO_PAREN_FUNCTION_PARSERS = { 990 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 991 "CASE": lambda self: self._parse_case(), 992 "IF": lambda self: self._parse_if(), 993 "NEXT": lambda self: self._parse_next_value_for(), 994 } 995 996 INVALID_FUNC_NAME_TOKENS = { 997 TokenType.IDENTIFIER, 998 TokenType.STRING, 999 } 1000 1001 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1002 1003 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1004 1005 FUNCTION_PARSERS = { 1006 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1007 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1008 "DECODE": lambda self: self._parse_decode(), 1009 "EXTRACT": lambda self: self._parse_extract(), 1010 "GAP_FILL": lambda self: self._parse_gap_fill(), 1011 "JSON_OBJECT": lambda self: self._parse_json_object(), 1012 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1013 "JSON_TABLE": lambda self: self._parse_json_table(), 1014 "MATCH": lambda self: self._parse_match_against(), 1015 "OPENJSON": lambda self: self._parse_open_json(), 1016 "POSITION": lambda self: self._parse_position(), 1017 "PREDICT": lambda self: self._parse_predict(), 1018 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1019 "STRING_AGG": lambda self: self._parse_string_agg(), 1020 "SUBSTRING": lambda self: self._parse_substring(), 1021 "TRIM": lambda self: self._parse_trim(), 1022 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1023 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1024 } 1025 1026 QUERY_MODIFIER_PARSERS = { 1027 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1028 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1029 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1030 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1031 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1032 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1033 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1034 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1035 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1036 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1037 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1038 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1039 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1040 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1041 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1042 TokenType.CLUSTER_BY: lambda self: ( 1043 "cluster", 1044 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1045 ), 1046 TokenType.DISTRIBUTE_BY: lambda self: ( 1047 "distribute", 1048 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1049 ), 1050 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1051 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1052 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1053 } 1054 1055 SET_PARSERS = { 1056 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1057 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1058 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1059 "TRANSACTION": lambda self: self._parse_set_transaction(), 1060 } 1061 1062 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1063 1064 TYPE_LITERAL_PARSERS = { 1065 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1066 } 1067 1068 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1069 1070 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1071 1072 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1073 1074 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1075 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1076 "ISOLATION": ( 1077 ("LEVEL", "REPEATABLE", "READ"), 1078 ("LEVEL", "READ", "COMMITTED"), 1079 ("LEVEL", "READ", "UNCOMITTED"), 1080 ("LEVEL", "SERIALIZABLE"), 1081 ), 1082 "READ": ("WRITE", "ONLY"), 1083 } 1084 1085 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1086 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1087 ) 1088 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1089 1090 CREATE_SEQUENCE: OPTIONS_TYPE = { 1091 "SCALE": ("EXTEND", "NOEXTEND"), 1092 "SHARD": ("EXTEND", "NOEXTEND"), 1093 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1094 **dict.fromkeys( 1095 ( 1096 "SESSION", 1097 "GLOBAL", 1098 "KEEP", 1099 "NOKEEP", 1100 "ORDER", 1101 "NOORDER", 1102 "NOCACHE", 1103 "CYCLE", 1104 "NOCYCLE", 1105 "NOMINVALUE", 1106 "NOMAXVALUE", 1107 "NOSCALE", 1108 "NOSHARD", 1109 ), 1110 tuple(), 1111 ), 1112 } 1113 1114 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1115 1116 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1117 1118 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1119 1120 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1121 1122 CLONE_KEYWORDS = {"CLONE", "COPY"} 1123 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1124 1125 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1126 1127 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1128 1129 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1130 1131 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1132 1133 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1134 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1135 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1136 1137 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1138 1139 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1140 1141 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1142 1143 DISTINCT_TOKENS = {TokenType.DISTINCT} 1144 1145 NULL_TOKENS = {TokenType.NULL} 1146 1147 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1148 1149 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1150 1151 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1152 1153 STRICT_CAST = True 1154 1155 PREFIXED_PIVOT_COLUMNS = False 1156 IDENTIFY_PIVOT_STRINGS = False 1157 1158 LOG_DEFAULTS_TO_LN = False 1159 1160 # Whether ADD is present for each column added by ALTER TABLE 1161 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1162 1163 # Whether the table sample clause expects CSV syntax 1164 TABLESAMPLE_CSV = False 1165 1166 # The default method used for table sampling 1167 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1168 1169 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1170 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1171 1172 # Whether the TRIM function expects the characters to trim as its first argument 1173 TRIM_PATTERN_FIRST = False 1174 1175 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1176 STRING_ALIASES = False 1177 1178 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1179 MODIFIERS_ATTACHED_TO_UNION = True 1180 UNION_MODIFIERS = {"order", "limit", "offset"} 1181 1182 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1183 NO_PAREN_IF_COMMANDS = True 1184 1185 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1186 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1187 1188 # Whether the `:` operator is used to extract a value from a JSON document 1189 COLON_IS_JSON_EXTRACT = False 1190 1191 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1192 # If this is True and '(' is not found, the keyword will be treated as an identifier 1193 VALUES_FOLLOWED_BY_PAREN = True 1194 1195 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1196 SUPPORTS_IMPLICIT_UNNEST = False 1197 1198 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1199 INTERVAL_SPANS = True 1200 1201 # Whether a PARTITION clause can follow a table reference 1202 SUPPORTS_PARTITION_SELECTION = False 1203 1204 __slots__ = ( 1205 "error_level", 1206 "error_message_context", 1207 "max_errors", 1208 "dialect", 1209 "sql", 1210 "errors", 1211 "_tokens", 1212 "_index", 1213 "_curr", 1214 "_next", 1215 "_prev", 1216 "_prev_comments", 1217 ) 1218 1219 # Autofilled 1220 SHOW_TRIE: t.Dict = {} 1221 SET_TRIE: t.Dict = {} 1222 1223 def __init__( 1224 self, 1225 error_level: t.Optional[ErrorLevel] = None, 1226 error_message_context: int = 100, 1227 max_errors: int = 3, 1228 dialect: DialectType = None, 1229 ): 1230 from sqlglot.dialects import Dialect 1231 1232 self.error_level = error_level or ErrorLevel.IMMEDIATE 1233 self.error_message_context = error_message_context 1234 self.max_errors = max_errors 1235 self.dialect = Dialect.get_or_raise(dialect) 1236 self.reset() 1237 1238 def reset(self): 1239 self.sql = "" 1240 self.errors = [] 1241 self._tokens = [] 1242 self._index = 0 1243 self._curr = None 1244 self._next = None 1245 self._prev = None 1246 self._prev_comments = None 1247 1248 def parse( 1249 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1250 ) -> t.List[t.Optional[exp.Expression]]: 1251 """ 1252 Parses a list of tokens and returns a list of syntax trees, one tree 1253 per parsed SQL statement. 1254 1255 Args: 1256 raw_tokens: The list of tokens. 1257 sql: The original SQL string, used to produce helpful debug messages. 1258 1259 Returns: 1260 The list of the produced syntax trees. 1261 """ 1262 return self._parse( 1263 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1264 ) 1265 1266 def parse_into( 1267 self, 1268 expression_types: exp.IntoType, 1269 raw_tokens: t.List[Token], 1270 sql: t.Optional[str] = None, 1271 ) -> t.List[t.Optional[exp.Expression]]: 1272 """ 1273 Parses a list of tokens into a given Expression type. If a collection of Expression 1274 types is given instead, this method will try to parse the token list into each one 1275 of them, stopping at the first for which the parsing succeeds. 1276 1277 Args: 1278 expression_types: The expression type(s) to try and parse the token list into. 1279 raw_tokens: The list of tokens. 1280 sql: The original SQL string, used to produce helpful debug messages. 1281 1282 Returns: 1283 The target Expression. 1284 """ 1285 errors = [] 1286 for expression_type in ensure_list(expression_types): 1287 parser = self.EXPRESSION_PARSERS.get(expression_type) 1288 if not parser: 1289 raise TypeError(f"No parser registered for {expression_type}") 1290 1291 try: 1292 return self._parse(parser, raw_tokens, sql) 1293 except ParseError as e: 1294 e.errors[0]["into_expression"] = expression_type 1295 errors.append(e) 1296 1297 raise ParseError( 1298 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1299 errors=merge_errors(errors), 1300 ) from errors[-1] 1301 1302 def _parse( 1303 self, 1304 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1305 raw_tokens: t.List[Token], 1306 sql: t.Optional[str] = None, 1307 ) -> t.List[t.Optional[exp.Expression]]: 1308 self.reset() 1309 self.sql = sql or "" 1310 1311 total = len(raw_tokens) 1312 chunks: t.List[t.List[Token]] = [[]] 1313 1314 for i, token in enumerate(raw_tokens): 1315 if token.token_type == TokenType.SEMICOLON: 1316 if token.comments: 1317 chunks.append([token]) 1318 1319 if i < total - 1: 1320 chunks.append([]) 1321 else: 1322 chunks[-1].append(token) 1323 1324 expressions = [] 1325 1326 for tokens in chunks: 1327 self._index = -1 1328 self._tokens = tokens 1329 self._advance() 1330 1331 expressions.append(parse_method(self)) 1332 1333 if self._index < len(self._tokens): 1334 self.raise_error("Invalid expression / Unexpected token") 1335 1336 self.check_errors() 1337 1338 return expressions 1339 1340 def check_errors(self) -> None: 1341 """Logs or raises any found errors, depending on the chosen error level setting.""" 1342 if self.error_level == ErrorLevel.WARN: 1343 for error in self.errors: 1344 logger.error(str(error)) 1345 elif self.error_level == ErrorLevel.RAISE and self.errors: 1346 raise ParseError( 1347 concat_messages(self.errors, self.max_errors), 1348 errors=merge_errors(self.errors), 1349 ) 1350 1351 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1352 """ 1353 Appends an error in the list of recorded errors or raises it, depending on the chosen 1354 error level setting. 1355 """ 1356 token = token or self._curr or self._prev or Token.string("") 1357 start = token.start 1358 end = token.end + 1 1359 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1360 highlight = self.sql[start:end] 1361 end_context = self.sql[end : end + self.error_message_context] 1362 1363 error = ParseError.new( 1364 f"{message}. Line {token.line}, Col: {token.col}.\n" 1365 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1366 description=message, 1367 line=token.line, 1368 col=token.col, 1369 start_context=start_context, 1370 highlight=highlight, 1371 end_context=end_context, 1372 ) 1373 1374 if self.error_level == ErrorLevel.IMMEDIATE: 1375 raise error 1376 1377 self.errors.append(error) 1378 1379 def expression( 1380 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1381 ) -> E: 1382 """ 1383 Creates a new, validated Expression. 1384 1385 Args: 1386 exp_class: The expression class to instantiate. 1387 comments: An optional list of comments to attach to the expression. 1388 kwargs: The arguments to set for the expression along with their respective values. 1389 1390 Returns: 1391 The target expression. 1392 """ 1393 instance = exp_class(**kwargs) 1394 instance.add_comments(comments) if comments else self._add_comments(instance) 1395 return self.validate_expression(instance) 1396 1397 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1398 if expression and self._prev_comments: 1399 expression.add_comments(self._prev_comments) 1400 self._prev_comments = None 1401 1402 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1403 """ 1404 Validates an Expression, making sure that all its mandatory arguments are set. 1405 1406 Args: 1407 expression: The expression to validate. 1408 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1409 1410 Returns: 1411 The validated expression. 1412 """ 1413 if self.error_level != ErrorLevel.IGNORE: 1414 for error_message in expression.error_messages(args): 1415 self.raise_error(error_message) 1416 1417 return expression 1418 1419 def _find_sql(self, start: Token, end: Token) -> str: 1420 return self.sql[start.start : end.end + 1] 1421 1422 def _is_connected(self) -> bool: 1423 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1424 1425 def _advance(self, times: int = 1) -> None: 1426 self._index += times 1427 self._curr = seq_get(self._tokens, self._index) 1428 self._next = seq_get(self._tokens, self._index + 1) 1429 1430 if self._index > 0: 1431 self._prev = self._tokens[self._index - 1] 1432 self._prev_comments = self._prev.comments 1433 else: 1434 self._prev = None 1435 self._prev_comments = None 1436 1437 def _retreat(self, index: int) -> None: 1438 if index != self._index: 1439 self._advance(index - self._index) 1440 1441 def _warn_unsupported(self) -> None: 1442 if len(self._tokens) <= 1: 1443 return 1444 1445 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1446 # interested in emitting a warning for the one being currently processed. 1447 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1448 1449 logger.warning( 1450 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1451 ) 1452 1453 def _parse_command(self) -> exp.Command: 1454 self._warn_unsupported() 1455 return self.expression( 1456 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1457 ) 1458 1459 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1460 """ 1461 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1462 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1463 the parser state accordingly 1464 """ 1465 index = self._index 1466 error_level = self.error_level 1467 1468 self.error_level = ErrorLevel.IMMEDIATE 1469 try: 1470 this = parse_method() 1471 except ParseError: 1472 this = None 1473 finally: 1474 if not this or retreat: 1475 self._retreat(index) 1476 self.error_level = error_level 1477 1478 return this 1479 1480 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1481 start = self._prev 1482 exists = self._parse_exists() if allow_exists else None 1483 1484 self._match(TokenType.ON) 1485 1486 materialized = self._match_text_seq("MATERIALIZED") 1487 kind = self._match_set(self.CREATABLES) and self._prev 1488 if not kind: 1489 return self._parse_as_command(start) 1490 1491 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1492 this = self._parse_user_defined_function(kind=kind.token_type) 1493 elif kind.token_type == TokenType.TABLE: 1494 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1495 elif kind.token_type == TokenType.COLUMN: 1496 this = self._parse_column() 1497 else: 1498 this = self._parse_id_var() 1499 1500 self._match(TokenType.IS) 1501 1502 return self.expression( 1503 exp.Comment, 1504 this=this, 1505 kind=kind.text, 1506 expression=self._parse_string(), 1507 exists=exists, 1508 materialized=materialized, 1509 ) 1510 1511 def _parse_to_table( 1512 self, 1513 ) -> exp.ToTableProperty: 1514 table = self._parse_table_parts(schema=True) 1515 return self.expression(exp.ToTableProperty, this=table) 1516 1517 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1518 def _parse_ttl(self) -> exp.Expression: 1519 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1520 this = self._parse_bitwise() 1521 1522 if self._match_text_seq("DELETE"): 1523 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1524 if self._match_text_seq("RECOMPRESS"): 1525 return self.expression( 1526 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1527 ) 1528 if self._match_text_seq("TO", "DISK"): 1529 return self.expression( 1530 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1531 ) 1532 if self._match_text_seq("TO", "VOLUME"): 1533 return self.expression( 1534 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1535 ) 1536 1537 return this 1538 1539 expressions = self._parse_csv(_parse_ttl_action) 1540 where = self._parse_where() 1541 group = self._parse_group() 1542 1543 aggregates = None 1544 if group and self._match(TokenType.SET): 1545 aggregates = self._parse_csv(self._parse_set_item) 1546 1547 return self.expression( 1548 exp.MergeTreeTTL, 1549 expressions=expressions, 1550 where=where, 1551 group=group, 1552 aggregates=aggregates, 1553 ) 1554 1555 def _parse_statement(self) -> t.Optional[exp.Expression]: 1556 if self._curr is None: 1557 return None 1558 1559 if self._match_set(self.STATEMENT_PARSERS): 1560 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1561 1562 if self._match_set(self.dialect.tokenizer.COMMANDS): 1563 return self._parse_command() 1564 1565 expression = self._parse_expression() 1566 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1567 return self._parse_query_modifiers(expression) 1568 1569 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1570 start = self._prev 1571 temporary = self._match(TokenType.TEMPORARY) 1572 materialized = self._match_text_seq("MATERIALIZED") 1573 1574 kind = self._match_set(self.CREATABLES) and self._prev.text 1575 if not kind: 1576 return self._parse_as_command(start) 1577 1578 if_exists = exists or self._parse_exists() 1579 table = self._parse_table_parts( 1580 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1581 ) 1582 1583 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1584 1585 if self._match(TokenType.L_PAREN, advance=False): 1586 expressions = self._parse_wrapped_csv(self._parse_types) 1587 else: 1588 expressions = None 1589 1590 return self.expression( 1591 exp.Drop, 1592 comments=start.comments, 1593 exists=if_exists, 1594 this=table, 1595 expressions=expressions, 1596 kind=kind.upper(), 1597 temporary=temporary, 1598 materialized=materialized, 1599 cascade=self._match_text_seq("CASCADE"), 1600 constraints=self._match_text_seq("CONSTRAINTS"), 1601 purge=self._match_text_seq("PURGE"), 1602 cluster=cluster, 1603 ) 1604 1605 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1606 return ( 1607 self._match_text_seq("IF") 1608 and (not not_ or self._match(TokenType.NOT)) 1609 and self._match(TokenType.EXISTS) 1610 ) 1611 1612 def _parse_create(self) -> exp.Create | exp.Command: 1613 # Note: this can't be None because we've matched a statement parser 1614 start = self._prev 1615 comments = self._prev_comments 1616 1617 replace = ( 1618 start.token_type == TokenType.REPLACE 1619 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1620 or self._match_pair(TokenType.OR, TokenType.ALTER) 1621 ) 1622 1623 unique = self._match(TokenType.UNIQUE) 1624 1625 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1626 self._advance() 1627 1628 properties = None 1629 create_token = self._match_set(self.CREATABLES) and self._prev 1630 1631 if not create_token: 1632 # exp.Properties.Location.POST_CREATE 1633 properties = self._parse_properties() 1634 create_token = self._match_set(self.CREATABLES) and self._prev 1635 1636 if not properties or not create_token: 1637 return self._parse_as_command(start) 1638 1639 exists = self._parse_exists(not_=True) 1640 this = None 1641 expression: t.Optional[exp.Expression] = None 1642 indexes = None 1643 no_schema_binding = None 1644 begin = None 1645 end = None 1646 clone = None 1647 1648 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1649 nonlocal properties 1650 if properties and temp_props: 1651 properties.expressions.extend(temp_props.expressions) 1652 elif temp_props: 1653 properties = temp_props 1654 1655 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1656 this = self._parse_user_defined_function(kind=create_token.token_type) 1657 1658 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1659 extend_props(self._parse_properties()) 1660 1661 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1662 extend_props(self._parse_properties()) 1663 1664 if not expression: 1665 if self._match(TokenType.COMMAND): 1666 expression = self._parse_as_command(self._prev) 1667 else: 1668 begin = self._match(TokenType.BEGIN) 1669 return_ = self._match_text_seq("RETURN") 1670 1671 if self._match(TokenType.STRING, advance=False): 1672 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1673 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1674 expression = self._parse_string() 1675 extend_props(self._parse_properties()) 1676 else: 1677 expression = self._parse_statement() 1678 1679 end = self._match_text_seq("END") 1680 1681 if return_: 1682 expression = self.expression(exp.Return, this=expression) 1683 elif create_token.token_type == TokenType.INDEX: 1684 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1685 if not self._match(TokenType.ON): 1686 index = self._parse_id_var() 1687 anonymous = False 1688 else: 1689 index = None 1690 anonymous = True 1691 1692 this = self._parse_index(index=index, anonymous=anonymous) 1693 elif create_token.token_type in self.DB_CREATABLES: 1694 table_parts = self._parse_table_parts( 1695 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1696 ) 1697 1698 # exp.Properties.Location.POST_NAME 1699 self._match(TokenType.COMMA) 1700 extend_props(self._parse_properties(before=True)) 1701 1702 this = self._parse_schema(this=table_parts) 1703 1704 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1705 extend_props(self._parse_properties()) 1706 1707 self._match(TokenType.ALIAS) 1708 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1709 # exp.Properties.Location.POST_ALIAS 1710 extend_props(self._parse_properties()) 1711 1712 if create_token.token_type == TokenType.SEQUENCE: 1713 expression = self._parse_types() 1714 extend_props(self._parse_properties()) 1715 else: 1716 expression = self._parse_ddl_select() 1717 1718 if create_token.token_type == TokenType.TABLE: 1719 # exp.Properties.Location.POST_EXPRESSION 1720 extend_props(self._parse_properties()) 1721 1722 indexes = [] 1723 while True: 1724 index = self._parse_index() 1725 1726 # exp.Properties.Location.POST_INDEX 1727 extend_props(self._parse_properties()) 1728 1729 if not index: 1730 break 1731 else: 1732 self._match(TokenType.COMMA) 1733 indexes.append(index) 1734 elif create_token.token_type == TokenType.VIEW: 1735 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1736 no_schema_binding = True 1737 1738 shallow = self._match_text_seq("SHALLOW") 1739 1740 if self._match_texts(self.CLONE_KEYWORDS): 1741 copy = self._prev.text.lower() == "copy" 1742 clone = self.expression( 1743 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1744 ) 1745 1746 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1747 return self._parse_as_command(start) 1748 1749 return self.expression( 1750 exp.Create, 1751 comments=comments, 1752 this=this, 1753 kind=create_token.text.upper(), 1754 replace=replace, 1755 unique=unique, 1756 expression=expression, 1757 exists=exists, 1758 properties=properties, 1759 indexes=indexes, 1760 no_schema_binding=no_schema_binding, 1761 begin=begin, 1762 end=end, 1763 clone=clone, 1764 ) 1765 1766 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1767 seq = exp.SequenceProperties() 1768 1769 options = [] 1770 index = self._index 1771 1772 while self._curr: 1773 self._match(TokenType.COMMA) 1774 if self._match_text_seq("INCREMENT"): 1775 self._match_text_seq("BY") 1776 self._match_text_seq("=") 1777 seq.set("increment", self._parse_term()) 1778 elif self._match_text_seq("MINVALUE"): 1779 seq.set("minvalue", self._parse_term()) 1780 elif self._match_text_seq("MAXVALUE"): 1781 seq.set("maxvalue", self._parse_term()) 1782 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1783 self._match_text_seq("=") 1784 seq.set("start", self._parse_term()) 1785 elif self._match_text_seq("CACHE"): 1786 # T-SQL allows empty CACHE which is initialized dynamically 1787 seq.set("cache", self._parse_number() or True) 1788 elif self._match_text_seq("OWNED", "BY"): 1789 # "OWNED BY NONE" is the default 1790 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1791 else: 1792 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1793 if opt: 1794 options.append(opt) 1795 else: 1796 break 1797 1798 seq.set("options", options if options else None) 1799 return None if self._index == index else seq 1800 1801 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1802 # only used for teradata currently 1803 self._match(TokenType.COMMA) 1804 1805 kwargs = { 1806 "no": self._match_text_seq("NO"), 1807 "dual": self._match_text_seq("DUAL"), 1808 "before": self._match_text_seq("BEFORE"), 1809 "default": self._match_text_seq("DEFAULT"), 1810 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1811 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1812 "after": self._match_text_seq("AFTER"), 1813 "minimum": self._match_texts(("MIN", "MINIMUM")), 1814 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1815 } 1816 1817 if self._match_texts(self.PROPERTY_PARSERS): 1818 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1819 try: 1820 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1821 except TypeError: 1822 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1823 1824 return None 1825 1826 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1827 return self._parse_wrapped_csv(self._parse_property) 1828 1829 def _parse_property(self) -> t.Optional[exp.Expression]: 1830 if self._match_texts(self.PROPERTY_PARSERS): 1831 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1832 1833 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1834 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1835 1836 if self._match_text_seq("COMPOUND", "SORTKEY"): 1837 return self._parse_sortkey(compound=True) 1838 1839 if self._match_text_seq("SQL", "SECURITY"): 1840 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1841 1842 index = self._index 1843 key = self._parse_column() 1844 1845 if not self._match(TokenType.EQ): 1846 self._retreat(index) 1847 return self._parse_sequence_properties() 1848 1849 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1850 if isinstance(key, exp.Column): 1851 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1852 1853 value = self._parse_bitwise() or self._parse_var(any_token=True) 1854 1855 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1856 if isinstance(value, exp.Column): 1857 value = exp.var(value.name) 1858 1859 return self.expression(exp.Property, this=key, value=value) 1860 1861 def _parse_stored(self) -> exp.FileFormatProperty: 1862 self._match(TokenType.ALIAS) 1863 1864 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1865 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1866 1867 return self.expression( 1868 exp.FileFormatProperty, 1869 this=( 1870 self.expression( 1871 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1872 ) 1873 if input_format or output_format 1874 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1875 ), 1876 ) 1877 1878 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1879 field = self._parse_field() 1880 if isinstance(field, exp.Identifier) and not field.quoted: 1881 field = exp.var(field) 1882 1883 return field 1884 1885 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1886 self._match(TokenType.EQ) 1887 self._match(TokenType.ALIAS) 1888 1889 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1890 1891 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1892 properties = [] 1893 while True: 1894 if before: 1895 prop = self._parse_property_before() 1896 else: 1897 prop = self._parse_property() 1898 if not prop: 1899 break 1900 for p in ensure_list(prop): 1901 properties.append(p) 1902 1903 if properties: 1904 return self.expression(exp.Properties, expressions=properties) 1905 1906 return None 1907 1908 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1909 return self.expression( 1910 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1911 ) 1912 1913 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1914 if self._index >= 2: 1915 pre_volatile_token = self._tokens[self._index - 2] 1916 else: 1917 pre_volatile_token = None 1918 1919 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1920 return exp.VolatileProperty() 1921 1922 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1923 1924 def _parse_retention_period(self) -> exp.Var: 1925 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1926 number = self._parse_number() 1927 number_str = f"{number} " if number else "" 1928 unit = self._parse_var(any_token=True) 1929 return exp.var(f"{number_str}{unit}") 1930 1931 def _parse_system_versioning_property( 1932 self, with_: bool = False 1933 ) -> exp.WithSystemVersioningProperty: 1934 self._match(TokenType.EQ) 1935 prop = self.expression( 1936 exp.WithSystemVersioningProperty, 1937 **{ # type: ignore 1938 "on": True, 1939 "with": with_, 1940 }, 1941 ) 1942 1943 if self._match_text_seq("OFF"): 1944 prop.set("on", False) 1945 return prop 1946 1947 self._match(TokenType.ON) 1948 if self._match(TokenType.L_PAREN): 1949 while self._curr and not self._match(TokenType.R_PAREN): 1950 if self._match_text_seq("HISTORY_TABLE", "="): 1951 prop.set("this", self._parse_table_parts()) 1952 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1953 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1954 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1955 prop.set("retention_period", self._parse_retention_period()) 1956 1957 self._match(TokenType.COMMA) 1958 1959 return prop 1960 1961 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1962 self._match(TokenType.EQ) 1963 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1964 prop = self.expression(exp.DataDeletionProperty, on=on) 1965 1966 if self._match(TokenType.L_PAREN): 1967 while self._curr and not self._match(TokenType.R_PAREN): 1968 if self._match_text_seq("FILTER_COLUMN", "="): 1969 prop.set("filter_column", self._parse_column()) 1970 elif self._match_text_seq("RETENTION_PERIOD", "="): 1971 prop.set("retention_period", self._parse_retention_period()) 1972 1973 self._match(TokenType.COMMA) 1974 1975 return prop 1976 1977 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1978 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1979 prop = self._parse_system_versioning_property(with_=True) 1980 self._match_r_paren() 1981 return prop 1982 1983 if self._match(TokenType.L_PAREN, advance=False): 1984 return self._parse_wrapped_properties() 1985 1986 if self._match_text_seq("JOURNAL"): 1987 return self._parse_withjournaltable() 1988 1989 if self._match_texts(self.VIEW_ATTRIBUTES): 1990 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1991 1992 if self._match_text_seq("DATA"): 1993 return self._parse_withdata(no=False) 1994 elif self._match_text_seq("NO", "DATA"): 1995 return self._parse_withdata(no=True) 1996 1997 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1998 return self._parse_serde_properties(with_=True) 1999 2000 if not self._next: 2001 return None 2002 2003 return self._parse_withisolatedloading() 2004 2005 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2006 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2007 self._match(TokenType.EQ) 2008 2009 user = self._parse_id_var() 2010 self._match(TokenType.PARAMETER) 2011 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2012 2013 if not user or not host: 2014 return None 2015 2016 return exp.DefinerProperty(this=f"{user}@{host}") 2017 2018 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2019 self._match(TokenType.TABLE) 2020 self._match(TokenType.EQ) 2021 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2022 2023 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2024 return self.expression(exp.LogProperty, no=no) 2025 2026 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2027 return self.expression(exp.JournalProperty, **kwargs) 2028 2029 def _parse_checksum(self) -> exp.ChecksumProperty: 2030 self._match(TokenType.EQ) 2031 2032 on = None 2033 if self._match(TokenType.ON): 2034 on = True 2035 elif self._match_text_seq("OFF"): 2036 on = False 2037 2038 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2039 2040 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2041 return self.expression( 2042 exp.Cluster, 2043 expressions=( 2044 self._parse_wrapped_csv(self._parse_ordered) 2045 if wrapped 2046 else self._parse_csv(self._parse_ordered) 2047 ), 2048 ) 2049 2050 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2051 self._match_text_seq("BY") 2052 2053 self._match_l_paren() 2054 expressions = self._parse_csv(self._parse_column) 2055 self._match_r_paren() 2056 2057 if self._match_text_seq("SORTED", "BY"): 2058 self._match_l_paren() 2059 sorted_by = self._parse_csv(self._parse_ordered) 2060 self._match_r_paren() 2061 else: 2062 sorted_by = None 2063 2064 self._match(TokenType.INTO) 2065 buckets = self._parse_number() 2066 self._match_text_seq("BUCKETS") 2067 2068 return self.expression( 2069 exp.ClusteredByProperty, 2070 expressions=expressions, 2071 sorted_by=sorted_by, 2072 buckets=buckets, 2073 ) 2074 2075 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2076 if not self._match_text_seq("GRANTS"): 2077 self._retreat(self._index - 1) 2078 return None 2079 2080 return self.expression(exp.CopyGrantsProperty) 2081 2082 def _parse_freespace(self) -> exp.FreespaceProperty: 2083 self._match(TokenType.EQ) 2084 return self.expression( 2085 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2086 ) 2087 2088 def _parse_mergeblockratio( 2089 self, no: bool = False, default: bool = False 2090 ) -> exp.MergeBlockRatioProperty: 2091 if self._match(TokenType.EQ): 2092 return self.expression( 2093 exp.MergeBlockRatioProperty, 2094 this=self._parse_number(), 2095 percent=self._match(TokenType.PERCENT), 2096 ) 2097 2098 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2099 2100 def _parse_datablocksize( 2101 self, 2102 default: t.Optional[bool] = None, 2103 minimum: t.Optional[bool] = None, 2104 maximum: t.Optional[bool] = None, 2105 ) -> exp.DataBlocksizeProperty: 2106 self._match(TokenType.EQ) 2107 size = self._parse_number() 2108 2109 units = None 2110 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2111 units = self._prev.text 2112 2113 return self.expression( 2114 exp.DataBlocksizeProperty, 2115 size=size, 2116 units=units, 2117 default=default, 2118 minimum=minimum, 2119 maximum=maximum, 2120 ) 2121 2122 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2123 self._match(TokenType.EQ) 2124 always = self._match_text_seq("ALWAYS") 2125 manual = self._match_text_seq("MANUAL") 2126 never = self._match_text_seq("NEVER") 2127 default = self._match_text_seq("DEFAULT") 2128 2129 autotemp = None 2130 if self._match_text_seq("AUTOTEMP"): 2131 autotemp = self._parse_schema() 2132 2133 return self.expression( 2134 exp.BlockCompressionProperty, 2135 always=always, 2136 manual=manual, 2137 never=never, 2138 default=default, 2139 autotemp=autotemp, 2140 ) 2141 2142 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2143 index = self._index 2144 no = self._match_text_seq("NO") 2145 concurrent = self._match_text_seq("CONCURRENT") 2146 2147 if not self._match_text_seq("ISOLATED", "LOADING"): 2148 self._retreat(index) 2149 return None 2150 2151 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2152 return self.expression( 2153 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2154 ) 2155 2156 def _parse_locking(self) -> exp.LockingProperty: 2157 if self._match(TokenType.TABLE): 2158 kind = "TABLE" 2159 elif self._match(TokenType.VIEW): 2160 kind = "VIEW" 2161 elif self._match(TokenType.ROW): 2162 kind = "ROW" 2163 elif self._match_text_seq("DATABASE"): 2164 kind = "DATABASE" 2165 else: 2166 kind = None 2167 2168 if kind in ("DATABASE", "TABLE", "VIEW"): 2169 this = self._parse_table_parts() 2170 else: 2171 this = None 2172 2173 if self._match(TokenType.FOR): 2174 for_or_in = "FOR" 2175 elif self._match(TokenType.IN): 2176 for_or_in = "IN" 2177 else: 2178 for_or_in = None 2179 2180 if self._match_text_seq("ACCESS"): 2181 lock_type = "ACCESS" 2182 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2183 lock_type = "EXCLUSIVE" 2184 elif self._match_text_seq("SHARE"): 2185 lock_type = "SHARE" 2186 elif self._match_text_seq("READ"): 2187 lock_type = "READ" 2188 elif self._match_text_seq("WRITE"): 2189 lock_type = "WRITE" 2190 elif self._match_text_seq("CHECKSUM"): 2191 lock_type = "CHECKSUM" 2192 else: 2193 lock_type = None 2194 2195 override = self._match_text_seq("OVERRIDE") 2196 2197 return self.expression( 2198 exp.LockingProperty, 2199 this=this, 2200 kind=kind, 2201 for_or_in=for_or_in, 2202 lock_type=lock_type, 2203 override=override, 2204 ) 2205 2206 def _parse_partition_by(self) -> t.List[exp.Expression]: 2207 if self._match(TokenType.PARTITION_BY): 2208 return self._parse_csv(self._parse_assignment) 2209 return [] 2210 2211 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2212 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2213 if self._match_text_seq("MINVALUE"): 2214 return exp.var("MINVALUE") 2215 if self._match_text_seq("MAXVALUE"): 2216 return exp.var("MAXVALUE") 2217 return self._parse_bitwise() 2218 2219 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2220 expression = None 2221 from_expressions = None 2222 to_expressions = None 2223 2224 if self._match(TokenType.IN): 2225 this = self._parse_wrapped_csv(self._parse_bitwise) 2226 elif self._match(TokenType.FROM): 2227 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2228 self._match_text_seq("TO") 2229 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2230 elif self._match_text_seq("WITH", "(", "MODULUS"): 2231 this = self._parse_number() 2232 self._match_text_seq(",", "REMAINDER") 2233 expression = self._parse_number() 2234 self._match_r_paren() 2235 else: 2236 self.raise_error("Failed to parse partition bound spec.") 2237 2238 return self.expression( 2239 exp.PartitionBoundSpec, 2240 this=this, 2241 expression=expression, 2242 from_expressions=from_expressions, 2243 to_expressions=to_expressions, 2244 ) 2245 2246 # https://www.postgresql.org/docs/current/sql-createtable.html 2247 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2248 if not self._match_text_seq("OF"): 2249 self._retreat(self._index - 1) 2250 return None 2251 2252 this = self._parse_table(schema=True) 2253 2254 if self._match(TokenType.DEFAULT): 2255 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2256 elif self._match_text_seq("FOR", "VALUES"): 2257 expression = self._parse_partition_bound_spec() 2258 else: 2259 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2260 2261 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2262 2263 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2264 self._match(TokenType.EQ) 2265 return self.expression( 2266 exp.PartitionedByProperty, 2267 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2268 ) 2269 2270 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2271 if self._match_text_seq("AND", "STATISTICS"): 2272 statistics = True 2273 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2274 statistics = False 2275 else: 2276 statistics = None 2277 2278 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2279 2280 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2281 if self._match_text_seq("SQL"): 2282 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2283 return None 2284 2285 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2286 if self._match_text_seq("SQL", "DATA"): 2287 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2288 return None 2289 2290 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2291 if self._match_text_seq("PRIMARY", "INDEX"): 2292 return exp.NoPrimaryIndexProperty() 2293 if self._match_text_seq("SQL"): 2294 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2295 return None 2296 2297 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2298 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2299 return exp.OnCommitProperty() 2300 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2301 return exp.OnCommitProperty(delete=True) 2302 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2303 2304 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2305 if self._match_text_seq("SQL", "DATA"): 2306 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2307 return None 2308 2309 def _parse_distkey(self) -> exp.DistKeyProperty: 2310 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2311 2312 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2313 table = self._parse_table(schema=True) 2314 2315 options = [] 2316 while self._match_texts(("INCLUDING", "EXCLUDING")): 2317 this = self._prev.text.upper() 2318 2319 id_var = self._parse_id_var() 2320 if not id_var: 2321 return None 2322 2323 options.append( 2324 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2325 ) 2326 2327 return self.expression(exp.LikeProperty, this=table, expressions=options) 2328 2329 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2330 return self.expression( 2331 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2332 ) 2333 2334 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2335 self._match(TokenType.EQ) 2336 return self.expression( 2337 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2338 ) 2339 2340 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2341 self._match_text_seq("WITH", "CONNECTION") 2342 return self.expression( 2343 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2344 ) 2345 2346 def _parse_returns(self) -> exp.ReturnsProperty: 2347 value: t.Optional[exp.Expression] 2348 null = None 2349 is_table = self._match(TokenType.TABLE) 2350 2351 if is_table: 2352 if self._match(TokenType.LT): 2353 value = self.expression( 2354 exp.Schema, 2355 this="TABLE", 2356 expressions=self._parse_csv(self._parse_struct_types), 2357 ) 2358 if not self._match(TokenType.GT): 2359 self.raise_error("Expecting >") 2360 else: 2361 value = self._parse_schema(exp.var("TABLE")) 2362 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2363 null = True 2364 value = None 2365 else: 2366 value = self._parse_types() 2367 2368 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2369 2370 def _parse_describe(self) -> exp.Describe: 2371 kind = self._match_set(self.CREATABLES) and self._prev.text 2372 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2373 if self._match(TokenType.DOT): 2374 style = None 2375 self._retreat(self._index - 2) 2376 this = self._parse_table(schema=True) 2377 properties = self._parse_properties() 2378 expressions = properties.expressions if properties else None 2379 return self.expression( 2380 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2381 ) 2382 2383 def _parse_insert(self) -> exp.Insert: 2384 comments = ensure_list(self._prev_comments) 2385 hint = self._parse_hint() 2386 overwrite = self._match(TokenType.OVERWRITE) 2387 ignore = self._match(TokenType.IGNORE) 2388 local = self._match_text_seq("LOCAL") 2389 alternative = None 2390 is_function = None 2391 2392 if self._match_text_seq("DIRECTORY"): 2393 this: t.Optional[exp.Expression] = self.expression( 2394 exp.Directory, 2395 this=self._parse_var_or_string(), 2396 local=local, 2397 row_format=self._parse_row_format(match_row=True), 2398 ) 2399 else: 2400 if self._match(TokenType.OR): 2401 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2402 2403 self._match(TokenType.INTO) 2404 comments += ensure_list(self._prev_comments) 2405 self._match(TokenType.TABLE) 2406 is_function = self._match(TokenType.FUNCTION) 2407 2408 this = ( 2409 self._parse_table(schema=True, parse_partition=True) 2410 if not is_function 2411 else self._parse_function() 2412 ) 2413 2414 returning = self._parse_returning() 2415 2416 return self.expression( 2417 exp.Insert, 2418 comments=comments, 2419 hint=hint, 2420 is_function=is_function, 2421 this=this, 2422 stored=self._match_text_seq("STORED") and self._parse_stored(), 2423 by_name=self._match_text_seq("BY", "NAME"), 2424 exists=self._parse_exists(), 2425 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2426 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2427 conflict=self._parse_on_conflict(), 2428 returning=returning or self._parse_returning(), 2429 overwrite=overwrite, 2430 alternative=alternative, 2431 ignore=ignore, 2432 ) 2433 2434 def _parse_kill(self) -> exp.Kill: 2435 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2436 2437 return self.expression( 2438 exp.Kill, 2439 this=self._parse_primary(), 2440 kind=kind, 2441 ) 2442 2443 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2444 conflict = self._match_text_seq("ON", "CONFLICT") 2445 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2446 2447 if not conflict and not duplicate: 2448 return None 2449 2450 conflict_keys = None 2451 constraint = None 2452 2453 if conflict: 2454 if self._match_text_seq("ON", "CONSTRAINT"): 2455 constraint = self._parse_id_var() 2456 elif self._match(TokenType.L_PAREN): 2457 conflict_keys = self._parse_csv(self._parse_id_var) 2458 self._match_r_paren() 2459 2460 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2461 if self._prev.token_type == TokenType.UPDATE: 2462 self._match(TokenType.SET) 2463 expressions = self._parse_csv(self._parse_equality) 2464 else: 2465 expressions = None 2466 2467 return self.expression( 2468 exp.OnConflict, 2469 duplicate=duplicate, 2470 expressions=expressions, 2471 action=action, 2472 conflict_keys=conflict_keys, 2473 constraint=constraint, 2474 ) 2475 2476 def _parse_returning(self) -> t.Optional[exp.Returning]: 2477 if not self._match(TokenType.RETURNING): 2478 return None 2479 return self.expression( 2480 exp.Returning, 2481 expressions=self._parse_csv(self._parse_expression), 2482 into=self._match(TokenType.INTO) and self._parse_table_part(), 2483 ) 2484 2485 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2486 if not self._match(TokenType.FORMAT): 2487 return None 2488 return self._parse_row_format() 2489 2490 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2491 index = self._index 2492 with_ = with_ or self._match_text_seq("WITH") 2493 2494 if not self._match(TokenType.SERDE_PROPERTIES): 2495 self._retreat(index) 2496 return None 2497 return self.expression( 2498 exp.SerdeProperties, 2499 **{ # type: ignore 2500 "expressions": self._parse_wrapped_properties(), 2501 "with": with_, 2502 }, 2503 ) 2504 2505 def _parse_row_format( 2506 self, match_row: bool = False 2507 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2508 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2509 return None 2510 2511 if self._match_text_seq("SERDE"): 2512 this = self._parse_string() 2513 2514 serde_properties = self._parse_serde_properties() 2515 2516 return self.expression( 2517 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2518 ) 2519 2520 self._match_text_seq("DELIMITED") 2521 2522 kwargs = {} 2523 2524 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2525 kwargs["fields"] = self._parse_string() 2526 if self._match_text_seq("ESCAPED", "BY"): 2527 kwargs["escaped"] = self._parse_string() 2528 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2529 kwargs["collection_items"] = self._parse_string() 2530 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2531 kwargs["map_keys"] = self._parse_string() 2532 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2533 kwargs["lines"] = self._parse_string() 2534 if self._match_text_seq("NULL", "DEFINED", "AS"): 2535 kwargs["null"] = self._parse_string() 2536 2537 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2538 2539 def _parse_load(self) -> exp.LoadData | exp.Command: 2540 if self._match_text_seq("DATA"): 2541 local = self._match_text_seq("LOCAL") 2542 self._match_text_seq("INPATH") 2543 inpath = self._parse_string() 2544 overwrite = self._match(TokenType.OVERWRITE) 2545 self._match_pair(TokenType.INTO, TokenType.TABLE) 2546 2547 return self.expression( 2548 exp.LoadData, 2549 this=self._parse_table(schema=True), 2550 local=local, 2551 overwrite=overwrite, 2552 inpath=inpath, 2553 partition=self._parse_partition(), 2554 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2555 serde=self._match_text_seq("SERDE") and self._parse_string(), 2556 ) 2557 return self._parse_as_command(self._prev) 2558 2559 def _parse_delete(self) -> exp.Delete: 2560 # This handles MySQL's "Multiple-Table Syntax" 2561 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2562 tables = None 2563 comments = self._prev_comments 2564 if not self._match(TokenType.FROM, advance=False): 2565 tables = self._parse_csv(self._parse_table) or None 2566 2567 returning = self._parse_returning() 2568 2569 return self.expression( 2570 exp.Delete, 2571 comments=comments, 2572 tables=tables, 2573 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2574 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2575 where=self._parse_where(), 2576 returning=returning or self._parse_returning(), 2577 limit=self._parse_limit(), 2578 ) 2579 2580 def _parse_update(self) -> exp.Update: 2581 comments = self._prev_comments 2582 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2583 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2584 returning = self._parse_returning() 2585 return self.expression( 2586 exp.Update, 2587 comments=comments, 2588 **{ # type: ignore 2589 "this": this, 2590 "expressions": expressions, 2591 "from": self._parse_from(joins=True), 2592 "where": self._parse_where(), 2593 "returning": returning or self._parse_returning(), 2594 "order": self._parse_order(), 2595 "limit": self._parse_limit(), 2596 }, 2597 ) 2598 2599 def _parse_uncache(self) -> exp.Uncache: 2600 if not self._match(TokenType.TABLE): 2601 self.raise_error("Expecting TABLE after UNCACHE") 2602 2603 return self.expression( 2604 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2605 ) 2606 2607 def _parse_cache(self) -> exp.Cache: 2608 lazy = self._match_text_seq("LAZY") 2609 self._match(TokenType.TABLE) 2610 table = self._parse_table(schema=True) 2611 2612 options = [] 2613 if self._match_text_seq("OPTIONS"): 2614 self._match_l_paren() 2615 k = self._parse_string() 2616 self._match(TokenType.EQ) 2617 v = self._parse_string() 2618 options = [k, v] 2619 self._match_r_paren() 2620 2621 self._match(TokenType.ALIAS) 2622 return self.expression( 2623 exp.Cache, 2624 this=table, 2625 lazy=lazy, 2626 options=options, 2627 expression=self._parse_select(nested=True), 2628 ) 2629 2630 def _parse_partition(self) -> t.Optional[exp.Partition]: 2631 if not self._match(TokenType.PARTITION): 2632 return None 2633 2634 return self.expression( 2635 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2636 ) 2637 2638 def _parse_value(self) -> t.Optional[exp.Tuple]: 2639 if self._match(TokenType.L_PAREN): 2640 expressions = self._parse_csv(self._parse_expression) 2641 self._match_r_paren() 2642 return self.expression(exp.Tuple, expressions=expressions) 2643 2644 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2645 expression = self._parse_expression() 2646 if expression: 2647 return self.expression(exp.Tuple, expressions=[expression]) 2648 return None 2649 2650 def _parse_projections(self) -> t.List[exp.Expression]: 2651 return self._parse_expressions() 2652 2653 def _parse_select( 2654 self, 2655 nested: bool = False, 2656 table: bool = False, 2657 parse_subquery_alias: bool = True, 2658 parse_set_operation: bool = True, 2659 ) -> t.Optional[exp.Expression]: 2660 cte = self._parse_with() 2661 2662 if cte: 2663 this = self._parse_statement() 2664 2665 if not this: 2666 self.raise_error("Failed to parse any statement following CTE") 2667 return cte 2668 2669 if "with" in this.arg_types: 2670 this.set("with", cte) 2671 else: 2672 self.raise_error(f"{this.key} does not support CTE") 2673 this = cte 2674 2675 return this 2676 2677 # duckdb supports leading with FROM x 2678 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2679 2680 if self._match(TokenType.SELECT): 2681 comments = self._prev_comments 2682 2683 hint = self._parse_hint() 2684 all_ = self._match(TokenType.ALL) 2685 distinct = self._match_set(self.DISTINCT_TOKENS) 2686 2687 kind = ( 2688 self._match(TokenType.ALIAS) 2689 and self._match_texts(("STRUCT", "VALUE")) 2690 and self._prev.text.upper() 2691 ) 2692 2693 if distinct: 2694 distinct = self.expression( 2695 exp.Distinct, 2696 on=self._parse_value() if self._match(TokenType.ON) else None, 2697 ) 2698 2699 if all_ and distinct: 2700 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2701 2702 limit = self._parse_limit(top=True) 2703 projections = self._parse_projections() 2704 2705 this = self.expression( 2706 exp.Select, 2707 kind=kind, 2708 hint=hint, 2709 distinct=distinct, 2710 expressions=projections, 2711 limit=limit, 2712 ) 2713 this.comments = comments 2714 2715 into = self._parse_into() 2716 if into: 2717 this.set("into", into) 2718 2719 if not from_: 2720 from_ = self._parse_from() 2721 2722 if from_: 2723 this.set("from", from_) 2724 2725 this = self._parse_query_modifiers(this) 2726 elif (table or nested) and self._match(TokenType.L_PAREN): 2727 if self._match(TokenType.PIVOT): 2728 this = self._parse_simplified_pivot() 2729 elif self._match(TokenType.FROM): 2730 this = exp.select("*").from_( 2731 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2732 ) 2733 else: 2734 this = ( 2735 self._parse_table() 2736 if table 2737 else self._parse_select(nested=True, parse_set_operation=False) 2738 ) 2739 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2740 2741 self._match_r_paren() 2742 2743 # We return early here so that the UNION isn't attached to the subquery by the 2744 # following call to _parse_set_operations, but instead becomes the parent node 2745 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2746 elif self._match(TokenType.VALUES, advance=False): 2747 this = self._parse_derived_table_values() 2748 elif from_: 2749 this = exp.select("*").from_(from_.this, copy=False) 2750 else: 2751 this = None 2752 2753 if parse_set_operation: 2754 return self._parse_set_operations(this) 2755 return this 2756 2757 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2758 if not skip_with_token and not self._match(TokenType.WITH): 2759 return None 2760 2761 comments = self._prev_comments 2762 recursive = self._match(TokenType.RECURSIVE) 2763 2764 expressions = [] 2765 while True: 2766 expressions.append(self._parse_cte()) 2767 2768 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2769 break 2770 else: 2771 self._match(TokenType.WITH) 2772 2773 return self.expression( 2774 exp.With, comments=comments, expressions=expressions, recursive=recursive 2775 ) 2776 2777 def _parse_cte(self) -> exp.CTE: 2778 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2779 if not alias or not alias.this: 2780 self.raise_error("Expected CTE to have alias") 2781 2782 self._match(TokenType.ALIAS) 2783 2784 if self._match_text_seq("NOT", "MATERIALIZED"): 2785 materialized = False 2786 elif self._match_text_seq("MATERIALIZED"): 2787 materialized = True 2788 else: 2789 materialized = None 2790 2791 return self.expression( 2792 exp.CTE, 2793 this=self._parse_wrapped(self._parse_statement), 2794 alias=alias, 2795 materialized=materialized, 2796 ) 2797 2798 def _parse_table_alias( 2799 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2800 ) -> t.Optional[exp.TableAlias]: 2801 any_token = self._match(TokenType.ALIAS) 2802 alias = ( 2803 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2804 or self._parse_string_as_identifier() 2805 ) 2806 2807 index = self._index 2808 if self._match(TokenType.L_PAREN): 2809 columns = self._parse_csv(self._parse_function_parameter) 2810 self._match_r_paren() if columns else self._retreat(index) 2811 else: 2812 columns = None 2813 2814 if not alias and not columns: 2815 return None 2816 2817 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2818 2819 # We bubble up comments from the Identifier to the TableAlias 2820 if isinstance(alias, exp.Identifier): 2821 table_alias.add_comments(alias.pop_comments()) 2822 2823 return table_alias 2824 2825 def _parse_subquery( 2826 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2827 ) -> t.Optional[exp.Subquery]: 2828 if not this: 2829 return None 2830 2831 return self.expression( 2832 exp.Subquery, 2833 this=this, 2834 pivots=self._parse_pivots(), 2835 alias=self._parse_table_alias() if parse_alias else None, 2836 ) 2837 2838 def _implicit_unnests_to_explicit(self, this: E) -> E: 2839 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2840 2841 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2842 for i, join in enumerate(this.args.get("joins") or []): 2843 table = join.this 2844 normalized_table = table.copy() 2845 normalized_table.meta["maybe_column"] = True 2846 normalized_table = _norm(normalized_table, dialect=self.dialect) 2847 2848 if isinstance(table, exp.Table) and not join.args.get("on"): 2849 if normalized_table.parts[0].name in refs: 2850 table_as_column = table.to_column() 2851 unnest = exp.Unnest(expressions=[table_as_column]) 2852 2853 # Table.to_column creates a parent Alias node that we want to convert to 2854 # a TableAlias and attach to the Unnest, so it matches the parser's output 2855 if isinstance(table.args.get("alias"), exp.TableAlias): 2856 table_as_column.replace(table_as_column.this) 2857 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2858 2859 table.replace(unnest) 2860 2861 refs.add(normalized_table.alias_or_name) 2862 2863 return this 2864 2865 def _parse_query_modifiers( 2866 self, this: t.Optional[exp.Expression] 2867 ) -> t.Optional[exp.Expression]: 2868 if isinstance(this, (exp.Query, exp.Table)): 2869 for join in self._parse_joins(): 2870 this.append("joins", join) 2871 for lateral in iter(self._parse_lateral, None): 2872 this.append("laterals", lateral) 2873 2874 while True: 2875 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2876 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2877 key, expression = parser(self) 2878 2879 if expression: 2880 this.set(key, expression) 2881 if key == "limit": 2882 offset = expression.args.pop("offset", None) 2883 2884 if offset: 2885 offset = exp.Offset(expression=offset) 2886 this.set("offset", offset) 2887 2888 limit_by_expressions = expression.expressions 2889 expression.set("expressions", None) 2890 offset.set("expressions", limit_by_expressions) 2891 continue 2892 break 2893 2894 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2895 this = self._implicit_unnests_to_explicit(this) 2896 2897 return this 2898 2899 def _parse_hint(self) -> t.Optional[exp.Hint]: 2900 if self._match(TokenType.HINT): 2901 hints = [] 2902 for hint in iter( 2903 lambda: self._parse_csv( 2904 lambda: self._parse_function() or self._parse_var(upper=True) 2905 ), 2906 [], 2907 ): 2908 hints.extend(hint) 2909 2910 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2911 self.raise_error("Expected */ after HINT") 2912 2913 return self.expression(exp.Hint, expressions=hints) 2914 2915 return None 2916 2917 def _parse_into(self) -> t.Optional[exp.Into]: 2918 if not self._match(TokenType.INTO): 2919 return None 2920 2921 temp = self._match(TokenType.TEMPORARY) 2922 unlogged = self._match_text_seq("UNLOGGED") 2923 self._match(TokenType.TABLE) 2924 2925 return self.expression( 2926 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2927 ) 2928 2929 def _parse_from( 2930 self, joins: bool = False, skip_from_token: bool = False 2931 ) -> t.Optional[exp.From]: 2932 if not skip_from_token and not self._match(TokenType.FROM): 2933 return None 2934 2935 return self.expression( 2936 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2937 ) 2938 2939 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2940 return self.expression( 2941 exp.MatchRecognizeMeasure, 2942 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2943 this=self._parse_expression(), 2944 ) 2945 2946 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2947 if not self._match(TokenType.MATCH_RECOGNIZE): 2948 return None 2949 2950 self._match_l_paren() 2951 2952 partition = self._parse_partition_by() 2953 order = self._parse_order() 2954 2955 measures = ( 2956 self._parse_csv(self._parse_match_recognize_measure) 2957 if self._match_text_seq("MEASURES") 2958 else None 2959 ) 2960 2961 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2962 rows = exp.var("ONE ROW PER MATCH") 2963 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2964 text = "ALL ROWS PER MATCH" 2965 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2966 text += " SHOW EMPTY MATCHES" 2967 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2968 text += " OMIT EMPTY MATCHES" 2969 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2970 text += " WITH UNMATCHED ROWS" 2971 rows = exp.var(text) 2972 else: 2973 rows = None 2974 2975 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2976 text = "AFTER MATCH SKIP" 2977 if self._match_text_seq("PAST", "LAST", "ROW"): 2978 text += " PAST LAST ROW" 2979 elif self._match_text_seq("TO", "NEXT", "ROW"): 2980 text += " TO NEXT ROW" 2981 elif self._match_text_seq("TO", "FIRST"): 2982 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2983 elif self._match_text_seq("TO", "LAST"): 2984 text += f" TO LAST {self._advance_any().text}" # type: ignore 2985 after = exp.var(text) 2986 else: 2987 after = None 2988 2989 if self._match_text_seq("PATTERN"): 2990 self._match_l_paren() 2991 2992 if not self._curr: 2993 self.raise_error("Expecting )", self._curr) 2994 2995 paren = 1 2996 start = self._curr 2997 2998 while self._curr and paren > 0: 2999 if self._curr.token_type == TokenType.L_PAREN: 3000 paren += 1 3001 if self._curr.token_type == TokenType.R_PAREN: 3002 paren -= 1 3003 3004 end = self._prev 3005 self._advance() 3006 3007 if paren > 0: 3008 self.raise_error("Expecting )", self._curr) 3009 3010 pattern = exp.var(self._find_sql(start, end)) 3011 else: 3012 pattern = None 3013 3014 define = ( 3015 self._parse_csv(self._parse_name_as_expression) 3016 if self._match_text_seq("DEFINE") 3017 else None 3018 ) 3019 3020 self._match_r_paren() 3021 3022 return self.expression( 3023 exp.MatchRecognize, 3024 partition_by=partition, 3025 order=order, 3026 measures=measures, 3027 rows=rows, 3028 after=after, 3029 pattern=pattern, 3030 define=define, 3031 alias=self._parse_table_alias(), 3032 ) 3033 3034 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3035 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3036 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3037 cross_apply = False 3038 3039 if cross_apply is not None: 3040 this = self._parse_select(table=True) 3041 view = None 3042 outer = None 3043 elif self._match(TokenType.LATERAL): 3044 this = self._parse_select(table=True) 3045 view = self._match(TokenType.VIEW) 3046 outer = self._match(TokenType.OUTER) 3047 else: 3048 return None 3049 3050 if not this: 3051 this = ( 3052 self._parse_unnest() 3053 or self._parse_function() 3054 or self._parse_id_var(any_token=False) 3055 ) 3056 3057 while self._match(TokenType.DOT): 3058 this = exp.Dot( 3059 this=this, 3060 expression=self._parse_function() or self._parse_id_var(any_token=False), 3061 ) 3062 3063 if view: 3064 table = self._parse_id_var(any_token=False) 3065 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3066 table_alias: t.Optional[exp.TableAlias] = self.expression( 3067 exp.TableAlias, this=table, columns=columns 3068 ) 3069 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3070 # We move the alias from the lateral's child node to the lateral itself 3071 table_alias = this.args["alias"].pop() 3072 else: 3073 table_alias = self._parse_table_alias() 3074 3075 return self.expression( 3076 exp.Lateral, 3077 this=this, 3078 view=view, 3079 outer=outer, 3080 alias=table_alias, 3081 cross_apply=cross_apply, 3082 ) 3083 3084 def _parse_join_parts( 3085 self, 3086 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3087 return ( 3088 self._match_set(self.JOIN_METHODS) and self._prev, 3089 self._match_set(self.JOIN_SIDES) and self._prev, 3090 self._match_set(self.JOIN_KINDS) and self._prev, 3091 ) 3092 3093 def _parse_join( 3094 self, skip_join_token: bool = False, parse_bracket: bool = False 3095 ) -> t.Optional[exp.Join]: 3096 if self._match(TokenType.COMMA): 3097 return self.expression(exp.Join, this=self._parse_table()) 3098 3099 index = self._index 3100 method, side, kind = self._parse_join_parts() 3101 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3102 join = self._match(TokenType.JOIN) 3103 3104 if not skip_join_token and not join: 3105 self._retreat(index) 3106 kind = None 3107 method = None 3108 side = None 3109 3110 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3111 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3112 3113 if not skip_join_token and not join and not outer_apply and not cross_apply: 3114 return None 3115 3116 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3117 3118 if method: 3119 kwargs["method"] = method.text 3120 if side: 3121 kwargs["side"] = side.text 3122 if kind: 3123 kwargs["kind"] = kind.text 3124 if hint: 3125 kwargs["hint"] = hint 3126 3127 if self._match(TokenType.MATCH_CONDITION): 3128 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3129 3130 if self._match(TokenType.ON): 3131 kwargs["on"] = self._parse_assignment() 3132 elif self._match(TokenType.USING): 3133 kwargs["using"] = self._parse_wrapped_id_vars() 3134 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3135 kind and kind.token_type == TokenType.CROSS 3136 ): 3137 index = self._index 3138 joins: t.Optional[list] = list(self._parse_joins()) 3139 3140 if joins and self._match(TokenType.ON): 3141 kwargs["on"] = self._parse_assignment() 3142 elif joins and self._match(TokenType.USING): 3143 kwargs["using"] = self._parse_wrapped_id_vars() 3144 else: 3145 joins = None 3146 self._retreat(index) 3147 3148 kwargs["this"].set("joins", joins if joins else None) 3149 3150 comments = [c for token in (method, side, kind) if token for c in token.comments] 3151 return self.expression(exp.Join, comments=comments, **kwargs) 3152 3153 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3154 this = self._parse_assignment() 3155 3156 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3157 return this 3158 3159 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3160 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3161 3162 return this 3163 3164 def _parse_index_params(self) -> exp.IndexParameters: 3165 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3166 3167 if self._match(TokenType.L_PAREN, advance=False): 3168 columns = self._parse_wrapped_csv(self._parse_with_operator) 3169 else: 3170 columns = None 3171 3172 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3173 partition_by = self._parse_partition_by() 3174 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3175 tablespace = ( 3176 self._parse_var(any_token=True) 3177 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3178 else None 3179 ) 3180 where = self._parse_where() 3181 3182 return self.expression( 3183 exp.IndexParameters, 3184 using=using, 3185 columns=columns, 3186 include=include, 3187 partition_by=partition_by, 3188 where=where, 3189 with_storage=with_storage, 3190 tablespace=tablespace, 3191 ) 3192 3193 def _parse_index( 3194 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3195 ) -> t.Optional[exp.Index]: 3196 if index or anonymous: 3197 unique = None 3198 primary = None 3199 amp = None 3200 3201 self._match(TokenType.ON) 3202 self._match(TokenType.TABLE) # hive 3203 table = self._parse_table_parts(schema=True) 3204 else: 3205 unique = self._match(TokenType.UNIQUE) 3206 primary = self._match_text_seq("PRIMARY") 3207 amp = self._match_text_seq("AMP") 3208 3209 if not self._match(TokenType.INDEX): 3210 return None 3211 3212 index = self._parse_id_var() 3213 table = None 3214 3215 params = self._parse_index_params() 3216 3217 return self.expression( 3218 exp.Index, 3219 this=index, 3220 table=table, 3221 unique=unique, 3222 primary=primary, 3223 amp=amp, 3224 params=params, 3225 ) 3226 3227 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3228 hints: t.List[exp.Expression] = [] 3229 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3230 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3231 hints.append( 3232 self.expression( 3233 exp.WithTableHint, 3234 expressions=self._parse_csv( 3235 lambda: self._parse_function() or self._parse_var(any_token=True) 3236 ), 3237 ) 3238 ) 3239 self._match_r_paren() 3240 else: 3241 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3242 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3243 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3244 3245 self._match_texts(("INDEX", "KEY")) 3246 if self._match(TokenType.FOR): 3247 hint.set("target", self._advance_any() and self._prev.text.upper()) 3248 3249 hint.set("expressions", self._parse_wrapped_id_vars()) 3250 hints.append(hint) 3251 3252 return hints or None 3253 3254 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3255 return ( 3256 (not schema and self._parse_function(optional_parens=False)) 3257 or self._parse_id_var(any_token=False) 3258 or self._parse_string_as_identifier() 3259 or self._parse_placeholder() 3260 ) 3261 3262 def _parse_table_parts( 3263 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3264 ) -> exp.Table: 3265 catalog = None 3266 db = None 3267 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3268 3269 while self._match(TokenType.DOT): 3270 if catalog: 3271 # This allows nesting the table in arbitrarily many dot expressions if needed 3272 table = self.expression( 3273 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3274 ) 3275 else: 3276 catalog = db 3277 db = table 3278 # "" used for tsql FROM a..b case 3279 table = self._parse_table_part(schema=schema) or "" 3280 3281 if ( 3282 wildcard 3283 and self._is_connected() 3284 and (isinstance(table, exp.Identifier) or not table) 3285 and self._match(TokenType.STAR) 3286 ): 3287 if isinstance(table, exp.Identifier): 3288 table.args["this"] += "*" 3289 else: 3290 table = exp.Identifier(this="*") 3291 3292 # We bubble up comments from the Identifier to the Table 3293 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3294 3295 if is_db_reference: 3296 catalog = db 3297 db = table 3298 table = None 3299 3300 if not table and not is_db_reference: 3301 self.raise_error(f"Expected table name but got {self._curr}") 3302 if not db and is_db_reference: 3303 self.raise_error(f"Expected database name but got {self._curr}") 3304 3305 return self.expression( 3306 exp.Table, 3307 comments=comments, 3308 this=table, 3309 db=db, 3310 catalog=catalog, 3311 pivots=self._parse_pivots(), 3312 ) 3313 3314 def _parse_table( 3315 self, 3316 schema: bool = False, 3317 joins: bool = False, 3318 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3319 parse_bracket: bool = False, 3320 is_db_reference: bool = False, 3321 parse_partition: bool = False, 3322 ) -> t.Optional[exp.Expression]: 3323 lateral = self._parse_lateral() 3324 if lateral: 3325 return lateral 3326 3327 unnest = self._parse_unnest() 3328 if unnest: 3329 return unnest 3330 3331 values = self._parse_derived_table_values() 3332 if values: 3333 return values 3334 3335 subquery = self._parse_select(table=True) 3336 if subquery: 3337 if not subquery.args.get("pivots"): 3338 subquery.set("pivots", self._parse_pivots()) 3339 return subquery 3340 3341 bracket = parse_bracket and self._parse_bracket(None) 3342 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3343 3344 only = self._match(TokenType.ONLY) 3345 3346 this = t.cast( 3347 exp.Expression, 3348 bracket 3349 or self._parse_bracket( 3350 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3351 ), 3352 ) 3353 3354 if only: 3355 this.set("only", only) 3356 3357 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3358 self._match_text_seq("*") 3359 3360 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3361 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3362 this.set("partition", self._parse_partition()) 3363 3364 if schema: 3365 return self._parse_schema(this=this) 3366 3367 version = self._parse_version() 3368 3369 if version: 3370 this.set("version", version) 3371 3372 if self.dialect.ALIAS_POST_TABLESAMPLE: 3373 table_sample = self._parse_table_sample() 3374 3375 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3376 if alias: 3377 this.set("alias", alias) 3378 3379 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3380 return self.expression( 3381 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3382 ) 3383 3384 this.set("hints", self._parse_table_hints()) 3385 3386 if not this.args.get("pivots"): 3387 this.set("pivots", self._parse_pivots()) 3388 3389 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3390 table_sample = self._parse_table_sample() 3391 3392 if table_sample: 3393 table_sample.set("this", this) 3394 this = table_sample 3395 3396 if joins: 3397 for join in self._parse_joins(): 3398 this.append("joins", join) 3399 3400 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3401 this.set("ordinality", True) 3402 this.set("alias", self._parse_table_alias()) 3403 3404 return this 3405 3406 def _parse_version(self) -> t.Optional[exp.Version]: 3407 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3408 this = "TIMESTAMP" 3409 elif self._match(TokenType.VERSION_SNAPSHOT): 3410 this = "VERSION" 3411 else: 3412 return None 3413 3414 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3415 kind = self._prev.text.upper() 3416 start = self._parse_bitwise() 3417 self._match_texts(("TO", "AND")) 3418 end = self._parse_bitwise() 3419 expression: t.Optional[exp.Expression] = self.expression( 3420 exp.Tuple, expressions=[start, end] 3421 ) 3422 elif self._match_text_seq("CONTAINED", "IN"): 3423 kind = "CONTAINED IN" 3424 expression = self.expression( 3425 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3426 ) 3427 elif self._match(TokenType.ALL): 3428 kind = "ALL" 3429 expression = None 3430 else: 3431 self._match_text_seq("AS", "OF") 3432 kind = "AS OF" 3433 expression = self._parse_type() 3434 3435 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3436 3437 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3438 if not self._match(TokenType.UNNEST): 3439 return None 3440 3441 expressions = self._parse_wrapped_csv(self._parse_equality) 3442 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3443 3444 alias = self._parse_table_alias() if with_alias else None 3445 3446 if alias: 3447 if self.dialect.UNNEST_COLUMN_ONLY: 3448 if alias.args.get("columns"): 3449 self.raise_error("Unexpected extra column alias in unnest.") 3450 3451 alias.set("columns", [alias.this]) 3452 alias.set("this", None) 3453 3454 columns = alias.args.get("columns") or [] 3455 if offset and len(expressions) < len(columns): 3456 offset = columns.pop() 3457 3458 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3459 self._match(TokenType.ALIAS) 3460 offset = self._parse_id_var( 3461 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3462 ) or exp.to_identifier("offset") 3463 3464 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3465 3466 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3467 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3468 if not is_derived and not self._match_text_seq("VALUES"): 3469 return None 3470 3471 expressions = self._parse_csv(self._parse_value) 3472 alias = self._parse_table_alias() 3473 3474 if is_derived: 3475 self._match_r_paren() 3476 3477 return self.expression( 3478 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3479 ) 3480 3481 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3482 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3483 as_modifier and self._match_text_seq("USING", "SAMPLE") 3484 ): 3485 return None 3486 3487 bucket_numerator = None 3488 bucket_denominator = None 3489 bucket_field = None 3490 percent = None 3491 size = None 3492 seed = None 3493 3494 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3495 matched_l_paren = self._match(TokenType.L_PAREN) 3496 3497 if self.TABLESAMPLE_CSV: 3498 num = None 3499 expressions = self._parse_csv(self._parse_primary) 3500 else: 3501 expressions = None 3502 num = ( 3503 self._parse_factor() 3504 if self._match(TokenType.NUMBER, advance=False) 3505 else self._parse_primary() or self._parse_placeholder() 3506 ) 3507 3508 if self._match_text_seq("BUCKET"): 3509 bucket_numerator = self._parse_number() 3510 self._match_text_seq("OUT", "OF") 3511 bucket_denominator = bucket_denominator = self._parse_number() 3512 self._match(TokenType.ON) 3513 bucket_field = self._parse_field() 3514 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3515 percent = num 3516 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3517 size = num 3518 else: 3519 percent = num 3520 3521 if matched_l_paren: 3522 self._match_r_paren() 3523 3524 if self._match(TokenType.L_PAREN): 3525 method = self._parse_var(upper=True) 3526 seed = self._match(TokenType.COMMA) and self._parse_number() 3527 self._match_r_paren() 3528 elif self._match_texts(("SEED", "REPEATABLE")): 3529 seed = self._parse_wrapped(self._parse_number) 3530 3531 if not method and self.DEFAULT_SAMPLING_METHOD: 3532 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3533 3534 return self.expression( 3535 exp.TableSample, 3536 expressions=expressions, 3537 method=method, 3538 bucket_numerator=bucket_numerator, 3539 bucket_denominator=bucket_denominator, 3540 bucket_field=bucket_field, 3541 percent=percent, 3542 size=size, 3543 seed=seed, 3544 ) 3545 3546 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3547 return list(iter(self._parse_pivot, None)) or None 3548 3549 def _parse_joins(self) -> t.Iterator[exp.Join]: 3550 return iter(self._parse_join, None) 3551 3552 # https://duckdb.org/docs/sql/statements/pivot 3553 def _parse_simplified_pivot(self) -> exp.Pivot: 3554 def _parse_on() -> t.Optional[exp.Expression]: 3555 this = self._parse_bitwise() 3556 return self._parse_in(this) if self._match(TokenType.IN) else this 3557 3558 this = self._parse_table() 3559 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3560 using = self._match(TokenType.USING) and self._parse_csv( 3561 lambda: self._parse_alias(self._parse_function()) 3562 ) 3563 group = self._parse_group() 3564 return self.expression( 3565 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3566 ) 3567 3568 def _parse_pivot_in(self) -> exp.In: 3569 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3570 this = self._parse_assignment() 3571 3572 self._match(TokenType.ALIAS) 3573 alias = self._parse_field() 3574 if alias: 3575 return self.expression(exp.PivotAlias, this=this, alias=alias) 3576 3577 return this 3578 3579 value = self._parse_column() 3580 3581 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3582 self.raise_error("Expecting IN (") 3583 3584 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3585 3586 self._match_r_paren() 3587 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3588 3589 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3590 index = self._index 3591 include_nulls = None 3592 3593 if self._match(TokenType.PIVOT): 3594 unpivot = False 3595 elif self._match(TokenType.UNPIVOT): 3596 unpivot = True 3597 3598 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3599 if self._match_text_seq("INCLUDE", "NULLS"): 3600 include_nulls = True 3601 elif self._match_text_seq("EXCLUDE", "NULLS"): 3602 include_nulls = False 3603 else: 3604 return None 3605 3606 expressions = [] 3607 3608 if not self._match(TokenType.L_PAREN): 3609 self._retreat(index) 3610 return None 3611 3612 if unpivot: 3613 expressions = self._parse_csv(self._parse_column) 3614 else: 3615 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3616 3617 if not expressions: 3618 self.raise_error("Failed to parse PIVOT's aggregation list") 3619 3620 if not self._match(TokenType.FOR): 3621 self.raise_error("Expecting FOR") 3622 3623 field = self._parse_pivot_in() 3624 3625 self._match_r_paren() 3626 3627 pivot = self.expression( 3628 exp.Pivot, 3629 expressions=expressions, 3630 field=field, 3631 unpivot=unpivot, 3632 include_nulls=include_nulls, 3633 ) 3634 3635 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3636 pivot.set("alias", self._parse_table_alias()) 3637 3638 if not unpivot: 3639 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3640 3641 columns: t.List[exp.Expression] = [] 3642 for fld in pivot.args["field"].expressions: 3643 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3644 for name in names: 3645 if self.PREFIXED_PIVOT_COLUMNS: 3646 name = f"{name}_{field_name}" if name else field_name 3647 else: 3648 name = f"{field_name}_{name}" if name else field_name 3649 3650 columns.append(exp.to_identifier(name)) 3651 3652 pivot.set("columns", columns) 3653 3654 return pivot 3655 3656 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3657 return [agg.alias for agg in aggregations] 3658 3659 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3660 if not skip_where_token and not self._match(TokenType.PREWHERE): 3661 return None 3662 3663 return self.expression( 3664 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3665 ) 3666 3667 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3668 if not skip_where_token and not self._match(TokenType.WHERE): 3669 return None 3670 3671 return self.expression( 3672 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3673 ) 3674 3675 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3676 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3677 return None 3678 3679 elements: t.Dict[str, t.Any] = defaultdict(list) 3680 3681 if self._match(TokenType.ALL): 3682 elements["all"] = True 3683 elif self._match(TokenType.DISTINCT): 3684 elements["all"] = False 3685 3686 while True: 3687 expressions = self._parse_csv( 3688 lambda: None 3689 if self._match(TokenType.ROLLUP, advance=False) 3690 else self._parse_assignment() 3691 ) 3692 if expressions: 3693 elements["expressions"].extend(expressions) 3694 3695 grouping_sets = self._parse_grouping_sets() 3696 if grouping_sets: 3697 elements["grouping_sets"].extend(grouping_sets) 3698 3699 rollup = None 3700 cube = None 3701 totals = None 3702 3703 index = self._index 3704 with_ = self._match(TokenType.WITH) 3705 if self._match(TokenType.ROLLUP): 3706 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3707 elements["rollup"].extend(ensure_list(rollup)) 3708 3709 if self._match(TokenType.CUBE): 3710 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3711 elements["cube"].extend(ensure_list(cube)) 3712 3713 if self._match_text_seq("TOTALS"): 3714 totals = True 3715 elements["totals"] = True # type: ignore 3716 3717 if not (grouping_sets or rollup or cube or totals): 3718 if with_: 3719 self._retreat(index) 3720 break 3721 3722 return self.expression(exp.Group, **elements) # type: ignore 3723 3724 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3725 if not self._match(TokenType.GROUPING_SETS): 3726 return None 3727 3728 return self._parse_wrapped_csv(self._parse_grouping_set) 3729 3730 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3731 if self._match(TokenType.L_PAREN): 3732 grouping_set = self._parse_csv(self._parse_column) 3733 self._match_r_paren() 3734 return self.expression(exp.Tuple, expressions=grouping_set) 3735 3736 return self._parse_column() 3737 3738 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3739 if not skip_having_token and not self._match(TokenType.HAVING): 3740 return None 3741 return self.expression(exp.Having, this=self._parse_assignment()) 3742 3743 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3744 if not self._match(TokenType.QUALIFY): 3745 return None 3746 return self.expression(exp.Qualify, this=self._parse_assignment()) 3747 3748 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3749 if skip_start_token: 3750 start = None 3751 elif self._match(TokenType.START_WITH): 3752 start = self._parse_assignment() 3753 else: 3754 return None 3755 3756 self._match(TokenType.CONNECT_BY) 3757 nocycle = self._match_text_seq("NOCYCLE") 3758 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3759 exp.Prior, this=self._parse_bitwise() 3760 ) 3761 connect = self._parse_assignment() 3762 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3763 3764 if not start and self._match(TokenType.START_WITH): 3765 start = self._parse_assignment() 3766 3767 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3768 3769 def _parse_name_as_expression(self) -> exp.Alias: 3770 return self.expression( 3771 exp.Alias, 3772 alias=self._parse_id_var(any_token=True), 3773 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3774 ) 3775 3776 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3777 if self._match_text_seq("INTERPOLATE"): 3778 return self._parse_wrapped_csv(self._parse_name_as_expression) 3779 return None 3780 3781 def _parse_order( 3782 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3783 ) -> t.Optional[exp.Expression]: 3784 siblings = None 3785 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3786 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3787 return this 3788 3789 siblings = True 3790 3791 return self.expression( 3792 exp.Order, 3793 this=this, 3794 expressions=self._parse_csv(self._parse_ordered), 3795 interpolate=self._parse_interpolate(), 3796 siblings=siblings, 3797 ) 3798 3799 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3800 if not self._match(token): 3801 return None 3802 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3803 3804 def _parse_ordered( 3805 self, parse_method: t.Optional[t.Callable] = None 3806 ) -> t.Optional[exp.Ordered]: 3807 this = parse_method() if parse_method else self._parse_assignment() 3808 if not this: 3809 return None 3810 3811 asc = self._match(TokenType.ASC) 3812 desc = self._match(TokenType.DESC) or (asc and False) 3813 3814 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3815 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3816 3817 nulls_first = is_nulls_first or False 3818 explicitly_null_ordered = is_nulls_first or is_nulls_last 3819 3820 if ( 3821 not explicitly_null_ordered 3822 and ( 3823 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3824 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3825 ) 3826 and self.dialect.NULL_ORDERING != "nulls_are_last" 3827 ): 3828 nulls_first = True 3829 3830 if self._match_text_seq("WITH", "FILL"): 3831 with_fill = self.expression( 3832 exp.WithFill, 3833 **{ # type: ignore 3834 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3835 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3836 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3837 }, 3838 ) 3839 else: 3840 with_fill = None 3841 3842 return self.expression( 3843 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3844 ) 3845 3846 def _parse_limit( 3847 self, 3848 this: t.Optional[exp.Expression] = None, 3849 top: bool = False, 3850 skip_limit_token: bool = False, 3851 ) -> t.Optional[exp.Expression]: 3852 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3853 comments = self._prev_comments 3854 if top: 3855 limit_paren = self._match(TokenType.L_PAREN) 3856 expression = self._parse_term() if limit_paren else self._parse_number() 3857 3858 if limit_paren: 3859 self._match_r_paren() 3860 else: 3861 expression = self._parse_term() 3862 3863 if self._match(TokenType.COMMA): 3864 offset = expression 3865 expression = self._parse_term() 3866 else: 3867 offset = None 3868 3869 limit_exp = self.expression( 3870 exp.Limit, 3871 this=this, 3872 expression=expression, 3873 offset=offset, 3874 comments=comments, 3875 expressions=self._parse_limit_by(), 3876 ) 3877 3878 return limit_exp 3879 3880 if self._match(TokenType.FETCH): 3881 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3882 direction = self._prev.text.upper() if direction else "FIRST" 3883 3884 count = self._parse_field(tokens=self.FETCH_TOKENS) 3885 percent = self._match(TokenType.PERCENT) 3886 3887 self._match_set((TokenType.ROW, TokenType.ROWS)) 3888 3889 only = self._match_text_seq("ONLY") 3890 with_ties = self._match_text_seq("WITH", "TIES") 3891 3892 if only and with_ties: 3893 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3894 3895 return self.expression( 3896 exp.Fetch, 3897 direction=direction, 3898 count=count, 3899 percent=percent, 3900 with_ties=with_ties, 3901 ) 3902 3903 return this 3904 3905 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3906 if not self._match(TokenType.OFFSET): 3907 return this 3908 3909 count = self._parse_term() 3910 self._match_set((TokenType.ROW, TokenType.ROWS)) 3911 3912 return self.expression( 3913 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3914 ) 3915 3916 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3917 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3918 3919 def _parse_locks(self) -> t.List[exp.Lock]: 3920 locks = [] 3921 while True: 3922 if self._match_text_seq("FOR", "UPDATE"): 3923 update = True 3924 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3925 "LOCK", "IN", "SHARE", "MODE" 3926 ): 3927 update = False 3928 else: 3929 break 3930 3931 expressions = None 3932 if self._match_text_seq("OF"): 3933 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3934 3935 wait: t.Optional[bool | exp.Expression] = None 3936 if self._match_text_seq("NOWAIT"): 3937 wait = True 3938 elif self._match_text_seq("WAIT"): 3939 wait = self._parse_primary() 3940 elif self._match_text_seq("SKIP", "LOCKED"): 3941 wait = False 3942 3943 locks.append( 3944 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3945 ) 3946 3947 return locks 3948 3949 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3950 while this and self._match_set(self.SET_OPERATIONS): 3951 token_type = self._prev.token_type 3952 3953 if token_type == TokenType.UNION: 3954 operation = exp.Union 3955 elif token_type == TokenType.EXCEPT: 3956 operation = exp.Except 3957 else: 3958 operation = exp.Intersect 3959 3960 comments = self._prev.comments 3961 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3962 by_name = self._match_text_seq("BY", "NAME") 3963 expression = self._parse_select(nested=True, parse_set_operation=False) 3964 3965 this = self.expression( 3966 operation, 3967 comments=comments, 3968 this=this, 3969 distinct=distinct, 3970 by_name=by_name, 3971 expression=expression, 3972 ) 3973 3974 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3975 expression = this.expression 3976 3977 if expression: 3978 for arg in self.UNION_MODIFIERS: 3979 expr = expression.args.get(arg) 3980 if expr: 3981 this.set(arg, expr.pop()) 3982 3983 return this 3984 3985 def _parse_expression(self) -> t.Optional[exp.Expression]: 3986 return self._parse_alias(self._parse_assignment()) 3987 3988 def _parse_assignment(self) -> t.Optional[exp.Expression]: 3989 this = self._parse_disjunction() 3990 3991 while self._match_set(self.ASSIGNMENT): 3992 this = self.expression( 3993 self.ASSIGNMENT[self._prev.token_type], 3994 this=this, 3995 comments=self._prev_comments, 3996 expression=self._parse_assignment(), 3997 ) 3998 3999 return this 4000 4001 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4002 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4003 4004 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4005 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4006 4007 def _parse_equality(self) -> t.Optional[exp.Expression]: 4008 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4009 4010 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4011 return self._parse_tokens(self._parse_range, self.COMPARISON) 4012 4013 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4014 this = this or self._parse_bitwise() 4015 negate = self._match(TokenType.NOT) 4016 4017 if self._match_set(self.RANGE_PARSERS): 4018 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4019 if not expression: 4020 return this 4021 4022 this = expression 4023 elif self._match(TokenType.ISNULL): 4024 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4025 4026 # Postgres supports ISNULL and NOTNULL for conditions. 4027 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4028 if self._match(TokenType.NOTNULL): 4029 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4030 this = self.expression(exp.Not, this=this) 4031 4032 if negate: 4033 this = self.expression(exp.Not, this=this) 4034 4035 if self._match(TokenType.IS): 4036 this = self._parse_is(this) 4037 4038 return this 4039 4040 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4041 index = self._index - 1 4042 negate = self._match(TokenType.NOT) 4043 4044 if self._match_text_seq("DISTINCT", "FROM"): 4045 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4046 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4047 4048 expression = self._parse_null() or self._parse_boolean() 4049 if not expression: 4050 self._retreat(index) 4051 return None 4052 4053 this = self.expression(exp.Is, this=this, expression=expression) 4054 return self.expression(exp.Not, this=this) if negate else this 4055 4056 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4057 unnest = self._parse_unnest(with_alias=False) 4058 if unnest: 4059 this = self.expression(exp.In, this=this, unnest=unnest) 4060 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4061 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4062 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4063 4064 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4065 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4066 else: 4067 this = self.expression(exp.In, this=this, expressions=expressions) 4068 4069 if matched_l_paren: 4070 self._match_r_paren(this) 4071 elif not self._match(TokenType.R_BRACKET, expression=this): 4072 self.raise_error("Expecting ]") 4073 else: 4074 this = self.expression(exp.In, this=this, field=self._parse_field()) 4075 4076 return this 4077 4078 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4079 low = self._parse_bitwise() 4080 self._match(TokenType.AND) 4081 high = self._parse_bitwise() 4082 return self.expression(exp.Between, this=this, low=low, high=high) 4083 4084 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4085 if not self._match(TokenType.ESCAPE): 4086 return this 4087 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4088 4089 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4090 index = self._index 4091 4092 if not self._match(TokenType.INTERVAL) and match_interval: 4093 return None 4094 4095 if self._match(TokenType.STRING, advance=False): 4096 this = self._parse_primary() 4097 else: 4098 this = self._parse_term() 4099 4100 if not this or ( 4101 isinstance(this, exp.Column) 4102 and not this.table 4103 and not this.this.quoted 4104 and this.name.upper() == "IS" 4105 ): 4106 self._retreat(index) 4107 return None 4108 4109 unit = self._parse_function() or ( 4110 not self._match(TokenType.ALIAS, advance=False) 4111 and self._parse_var(any_token=True, upper=True) 4112 ) 4113 4114 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4115 # each INTERVAL expression into this canonical form so it's easy to transpile 4116 if this and this.is_number: 4117 this = exp.Literal.string(this.name) 4118 elif this and this.is_string: 4119 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4120 if len(parts) == 1: 4121 if unit: 4122 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4123 self._retreat(self._index - 1) 4124 4125 this = exp.Literal.string(parts[0][0]) 4126 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4127 4128 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4129 unit = self.expression( 4130 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4131 ) 4132 4133 interval = self.expression(exp.Interval, this=this, unit=unit) 4134 4135 index = self._index 4136 self._match(TokenType.PLUS) 4137 4138 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4139 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4140 return self.expression( 4141 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4142 ) 4143 4144 self._retreat(index) 4145 return interval 4146 4147 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4148 this = self._parse_term() 4149 4150 while True: 4151 if self._match_set(self.BITWISE): 4152 this = self.expression( 4153 self.BITWISE[self._prev.token_type], 4154 this=this, 4155 expression=self._parse_term(), 4156 ) 4157 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4158 this = self.expression( 4159 exp.DPipe, 4160 this=this, 4161 expression=self._parse_term(), 4162 safe=not self.dialect.STRICT_STRING_CONCAT, 4163 ) 4164 elif self._match(TokenType.DQMARK): 4165 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4166 elif self._match_pair(TokenType.LT, TokenType.LT): 4167 this = self.expression( 4168 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4169 ) 4170 elif self._match_pair(TokenType.GT, TokenType.GT): 4171 this = self.expression( 4172 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4173 ) 4174 else: 4175 break 4176 4177 return this 4178 4179 def _parse_term(self) -> t.Optional[exp.Expression]: 4180 return self._parse_tokens(self._parse_factor, self.TERM) 4181 4182 def _parse_factor(self) -> t.Optional[exp.Expression]: 4183 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4184 this = parse_method() 4185 4186 while self._match_set(self.FACTOR): 4187 klass = self.FACTOR[self._prev.token_type] 4188 comments = self._prev_comments 4189 expression = parse_method() 4190 4191 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4192 self._retreat(self._index - 1) 4193 return this 4194 4195 this = self.expression(klass, this=this, comments=comments, expression=expression) 4196 4197 if isinstance(this, exp.Div): 4198 this.args["typed"] = self.dialect.TYPED_DIVISION 4199 this.args["safe"] = self.dialect.SAFE_DIVISION 4200 4201 return this 4202 4203 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4204 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4205 4206 def _parse_unary(self) -> t.Optional[exp.Expression]: 4207 if self._match_set(self.UNARY_PARSERS): 4208 return self.UNARY_PARSERS[self._prev.token_type](self) 4209 return self._parse_at_time_zone(self._parse_type()) 4210 4211 def _parse_type( 4212 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4213 ) -> t.Optional[exp.Expression]: 4214 interval = parse_interval and self._parse_interval() 4215 if interval: 4216 return interval 4217 4218 index = self._index 4219 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4220 4221 if data_type: 4222 index2 = self._index 4223 this = self._parse_primary() 4224 4225 if isinstance(this, exp.Literal): 4226 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4227 if parser: 4228 return parser(self, this, data_type) 4229 4230 return self.expression(exp.Cast, this=this, to=data_type) 4231 4232 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4233 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4234 # 4235 # If the index difference here is greater than 1, that means the parser itself must have 4236 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4237 # 4238 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4239 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4240 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4241 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4242 # 4243 # In these cases, we don't really want to return the converted type, but instead retreat 4244 # and try to parse a Column or Identifier in the section below. 4245 if data_type.expressions and index2 - index > 1: 4246 self._retreat(index2) 4247 return self._parse_column_ops(data_type) 4248 4249 self._retreat(index) 4250 4251 if fallback_to_identifier: 4252 return self._parse_id_var() 4253 4254 this = self._parse_column() 4255 return this and self._parse_column_ops(this) 4256 4257 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4258 this = self._parse_type() 4259 if not this: 4260 return None 4261 4262 if isinstance(this, exp.Column) and not this.table: 4263 this = exp.var(this.name.upper()) 4264 4265 return self.expression( 4266 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4267 ) 4268 4269 def _parse_types( 4270 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4271 ) -> t.Optional[exp.Expression]: 4272 index = self._index 4273 4274 this: t.Optional[exp.Expression] = None 4275 prefix = self._match_text_seq("SYSUDTLIB", ".") 4276 4277 if not self._match_set(self.TYPE_TOKENS): 4278 identifier = allow_identifiers and self._parse_id_var( 4279 any_token=False, tokens=(TokenType.VAR,) 4280 ) 4281 if identifier: 4282 tokens = self.dialect.tokenize(identifier.name) 4283 4284 if len(tokens) != 1: 4285 self.raise_error("Unexpected identifier", self._prev) 4286 4287 if tokens[0].token_type in self.TYPE_TOKENS: 4288 self._prev = tokens[0] 4289 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4290 type_name = identifier.name 4291 4292 while self._match(TokenType.DOT): 4293 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4294 4295 this = exp.DataType.build(type_name, udt=True) 4296 else: 4297 self._retreat(self._index - 1) 4298 return None 4299 else: 4300 return None 4301 4302 type_token = self._prev.token_type 4303 4304 if type_token == TokenType.PSEUDO_TYPE: 4305 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4306 4307 if type_token == TokenType.OBJECT_IDENTIFIER: 4308 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4309 4310 # https://materialize.com/docs/sql/types/map/ 4311 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4312 key_type = self._parse_types( 4313 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4314 ) 4315 if not self._match(TokenType.FARROW): 4316 self._retreat(index) 4317 return None 4318 4319 value_type = self._parse_types( 4320 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4321 ) 4322 if not self._match(TokenType.R_BRACKET): 4323 self._retreat(index) 4324 return None 4325 4326 return exp.DataType( 4327 this=exp.DataType.Type.MAP, 4328 expressions=[key_type, value_type], 4329 nested=True, 4330 prefix=prefix, 4331 ) 4332 4333 nested = type_token in self.NESTED_TYPE_TOKENS 4334 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4335 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4336 expressions = None 4337 maybe_func = False 4338 4339 if self._match(TokenType.L_PAREN): 4340 if is_struct: 4341 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4342 elif nested: 4343 expressions = self._parse_csv( 4344 lambda: self._parse_types( 4345 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4346 ) 4347 ) 4348 elif type_token in self.ENUM_TYPE_TOKENS: 4349 expressions = self._parse_csv(self._parse_equality) 4350 elif is_aggregate: 4351 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4352 any_token=False, tokens=(TokenType.VAR,) 4353 ) 4354 if not func_or_ident or not self._match(TokenType.COMMA): 4355 return None 4356 expressions = self._parse_csv( 4357 lambda: self._parse_types( 4358 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4359 ) 4360 ) 4361 expressions.insert(0, func_or_ident) 4362 else: 4363 expressions = self._parse_csv(self._parse_type_size) 4364 4365 if not expressions or not self._match(TokenType.R_PAREN): 4366 self._retreat(index) 4367 return None 4368 4369 maybe_func = True 4370 4371 values: t.Optional[t.List[exp.Expression]] = None 4372 4373 if nested and self._match(TokenType.LT): 4374 if is_struct: 4375 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4376 else: 4377 expressions = self._parse_csv( 4378 lambda: self._parse_types( 4379 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4380 ) 4381 ) 4382 4383 if not self._match(TokenType.GT): 4384 self.raise_error("Expecting >") 4385 4386 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4387 values = self._parse_csv(self._parse_assignment) 4388 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4389 4390 if type_token in self.TIMESTAMPS: 4391 if self._match_text_seq("WITH", "TIME", "ZONE"): 4392 maybe_func = False 4393 tz_type = ( 4394 exp.DataType.Type.TIMETZ 4395 if type_token in self.TIMES 4396 else exp.DataType.Type.TIMESTAMPTZ 4397 ) 4398 this = exp.DataType(this=tz_type, expressions=expressions) 4399 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4400 maybe_func = False 4401 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4402 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4403 maybe_func = False 4404 elif type_token == TokenType.INTERVAL: 4405 unit = self._parse_var(upper=True) 4406 if unit: 4407 if self._match_text_seq("TO"): 4408 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4409 4410 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4411 else: 4412 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4413 4414 if maybe_func and check_func: 4415 index2 = self._index 4416 peek = self._parse_string() 4417 4418 if not peek: 4419 self._retreat(index) 4420 return None 4421 4422 self._retreat(index2) 4423 4424 if not this: 4425 if self._match_text_seq("UNSIGNED"): 4426 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4427 if not unsigned_type_token: 4428 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4429 4430 type_token = unsigned_type_token or type_token 4431 4432 this = exp.DataType( 4433 this=exp.DataType.Type[type_token.value], 4434 expressions=expressions, 4435 nested=nested, 4436 values=values, 4437 prefix=prefix, 4438 ) 4439 elif expressions: 4440 this.set("expressions", expressions) 4441 4442 # https://materialize.com/docs/sql/types/list/#type-name 4443 while self._match(TokenType.LIST): 4444 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4445 4446 index = self._index 4447 4448 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4449 matched_array = self._match(TokenType.ARRAY) 4450 4451 while self._curr: 4452 matched_l_bracket = self._match(TokenType.L_BRACKET) 4453 if not matched_l_bracket and not matched_array: 4454 break 4455 4456 matched_array = False 4457 values = self._parse_csv(self._parse_assignment) or None 4458 if values and not schema: 4459 self._retreat(index) 4460 break 4461 4462 this = exp.DataType( 4463 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4464 ) 4465 self._match(TokenType.R_BRACKET) 4466 4467 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4468 converter = self.TYPE_CONVERTER.get(this.this) 4469 if converter: 4470 this = converter(t.cast(exp.DataType, this)) 4471 4472 return this 4473 4474 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4475 index = self._index 4476 this = ( 4477 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4478 or self._parse_id_var() 4479 ) 4480 self._match(TokenType.COLON) 4481 4482 if ( 4483 type_required 4484 and not isinstance(this, exp.DataType) 4485 and not self._match_set(self.TYPE_TOKENS, advance=False) 4486 ): 4487 self._retreat(index) 4488 return self._parse_types() 4489 4490 return self._parse_column_def(this) 4491 4492 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4493 if not self._match_text_seq("AT", "TIME", "ZONE"): 4494 return this 4495 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4496 4497 def _parse_column(self) -> t.Optional[exp.Expression]: 4498 this = self._parse_column_reference() 4499 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4500 4501 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4502 this = self._parse_field() 4503 if ( 4504 not this 4505 and self._match(TokenType.VALUES, advance=False) 4506 and self.VALUES_FOLLOWED_BY_PAREN 4507 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4508 ): 4509 this = self._parse_id_var() 4510 4511 if isinstance(this, exp.Identifier): 4512 # We bubble up comments from the Identifier to the Column 4513 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4514 4515 return this 4516 4517 def _parse_colon_as_json_extract( 4518 self, this: t.Optional[exp.Expression] 4519 ) -> t.Optional[exp.Expression]: 4520 casts = [] 4521 json_path = [] 4522 4523 while self._match(TokenType.COLON): 4524 start_index = self._index 4525 path = self._parse_column_ops(self._parse_field(any_token=True)) 4526 4527 # The cast :: operator has a lower precedence than the extraction operator :, so 4528 # we rearrange the AST appropriately to avoid casting the JSON path 4529 while isinstance(path, exp.Cast): 4530 casts.append(path.to) 4531 path = path.this 4532 4533 if casts: 4534 dcolon_offset = next( 4535 i 4536 for i, t in enumerate(self._tokens[start_index:]) 4537 if t.token_type == TokenType.DCOLON 4538 ) 4539 end_token = self._tokens[start_index + dcolon_offset - 1] 4540 else: 4541 end_token = self._prev 4542 4543 if path: 4544 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4545 4546 if json_path: 4547 this = self.expression( 4548 exp.JSONExtract, 4549 this=this, 4550 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4551 ) 4552 4553 while casts: 4554 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4555 4556 return this 4557 4558 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4559 this = self._parse_bracket(this) 4560 4561 while self._match_set(self.COLUMN_OPERATORS): 4562 op_token = self._prev.token_type 4563 op = self.COLUMN_OPERATORS.get(op_token) 4564 4565 if op_token == TokenType.DCOLON: 4566 field = self._parse_types() 4567 if not field: 4568 self.raise_error("Expected type") 4569 elif op and self._curr: 4570 field = self._parse_column_reference() 4571 else: 4572 field = self._parse_field(any_token=True, anonymous_func=True) 4573 4574 if isinstance(field, exp.Func) and this: 4575 # bigquery allows function calls like x.y.count(...) 4576 # SAFE.SUBSTR(...) 4577 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4578 this = exp.replace_tree( 4579 this, 4580 lambda n: ( 4581 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4582 if n.table 4583 else n.this 4584 ) 4585 if isinstance(n, exp.Column) 4586 else n, 4587 ) 4588 4589 if op: 4590 this = op(self, this, field) 4591 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4592 this = self.expression( 4593 exp.Column, 4594 this=field, 4595 table=this.this, 4596 db=this.args.get("table"), 4597 catalog=this.args.get("db"), 4598 ) 4599 else: 4600 this = self.expression(exp.Dot, this=this, expression=field) 4601 4602 this = self._parse_bracket(this) 4603 4604 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4605 4606 def _parse_primary(self) -> t.Optional[exp.Expression]: 4607 if self._match_set(self.PRIMARY_PARSERS): 4608 token_type = self._prev.token_type 4609 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4610 4611 if token_type == TokenType.STRING: 4612 expressions = [primary] 4613 while self._match(TokenType.STRING): 4614 expressions.append(exp.Literal.string(self._prev.text)) 4615 4616 if len(expressions) > 1: 4617 return self.expression(exp.Concat, expressions=expressions) 4618 4619 return primary 4620 4621 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4622 return exp.Literal.number(f"0.{self._prev.text}") 4623 4624 if self._match(TokenType.L_PAREN): 4625 comments = self._prev_comments 4626 query = self._parse_select() 4627 4628 if query: 4629 expressions = [query] 4630 else: 4631 expressions = self._parse_expressions() 4632 4633 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4634 4635 if not this and self._match(TokenType.R_PAREN, advance=False): 4636 this = self.expression(exp.Tuple) 4637 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4638 this = self._parse_subquery(this=this, parse_alias=False) 4639 elif isinstance(this, exp.Subquery): 4640 this = self._parse_subquery( 4641 this=self._parse_set_operations(this), parse_alias=False 4642 ) 4643 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4644 this = self.expression(exp.Tuple, expressions=expressions) 4645 else: 4646 this = self.expression(exp.Paren, this=this) 4647 4648 if this: 4649 this.add_comments(comments) 4650 4651 self._match_r_paren(expression=this) 4652 return this 4653 4654 return None 4655 4656 def _parse_field( 4657 self, 4658 any_token: bool = False, 4659 tokens: t.Optional[t.Collection[TokenType]] = None, 4660 anonymous_func: bool = False, 4661 ) -> t.Optional[exp.Expression]: 4662 if anonymous_func: 4663 field = ( 4664 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4665 or self._parse_primary() 4666 ) 4667 else: 4668 field = self._parse_primary() or self._parse_function( 4669 anonymous=anonymous_func, any_token=any_token 4670 ) 4671 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4672 4673 def _parse_function( 4674 self, 4675 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4676 anonymous: bool = False, 4677 optional_parens: bool = True, 4678 any_token: bool = False, 4679 ) -> t.Optional[exp.Expression]: 4680 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4681 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4682 fn_syntax = False 4683 if ( 4684 self._match(TokenType.L_BRACE, advance=False) 4685 and self._next 4686 and self._next.text.upper() == "FN" 4687 ): 4688 self._advance(2) 4689 fn_syntax = True 4690 4691 func = self._parse_function_call( 4692 functions=functions, 4693 anonymous=anonymous, 4694 optional_parens=optional_parens, 4695 any_token=any_token, 4696 ) 4697 4698 if fn_syntax: 4699 self._match(TokenType.R_BRACE) 4700 4701 return func 4702 4703 def _parse_function_call( 4704 self, 4705 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4706 anonymous: bool = False, 4707 optional_parens: bool = True, 4708 any_token: bool = False, 4709 ) -> t.Optional[exp.Expression]: 4710 if not self._curr: 4711 return None 4712 4713 comments = self._curr.comments 4714 token_type = self._curr.token_type 4715 this = self._curr.text 4716 upper = this.upper() 4717 4718 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4719 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4720 self._advance() 4721 return self._parse_window(parser(self)) 4722 4723 if not self._next or self._next.token_type != TokenType.L_PAREN: 4724 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4725 self._advance() 4726 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4727 4728 return None 4729 4730 if any_token: 4731 if token_type in self.RESERVED_TOKENS: 4732 return None 4733 elif token_type not in self.FUNC_TOKENS: 4734 return None 4735 4736 self._advance(2) 4737 4738 parser = self.FUNCTION_PARSERS.get(upper) 4739 if parser and not anonymous: 4740 this = parser(self) 4741 else: 4742 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4743 4744 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4745 this = self.expression(subquery_predicate, this=self._parse_select()) 4746 self._match_r_paren() 4747 return this 4748 4749 if functions is None: 4750 functions = self.FUNCTIONS 4751 4752 function = functions.get(upper) 4753 4754 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4755 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4756 4757 if alias: 4758 args = self._kv_to_prop_eq(args) 4759 4760 if function and not anonymous: 4761 if "dialect" in function.__code__.co_varnames: 4762 func = function(args, dialect=self.dialect) 4763 else: 4764 func = function(args) 4765 4766 func = self.validate_expression(func, args) 4767 if not self.dialect.NORMALIZE_FUNCTIONS: 4768 func.meta["name"] = this 4769 4770 this = func 4771 else: 4772 if token_type == TokenType.IDENTIFIER: 4773 this = exp.Identifier(this=this, quoted=True) 4774 this = self.expression(exp.Anonymous, this=this, expressions=args) 4775 4776 if isinstance(this, exp.Expression): 4777 this.add_comments(comments) 4778 4779 self._match_r_paren(this) 4780 return self._parse_window(this) 4781 4782 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4783 transformed = [] 4784 4785 for e in expressions: 4786 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4787 if isinstance(e, exp.Alias): 4788 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4789 4790 if not isinstance(e, exp.PropertyEQ): 4791 e = self.expression( 4792 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4793 ) 4794 4795 if isinstance(e.this, exp.Column): 4796 e.this.replace(e.this.this) 4797 4798 transformed.append(e) 4799 4800 return transformed 4801 4802 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4803 return self._parse_column_def(self._parse_id_var()) 4804 4805 def _parse_user_defined_function( 4806 self, kind: t.Optional[TokenType] = None 4807 ) -> t.Optional[exp.Expression]: 4808 this = self._parse_id_var() 4809 4810 while self._match(TokenType.DOT): 4811 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4812 4813 if not self._match(TokenType.L_PAREN): 4814 return this 4815 4816 expressions = self._parse_csv(self._parse_function_parameter) 4817 self._match_r_paren() 4818 return self.expression( 4819 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4820 ) 4821 4822 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4823 literal = self._parse_primary() 4824 if literal: 4825 return self.expression(exp.Introducer, this=token.text, expression=literal) 4826 4827 return self.expression(exp.Identifier, this=token.text) 4828 4829 def _parse_session_parameter(self) -> exp.SessionParameter: 4830 kind = None 4831 this = self._parse_id_var() or self._parse_primary() 4832 4833 if this and self._match(TokenType.DOT): 4834 kind = this.name 4835 this = self._parse_var() or self._parse_primary() 4836 4837 return self.expression(exp.SessionParameter, this=this, kind=kind) 4838 4839 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4840 return self._parse_id_var() 4841 4842 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4843 index = self._index 4844 4845 if self._match(TokenType.L_PAREN): 4846 expressions = t.cast( 4847 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4848 ) 4849 4850 if not self._match(TokenType.R_PAREN): 4851 self._retreat(index) 4852 else: 4853 expressions = [self._parse_lambda_arg()] 4854 4855 if self._match_set(self.LAMBDAS): 4856 return self.LAMBDAS[self._prev.token_type](self, expressions) 4857 4858 self._retreat(index) 4859 4860 this: t.Optional[exp.Expression] 4861 4862 if self._match(TokenType.DISTINCT): 4863 this = self.expression( 4864 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4865 ) 4866 else: 4867 this = self._parse_select_or_expression(alias=alias) 4868 4869 return self._parse_limit( 4870 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4871 ) 4872 4873 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4874 index = self._index 4875 if not self._match(TokenType.L_PAREN): 4876 return this 4877 4878 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4879 # expr can be of both types 4880 if self._match_set(self.SELECT_START_TOKENS): 4881 self._retreat(index) 4882 return this 4883 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4884 self._match_r_paren() 4885 return self.expression(exp.Schema, this=this, expressions=args) 4886 4887 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4888 return self._parse_column_def(self._parse_field(any_token=True)) 4889 4890 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4891 # column defs are not really columns, they're identifiers 4892 if isinstance(this, exp.Column): 4893 this = this.this 4894 4895 kind = self._parse_types(schema=True) 4896 4897 if self._match_text_seq("FOR", "ORDINALITY"): 4898 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4899 4900 constraints: t.List[exp.Expression] = [] 4901 4902 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4903 ("ALIAS", "MATERIALIZED") 4904 ): 4905 persisted = self._prev.text.upper() == "MATERIALIZED" 4906 constraints.append( 4907 self.expression( 4908 exp.ComputedColumnConstraint, 4909 this=self._parse_assignment(), 4910 persisted=persisted or self._match_text_seq("PERSISTED"), 4911 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4912 ) 4913 ) 4914 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4915 self._match(TokenType.ALIAS) 4916 constraints.append( 4917 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4918 ) 4919 4920 while True: 4921 constraint = self._parse_column_constraint() 4922 if not constraint: 4923 break 4924 constraints.append(constraint) 4925 4926 if not kind and not constraints: 4927 return this 4928 4929 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4930 4931 def _parse_auto_increment( 4932 self, 4933 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4934 start = None 4935 increment = None 4936 4937 if self._match(TokenType.L_PAREN, advance=False): 4938 args = self._parse_wrapped_csv(self._parse_bitwise) 4939 start = seq_get(args, 0) 4940 increment = seq_get(args, 1) 4941 elif self._match_text_seq("START"): 4942 start = self._parse_bitwise() 4943 self._match_text_seq("INCREMENT") 4944 increment = self._parse_bitwise() 4945 4946 if start and increment: 4947 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4948 4949 return exp.AutoIncrementColumnConstraint() 4950 4951 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4952 if not self._match_text_seq("REFRESH"): 4953 self._retreat(self._index - 1) 4954 return None 4955 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4956 4957 def _parse_compress(self) -> exp.CompressColumnConstraint: 4958 if self._match(TokenType.L_PAREN, advance=False): 4959 return self.expression( 4960 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4961 ) 4962 4963 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4964 4965 def _parse_generated_as_identity( 4966 self, 4967 ) -> ( 4968 exp.GeneratedAsIdentityColumnConstraint 4969 | exp.ComputedColumnConstraint 4970 | exp.GeneratedAsRowColumnConstraint 4971 ): 4972 if self._match_text_seq("BY", "DEFAULT"): 4973 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4974 this = self.expression( 4975 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4976 ) 4977 else: 4978 self._match_text_seq("ALWAYS") 4979 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4980 4981 self._match(TokenType.ALIAS) 4982 4983 if self._match_text_seq("ROW"): 4984 start = self._match_text_seq("START") 4985 if not start: 4986 self._match(TokenType.END) 4987 hidden = self._match_text_seq("HIDDEN") 4988 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4989 4990 identity = self._match_text_seq("IDENTITY") 4991 4992 if self._match(TokenType.L_PAREN): 4993 if self._match(TokenType.START_WITH): 4994 this.set("start", self._parse_bitwise()) 4995 if self._match_text_seq("INCREMENT", "BY"): 4996 this.set("increment", self._parse_bitwise()) 4997 if self._match_text_seq("MINVALUE"): 4998 this.set("minvalue", self._parse_bitwise()) 4999 if self._match_text_seq("MAXVALUE"): 5000 this.set("maxvalue", self._parse_bitwise()) 5001 5002 if self._match_text_seq("CYCLE"): 5003 this.set("cycle", True) 5004 elif self._match_text_seq("NO", "CYCLE"): 5005 this.set("cycle", False) 5006 5007 if not identity: 5008 this.set("expression", self._parse_range()) 5009 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5010 args = self._parse_csv(self._parse_bitwise) 5011 this.set("start", seq_get(args, 0)) 5012 this.set("increment", seq_get(args, 1)) 5013 5014 self._match_r_paren() 5015 5016 return this 5017 5018 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5019 self._match_text_seq("LENGTH") 5020 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5021 5022 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5023 if self._match_text_seq("NULL"): 5024 return self.expression(exp.NotNullColumnConstraint) 5025 if self._match_text_seq("CASESPECIFIC"): 5026 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5027 if self._match_text_seq("FOR", "REPLICATION"): 5028 return self.expression(exp.NotForReplicationColumnConstraint) 5029 return None 5030 5031 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5032 if self._match(TokenType.CONSTRAINT): 5033 this = self._parse_id_var() 5034 else: 5035 this = None 5036 5037 if self._match_texts(self.CONSTRAINT_PARSERS): 5038 return self.expression( 5039 exp.ColumnConstraint, 5040 this=this, 5041 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5042 ) 5043 5044 return this 5045 5046 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5047 if not self._match(TokenType.CONSTRAINT): 5048 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5049 5050 return self.expression( 5051 exp.Constraint, 5052 this=self._parse_id_var(), 5053 expressions=self._parse_unnamed_constraints(), 5054 ) 5055 5056 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5057 constraints = [] 5058 while True: 5059 constraint = self._parse_unnamed_constraint() or self._parse_function() 5060 if not constraint: 5061 break 5062 constraints.append(constraint) 5063 5064 return constraints 5065 5066 def _parse_unnamed_constraint( 5067 self, constraints: t.Optional[t.Collection[str]] = None 5068 ) -> t.Optional[exp.Expression]: 5069 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5070 constraints or self.CONSTRAINT_PARSERS 5071 ): 5072 return None 5073 5074 constraint = self._prev.text.upper() 5075 if constraint not in self.CONSTRAINT_PARSERS: 5076 self.raise_error(f"No parser found for schema constraint {constraint}.") 5077 5078 return self.CONSTRAINT_PARSERS[constraint](self) 5079 5080 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5081 self._match_text_seq("KEY") 5082 return self.expression( 5083 exp.UniqueColumnConstraint, 5084 this=self._parse_schema(self._parse_id_var(any_token=False)), 5085 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5086 on_conflict=self._parse_on_conflict(), 5087 ) 5088 5089 def _parse_key_constraint_options(self) -> t.List[str]: 5090 options = [] 5091 while True: 5092 if not self._curr: 5093 break 5094 5095 if self._match(TokenType.ON): 5096 action = None 5097 on = self._advance_any() and self._prev.text 5098 5099 if self._match_text_seq("NO", "ACTION"): 5100 action = "NO ACTION" 5101 elif self._match_text_seq("CASCADE"): 5102 action = "CASCADE" 5103 elif self._match_text_seq("RESTRICT"): 5104 action = "RESTRICT" 5105 elif self._match_pair(TokenType.SET, TokenType.NULL): 5106 action = "SET NULL" 5107 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5108 action = "SET DEFAULT" 5109 else: 5110 self.raise_error("Invalid key constraint") 5111 5112 options.append(f"ON {on} {action}") 5113 elif self._match_text_seq("NOT", "ENFORCED"): 5114 options.append("NOT ENFORCED") 5115 elif self._match_text_seq("DEFERRABLE"): 5116 options.append("DEFERRABLE") 5117 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5118 options.append("INITIALLY DEFERRED") 5119 elif self._match_text_seq("NORELY"): 5120 options.append("NORELY") 5121 elif self._match_text_seq("MATCH", "FULL"): 5122 options.append("MATCH FULL") 5123 else: 5124 break 5125 5126 return options 5127 5128 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5129 if match and not self._match(TokenType.REFERENCES): 5130 return None 5131 5132 expressions = None 5133 this = self._parse_table(schema=True) 5134 options = self._parse_key_constraint_options() 5135 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5136 5137 def _parse_foreign_key(self) -> exp.ForeignKey: 5138 expressions = self._parse_wrapped_id_vars() 5139 reference = self._parse_references() 5140 options = {} 5141 5142 while self._match(TokenType.ON): 5143 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5144 self.raise_error("Expected DELETE or UPDATE") 5145 5146 kind = self._prev.text.lower() 5147 5148 if self._match_text_seq("NO", "ACTION"): 5149 action = "NO ACTION" 5150 elif self._match(TokenType.SET): 5151 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5152 action = "SET " + self._prev.text.upper() 5153 else: 5154 self._advance() 5155 action = self._prev.text.upper() 5156 5157 options[kind] = action 5158 5159 return self.expression( 5160 exp.ForeignKey, 5161 expressions=expressions, 5162 reference=reference, 5163 **options, # type: ignore 5164 ) 5165 5166 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5167 return self._parse_field() 5168 5169 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5170 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5171 self._retreat(self._index - 1) 5172 return None 5173 5174 id_vars = self._parse_wrapped_id_vars() 5175 return self.expression( 5176 exp.PeriodForSystemTimeConstraint, 5177 this=seq_get(id_vars, 0), 5178 expression=seq_get(id_vars, 1), 5179 ) 5180 5181 def _parse_primary_key( 5182 self, wrapped_optional: bool = False, in_props: bool = False 5183 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5184 desc = ( 5185 self._match_set((TokenType.ASC, TokenType.DESC)) 5186 and self._prev.token_type == TokenType.DESC 5187 ) 5188 5189 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5190 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5191 5192 expressions = self._parse_wrapped_csv( 5193 self._parse_primary_key_part, optional=wrapped_optional 5194 ) 5195 options = self._parse_key_constraint_options() 5196 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5197 5198 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5199 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5200 5201 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5202 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5203 return this 5204 5205 bracket_kind = self._prev.token_type 5206 expressions = self._parse_csv( 5207 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5208 ) 5209 5210 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5211 self.raise_error("Expected ]") 5212 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5213 self.raise_error("Expected }") 5214 5215 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5216 if bracket_kind == TokenType.L_BRACE: 5217 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5218 elif not this: 5219 this = self.expression(exp.Array, expressions=expressions) 5220 else: 5221 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5222 if constructor_type: 5223 return self.expression(constructor_type, expressions=expressions) 5224 5225 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5226 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5227 5228 self._add_comments(this) 5229 return self._parse_bracket(this) 5230 5231 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5232 if self._match(TokenType.COLON): 5233 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5234 return this 5235 5236 def _parse_case(self) -> t.Optional[exp.Expression]: 5237 ifs = [] 5238 default = None 5239 5240 comments = self._prev_comments 5241 expression = self._parse_assignment() 5242 5243 while self._match(TokenType.WHEN): 5244 this = self._parse_assignment() 5245 self._match(TokenType.THEN) 5246 then = self._parse_assignment() 5247 ifs.append(self.expression(exp.If, this=this, true=then)) 5248 5249 if self._match(TokenType.ELSE): 5250 default = self._parse_assignment() 5251 5252 if not self._match(TokenType.END): 5253 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5254 default = exp.column("interval") 5255 else: 5256 self.raise_error("Expected END after CASE", self._prev) 5257 5258 return self.expression( 5259 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5260 ) 5261 5262 def _parse_if(self) -> t.Optional[exp.Expression]: 5263 if self._match(TokenType.L_PAREN): 5264 args = self._parse_csv(self._parse_assignment) 5265 this = self.validate_expression(exp.If.from_arg_list(args), args) 5266 self._match_r_paren() 5267 else: 5268 index = self._index - 1 5269 5270 if self.NO_PAREN_IF_COMMANDS and index == 0: 5271 return self._parse_as_command(self._prev) 5272 5273 condition = self._parse_assignment() 5274 5275 if not condition: 5276 self._retreat(index) 5277 return None 5278 5279 self._match(TokenType.THEN) 5280 true = self._parse_assignment() 5281 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5282 self._match(TokenType.END) 5283 this = self.expression(exp.If, this=condition, true=true, false=false) 5284 5285 return this 5286 5287 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5288 if not self._match_text_seq("VALUE", "FOR"): 5289 self._retreat(self._index - 1) 5290 return None 5291 5292 return self.expression( 5293 exp.NextValueFor, 5294 this=self._parse_column(), 5295 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5296 ) 5297 5298 def _parse_extract(self) -> exp.Extract: 5299 this = self._parse_function() or self._parse_var() or self._parse_type() 5300 5301 if self._match(TokenType.FROM): 5302 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5303 5304 if not self._match(TokenType.COMMA): 5305 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5306 5307 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5308 5309 def _parse_gap_fill(self) -> exp.GapFill: 5310 self._match(TokenType.TABLE) 5311 this = self._parse_table() 5312 5313 self._match(TokenType.COMMA) 5314 args = [this, *self._parse_csv(self._parse_lambda)] 5315 5316 gap_fill = exp.GapFill.from_arg_list(args) 5317 return self.validate_expression(gap_fill, args) 5318 5319 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5320 this = self._parse_assignment() 5321 5322 if not self._match(TokenType.ALIAS): 5323 if self._match(TokenType.COMMA): 5324 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5325 5326 self.raise_error("Expected AS after CAST") 5327 5328 fmt = None 5329 to = self._parse_types() 5330 5331 if self._match(TokenType.FORMAT): 5332 fmt_string = self._parse_string() 5333 fmt = self._parse_at_time_zone(fmt_string) 5334 5335 if not to: 5336 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5337 if to.this in exp.DataType.TEMPORAL_TYPES: 5338 this = self.expression( 5339 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5340 this=this, 5341 format=exp.Literal.string( 5342 format_time( 5343 fmt_string.this if fmt_string else "", 5344 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5345 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5346 ) 5347 ), 5348 ) 5349 5350 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5351 this.set("zone", fmt.args["zone"]) 5352 return this 5353 elif not to: 5354 self.raise_error("Expected TYPE after CAST") 5355 elif isinstance(to, exp.Identifier): 5356 to = exp.DataType.build(to.name, udt=True) 5357 elif to.this == exp.DataType.Type.CHAR: 5358 if self._match(TokenType.CHARACTER_SET): 5359 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5360 5361 return self.expression( 5362 exp.Cast if strict else exp.TryCast, 5363 this=this, 5364 to=to, 5365 format=fmt, 5366 safe=safe, 5367 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5368 ) 5369 5370 def _parse_string_agg(self) -> exp.Expression: 5371 if self._match(TokenType.DISTINCT): 5372 args: t.List[t.Optional[exp.Expression]] = [ 5373 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5374 ] 5375 if self._match(TokenType.COMMA): 5376 args.extend(self._parse_csv(self._parse_assignment)) 5377 else: 5378 args = self._parse_csv(self._parse_assignment) # type: ignore 5379 5380 index = self._index 5381 if not self._match(TokenType.R_PAREN) and args: 5382 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5383 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5384 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5385 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5386 5387 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5388 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5389 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5390 if not self._match_text_seq("WITHIN", "GROUP"): 5391 self._retreat(index) 5392 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5393 5394 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5395 order = self._parse_order(this=seq_get(args, 0)) 5396 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5397 5398 def _parse_convert( 5399 self, strict: bool, safe: t.Optional[bool] = None 5400 ) -> t.Optional[exp.Expression]: 5401 this = self._parse_bitwise() 5402 5403 if self._match(TokenType.USING): 5404 to: t.Optional[exp.Expression] = self.expression( 5405 exp.CharacterSet, this=self._parse_var() 5406 ) 5407 elif self._match(TokenType.COMMA): 5408 to = self._parse_types() 5409 else: 5410 to = None 5411 5412 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5413 5414 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5415 """ 5416 There are generally two variants of the DECODE function: 5417 5418 - DECODE(bin, charset) 5419 - DECODE(expression, search, result [, search, result] ... [, default]) 5420 5421 The second variant will always be parsed into a CASE expression. Note that NULL 5422 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5423 instead of relying on pattern matching. 5424 """ 5425 args = self._parse_csv(self._parse_assignment) 5426 5427 if len(args) < 3: 5428 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5429 5430 expression, *expressions = args 5431 if not expression: 5432 return None 5433 5434 ifs = [] 5435 for search, result in zip(expressions[::2], expressions[1::2]): 5436 if not search or not result: 5437 return None 5438 5439 if isinstance(search, exp.Literal): 5440 ifs.append( 5441 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5442 ) 5443 elif isinstance(search, exp.Null): 5444 ifs.append( 5445 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5446 ) 5447 else: 5448 cond = exp.or_( 5449 exp.EQ(this=expression.copy(), expression=search), 5450 exp.and_( 5451 exp.Is(this=expression.copy(), expression=exp.Null()), 5452 exp.Is(this=search.copy(), expression=exp.Null()), 5453 copy=False, 5454 ), 5455 copy=False, 5456 ) 5457 ifs.append(exp.If(this=cond, true=result)) 5458 5459 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5460 5461 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5462 self._match_text_seq("KEY") 5463 key = self._parse_column() 5464 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5465 self._match_text_seq("VALUE") 5466 value = self._parse_bitwise() 5467 5468 if not key and not value: 5469 return None 5470 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5471 5472 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5473 if not this or not self._match_text_seq("FORMAT", "JSON"): 5474 return this 5475 5476 return self.expression(exp.FormatJson, this=this) 5477 5478 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5479 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5480 for value in values: 5481 if self._match_text_seq(value, "ON", on): 5482 return f"{value} ON {on}" 5483 5484 return None 5485 5486 @t.overload 5487 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5488 5489 @t.overload 5490 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5491 5492 def _parse_json_object(self, agg=False): 5493 star = self._parse_star() 5494 expressions = ( 5495 [star] 5496 if star 5497 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5498 ) 5499 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5500 5501 unique_keys = None 5502 if self._match_text_seq("WITH", "UNIQUE"): 5503 unique_keys = True 5504 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5505 unique_keys = False 5506 5507 self._match_text_seq("KEYS") 5508 5509 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5510 self._parse_type() 5511 ) 5512 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5513 5514 return self.expression( 5515 exp.JSONObjectAgg if agg else exp.JSONObject, 5516 expressions=expressions, 5517 null_handling=null_handling, 5518 unique_keys=unique_keys, 5519 return_type=return_type, 5520 encoding=encoding, 5521 ) 5522 5523 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5524 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5525 if not self._match_text_seq("NESTED"): 5526 this = self._parse_id_var() 5527 kind = self._parse_types(allow_identifiers=False) 5528 nested = None 5529 else: 5530 this = None 5531 kind = None 5532 nested = True 5533 5534 path = self._match_text_seq("PATH") and self._parse_string() 5535 nested_schema = nested and self._parse_json_schema() 5536 5537 return self.expression( 5538 exp.JSONColumnDef, 5539 this=this, 5540 kind=kind, 5541 path=path, 5542 nested_schema=nested_schema, 5543 ) 5544 5545 def _parse_json_schema(self) -> exp.JSONSchema: 5546 self._match_text_seq("COLUMNS") 5547 return self.expression( 5548 exp.JSONSchema, 5549 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5550 ) 5551 5552 def _parse_json_table(self) -> exp.JSONTable: 5553 this = self._parse_format_json(self._parse_bitwise()) 5554 path = self._match(TokenType.COMMA) and self._parse_string() 5555 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5556 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5557 schema = self._parse_json_schema() 5558 5559 return exp.JSONTable( 5560 this=this, 5561 schema=schema, 5562 path=path, 5563 error_handling=error_handling, 5564 empty_handling=empty_handling, 5565 ) 5566 5567 def _parse_match_against(self) -> exp.MatchAgainst: 5568 expressions = self._parse_csv(self._parse_column) 5569 5570 self._match_text_seq(")", "AGAINST", "(") 5571 5572 this = self._parse_string() 5573 5574 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5575 modifier = "IN NATURAL LANGUAGE MODE" 5576 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5577 modifier = f"{modifier} WITH QUERY EXPANSION" 5578 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5579 modifier = "IN BOOLEAN MODE" 5580 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5581 modifier = "WITH QUERY EXPANSION" 5582 else: 5583 modifier = None 5584 5585 return self.expression( 5586 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5587 ) 5588 5589 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5590 def _parse_open_json(self) -> exp.OpenJSON: 5591 this = self._parse_bitwise() 5592 path = self._match(TokenType.COMMA) and self._parse_string() 5593 5594 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5595 this = self._parse_field(any_token=True) 5596 kind = self._parse_types() 5597 path = self._parse_string() 5598 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5599 5600 return self.expression( 5601 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5602 ) 5603 5604 expressions = None 5605 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5606 self._match_l_paren() 5607 expressions = self._parse_csv(_parse_open_json_column_def) 5608 5609 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5610 5611 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5612 args = self._parse_csv(self._parse_bitwise) 5613 5614 if self._match(TokenType.IN): 5615 return self.expression( 5616 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5617 ) 5618 5619 if haystack_first: 5620 haystack = seq_get(args, 0) 5621 needle = seq_get(args, 1) 5622 else: 5623 needle = seq_get(args, 0) 5624 haystack = seq_get(args, 1) 5625 5626 return self.expression( 5627 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5628 ) 5629 5630 def _parse_predict(self) -> exp.Predict: 5631 self._match_text_seq("MODEL") 5632 this = self._parse_table() 5633 5634 self._match(TokenType.COMMA) 5635 self._match_text_seq("TABLE") 5636 5637 return self.expression( 5638 exp.Predict, 5639 this=this, 5640 expression=self._parse_table(), 5641 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5642 ) 5643 5644 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5645 args = self._parse_csv(self._parse_table) 5646 return exp.JoinHint(this=func_name.upper(), expressions=args) 5647 5648 def _parse_substring(self) -> exp.Substring: 5649 # Postgres supports the form: substring(string [from int] [for int]) 5650 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5651 5652 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5653 5654 if self._match(TokenType.FROM): 5655 args.append(self._parse_bitwise()) 5656 if self._match(TokenType.FOR): 5657 if len(args) == 1: 5658 args.append(exp.Literal.number(1)) 5659 args.append(self._parse_bitwise()) 5660 5661 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5662 5663 def _parse_trim(self) -> exp.Trim: 5664 # https://www.w3resource.com/sql/character-functions/trim.php 5665 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5666 5667 position = None 5668 collation = None 5669 expression = None 5670 5671 if self._match_texts(self.TRIM_TYPES): 5672 position = self._prev.text.upper() 5673 5674 this = self._parse_bitwise() 5675 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5676 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5677 expression = self._parse_bitwise() 5678 5679 if invert_order: 5680 this, expression = expression, this 5681 5682 if self._match(TokenType.COLLATE): 5683 collation = self._parse_bitwise() 5684 5685 return self.expression( 5686 exp.Trim, this=this, position=position, expression=expression, collation=collation 5687 ) 5688 5689 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5690 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5691 5692 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5693 return self._parse_window(self._parse_id_var(), alias=True) 5694 5695 def _parse_respect_or_ignore_nulls( 5696 self, this: t.Optional[exp.Expression] 5697 ) -> t.Optional[exp.Expression]: 5698 if self._match_text_seq("IGNORE", "NULLS"): 5699 return self.expression(exp.IgnoreNulls, this=this) 5700 if self._match_text_seq("RESPECT", "NULLS"): 5701 return self.expression(exp.RespectNulls, this=this) 5702 return this 5703 5704 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5705 if self._match(TokenType.HAVING): 5706 self._match_texts(("MAX", "MIN")) 5707 max = self._prev.text.upper() != "MIN" 5708 return self.expression( 5709 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5710 ) 5711 5712 return this 5713 5714 def _parse_window( 5715 self, this: t.Optional[exp.Expression], alias: bool = False 5716 ) -> t.Optional[exp.Expression]: 5717 func = this 5718 comments = func.comments if isinstance(func, exp.Expression) else None 5719 5720 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5721 self._match(TokenType.WHERE) 5722 this = self.expression( 5723 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5724 ) 5725 self._match_r_paren() 5726 5727 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5728 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5729 if self._match_text_seq("WITHIN", "GROUP"): 5730 order = self._parse_wrapped(self._parse_order) 5731 this = self.expression(exp.WithinGroup, this=this, expression=order) 5732 5733 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5734 # Some dialects choose to implement and some do not. 5735 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5736 5737 # There is some code above in _parse_lambda that handles 5738 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5739 5740 # The below changes handle 5741 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5742 5743 # Oracle allows both formats 5744 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5745 # and Snowflake chose to do the same for familiarity 5746 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5747 if isinstance(this, exp.AggFunc): 5748 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5749 5750 if ignore_respect and ignore_respect is not this: 5751 ignore_respect.replace(ignore_respect.this) 5752 this = self.expression(ignore_respect.__class__, this=this) 5753 5754 this = self._parse_respect_or_ignore_nulls(this) 5755 5756 # bigquery select from window x AS (partition by ...) 5757 if alias: 5758 over = None 5759 self._match(TokenType.ALIAS) 5760 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5761 return this 5762 else: 5763 over = self._prev.text.upper() 5764 5765 if comments and isinstance(func, exp.Expression): 5766 func.pop_comments() 5767 5768 if not self._match(TokenType.L_PAREN): 5769 return self.expression( 5770 exp.Window, 5771 comments=comments, 5772 this=this, 5773 alias=self._parse_id_var(False), 5774 over=over, 5775 ) 5776 5777 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5778 5779 first = self._match(TokenType.FIRST) 5780 if self._match_text_seq("LAST"): 5781 first = False 5782 5783 partition, order = self._parse_partition_and_order() 5784 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5785 5786 if kind: 5787 self._match(TokenType.BETWEEN) 5788 start = self._parse_window_spec() 5789 self._match(TokenType.AND) 5790 end = self._parse_window_spec() 5791 5792 spec = self.expression( 5793 exp.WindowSpec, 5794 kind=kind, 5795 start=start["value"], 5796 start_side=start["side"], 5797 end=end["value"], 5798 end_side=end["side"], 5799 ) 5800 else: 5801 spec = None 5802 5803 self._match_r_paren() 5804 5805 window = self.expression( 5806 exp.Window, 5807 comments=comments, 5808 this=this, 5809 partition_by=partition, 5810 order=order, 5811 spec=spec, 5812 alias=window_alias, 5813 over=over, 5814 first=first, 5815 ) 5816 5817 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5818 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5819 return self._parse_window(window, alias=alias) 5820 5821 return window 5822 5823 def _parse_partition_and_order( 5824 self, 5825 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5826 return self._parse_partition_by(), self._parse_order() 5827 5828 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5829 self._match(TokenType.BETWEEN) 5830 5831 return { 5832 "value": ( 5833 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5834 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5835 or self._parse_bitwise() 5836 ), 5837 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5838 } 5839 5840 def _parse_alias( 5841 self, this: t.Optional[exp.Expression], explicit: bool = False 5842 ) -> t.Optional[exp.Expression]: 5843 any_token = self._match(TokenType.ALIAS) 5844 comments = self._prev_comments or [] 5845 5846 if explicit and not any_token: 5847 return this 5848 5849 if self._match(TokenType.L_PAREN): 5850 aliases = self.expression( 5851 exp.Aliases, 5852 comments=comments, 5853 this=this, 5854 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5855 ) 5856 self._match_r_paren(aliases) 5857 return aliases 5858 5859 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5860 self.STRING_ALIASES and self._parse_string_as_identifier() 5861 ) 5862 5863 if alias: 5864 comments.extend(alias.pop_comments()) 5865 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5866 column = this.this 5867 5868 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5869 if not this.comments and column and column.comments: 5870 this.comments = column.pop_comments() 5871 5872 return this 5873 5874 def _parse_id_var( 5875 self, 5876 any_token: bool = True, 5877 tokens: t.Optional[t.Collection[TokenType]] = None, 5878 ) -> t.Optional[exp.Expression]: 5879 expression = self._parse_identifier() 5880 if not expression and ( 5881 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5882 ): 5883 quoted = self._prev.token_type == TokenType.STRING 5884 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5885 5886 return expression 5887 5888 def _parse_string(self) -> t.Optional[exp.Expression]: 5889 if self._match_set(self.STRING_PARSERS): 5890 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5891 return self._parse_placeholder() 5892 5893 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5894 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5895 5896 def _parse_number(self) -> t.Optional[exp.Expression]: 5897 if self._match_set(self.NUMERIC_PARSERS): 5898 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5899 return self._parse_placeholder() 5900 5901 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5902 if self._match(TokenType.IDENTIFIER): 5903 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5904 return self._parse_placeholder() 5905 5906 def _parse_var( 5907 self, 5908 any_token: bool = False, 5909 tokens: t.Optional[t.Collection[TokenType]] = None, 5910 upper: bool = False, 5911 ) -> t.Optional[exp.Expression]: 5912 if ( 5913 (any_token and self._advance_any()) 5914 or self._match(TokenType.VAR) 5915 or (self._match_set(tokens) if tokens else False) 5916 ): 5917 return self.expression( 5918 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5919 ) 5920 return self._parse_placeholder() 5921 5922 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5923 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5924 self._advance() 5925 return self._prev 5926 return None 5927 5928 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5929 return self._parse_var() or self._parse_string() 5930 5931 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5932 return self._parse_primary() or self._parse_var(any_token=True) 5933 5934 def _parse_null(self) -> t.Optional[exp.Expression]: 5935 if self._match_set(self.NULL_TOKENS): 5936 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5937 return self._parse_placeholder() 5938 5939 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5940 if self._match(TokenType.TRUE): 5941 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5942 if self._match(TokenType.FALSE): 5943 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5944 return self._parse_placeholder() 5945 5946 def _parse_star(self) -> t.Optional[exp.Expression]: 5947 if self._match(TokenType.STAR): 5948 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5949 return self._parse_placeholder() 5950 5951 def _parse_parameter(self) -> exp.Parameter: 5952 this = self._parse_identifier() or self._parse_primary_or_var() 5953 return self.expression(exp.Parameter, this=this) 5954 5955 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5956 if self._match_set(self.PLACEHOLDER_PARSERS): 5957 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5958 if placeholder: 5959 return placeholder 5960 self._advance(-1) 5961 return None 5962 5963 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5964 if not self._match_texts(keywords): 5965 return None 5966 if self._match(TokenType.L_PAREN, advance=False): 5967 return self._parse_wrapped_csv(self._parse_expression) 5968 5969 expression = self._parse_expression() 5970 return [expression] if expression else None 5971 5972 def _parse_csv( 5973 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5974 ) -> t.List[exp.Expression]: 5975 parse_result = parse_method() 5976 items = [parse_result] if parse_result is not None else [] 5977 5978 while self._match(sep): 5979 self._add_comments(parse_result) 5980 parse_result = parse_method() 5981 if parse_result is not None: 5982 items.append(parse_result) 5983 5984 return items 5985 5986 def _parse_tokens( 5987 self, parse_method: t.Callable, expressions: t.Dict 5988 ) -> t.Optional[exp.Expression]: 5989 this = parse_method() 5990 5991 while self._match_set(expressions): 5992 this = self.expression( 5993 expressions[self._prev.token_type], 5994 this=this, 5995 comments=self._prev_comments, 5996 expression=parse_method(), 5997 ) 5998 5999 return this 6000 6001 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6002 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6003 6004 def _parse_wrapped_csv( 6005 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6006 ) -> t.List[exp.Expression]: 6007 return self._parse_wrapped( 6008 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6009 ) 6010 6011 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6012 wrapped = self._match(TokenType.L_PAREN) 6013 if not wrapped and not optional: 6014 self.raise_error("Expecting (") 6015 parse_result = parse_method() 6016 if wrapped: 6017 self._match_r_paren() 6018 return parse_result 6019 6020 def _parse_expressions(self) -> t.List[exp.Expression]: 6021 return self._parse_csv(self._parse_expression) 6022 6023 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6024 return self._parse_select() or self._parse_set_operations( 6025 self._parse_expression() if alias else self._parse_assignment() 6026 ) 6027 6028 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6029 return self._parse_query_modifiers( 6030 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6031 ) 6032 6033 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6034 this = None 6035 if self._match_texts(self.TRANSACTION_KIND): 6036 this = self._prev.text 6037 6038 self._match_texts(("TRANSACTION", "WORK")) 6039 6040 modes = [] 6041 while True: 6042 mode = [] 6043 while self._match(TokenType.VAR): 6044 mode.append(self._prev.text) 6045 6046 if mode: 6047 modes.append(" ".join(mode)) 6048 if not self._match(TokenType.COMMA): 6049 break 6050 6051 return self.expression(exp.Transaction, this=this, modes=modes) 6052 6053 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6054 chain = None 6055 savepoint = None 6056 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6057 6058 self._match_texts(("TRANSACTION", "WORK")) 6059 6060 if self._match_text_seq("TO"): 6061 self._match_text_seq("SAVEPOINT") 6062 savepoint = self._parse_id_var() 6063 6064 if self._match(TokenType.AND): 6065 chain = not self._match_text_seq("NO") 6066 self._match_text_seq("CHAIN") 6067 6068 if is_rollback: 6069 return self.expression(exp.Rollback, savepoint=savepoint) 6070 6071 return self.expression(exp.Commit, chain=chain) 6072 6073 def _parse_refresh(self) -> exp.Refresh: 6074 self._match(TokenType.TABLE) 6075 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6076 6077 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6078 if not self._match_text_seq("ADD"): 6079 return None 6080 6081 self._match(TokenType.COLUMN) 6082 exists_column = self._parse_exists(not_=True) 6083 expression = self._parse_field_def() 6084 6085 if expression: 6086 expression.set("exists", exists_column) 6087 6088 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6089 if self._match_texts(("FIRST", "AFTER")): 6090 position = self._prev.text 6091 column_position = self.expression( 6092 exp.ColumnPosition, this=self._parse_column(), position=position 6093 ) 6094 expression.set("position", column_position) 6095 6096 return expression 6097 6098 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6099 drop = self._match(TokenType.DROP) and self._parse_drop() 6100 if drop and not isinstance(drop, exp.Command): 6101 drop.set("kind", drop.args.get("kind", "COLUMN")) 6102 return drop 6103 6104 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6105 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6106 return self.expression( 6107 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6108 ) 6109 6110 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6111 index = self._index - 1 6112 6113 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6114 return self._parse_csv( 6115 lambda: self.expression( 6116 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6117 ) 6118 ) 6119 6120 self._retreat(index) 6121 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6122 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6123 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6124 6125 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6126 if self._match_texts(self.ALTER_ALTER_PARSERS): 6127 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6128 6129 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6130 # keyword after ALTER we default to parsing this statement 6131 self._match(TokenType.COLUMN) 6132 column = self._parse_field(any_token=True) 6133 6134 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6135 return self.expression(exp.AlterColumn, this=column, drop=True) 6136 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6137 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6138 if self._match(TokenType.COMMENT): 6139 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6140 if self._match_text_seq("DROP", "NOT", "NULL"): 6141 return self.expression( 6142 exp.AlterColumn, 6143 this=column, 6144 drop=True, 6145 allow_null=True, 6146 ) 6147 if self._match_text_seq("SET", "NOT", "NULL"): 6148 return self.expression( 6149 exp.AlterColumn, 6150 this=column, 6151 allow_null=False, 6152 ) 6153 self._match_text_seq("SET", "DATA") 6154 self._match_text_seq("TYPE") 6155 return self.expression( 6156 exp.AlterColumn, 6157 this=column, 6158 dtype=self._parse_types(), 6159 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6160 using=self._match(TokenType.USING) and self._parse_assignment(), 6161 ) 6162 6163 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6164 if self._match_texts(("ALL", "EVEN", "AUTO")): 6165 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6166 6167 self._match_text_seq("KEY", "DISTKEY") 6168 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6169 6170 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6171 if compound: 6172 self._match_text_seq("SORTKEY") 6173 6174 if self._match(TokenType.L_PAREN, advance=False): 6175 return self.expression( 6176 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6177 ) 6178 6179 self._match_texts(("AUTO", "NONE")) 6180 return self.expression( 6181 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6182 ) 6183 6184 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6185 index = self._index - 1 6186 6187 partition_exists = self._parse_exists() 6188 if self._match(TokenType.PARTITION, advance=False): 6189 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6190 6191 self._retreat(index) 6192 return self._parse_csv(self._parse_drop_column) 6193 6194 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6195 if self._match(TokenType.COLUMN): 6196 exists = self._parse_exists() 6197 old_column = self._parse_column() 6198 to = self._match_text_seq("TO") 6199 new_column = self._parse_column() 6200 6201 if old_column is None or to is None or new_column is None: 6202 return None 6203 6204 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6205 6206 self._match_text_seq("TO") 6207 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6208 6209 def _parse_alter_table_set(self) -> exp.AlterSet: 6210 alter_set = self.expression(exp.AlterSet) 6211 6212 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6213 "TABLE", "PROPERTIES" 6214 ): 6215 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6216 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6217 alter_set.set("expressions", [self._parse_assignment()]) 6218 elif self._match_texts(("LOGGED", "UNLOGGED")): 6219 alter_set.set("option", exp.var(self._prev.text.upper())) 6220 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6221 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6222 elif self._match_text_seq("LOCATION"): 6223 alter_set.set("location", self._parse_field()) 6224 elif self._match_text_seq("ACCESS", "METHOD"): 6225 alter_set.set("access_method", self._parse_field()) 6226 elif self._match_text_seq("TABLESPACE"): 6227 alter_set.set("tablespace", self._parse_field()) 6228 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6229 alter_set.set("file_format", [self._parse_field()]) 6230 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6231 alter_set.set("file_format", self._parse_wrapped_options()) 6232 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6233 alter_set.set("copy_options", self._parse_wrapped_options()) 6234 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6235 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6236 else: 6237 if self._match_text_seq("SERDE"): 6238 alter_set.set("serde", self._parse_field()) 6239 6240 alter_set.set("expressions", [self._parse_properties()]) 6241 6242 return alter_set 6243 6244 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6245 start = self._prev 6246 6247 if not self._match(TokenType.TABLE): 6248 return self._parse_as_command(start) 6249 6250 exists = self._parse_exists() 6251 only = self._match_text_seq("ONLY") 6252 this = self._parse_table(schema=True) 6253 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6254 6255 if self._next: 6256 self._advance() 6257 6258 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6259 if parser: 6260 actions = ensure_list(parser(self)) 6261 options = self._parse_csv(self._parse_property) 6262 6263 if not self._curr and actions: 6264 return self.expression( 6265 exp.AlterTable, 6266 this=this, 6267 exists=exists, 6268 actions=actions, 6269 only=only, 6270 options=options, 6271 cluster=cluster, 6272 ) 6273 6274 return self._parse_as_command(start) 6275 6276 def _parse_merge(self) -> exp.Merge: 6277 self._match(TokenType.INTO) 6278 target = self._parse_table() 6279 6280 if target and self._match(TokenType.ALIAS, advance=False): 6281 target.set("alias", self._parse_table_alias()) 6282 6283 self._match(TokenType.USING) 6284 using = self._parse_table() 6285 6286 self._match(TokenType.ON) 6287 on = self._parse_assignment() 6288 6289 return self.expression( 6290 exp.Merge, 6291 this=target, 6292 using=using, 6293 on=on, 6294 expressions=self._parse_when_matched(), 6295 ) 6296 6297 def _parse_when_matched(self) -> t.List[exp.When]: 6298 whens = [] 6299 6300 while self._match(TokenType.WHEN): 6301 matched = not self._match(TokenType.NOT) 6302 self._match_text_seq("MATCHED") 6303 source = ( 6304 False 6305 if self._match_text_seq("BY", "TARGET") 6306 else self._match_text_seq("BY", "SOURCE") 6307 ) 6308 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6309 6310 self._match(TokenType.THEN) 6311 6312 if self._match(TokenType.INSERT): 6313 _this = self._parse_star() 6314 if _this: 6315 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6316 else: 6317 then = self.expression( 6318 exp.Insert, 6319 this=self._parse_value(), 6320 expression=self._match_text_seq("VALUES") and self._parse_value(), 6321 ) 6322 elif self._match(TokenType.UPDATE): 6323 expressions = self._parse_star() 6324 if expressions: 6325 then = self.expression(exp.Update, expressions=expressions) 6326 else: 6327 then = self.expression( 6328 exp.Update, 6329 expressions=self._match(TokenType.SET) 6330 and self._parse_csv(self._parse_equality), 6331 ) 6332 elif self._match(TokenType.DELETE): 6333 then = self.expression(exp.Var, this=self._prev.text) 6334 else: 6335 then = None 6336 6337 whens.append( 6338 self.expression( 6339 exp.When, 6340 matched=matched, 6341 source=source, 6342 condition=condition, 6343 then=then, 6344 ) 6345 ) 6346 return whens 6347 6348 def _parse_show(self) -> t.Optional[exp.Expression]: 6349 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6350 if parser: 6351 return parser(self) 6352 return self._parse_as_command(self._prev) 6353 6354 def _parse_set_item_assignment( 6355 self, kind: t.Optional[str] = None 6356 ) -> t.Optional[exp.Expression]: 6357 index = self._index 6358 6359 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6360 return self._parse_set_transaction(global_=kind == "GLOBAL") 6361 6362 left = self._parse_primary() or self._parse_column() 6363 assignment_delimiter = self._match_texts(("=", "TO")) 6364 6365 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6366 self._retreat(index) 6367 return None 6368 6369 right = self._parse_statement() or self._parse_id_var() 6370 if isinstance(right, (exp.Column, exp.Identifier)): 6371 right = exp.var(right.name) 6372 6373 this = self.expression(exp.EQ, this=left, expression=right) 6374 return self.expression(exp.SetItem, this=this, kind=kind) 6375 6376 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6377 self._match_text_seq("TRANSACTION") 6378 characteristics = self._parse_csv( 6379 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6380 ) 6381 return self.expression( 6382 exp.SetItem, 6383 expressions=characteristics, 6384 kind="TRANSACTION", 6385 **{"global": global_}, # type: ignore 6386 ) 6387 6388 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6389 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6390 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6391 6392 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6393 index = self._index 6394 set_ = self.expression( 6395 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6396 ) 6397 6398 if self._curr: 6399 self._retreat(index) 6400 return self._parse_as_command(self._prev) 6401 6402 return set_ 6403 6404 def _parse_var_from_options( 6405 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6406 ) -> t.Optional[exp.Var]: 6407 start = self._curr 6408 if not start: 6409 return None 6410 6411 option = start.text.upper() 6412 continuations = options.get(option) 6413 6414 index = self._index 6415 self._advance() 6416 for keywords in continuations or []: 6417 if isinstance(keywords, str): 6418 keywords = (keywords,) 6419 6420 if self._match_text_seq(*keywords): 6421 option = f"{option} {' '.join(keywords)}" 6422 break 6423 else: 6424 if continuations or continuations is None: 6425 if raise_unmatched: 6426 self.raise_error(f"Unknown option {option}") 6427 6428 self._retreat(index) 6429 return None 6430 6431 return exp.var(option) 6432 6433 def _parse_as_command(self, start: Token) -> exp.Command: 6434 while self._curr: 6435 self._advance() 6436 text = self._find_sql(start, self._prev) 6437 size = len(start.text) 6438 self._warn_unsupported() 6439 return exp.Command(this=text[:size], expression=text[size:]) 6440 6441 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6442 settings = [] 6443 6444 self._match_l_paren() 6445 kind = self._parse_id_var() 6446 6447 if self._match(TokenType.L_PAREN): 6448 while True: 6449 key = self._parse_id_var() 6450 value = self._parse_primary() 6451 6452 if not key and value is None: 6453 break 6454 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6455 self._match(TokenType.R_PAREN) 6456 6457 self._match_r_paren() 6458 6459 return self.expression( 6460 exp.DictProperty, 6461 this=this, 6462 kind=kind.this if kind else None, 6463 settings=settings, 6464 ) 6465 6466 def _parse_dict_range(self, this: str) -> exp.DictRange: 6467 self._match_l_paren() 6468 has_min = self._match_text_seq("MIN") 6469 if has_min: 6470 min = self._parse_var() or self._parse_primary() 6471 self._match_text_seq("MAX") 6472 max = self._parse_var() or self._parse_primary() 6473 else: 6474 max = self._parse_var() or self._parse_primary() 6475 min = exp.Literal.number(0) 6476 self._match_r_paren() 6477 return self.expression(exp.DictRange, this=this, min=min, max=max) 6478 6479 def _parse_comprehension( 6480 self, this: t.Optional[exp.Expression] 6481 ) -> t.Optional[exp.Comprehension]: 6482 index = self._index 6483 expression = self._parse_column() 6484 if not self._match(TokenType.IN): 6485 self._retreat(index - 1) 6486 return None 6487 iterator = self._parse_column() 6488 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6489 return self.expression( 6490 exp.Comprehension, 6491 this=this, 6492 expression=expression, 6493 iterator=iterator, 6494 condition=condition, 6495 ) 6496 6497 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6498 if self._match(TokenType.HEREDOC_STRING): 6499 return self.expression(exp.Heredoc, this=self._prev.text) 6500 6501 if not self._match_text_seq("$"): 6502 return None 6503 6504 tags = ["$"] 6505 tag_text = None 6506 6507 if self._is_connected(): 6508 self._advance() 6509 tags.append(self._prev.text.upper()) 6510 else: 6511 self.raise_error("No closing $ found") 6512 6513 if tags[-1] != "$": 6514 if self._is_connected() and self._match_text_seq("$"): 6515 tag_text = tags[-1] 6516 tags.append("$") 6517 else: 6518 self.raise_error("No closing $ found") 6519 6520 heredoc_start = self._curr 6521 6522 while self._curr: 6523 if self._match_text_seq(*tags, advance=False): 6524 this = self._find_sql(heredoc_start, self._prev) 6525 self._advance(len(tags)) 6526 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6527 6528 self._advance() 6529 6530 self.raise_error(f"No closing {''.join(tags)} found") 6531 return None 6532 6533 def _find_parser( 6534 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6535 ) -> t.Optional[t.Callable]: 6536 if not self._curr: 6537 return None 6538 6539 index = self._index 6540 this = [] 6541 while True: 6542 # The current token might be multiple words 6543 curr = self._curr.text.upper() 6544 key = curr.split(" ") 6545 this.append(curr) 6546 6547 self._advance() 6548 result, trie = in_trie(trie, key) 6549 if result == TrieResult.FAILED: 6550 break 6551 6552 if result == TrieResult.EXISTS: 6553 subparser = parsers[" ".join(this)] 6554 return subparser 6555 6556 self._retreat(index) 6557 return None 6558 6559 def _match(self, token_type, advance=True, expression=None): 6560 if not self._curr: 6561 return None 6562 6563 if self._curr.token_type == token_type: 6564 if advance: 6565 self._advance() 6566 self._add_comments(expression) 6567 return True 6568 6569 return None 6570 6571 def _match_set(self, types, advance=True): 6572 if not self._curr: 6573 return None 6574 6575 if self._curr.token_type in types: 6576 if advance: 6577 self._advance() 6578 return True 6579 6580 return None 6581 6582 def _match_pair(self, token_type_a, token_type_b, advance=True): 6583 if not self._curr or not self._next: 6584 return None 6585 6586 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6587 if advance: 6588 self._advance(2) 6589 return True 6590 6591 return None 6592 6593 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6594 if not self._match(TokenType.L_PAREN, expression=expression): 6595 self.raise_error("Expecting (") 6596 6597 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6598 if not self._match(TokenType.R_PAREN, expression=expression): 6599 self.raise_error("Expecting )") 6600 6601 def _match_texts(self, texts, advance=True): 6602 if self._curr and self._curr.text.upper() in texts: 6603 if advance: 6604 self._advance() 6605 return True 6606 return None 6607 6608 def _match_text_seq(self, *texts, advance=True): 6609 index = self._index 6610 for text in texts: 6611 if self._curr and self._curr.text.upper() == text: 6612 self._advance() 6613 else: 6614 self._retreat(index) 6615 return None 6616 6617 if not advance: 6618 self._retreat(index) 6619 6620 return True 6621 6622 def _replace_lambda( 6623 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6624 ) -> t.Optional[exp.Expression]: 6625 if not node: 6626 return node 6627 6628 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6629 6630 for column in node.find_all(exp.Column): 6631 typ = lambda_types.get(column.parts[0].name) 6632 if typ is not None: 6633 dot_or_id = column.to_dot() if column.table else column.this 6634 6635 if typ: 6636 dot_or_id = self.expression( 6637 exp.Cast, 6638 this=dot_or_id, 6639 to=typ, 6640 ) 6641 6642 parent = column.parent 6643 6644 while isinstance(parent, exp.Dot): 6645 if not isinstance(parent.parent, exp.Dot): 6646 parent.replace(dot_or_id) 6647 break 6648 parent = parent.parent 6649 else: 6650 if column is node: 6651 node = dot_or_id 6652 else: 6653 column.replace(dot_or_id) 6654 return node 6655 6656 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6657 start = self._prev 6658 6659 # Not to be confused with TRUNCATE(number, decimals) function call 6660 if self._match(TokenType.L_PAREN): 6661 self._retreat(self._index - 2) 6662 return self._parse_function() 6663 6664 # Clickhouse supports TRUNCATE DATABASE as well 6665 is_database = self._match(TokenType.DATABASE) 6666 6667 self._match(TokenType.TABLE) 6668 6669 exists = self._parse_exists(not_=False) 6670 6671 expressions = self._parse_csv( 6672 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6673 ) 6674 6675 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6676 6677 if self._match_text_seq("RESTART", "IDENTITY"): 6678 identity = "RESTART" 6679 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6680 identity = "CONTINUE" 6681 else: 6682 identity = None 6683 6684 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6685 option = self._prev.text 6686 else: 6687 option = None 6688 6689 partition = self._parse_partition() 6690 6691 # Fallback case 6692 if self._curr: 6693 return self._parse_as_command(start) 6694 6695 return self.expression( 6696 exp.TruncateTable, 6697 expressions=expressions, 6698 is_database=is_database, 6699 exists=exists, 6700 cluster=cluster, 6701 identity=identity, 6702 option=option, 6703 partition=partition, 6704 ) 6705 6706 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6707 this = self._parse_ordered(self._parse_opclass) 6708 6709 if not self._match(TokenType.WITH): 6710 return this 6711 6712 op = self._parse_var(any_token=True) 6713 6714 return self.expression(exp.WithOperator, this=this, op=op) 6715 6716 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6717 self._match(TokenType.EQ) 6718 self._match(TokenType.L_PAREN) 6719 6720 opts: t.List[t.Optional[exp.Expression]] = [] 6721 while self._curr and not self._match(TokenType.R_PAREN): 6722 if self._match_text_seq("FORMAT_NAME", "="): 6723 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6724 # so we parse it separately to use _parse_field() 6725 prop = self.expression( 6726 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6727 ) 6728 opts.append(prop) 6729 else: 6730 opts.append(self._parse_property()) 6731 6732 self._match(TokenType.COMMA) 6733 6734 return opts 6735 6736 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6737 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6738 6739 options = [] 6740 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6741 option = self._parse_var(any_token=True) 6742 prev = self._prev.text.upper() 6743 6744 # Different dialects might separate options and values by white space, "=" and "AS" 6745 self._match(TokenType.EQ) 6746 self._match(TokenType.ALIAS) 6747 6748 param = self.expression(exp.CopyParameter, this=option) 6749 6750 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6751 TokenType.L_PAREN, advance=False 6752 ): 6753 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6754 param.set("expressions", self._parse_wrapped_options()) 6755 elif prev == "FILE_FORMAT": 6756 # T-SQL's external file format case 6757 param.set("expression", self._parse_field()) 6758 else: 6759 param.set("expression", self._parse_unquoted_field()) 6760 6761 options.append(param) 6762 self._match(sep) 6763 6764 return options 6765 6766 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6767 expr = self.expression(exp.Credentials) 6768 6769 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6770 expr.set("storage", self._parse_field()) 6771 if self._match_text_seq("CREDENTIALS"): 6772 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6773 creds = ( 6774 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6775 ) 6776 expr.set("credentials", creds) 6777 if self._match_text_seq("ENCRYPTION"): 6778 expr.set("encryption", self._parse_wrapped_options()) 6779 if self._match_text_seq("IAM_ROLE"): 6780 expr.set("iam_role", self._parse_field()) 6781 if self._match_text_seq("REGION"): 6782 expr.set("region", self._parse_field()) 6783 6784 return expr 6785 6786 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6787 return self._parse_field() 6788 6789 def _parse_copy(self) -> exp.Copy | exp.Command: 6790 start = self._prev 6791 6792 self._match(TokenType.INTO) 6793 6794 this = ( 6795 self._parse_select(nested=True, parse_subquery_alias=False) 6796 if self._match(TokenType.L_PAREN, advance=False) 6797 else self._parse_table(schema=True) 6798 ) 6799 6800 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6801 6802 files = self._parse_csv(self._parse_file_location) 6803 credentials = self._parse_credentials() 6804 6805 self._match_text_seq("WITH") 6806 6807 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6808 6809 # Fallback case 6810 if self._curr: 6811 return self._parse_as_command(start) 6812 6813 return self.expression( 6814 exp.Copy, 6815 this=this, 6816 kind=kind, 6817 credentials=credentials, 6818 files=files, 6819 params=params, 6820 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "MOD": build_mod, 162 "TIME_TO_TIME_STR": lambda args: exp.Cast( 163 this=seq_get(args, 0), 164 to=exp.DataType(this=exp.DataType.Type.TEXT), 165 ), 166 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 167 this=exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 start=exp.Literal.number(1), 172 length=exp.Literal.number(10), 173 ), 174 "VAR_MAP": build_var_map, 175 "LOWER": build_lower, 176 "UPPER": build_upper, 177 "HEX": build_hex, 178 "TO_HEX": build_hex, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LIST, 198 TokenType.LOWCARDINALITY, 199 TokenType.MAP, 200 TokenType.NULLABLE, 201 *STRUCT_TYPE_TOKENS, 202 } 203 204 ENUM_TYPE_TOKENS = { 205 TokenType.ENUM, 206 TokenType.ENUM8, 207 TokenType.ENUM16, 208 } 209 210 AGGREGATE_TYPE_TOKENS = { 211 TokenType.AGGREGATEFUNCTION, 212 TokenType.SIMPLEAGGREGATEFUNCTION, 213 } 214 215 TYPE_TOKENS = { 216 TokenType.BIT, 217 TokenType.BOOLEAN, 218 TokenType.TINYINT, 219 TokenType.UTINYINT, 220 TokenType.SMALLINT, 221 TokenType.USMALLINT, 222 TokenType.INT, 223 TokenType.UINT, 224 TokenType.BIGINT, 225 TokenType.UBIGINT, 226 TokenType.INT128, 227 TokenType.UINT128, 228 TokenType.INT256, 229 TokenType.UINT256, 230 TokenType.MEDIUMINT, 231 TokenType.UMEDIUMINT, 232 TokenType.FIXEDSTRING, 233 TokenType.FLOAT, 234 TokenType.DOUBLE, 235 TokenType.CHAR, 236 TokenType.NCHAR, 237 TokenType.VARCHAR, 238 TokenType.NVARCHAR, 239 TokenType.BPCHAR, 240 TokenType.TEXT, 241 TokenType.MEDIUMTEXT, 242 TokenType.LONGTEXT, 243 TokenType.MEDIUMBLOB, 244 TokenType.LONGBLOB, 245 TokenType.BINARY, 246 TokenType.VARBINARY, 247 TokenType.JSON, 248 TokenType.JSONB, 249 TokenType.INTERVAL, 250 TokenType.TINYBLOB, 251 TokenType.TINYTEXT, 252 TokenType.TIME, 253 TokenType.TIMETZ, 254 TokenType.TIMESTAMP, 255 TokenType.TIMESTAMP_S, 256 TokenType.TIMESTAMP_MS, 257 TokenType.TIMESTAMP_NS, 258 TokenType.TIMESTAMPTZ, 259 TokenType.TIMESTAMPLTZ, 260 TokenType.TIMESTAMPNTZ, 261 TokenType.DATETIME, 262 TokenType.DATETIME64, 263 TokenType.DATE, 264 TokenType.DATE32, 265 TokenType.INT4RANGE, 266 TokenType.INT4MULTIRANGE, 267 TokenType.INT8RANGE, 268 TokenType.INT8MULTIRANGE, 269 TokenType.NUMRANGE, 270 TokenType.NUMMULTIRANGE, 271 TokenType.TSRANGE, 272 TokenType.TSMULTIRANGE, 273 TokenType.TSTZRANGE, 274 TokenType.TSTZMULTIRANGE, 275 TokenType.DATERANGE, 276 TokenType.DATEMULTIRANGE, 277 TokenType.DECIMAL, 278 TokenType.UDECIMAL, 279 TokenType.BIGDECIMAL, 280 TokenType.UUID, 281 TokenType.GEOGRAPHY, 282 TokenType.GEOMETRY, 283 TokenType.HLLSKETCH, 284 TokenType.HSTORE, 285 TokenType.PSEUDO_TYPE, 286 TokenType.SUPER, 287 TokenType.SERIAL, 288 TokenType.SMALLSERIAL, 289 TokenType.BIGSERIAL, 290 TokenType.XML, 291 TokenType.YEAR, 292 TokenType.UNIQUEIDENTIFIER, 293 TokenType.USERDEFINED, 294 TokenType.MONEY, 295 TokenType.SMALLMONEY, 296 TokenType.ROWVERSION, 297 TokenType.IMAGE, 298 TokenType.VARIANT, 299 TokenType.OBJECT, 300 TokenType.OBJECT_IDENTIFIER, 301 TokenType.INET, 302 TokenType.IPADDRESS, 303 TokenType.IPPREFIX, 304 TokenType.IPV4, 305 TokenType.IPV6, 306 TokenType.UNKNOWN, 307 TokenType.NULL, 308 TokenType.NAME, 309 TokenType.TDIGEST, 310 *ENUM_TYPE_TOKENS, 311 *NESTED_TYPE_TOKENS, 312 *AGGREGATE_TYPE_TOKENS, 313 } 314 315 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 316 TokenType.BIGINT: TokenType.UBIGINT, 317 TokenType.INT: TokenType.UINT, 318 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 319 TokenType.SMALLINT: TokenType.USMALLINT, 320 TokenType.TINYINT: TokenType.UTINYINT, 321 TokenType.DECIMAL: TokenType.UDECIMAL, 322 } 323 324 SUBQUERY_PREDICATES = { 325 TokenType.ANY: exp.Any, 326 TokenType.ALL: exp.All, 327 TokenType.EXISTS: exp.Exists, 328 TokenType.SOME: exp.Any, 329 } 330 331 RESERVED_TOKENS = { 332 *Tokenizer.SINGLE_TOKENS.values(), 333 TokenType.SELECT, 334 } - {TokenType.IDENTIFIER} 335 336 DB_CREATABLES = { 337 TokenType.DATABASE, 338 TokenType.DICTIONARY, 339 TokenType.MODEL, 340 TokenType.SCHEMA, 341 TokenType.SEQUENCE, 342 TokenType.STORAGE_INTEGRATION, 343 TokenType.TABLE, 344 TokenType.TAG, 345 TokenType.VIEW, 346 TokenType.WAREHOUSE, 347 TokenType.STREAMLIT, 348 } 349 350 CREATABLES = { 351 TokenType.COLUMN, 352 TokenType.CONSTRAINT, 353 TokenType.FOREIGN_KEY, 354 TokenType.FUNCTION, 355 TokenType.INDEX, 356 TokenType.PROCEDURE, 357 *DB_CREATABLES, 358 } 359 360 # Tokens that can represent identifiers 361 ID_VAR_TOKENS = { 362 TokenType.VAR, 363 TokenType.ANTI, 364 TokenType.APPLY, 365 TokenType.ASC, 366 TokenType.ASOF, 367 TokenType.AUTO_INCREMENT, 368 TokenType.BEGIN, 369 TokenType.BPCHAR, 370 TokenType.CACHE, 371 TokenType.CASE, 372 TokenType.COLLATE, 373 TokenType.COMMAND, 374 TokenType.COMMENT, 375 TokenType.COMMIT, 376 TokenType.CONSTRAINT, 377 TokenType.COPY, 378 TokenType.DEFAULT, 379 TokenType.DELETE, 380 TokenType.DESC, 381 TokenType.DESCRIBE, 382 TokenType.DICTIONARY, 383 TokenType.DIV, 384 TokenType.END, 385 TokenType.EXECUTE, 386 TokenType.ESCAPE, 387 TokenType.FALSE, 388 TokenType.FIRST, 389 TokenType.FILTER, 390 TokenType.FINAL, 391 TokenType.FORMAT, 392 TokenType.FULL, 393 TokenType.IDENTIFIER, 394 TokenType.IS, 395 TokenType.ISNULL, 396 TokenType.INTERVAL, 397 TokenType.KEEP, 398 TokenType.KILL, 399 TokenType.LEFT, 400 TokenType.LOAD, 401 TokenType.MERGE, 402 TokenType.NATURAL, 403 TokenType.NEXT, 404 TokenType.OFFSET, 405 TokenType.OPERATOR, 406 TokenType.ORDINALITY, 407 TokenType.OVERLAPS, 408 TokenType.OVERWRITE, 409 TokenType.PARTITION, 410 TokenType.PERCENT, 411 TokenType.PIVOT, 412 TokenType.PRAGMA, 413 TokenType.RANGE, 414 TokenType.RECURSIVE, 415 TokenType.REFERENCES, 416 TokenType.REFRESH, 417 TokenType.REPLACE, 418 TokenType.RIGHT, 419 TokenType.ROLLUP, 420 TokenType.ROW, 421 TokenType.ROWS, 422 TokenType.SEMI, 423 TokenType.SET, 424 TokenType.SETTINGS, 425 TokenType.SHOW, 426 TokenType.TEMPORARY, 427 TokenType.TOP, 428 TokenType.TRUE, 429 TokenType.TRUNCATE, 430 TokenType.UNIQUE, 431 TokenType.UNNEST, 432 TokenType.UNPIVOT, 433 TokenType.UPDATE, 434 TokenType.USE, 435 TokenType.VOLATILE, 436 TokenType.WINDOW, 437 *CREATABLES, 438 *SUBQUERY_PREDICATES, 439 *TYPE_TOKENS, 440 *NO_PAREN_FUNCTIONS, 441 } 442 443 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 444 445 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 446 TokenType.ANTI, 447 TokenType.APPLY, 448 TokenType.ASOF, 449 TokenType.FULL, 450 TokenType.LEFT, 451 TokenType.LOCK, 452 TokenType.NATURAL, 453 TokenType.OFFSET, 454 TokenType.RIGHT, 455 TokenType.SEMI, 456 TokenType.WINDOW, 457 } 458 459 ALIAS_TOKENS = ID_VAR_TOKENS 460 461 ARRAY_CONSTRUCTORS = { 462 "ARRAY": exp.Array, 463 "LIST": exp.List, 464 } 465 466 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 467 468 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 469 470 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 471 472 FUNC_TOKENS = { 473 TokenType.COLLATE, 474 TokenType.COMMAND, 475 TokenType.CURRENT_DATE, 476 TokenType.CURRENT_DATETIME, 477 TokenType.CURRENT_TIMESTAMP, 478 TokenType.CURRENT_TIME, 479 TokenType.CURRENT_USER, 480 TokenType.FILTER, 481 TokenType.FIRST, 482 TokenType.FORMAT, 483 TokenType.GLOB, 484 TokenType.IDENTIFIER, 485 TokenType.INDEX, 486 TokenType.ISNULL, 487 TokenType.ILIKE, 488 TokenType.INSERT, 489 TokenType.LIKE, 490 TokenType.MERGE, 491 TokenType.OFFSET, 492 TokenType.PRIMARY_KEY, 493 TokenType.RANGE, 494 TokenType.REPLACE, 495 TokenType.RLIKE, 496 TokenType.ROW, 497 TokenType.UNNEST, 498 TokenType.VAR, 499 TokenType.LEFT, 500 TokenType.RIGHT, 501 TokenType.SEQUENCE, 502 TokenType.DATE, 503 TokenType.DATETIME, 504 TokenType.TABLE, 505 TokenType.TIMESTAMP, 506 TokenType.TIMESTAMPTZ, 507 TokenType.TRUNCATE, 508 TokenType.WINDOW, 509 TokenType.XOR, 510 *TYPE_TOKENS, 511 *SUBQUERY_PREDICATES, 512 } 513 514 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 515 TokenType.AND: exp.And, 516 } 517 518 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 519 TokenType.COLON_EQ: exp.PropertyEQ, 520 } 521 522 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 523 TokenType.OR: exp.Or, 524 } 525 526 EQUALITY = { 527 TokenType.EQ: exp.EQ, 528 TokenType.NEQ: exp.NEQ, 529 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 530 } 531 532 COMPARISON = { 533 TokenType.GT: exp.GT, 534 TokenType.GTE: exp.GTE, 535 TokenType.LT: exp.LT, 536 TokenType.LTE: exp.LTE, 537 } 538 539 BITWISE = { 540 TokenType.AMP: exp.BitwiseAnd, 541 TokenType.CARET: exp.BitwiseXor, 542 TokenType.PIPE: exp.BitwiseOr, 543 } 544 545 TERM = { 546 TokenType.DASH: exp.Sub, 547 TokenType.PLUS: exp.Add, 548 TokenType.MOD: exp.Mod, 549 TokenType.COLLATE: exp.Collate, 550 } 551 552 FACTOR = { 553 TokenType.DIV: exp.IntDiv, 554 TokenType.LR_ARROW: exp.Distance, 555 TokenType.SLASH: exp.Div, 556 TokenType.STAR: exp.Mul, 557 } 558 559 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 560 561 TIMES = { 562 TokenType.TIME, 563 TokenType.TIMETZ, 564 } 565 566 TIMESTAMPS = { 567 TokenType.TIMESTAMP, 568 TokenType.TIMESTAMPTZ, 569 TokenType.TIMESTAMPLTZ, 570 *TIMES, 571 } 572 573 SET_OPERATIONS = { 574 TokenType.UNION, 575 TokenType.INTERSECT, 576 TokenType.EXCEPT, 577 } 578 579 JOIN_METHODS = { 580 TokenType.ASOF, 581 TokenType.NATURAL, 582 TokenType.POSITIONAL, 583 } 584 585 JOIN_SIDES = { 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.FULL, 589 } 590 591 JOIN_KINDS = { 592 TokenType.INNER, 593 TokenType.OUTER, 594 TokenType.CROSS, 595 TokenType.SEMI, 596 TokenType.ANTI, 597 } 598 599 JOIN_HINTS: t.Set[str] = set() 600 601 LAMBDAS = { 602 TokenType.ARROW: lambda self, expressions: self.expression( 603 exp.Lambda, 604 this=self._replace_lambda( 605 self._parse_assignment(), 606 expressions, 607 ), 608 expressions=expressions, 609 ), 610 TokenType.FARROW: lambda self, expressions: self.expression( 611 exp.Kwarg, 612 this=exp.var(expressions[0].name), 613 expression=self._parse_assignment(), 614 ), 615 } 616 617 COLUMN_OPERATORS = { 618 TokenType.DOT: None, 619 TokenType.DCOLON: lambda self, this, to: self.expression( 620 exp.Cast if self.STRICT_CAST else exp.TryCast, 621 this=this, 622 to=to, 623 ), 624 TokenType.ARROW: lambda self, this, path: self.expression( 625 exp.JSONExtract, 626 this=this, 627 expression=self.dialect.to_json_path(path), 628 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 629 ), 630 TokenType.DARROW: lambda self, this, path: self.expression( 631 exp.JSONExtractScalar, 632 this=this, 633 expression=self.dialect.to_json_path(path), 634 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 635 ), 636 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 637 exp.JSONBExtract, 638 this=this, 639 expression=path, 640 ), 641 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 642 exp.JSONBExtractScalar, 643 this=this, 644 expression=path, 645 ), 646 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 647 exp.JSONBContains, 648 this=this, 649 expression=key, 650 ), 651 } 652 653 EXPRESSION_PARSERS = { 654 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 655 exp.Column: lambda self: self._parse_column(), 656 exp.Condition: lambda self: self._parse_assignment(), 657 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 658 exp.Expression: lambda self: self._parse_expression(), 659 exp.From: lambda self: self._parse_from(joins=True), 660 exp.Group: lambda self: self._parse_group(), 661 exp.Having: lambda self: self._parse_having(), 662 exp.Identifier: lambda self: self._parse_id_var(), 663 exp.Join: lambda self: self._parse_join(), 664 exp.Lambda: lambda self: self._parse_lambda(), 665 exp.Lateral: lambda self: self._parse_lateral(), 666 exp.Limit: lambda self: self._parse_limit(), 667 exp.Offset: lambda self: self._parse_offset(), 668 exp.Order: lambda self: self._parse_order(), 669 exp.Ordered: lambda self: self._parse_ordered(), 670 exp.Properties: lambda self: self._parse_properties(), 671 exp.Qualify: lambda self: self._parse_qualify(), 672 exp.Returning: lambda self: self._parse_returning(), 673 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 674 exp.Table: lambda self: self._parse_table_parts(), 675 exp.TableAlias: lambda self: self._parse_table_alias(), 676 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 677 exp.Where: lambda self: self._parse_where(), 678 exp.Window: lambda self: self._parse_named_window(), 679 exp.With: lambda self: self._parse_with(), 680 "JOIN_TYPE": lambda self: self._parse_join_parts(), 681 } 682 683 STATEMENT_PARSERS = { 684 TokenType.ALTER: lambda self: self._parse_alter(), 685 TokenType.BEGIN: lambda self: self._parse_transaction(), 686 TokenType.CACHE: lambda self: self._parse_cache(), 687 TokenType.COMMENT: lambda self: self._parse_comment(), 688 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 689 TokenType.COPY: lambda self: self._parse_copy(), 690 TokenType.CREATE: lambda self: self._parse_create(), 691 TokenType.DELETE: lambda self: self._parse_delete(), 692 TokenType.DESC: lambda self: self._parse_describe(), 693 TokenType.DESCRIBE: lambda self: self._parse_describe(), 694 TokenType.DROP: lambda self: self._parse_drop(), 695 TokenType.INSERT: lambda self: self._parse_insert(), 696 TokenType.KILL: lambda self: self._parse_kill(), 697 TokenType.LOAD: lambda self: self._parse_load(), 698 TokenType.MERGE: lambda self: self._parse_merge(), 699 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 700 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 701 TokenType.REFRESH: lambda self: self._parse_refresh(), 702 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 703 TokenType.SET: lambda self: self._parse_set(), 704 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 705 TokenType.UNCACHE: lambda self: self._parse_uncache(), 706 TokenType.UPDATE: lambda self: self._parse_update(), 707 TokenType.USE: lambda self: self.expression( 708 exp.Use, 709 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 710 this=self._parse_table(schema=False), 711 ), 712 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 713 } 714 715 UNARY_PARSERS = { 716 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 717 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 718 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 719 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 720 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 721 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 722 } 723 724 STRING_PARSERS = { 725 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 726 exp.RawString, this=token.text 727 ), 728 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 729 exp.National, this=token.text 730 ), 731 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 732 TokenType.STRING: lambda self, token: self.expression( 733 exp.Literal, this=token.text, is_string=True 734 ), 735 TokenType.UNICODE_STRING: lambda self, token: self.expression( 736 exp.UnicodeString, 737 this=token.text, 738 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 739 ), 740 } 741 742 NUMERIC_PARSERS = { 743 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 744 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 745 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 746 TokenType.NUMBER: lambda self, token: self.expression( 747 exp.Literal, this=token.text, is_string=False 748 ), 749 } 750 751 PRIMARY_PARSERS = { 752 **STRING_PARSERS, 753 **NUMERIC_PARSERS, 754 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 755 TokenType.NULL: lambda self, _: self.expression(exp.Null), 756 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 757 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 758 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 759 TokenType.STAR: lambda self, _: self.expression( 760 exp.Star, 761 **{ 762 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 763 "replace": self._parse_star_op("REPLACE"), 764 "rename": self._parse_star_op("RENAME"), 765 }, 766 ), 767 } 768 769 PLACEHOLDER_PARSERS = { 770 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 771 TokenType.PARAMETER: lambda self: self._parse_parameter(), 772 TokenType.COLON: lambda self: ( 773 self.expression(exp.Placeholder, this=self._prev.text) 774 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 775 else None 776 ), 777 } 778 779 RANGE_PARSERS = { 780 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 781 TokenType.GLOB: binary_range_parser(exp.Glob), 782 TokenType.ILIKE: binary_range_parser(exp.ILike), 783 TokenType.IN: lambda self, this: self._parse_in(this), 784 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 785 TokenType.IS: lambda self, this: self._parse_is(this), 786 TokenType.LIKE: binary_range_parser(exp.Like), 787 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 788 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 789 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 790 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 791 } 792 793 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 794 "ALLOWED_VALUES": lambda self: self.expression( 795 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 796 ), 797 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 798 "AUTO": lambda self: self._parse_auto_property(), 799 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 800 "BACKUP": lambda self: self.expression( 801 exp.BackupProperty, this=self._parse_var(any_token=True) 802 ), 803 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 804 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 805 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 806 "CHECKSUM": lambda self: self._parse_checksum(), 807 "CLUSTER BY": lambda self: self._parse_cluster(), 808 "CLUSTERED": lambda self: self._parse_clustered_by(), 809 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 810 exp.CollateProperty, **kwargs 811 ), 812 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 813 "CONTAINS": lambda self: self._parse_contains_property(), 814 "COPY": lambda self: self._parse_copy_property(), 815 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 816 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 817 "DEFINER": lambda self: self._parse_definer(), 818 "DETERMINISTIC": lambda self: self.expression( 819 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 820 ), 821 "DISTKEY": lambda self: self._parse_distkey(), 822 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 823 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 824 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 825 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 826 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 827 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 828 "FREESPACE": lambda self: self._parse_freespace(), 829 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 830 "HEAP": lambda self: self.expression(exp.HeapProperty), 831 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 832 "IMMUTABLE": lambda self: self.expression( 833 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 834 ), 835 "INHERITS": lambda self: self.expression( 836 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 837 ), 838 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 839 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 840 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 841 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 842 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 843 "LIKE": lambda self: self._parse_create_like(), 844 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 845 "LOCK": lambda self: self._parse_locking(), 846 "LOCKING": lambda self: self._parse_locking(), 847 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 848 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 849 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 850 "MODIFIES": lambda self: self._parse_modifies_property(), 851 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 852 "NO": lambda self: self._parse_no_property(), 853 "ON": lambda self: self._parse_on_property(), 854 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 855 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 856 "PARTITION": lambda self: self._parse_partitioned_of(), 857 "PARTITION BY": lambda self: self._parse_partitioned_by(), 858 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 859 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 860 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 861 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 862 "READS": lambda self: self._parse_reads_property(), 863 "REMOTE": lambda self: self._parse_remote_with_connection(), 864 "RETURNS": lambda self: self._parse_returns(), 865 "STRICT": lambda self: self.expression(exp.StrictProperty), 866 "ROW": lambda self: self._parse_row(), 867 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 868 "SAMPLE": lambda self: self.expression( 869 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 870 ), 871 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 872 "SETTINGS": lambda self: self.expression( 873 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 874 ), 875 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 876 "SORTKEY": lambda self: self._parse_sortkey(), 877 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 878 "STABLE": lambda self: self.expression( 879 exp.StabilityProperty, this=exp.Literal.string("STABLE") 880 ), 881 "STORED": lambda self: self._parse_stored(), 882 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 883 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 884 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 885 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 886 "TO": lambda self: self._parse_to_table(), 887 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 888 "TRANSFORM": lambda self: self.expression( 889 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 890 ), 891 "TTL": lambda self: self._parse_ttl(), 892 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 893 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 894 "VOLATILE": lambda self: self._parse_volatile_property(), 895 "WITH": lambda self: self._parse_with_property(), 896 } 897 898 CONSTRAINT_PARSERS = { 899 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 900 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 901 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 902 "CHARACTER SET": lambda self: self.expression( 903 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 904 ), 905 "CHECK": lambda self: self.expression( 906 exp.CheckColumnConstraint, 907 this=self._parse_wrapped(self._parse_assignment), 908 enforced=self._match_text_seq("ENFORCED"), 909 ), 910 "COLLATE": lambda self: self.expression( 911 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 912 ), 913 "COMMENT": lambda self: self.expression( 914 exp.CommentColumnConstraint, this=self._parse_string() 915 ), 916 "COMPRESS": lambda self: self._parse_compress(), 917 "CLUSTERED": lambda self: self.expression( 918 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 919 ), 920 "NONCLUSTERED": lambda self: self.expression( 921 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 922 ), 923 "DEFAULT": lambda self: self.expression( 924 exp.DefaultColumnConstraint, this=self._parse_bitwise() 925 ), 926 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 927 "EPHEMERAL": lambda self: self.expression( 928 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 929 ), 930 "EXCLUDE": lambda self: self.expression( 931 exp.ExcludeColumnConstraint, this=self._parse_index_params() 932 ), 933 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 934 "FORMAT": lambda self: self.expression( 935 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 936 ), 937 "GENERATED": lambda self: self._parse_generated_as_identity(), 938 "IDENTITY": lambda self: self._parse_auto_increment(), 939 "INLINE": lambda self: self._parse_inline(), 940 "LIKE": lambda self: self._parse_create_like(), 941 "NOT": lambda self: self._parse_not_constraint(), 942 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 943 "ON": lambda self: ( 944 self._match(TokenType.UPDATE) 945 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 946 ) 947 or self.expression(exp.OnProperty, this=self._parse_id_var()), 948 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 949 "PERIOD": lambda self: self._parse_period_for_system_time(), 950 "PRIMARY KEY": lambda self: self._parse_primary_key(), 951 "REFERENCES": lambda self: self._parse_references(match=False), 952 "TITLE": lambda self: self.expression( 953 exp.TitleColumnConstraint, this=self._parse_var_or_string() 954 ), 955 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 956 "UNIQUE": lambda self: self._parse_unique(), 957 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 958 "WITH": lambda self: self.expression( 959 exp.Properties, expressions=self._parse_wrapped_properties() 960 ), 961 } 962 963 ALTER_PARSERS = { 964 "ADD": lambda self: self._parse_alter_table_add(), 965 "ALTER": lambda self: self._parse_alter_table_alter(), 966 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 967 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 968 "DROP": lambda self: self._parse_alter_table_drop(), 969 "RENAME": lambda self: self._parse_alter_table_rename(), 970 "SET": lambda self: self._parse_alter_table_set(), 971 } 972 973 ALTER_ALTER_PARSERS = { 974 "DISTKEY": lambda self: self._parse_alter_diststyle(), 975 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 976 "SORTKEY": lambda self: self._parse_alter_sortkey(), 977 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 978 } 979 980 SCHEMA_UNNAMED_CONSTRAINTS = { 981 "CHECK", 982 "EXCLUDE", 983 "FOREIGN KEY", 984 "LIKE", 985 "PERIOD", 986 "PRIMARY KEY", 987 "UNIQUE", 988 } 989 990 NO_PAREN_FUNCTION_PARSERS = { 991 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 992 "CASE": lambda self: self._parse_case(), 993 "IF": lambda self: self._parse_if(), 994 "NEXT": lambda self: self._parse_next_value_for(), 995 } 996 997 INVALID_FUNC_NAME_TOKENS = { 998 TokenType.IDENTIFIER, 999 TokenType.STRING, 1000 } 1001 1002 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1003 1004 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1005 1006 FUNCTION_PARSERS = { 1007 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1008 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1009 "DECODE": lambda self: self._parse_decode(), 1010 "EXTRACT": lambda self: self._parse_extract(), 1011 "GAP_FILL": lambda self: self._parse_gap_fill(), 1012 "JSON_OBJECT": lambda self: self._parse_json_object(), 1013 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1014 "JSON_TABLE": lambda self: self._parse_json_table(), 1015 "MATCH": lambda self: self._parse_match_against(), 1016 "OPENJSON": lambda self: self._parse_open_json(), 1017 "POSITION": lambda self: self._parse_position(), 1018 "PREDICT": lambda self: self._parse_predict(), 1019 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1020 "STRING_AGG": lambda self: self._parse_string_agg(), 1021 "SUBSTRING": lambda self: self._parse_substring(), 1022 "TRIM": lambda self: self._parse_trim(), 1023 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1024 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1025 } 1026 1027 QUERY_MODIFIER_PARSERS = { 1028 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1029 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1030 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1031 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1032 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1033 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1034 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1035 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1036 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1037 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1038 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1039 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1040 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1041 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1042 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1043 TokenType.CLUSTER_BY: lambda self: ( 1044 "cluster", 1045 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1046 ), 1047 TokenType.DISTRIBUTE_BY: lambda self: ( 1048 "distribute", 1049 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1050 ), 1051 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1052 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1053 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1054 } 1055 1056 SET_PARSERS = { 1057 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1058 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1059 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1060 "TRANSACTION": lambda self: self._parse_set_transaction(), 1061 } 1062 1063 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1064 1065 TYPE_LITERAL_PARSERS = { 1066 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1067 } 1068 1069 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1070 1071 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1072 1073 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1074 1075 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1076 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1077 "ISOLATION": ( 1078 ("LEVEL", "REPEATABLE", "READ"), 1079 ("LEVEL", "READ", "COMMITTED"), 1080 ("LEVEL", "READ", "UNCOMITTED"), 1081 ("LEVEL", "SERIALIZABLE"), 1082 ), 1083 "READ": ("WRITE", "ONLY"), 1084 } 1085 1086 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1087 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1088 ) 1089 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1090 1091 CREATE_SEQUENCE: OPTIONS_TYPE = { 1092 "SCALE": ("EXTEND", "NOEXTEND"), 1093 "SHARD": ("EXTEND", "NOEXTEND"), 1094 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1095 **dict.fromkeys( 1096 ( 1097 "SESSION", 1098 "GLOBAL", 1099 "KEEP", 1100 "NOKEEP", 1101 "ORDER", 1102 "NOORDER", 1103 "NOCACHE", 1104 "CYCLE", 1105 "NOCYCLE", 1106 "NOMINVALUE", 1107 "NOMAXVALUE", 1108 "NOSCALE", 1109 "NOSHARD", 1110 ), 1111 tuple(), 1112 ), 1113 } 1114 1115 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1116 1117 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1118 1119 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1120 1121 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1122 1123 CLONE_KEYWORDS = {"CLONE", "COPY"} 1124 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1125 1126 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1127 1128 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1129 1130 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1131 1132 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1133 1134 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1135 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1136 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1137 1138 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1139 1140 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1141 1142 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1143 1144 DISTINCT_TOKENS = {TokenType.DISTINCT} 1145 1146 NULL_TOKENS = {TokenType.NULL} 1147 1148 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1149 1150 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1151 1152 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1153 1154 STRICT_CAST = True 1155 1156 PREFIXED_PIVOT_COLUMNS = False 1157 IDENTIFY_PIVOT_STRINGS = False 1158 1159 LOG_DEFAULTS_TO_LN = False 1160 1161 # Whether ADD is present for each column added by ALTER TABLE 1162 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1163 1164 # Whether the table sample clause expects CSV syntax 1165 TABLESAMPLE_CSV = False 1166 1167 # The default method used for table sampling 1168 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1169 1170 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1171 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1172 1173 # Whether the TRIM function expects the characters to trim as its first argument 1174 TRIM_PATTERN_FIRST = False 1175 1176 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1177 STRING_ALIASES = False 1178 1179 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1180 MODIFIERS_ATTACHED_TO_UNION = True 1181 UNION_MODIFIERS = {"order", "limit", "offset"} 1182 1183 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1184 NO_PAREN_IF_COMMANDS = True 1185 1186 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1187 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1188 1189 # Whether the `:` operator is used to extract a value from a JSON document 1190 COLON_IS_JSON_EXTRACT = False 1191 1192 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1193 # If this is True and '(' is not found, the keyword will be treated as an identifier 1194 VALUES_FOLLOWED_BY_PAREN = True 1195 1196 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1197 SUPPORTS_IMPLICIT_UNNEST = False 1198 1199 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1200 INTERVAL_SPANS = True 1201 1202 # Whether a PARTITION clause can follow a table reference 1203 SUPPORTS_PARTITION_SELECTION = False 1204 1205 __slots__ = ( 1206 "error_level", 1207 "error_message_context", 1208 "max_errors", 1209 "dialect", 1210 "sql", 1211 "errors", 1212 "_tokens", 1213 "_index", 1214 "_curr", 1215 "_next", 1216 "_prev", 1217 "_prev_comments", 1218 ) 1219 1220 # Autofilled 1221 SHOW_TRIE: t.Dict = {} 1222 SET_TRIE: t.Dict = {} 1223 1224 def __init__( 1225 self, 1226 error_level: t.Optional[ErrorLevel] = None, 1227 error_message_context: int = 100, 1228 max_errors: int = 3, 1229 dialect: DialectType = None, 1230 ): 1231 from sqlglot.dialects import Dialect 1232 1233 self.error_level = error_level or ErrorLevel.IMMEDIATE 1234 self.error_message_context = error_message_context 1235 self.max_errors = max_errors 1236 self.dialect = Dialect.get_or_raise(dialect) 1237 self.reset() 1238 1239 def reset(self): 1240 self.sql = "" 1241 self.errors = [] 1242 self._tokens = [] 1243 self._index = 0 1244 self._curr = None 1245 self._next = None 1246 self._prev = None 1247 self._prev_comments = None 1248 1249 def parse( 1250 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1251 ) -> t.List[t.Optional[exp.Expression]]: 1252 """ 1253 Parses a list of tokens and returns a list of syntax trees, one tree 1254 per parsed SQL statement. 1255 1256 Args: 1257 raw_tokens: The list of tokens. 1258 sql: The original SQL string, used to produce helpful debug messages. 1259 1260 Returns: 1261 The list of the produced syntax trees. 1262 """ 1263 return self._parse( 1264 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1265 ) 1266 1267 def parse_into( 1268 self, 1269 expression_types: exp.IntoType, 1270 raw_tokens: t.List[Token], 1271 sql: t.Optional[str] = None, 1272 ) -> t.List[t.Optional[exp.Expression]]: 1273 """ 1274 Parses a list of tokens into a given Expression type. If a collection of Expression 1275 types is given instead, this method will try to parse the token list into each one 1276 of them, stopping at the first for which the parsing succeeds. 1277 1278 Args: 1279 expression_types: The expression type(s) to try and parse the token list into. 1280 raw_tokens: The list of tokens. 1281 sql: The original SQL string, used to produce helpful debug messages. 1282 1283 Returns: 1284 The target Expression. 1285 """ 1286 errors = [] 1287 for expression_type in ensure_list(expression_types): 1288 parser = self.EXPRESSION_PARSERS.get(expression_type) 1289 if not parser: 1290 raise TypeError(f"No parser registered for {expression_type}") 1291 1292 try: 1293 return self._parse(parser, raw_tokens, sql) 1294 except ParseError as e: 1295 e.errors[0]["into_expression"] = expression_type 1296 errors.append(e) 1297 1298 raise ParseError( 1299 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1300 errors=merge_errors(errors), 1301 ) from errors[-1] 1302 1303 def _parse( 1304 self, 1305 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1306 raw_tokens: t.List[Token], 1307 sql: t.Optional[str] = None, 1308 ) -> t.List[t.Optional[exp.Expression]]: 1309 self.reset() 1310 self.sql = sql or "" 1311 1312 total = len(raw_tokens) 1313 chunks: t.List[t.List[Token]] = [[]] 1314 1315 for i, token in enumerate(raw_tokens): 1316 if token.token_type == TokenType.SEMICOLON: 1317 if token.comments: 1318 chunks.append([token]) 1319 1320 if i < total - 1: 1321 chunks.append([]) 1322 else: 1323 chunks[-1].append(token) 1324 1325 expressions = [] 1326 1327 for tokens in chunks: 1328 self._index = -1 1329 self._tokens = tokens 1330 self._advance() 1331 1332 expressions.append(parse_method(self)) 1333 1334 if self._index < len(self._tokens): 1335 self.raise_error("Invalid expression / Unexpected token") 1336 1337 self.check_errors() 1338 1339 return expressions 1340 1341 def check_errors(self) -> None: 1342 """Logs or raises any found errors, depending on the chosen error level setting.""" 1343 if self.error_level == ErrorLevel.WARN: 1344 for error in self.errors: 1345 logger.error(str(error)) 1346 elif self.error_level == ErrorLevel.RAISE and self.errors: 1347 raise ParseError( 1348 concat_messages(self.errors, self.max_errors), 1349 errors=merge_errors(self.errors), 1350 ) 1351 1352 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1353 """ 1354 Appends an error in the list of recorded errors or raises it, depending on the chosen 1355 error level setting. 1356 """ 1357 token = token or self._curr or self._prev or Token.string("") 1358 start = token.start 1359 end = token.end + 1 1360 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1361 highlight = self.sql[start:end] 1362 end_context = self.sql[end : end + self.error_message_context] 1363 1364 error = ParseError.new( 1365 f"{message}. Line {token.line}, Col: {token.col}.\n" 1366 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1367 description=message, 1368 line=token.line, 1369 col=token.col, 1370 start_context=start_context, 1371 highlight=highlight, 1372 end_context=end_context, 1373 ) 1374 1375 if self.error_level == ErrorLevel.IMMEDIATE: 1376 raise error 1377 1378 self.errors.append(error) 1379 1380 def expression( 1381 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1382 ) -> E: 1383 """ 1384 Creates a new, validated Expression. 1385 1386 Args: 1387 exp_class: The expression class to instantiate. 1388 comments: An optional list of comments to attach to the expression. 1389 kwargs: The arguments to set for the expression along with their respective values. 1390 1391 Returns: 1392 The target expression. 1393 """ 1394 instance = exp_class(**kwargs) 1395 instance.add_comments(comments) if comments else self._add_comments(instance) 1396 return self.validate_expression(instance) 1397 1398 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1399 if expression and self._prev_comments: 1400 expression.add_comments(self._prev_comments) 1401 self._prev_comments = None 1402 1403 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1404 """ 1405 Validates an Expression, making sure that all its mandatory arguments are set. 1406 1407 Args: 1408 expression: The expression to validate. 1409 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1410 1411 Returns: 1412 The validated expression. 1413 """ 1414 if self.error_level != ErrorLevel.IGNORE: 1415 for error_message in expression.error_messages(args): 1416 self.raise_error(error_message) 1417 1418 return expression 1419 1420 def _find_sql(self, start: Token, end: Token) -> str: 1421 return self.sql[start.start : end.end + 1] 1422 1423 def _is_connected(self) -> bool: 1424 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1425 1426 def _advance(self, times: int = 1) -> None: 1427 self._index += times 1428 self._curr = seq_get(self._tokens, self._index) 1429 self._next = seq_get(self._tokens, self._index + 1) 1430 1431 if self._index > 0: 1432 self._prev = self._tokens[self._index - 1] 1433 self._prev_comments = self._prev.comments 1434 else: 1435 self._prev = None 1436 self._prev_comments = None 1437 1438 def _retreat(self, index: int) -> None: 1439 if index != self._index: 1440 self._advance(index - self._index) 1441 1442 def _warn_unsupported(self) -> None: 1443 if len(self._tokens) <= 1: 1444 return 1445 1446 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1447 # interested in emitting a warning for the one being currently processed. 1448 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1449 1450 logger.warning( 1451 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1452 ) 1453 1454 def _parse_command(self) -> exp.Command: 1455 self._warn_unsupported() 1456 return self.expression( 1457 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1458 ) 1459 1460 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1461 """ 1462 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1463 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1464 the parser state accordingly 1465 """ 1466 index = self._index 1467 error_level = self.error_level 1468 1469 self.error_level = ErrorLevel.IMMEDIATE 1470 try: 1471 this = parse_method() 1472 except ParseError: 1473 this = None 1474 finally: 1475 if not this or retreat: 1476 self._retreat(index) 1477 self.error_level = error_level 1478 1479 return this 1480 1481 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1482 start = self._prev 1483 exists = self._parse_exists() if allow_exists else None 1484 1485 self._match(TokenType.ON) 1486 1487 materialized = self._match_text_seq("MATERIALIZED") 1488 kind = self._match_set(self.CREATABLES) and self._prev 1489 if not kind: 1490 return self._parse_as_command(start) 1491 1492 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1493 this = self._parse_user_defined_function(kind=kind.token_type) 1494 elif kind.token_type == TokenType.TABLE: 1495 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1496 elif kind.token_type == TokenType.COLUMN: 1497 this = self._parse_column() 1498 else: 1499 this = self._parse_id_var() 1500 1501 self._match(TokenType.IS) 1502 1503 return self.expression( 1504 exp.Comment, 1505 this=this, 1506 kind=kind.text, 1507 expression=self._parse_string(), 1508 exists=exists, 1509 materialized=materialized, 1510 ) 1511 1512 def _parse_to_table( 1513 self, 1514 ) -> exp.ToTableProperty: 1515 table = self._parse_table_parts(schema=True) 1516 return self.expression(exp.ToTableProperty, this=table) 1517 1518 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1519 def _parse_ttl(self) -> exp.Expression: 1520 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1521 this = self._parse_bitwise() 1522 1523 if self._match_text_seq("DELETE"): 1524 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1525 if self._match_text_seq("RECOMPRESS"): 1526 return self.expression( 1527 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1528 ) 1529 if self._match_text_seq("TO", "DISK"): 1530 return self.expression( 1531 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1532 ) 1533 if self._match_text_seq("TO", "VOLUME"): 1534 return self.expression( 1535 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1536 ) 1537 1538 return this 1539 1540 expressions = self._parse_csv(_parse_ttl_action) 1541 where = self._parse_where() 1542 group = self._parse_group() 1543 1544 aggregates = None 1545 if group and self._match(TokenType.SET): 1546 aggregates = self._parse_csv(self._parse_set_item) 1547 1548 return self.expression( 1549 exp.MergeTreeTTL, 1550 expressions=expressions, 1551 where=where, 1552 group=group, 1553 aggregates=aggregates, 1554 ) 1555 1556 def _parse_statement(self) -> t.Optional[exp.Expression]: 1557 if self._curr is None: 1558 return None 1559 1560 if self._match_set(self.STATEMENT_PARSERS): 1561 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1562 1563 if self._match_set(self.dialect.tokenizer.COMMANDS): 1564 return self._parse_command() 1565 1566 expression = self._parse_expression() 1567 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1568 return self._parse_query_modifiers(expression) 1569 1570 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1571 start = self._prev 1572 temporary = self._match(TokenType.TEMPORARY) 1573 materialized = self._match_text_seq("MATERIALIZED") 1574 1575 kind = self._match_set(self.CREATABLES) and self._prev.text 1576 if not kind: 1577 return self._parse_as_command(start) 1578 1579 if_exists = exists or self._parse_exists() 1580 table = self._parse_table_parts( 1581 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1582 ) 1583 1584 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1585 1586 if self._match(TokenType.L_PAREN, advance=False): 1587 expressions = self._parse_wrapped_csv(self._parse_types) 1588 else: 1589 expressions = None 1590 1591 return self.expression( 1592 exp.Drop, 1593 comments=start.comments, 1594 exists=if_exists, 1595 this=table, 1596 expressions=expressions, 1597 kind=kind.upper(), 1598 temporary=temporary, 1599 materialized=materialized, 1600 cascade=self._match_text_seq("CASCADE"), 1601 constraints=self._match_text_seq("CONSTRAINTS"), 1602 purge=self._match_text_seq("PURGE"), 1603 cluster=cluster, 1604 ) 1605 1606 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1607 return ( 1608 self._match_text_seq("IF") 1609 and (not not_ or self._match(TokenType.NOT)) 1610 and self._match(TokenType.EXISTS) 1611 ) 1612 1613 def _parse_create(self) -> exp.Create | exp.Command: 1614 # Note: this can't be None because we've matched a statement parser 1615 start = self._prev 1616 comments = self._prev_comments 1617 1618 replace = ( 1619 start.token_type == TokenType.REPLACE 1620 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1621 or self._match_pair(TokenType.OR, TokenType.ALTER) 1622 ) 1623 1624 unique = self._match(TokenType.UNIQUE) 1625 1626 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1627 self._advance() 1628 1629 properties = None 1630 create_token = self._match_set(self.CREATABLES) and self._prev 1631 1632 if not create_token: 1633 # exp.Properties.Location.POST_CREATE 1634 properties = self._parse_properties() 1635 create_token = self._match_set(self.CREATABLES) and self._prev 1636 1637 if not properties or not create_token: 1638 return self._parse_as_command(start) 1639 1640 exists = self._parse_exists(not_=True) 1641 this = None 1642 expression: t.Optional[exp.Expression] = None 1643 indexes = None 1644 no_schema_binding = None 1645 begin = None 1646 end = None 1647 clone = None 1648 1649 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1650 nonlocal properties 1651 if properties and temp_props: 1652 properties.expressions.extend(temp_props.expressions) 1653 elif temp_props: 1654 properties = temp_props 1655 1656 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1657 this = self._parse_user_defined_function(kind=create_token.token_type) 1658 1659 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1660 extend_props(self._parse_properties()) 1661 1662 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1663 extend_props(self._parse_properties()) 1664 1665 if not expression: 1666 if self._match(TokenType.COMMAND): 1667 expression = self._parse_as_command(self._prev) 1668 else: 1669 begin = self._match(TokenType.BEGIN) 1670 return_ = self._match_text_seq("RETURN") 1671 1672 if self._match(TokenType.STRING, advance=False): 1673 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1674 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1675 expression = self._parse_string() 1676 extend_props(self._parse_properties()) 1677 else: 1678 expression = self._parse_statement() 1679 1680 end = self._match_text_seq("END") 1681 1682 if return_: 1683 expression = self.expression(exp.Return, this=expression) 1684 elif create_token.token_type == TokenType.INDEX: 1685 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1686 if not self._match(TokenType.ON): 1687 index = self._parse_id_var() 1688 anonymous = False 1689 else: 1690 index = None 1691 anonymous = True 1692 1693 this = self._parse_index(index=index, anonymous=anonymous) 1694 elif create_token.token_type in self.DB_CREATABLES: 1695 table_parts = self._parse_table_parts( 1696 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1697 ) 1698 1699 # exp.Properties.Location.POST_NAME 1700 self._match(TokenType.COMMA) 1701 extend_props(self._parse_properties(before=True)) 1702 1703 this = self._parse_schema(this=table_parts) 1704 1705 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1706 extend_props(self._parse_properties()) 1707 1708 self._match(TokenType.ALIAS) 1709 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1710 # exp.Properties.Location.POST_ALIAS 1711 extend_props(self._parse_properties()) 1712 1713 if create_token.token_type == TokenType.SEQUENCE: 1714 expression = self._parse_types() 1715 extend_props(self._parse_properties()) 1716 else: 1717 expression = self._parse_ddl_select() 1718 1719 if create_token.token_type == TokenType.TABLE: 1720 # exp.Properties.Location.POST_EXPRESSION 1721 extend_props(self._parse_properties()) 1722 1723 indexes = [] 1724 while True: 1725 index = self._parse_index() 1726 1727 # exp.Properties.Location.POST_INDEX 1728 extend_props(self._parse_properties()) 1729 1730 if not index: 1731 break 1732 else: 1733 self._match(TokenType.COMMA) 1734 indexes.append(index) 1735 elif create_token.token_type == TokenType.VIEW: 1736 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1737 no_schema_binding = True 1738 1739 shallow = self._match_text_seq("SHALLOW") 1740 1741 if self._match_texts(self.CLONE_KEYWORDS): 1742 copy = self._prev.text.lower() == "copy" 1743 clone = self.expression( 1744 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1745 ) 1746 1747 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1748 return self._parse_as_command(start) 1749 1750 return self.expression( 1751 exp.Create, 1752 comments=comments, 1753 this=this, 1754 kind=create_token.text.upper(), 1755 replace=replace, 1756 unique=unique, 1757 expression=expression, 1758 exists=exists, 1759 properties=properties, 1760 indexes=indexes, 1761 no_schema_binding=no_schema_binding, 1762 begin=begin, 1763 end=end, 1764 clone=clone, 1765 ) 1766 1767 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1768 seq = exp.SequenceProperties() 1769 1770 options = [] 1771 index = self._index 1772 1773 while self._curr: 1774 self._match(TokenType.COMMA) 1775 if self._match_text_seq("INCREMENT"): 1776 self._match_text_seq("BY") 1777 self._match_text_seq("=") 1778 seq.set("increment", self._parse_term()) 1779 elif self._match_text_seq("MINVALUE"): 1780 seq.set("minvalue", self._parse_term()) 1781 elif self._match_text_seq("MAXVALUE"): 1782 seq.set("maxvalue", self._parse_term()) 1783 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1784 self._match_text_seq("=") 1785 seq.set("start", self._parse_term()) 1786 elif self._match_text_seq("CACHE"): 1787 # T-SQL allows empty CACHE which is initialized dynamically 1788 seq.set("cache", self._parse_number() or True) 1789 elif self._match_text_seq("OWNED", "BY"): 1790 # "OWNED BY NONE" is the default 1791 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1792 else: 1793 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1794 if opt: 1795 options.append(opt) 1796 else: 1797 break 1798 1799 seq.set("options", options if options else None) 1800 return None if self._index == index else seq 1801 1802 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1803 # only used for teradata currently 1804 self._match(TokenType.COMMA) 1805 1806 kwargs = { 1807 "no": self._match_text_seq("NO"), 1808 "dual": self._match_text_seq("DUAL"), 1809 "before": self._match_text_seq("BEFORE"), 1810 "default": self._match_text_seq("DEFAULT"), 1811 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1812 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1813 "after": self._match_text_seq("AFTER"), 1814 "minimum": self._match_texts(("MIN", "MINIMUM")), 1815 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1816 } 1817 1818 if self._match_texts(self.PROPERTY_PARSERS): 1819 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1820 try: 1821 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1822 except TypeError: 1823 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1824 1825 return None 1826 1827 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1828 return self._parse_wrapped_csv(self._parse_property) 1829 1830 def _parse_property(self) -> t.Optional[exp.Expression]: 1831 if self._match_texts(self.PROPERTY_PARSERS): 1832 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1833 1834 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1835 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1836 1837 if self._match_text_seq("COMPOUND", "SORTKEY"): 1838 return self._parse_sortkey(compound=True) 1839 1840 if self._match_text_seq("SQL", "SECURITY"): 1841 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1842 1843 index = self._index 1844 key = self._parse_column() 1845 1846 if not self._match(TokenType.EQ): 1847 self._retreat(index) 1848 return self._parse_sequence_properties() 1849 1850 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1851 if isinstance(key, exp.Column): 1852 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1853 1854 value = self._parse_bitwise() or self._parse_var(any_token=True) 1855 1856 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1857 if isinstance(value, exp.Column): 1858 value = exp.var(value.name) 1859 1860 return self.expression(exp.Property, this=key, value=value) 1861 1862 def _parse_stored(self) -> exp.FileFormatProperty: 1863 self._match(TokenType.ALIAS) 1864 1865 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1866 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1867 1868 return self.expression( 1869 exp.FileFormatProperty, 1870 this=( 1871 self.expression( 1872 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1873 ) 1874 if input_format or output_format 1875 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1876 ), 1877 ) 1878 1879 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1880 field = self._parse_field() 1881 if isinstance(field, exp.Identifier) and not field.quoted: 1882 field = exp.var(field) 1883 1884 return field 1885 1886 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1887 self._match(TokenType.EQ) 1888 self._match(TokenType.ALIAS) 1889 1890 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1891 1892 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1893 properties = [] 1894 while True: 1895 if before: 1896 prop = self._parse_property_before() 1897 else: 1898 prop = self._parse_property() 1899 if not prop: 1900 break 1901 for p in ensure_list(prop): 1902 properties.append(p) 1903 1904 if properties: 1905 return self.expression(exp.Properties, expressions=properties) 1906 1907 return None 1908 1909 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1910 return self.expression( 1911 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1912 ) 1913 1914 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1915 if self._index >= 2: 1916 pre_volatile_token = self._tokens[self._index - 2] 1917 else: 1918 pre_volatile_token = None 1919 1920 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1921 return exp.VolatileProperty() 1922 1923 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1924 1925 def _parse_retention_period(self) -> exp.Var: 1926 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1927 number = self._parse_number() 1928 number_str = f"{number} " if number else "" 1929 unit = self._parse_var(any_token=True) 1930 return exp.var(f"{number_str}{unit}") 1931 1932 def _parse_system_versioning_property( 1933 self, with_: bool = False 1934 ) -> exp.WithSystemVersioningProperty: 1935 self._match(TokenType.EQ) 1936 prop = self.expression( 1937 exp.WithSystemVersioningProperty, 1938 **{ # type: ignore 1939 "on": True, 1940 "with": with_, 1941 }, 1942 ) 1943 1944 if self._match_text_seq("OFF"): 1945 prop.set("on", False) 1946 return prop 1947 1948 self._match(TokenType.ON) 1949 if self._match(TokenType.L_PAREN): 1950 while self._curr and not self._match(TokenType.R_PAREN): 1951 if self._match_text_seq("HISTORY_TABLE", "="): 1952 prop.set("this", self._parse_table_parts()) 1953 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1954 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1955 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1956 prop.set("retention_period", self._parse_retention_period()) 1957 1958 self._match(TokenType.COMMA) 1959 1960 return prop 1961 1962 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1963 self._match(TokenType.EQ) 1964 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1965 prop = self.expression(exp.DataDeletionProperty, on=on) 1966 1967 if self._match(TokenType.L_PAREN): 1968 while self._curr and not self._match(TokenType.R_PAREN): 1969 if self._match_text_seq("FILTER_COLUMN", "="): 1970 prop.set("filter_column", self._parse_column()) 1971 elif self._match_text_seq("RETENTION_PERIOD", "="): 1972 prop.set("retention_period", self._parse_retention_period()) 1973 1974 self._match(TokenType.COMMA) 1975 1976 return prop 1977 1978 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1979 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1980 prop = self._parse_system_versioning_property(with_=True) 1981 self._match_r_paren() 1982 return prop 1983 1984 if self._match(TokenType.L_PAREN, advance=False): 1985 return self._parse_wrapped_properties() 1986 1987 if self._match_text_seq("JOURNAL"): 1988 return self._parse_withjournaltable() 1989 1990 if self._match_texts(self.VIEW_ATTRIBUTES): 1991 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1992 1993 if self._match_text_seq("DATA"): 1994 return self._parse_withdata(no=False) 1995 elif self._match_text_seq("NO", "DATA"): 1996 return self._parse_withdata(no=True) 1997 1998 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1999 return self._parse_serde_properties(with_=True) 2000 2001 if not self._next: 2002 return None 2003 2004 return self._parse_withisolatedloading() 2005 2006 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2007 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2008 self._match(TokenType.EQ) 2009 2010 user = self._parse_id_var() 2011 self._match(TokenType.PARAMETER) 2012 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2013 2014 if not user or not host: 2015 return None 2016 2017 return exp.DefinerProperty(this=f"{user}@{host}") 2018 2019 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2020 self._match(TokenType.TABLE) 2021 self._match(TokenType.EQ) 2022 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2023 2024 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2025 return self.expression(exp.LogProperty, no=no) 2026 2027 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2028 return self.expression(exp.JournalProperty, **kwargs) 2029 2030 def _parse_checksum(self) -> exp.ChecksumProperty: 2031 self._match(TokenType.EQ) 2032 2033 on = None 2034 if self._match(TokenType.ON): 2035 on = True 2036 elif self._match_text_seq("OFF"): 2037 on = False 2038 2039 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2040 2041 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2042 return self.expression( 2043 exp.Cluster, 2044 expressions=( 2045 self._parse_wrapped_csv(self._parse_ordered) 2046 if wrapped 2047 else self._parse_csv(self._parse_ordered) 2048 ), 2049 ) 2050 2051 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2052 self._match_text_seq("BY") 2053 2054 self._match_l_paren() 2055 expressions = self._parse_csv(self._parse_column) 2056 self._match_r_paren() 2057 2058 if self._match_text_seq("SORTED", "BY"): 2059 self._match_l_paren() 2060 sorted_by = self._parse_csv(self._parse_ordered) 2061 self._match_r_paren() 2062 else: 2063 sorted_by = None 2064 2065 self._match(TokenType.INTO) 2066 buckets = self._parse_number() 2067 self._match_text_seq("BUCKETS") 2068 2069 return self.expression( 2070 exp.ClusteredByProperty, 2071 expressions=expressions, 2072 sorted_by=sorted_by, 2073 buckets=buckets, 2074 ) 2075 2076 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2077 if not self._match_text_seq("GRANTS"): 2078 self._retreat(self._index - 1) 2079 return None 2080 2081 return self.expression(exp.CopyGrantsProperty) 2082 2083 def _parse_freespace(self) -> exp.FreespaceProperty: 2084 self._match(TokenType.EQ) 2085 return self.expression( 2086 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2087 ) 2088 2089 def _parse_mergeblockratio( 2090 self, no: bool = False, default: bool = False 2091 ) -> exp.MergeBlockRatioProperty: 2092 if self._match(TokenType.EQ): 2093 return self.expression( 2094 exp.MergeBlockRatioProperty, 2095 this=self._parse_number(), 2096 percent=self._match(TokenType.PERCENT), 2097 ) 2098 2099 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2100 2101 def _parse_datablocksize( 2102 self, 2103 default: t.Optional[bool] = None, 2104 minimum: t.Optional[bool] = None, 2105 maximum: t.Optional[bool] = None, 2106 ) -> exp.DataBlocksizeProperty: 2107 self._match(TokenType.EQ) 2108 size = self._parse_number() 2109 2110 units = None 2111 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2112 units = self._prev.text 2113 2114 return self.expression( 2115 exp.DataBlocksizeProperty, 2116 size=size, 2117 units=units, 2118 default=default, 2119 minimum=minimum, 2120 maximum=maximum, 2121 ) 2122 2123 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2124 self._match(TokenType.EQ) 2125 always = self._match_text_seq("ALWAYS") 2126 manual = self._match_text_seq("MANUAL") 2127 never = self._match_text_seq("NEVER") 2128 default = self._match_text_seq("DEFAULT") 2129 2130 autotemp = None 2131 if self._match_text_seq("AUTOTEMP"): 2132 autotemp = self._parse_schema() 2133 2134 return self.expression( 2135 exp.BlockCompressionProperty, 2136 always=always, 2137 manual=manual, 2138 never=never, 2139 default=default, 2140 autotemp=autotemp, 2141 ) 2142 2143 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2144 index = self._index 2145 no = self._match_text_seq("NO") 2146 concurrent = self._match_text_seq("CONCURRENT") 2147 2148 if not self._match_text_seq("ISOLATED", "LOADING"): 2149 self._retreat(index) 2150 return None 2151 2152 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2153 return self.expression( 2154 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2155 ) 2156 2157 def _parse_locking(self) -> exp.LockingProperty: 2158 if self._match(TokenType.TABLE): 2159 kind = "TABLE" 2160 elif self._match(TokenType.VIEW): 2161 kind = "VIEW" 2162 elif self._match(TokenType.ROW): 2163 kind = "ROW" 2164 elif self._match_text_seq("DATABASE"): 2165 kind = "DATABASE" 2166 else: 2167 kind = None 2168 2169 if kind in ("DATABASE", "TABLE", "VIEW"): 2170 this = self._parse_table_parts() 2171 else: 2172 this = None 2173 2174 if self._match(TokenType.FOR): 2175 for_or_in = "FOR" 2176 elif self._match(TokenType.IN): 2177 for_or_in = "IN" 2178 else: 2179 for_or_in = None 2180 2181 if self._match_text_seq("ACCESS"): 2182 lock_type = "ACCESS" 2183 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2184 lock_type = "EXCLUSIVE" 2185 elif self._match_text_seq("SHARE"): 2186 lock_type = "SHARE" 2187 elif self._match_text_seq("READ"): 2188 lock_type = "READ" 2189 elif self._match_text_seq("WRITE"): 2190 lock_type = "WRITE" 2191 elif self._match_text_seq("CHECKSUM"): 2192 lock_type = "CHECKSUM" 2193 else: 2194 lock_type = None 2195 2196 override = self._match_text_seq("OVERRIDE") 2197 2198 return self.expression( 2199 exp.LockingProperty, 2200 this=this, 2201 kind=kind, 2202 for_or_in=for_or_in, 2203 lock_type=lock_type, 2204 override=override, 2205 ) 2206 2207 def _parse_partition_by(self) -> t.List[exp.Expression]: 2208 if self._match(TokenType.PARTITION_BY): 2209 return self._parse_csv(self._parse_assignment) 2210 return [] 2211 2212 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2213 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2214 if self._match_text_seq("MINVALUE"): 2215 return exp.var("MINVALUE") 2216 if self._match_text_seq("MAXVALUE"): 2217 return exp.var("MAXVALUE") 2218 return self._parse_bitwise() 2219 2220 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2221 expression = None 2222 from_expressions = None 2223 to_expressions = None 2224 2225 if self._match(TokenType.IN): 2226 this = self._parse_wrapped_csv(self._parse_bitwise) 2227 elif self._match(TokenType.FROM): 2228 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2229 self._match_text_seq("TO") 2230 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2231 elif self._match_text_seq("WITH", "(", "MODULUS"): 2232 this = self._parse_number() 2233 self._match_text_seq(",", "REMAINDER") 2234 expression = self._parse_number() 2235 self._match_r_paren() 2236 else: 2237 self.raise_error("Failed to parse partition bound spec.") 2238 2239 return self.expression( 2240 exp.PartitionBoundSpec, 2241 this=this, 2242 expression=expression, 2243 from_expressions=from_expressions, 2244 to_expressions=to_expressions, 2245 ) 2246 2247 # https://www.postgresql.org/docs/current/sql-createtable.html 2248 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2249 if not self._match_text_seq("OF"): 2250 self._retreat(self._index - 1) 2251 return None 2252 2253 this = self._parse_table(schema=True) 2254 2255 if self._match(TokenType.DEFAULT): 2256 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2257 elif self._match_text_seq("FOR", "VALUES"): 2258 expression = self._parse_partition_bound_spec() 2259 else: 2260 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2261 2262 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2263 2264 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2265 self._match(TokenType.EQ) 2266 return self.expression( 2267 exp.PartitionedByProperty, 2268 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2269 ) 2270 2271 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2272 if self._match_text_seq("AND", "STATISTICS"): 2273 statistics = True 2274 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2275 statistics = False 2276 else: 2277 statistics = None 2278 2279 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2280 2281 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2282 if self._match_text_seq("SQL"): 2283 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2284 return None 2285 2286 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2287 if self._match_text_seq("SQL", "DATA"): 2288 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2289 return None 2290 2291 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2292 if self._match_text_seq("PRIMARY", "INDEX"): 2293 return exp.NoPrimaryIndexProperty() 2294 if self._match_text_seq("SQL"): 2295 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2296 return None 2297 2298 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2299 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2300 return exp.OnCommitProperty() 2301 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2302 return exp.OnCommitProperty(delete=True) 2303 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2304 2305 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2306 if self._match_text_seq("SQL", "DATA"): 2307 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2308 return None 2309 2310 def _parse_distkey(self) -> exp.DistKeyProperty: 2311 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2312 2313 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2314 table = self._parse_table(schema=True) 2315 2316 options = [] 2317 while self._match_texts(("INCLUDING", "EXCLUDING")): 2318 this = self._prev.text.upper() 2319 2320 id_var = self._parse_id_var() 2321 if not id_var: 2322 return None 2323 2324 options.append( 2325 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2326 ) 2327 2328 return self.expression(exp.LikeProperty, this=table, expressions=options) 2329 2330 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2331 return self.expression( 2332 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2333 ) 2334 2335 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2336 self._match(TokenType.EQ) 2337 return self.expression( 2338 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2339 ) 2340 2341 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2342 self._match_text_seq("WITH", "CONNECTION") 2343 return self.expression( 2344 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2345 ) 2346 2347 def _parse_returns(self) -> exp.ReturnsProperty: 2348 value: t.Optional[exp.Expression] 2349 null = None 2350 is_table = self._match(TokenType.TABLE) 2351 2352 if is_table: 2353 if self._match(TokenType.LT): 2354 value = self.expression( 2355 exp.Schema, 2356 this="TABLE", 2357 expressions=self._parse_csv(self._parse_struct_types), 2358 ) 2359 if not self._match(TokenType.GT): 2360 self.raise_error("Expecting >") 2361 else: 2362 value = self._parse_schema(exp.var("TABLE")) 2363 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2364 null = True 2365 value = None 2366 else: 2367 value = self._parse_types() 2368 2369 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2370 2371 def _parse_describe(self) -> exp.Describe: 2372 kind = self._match_set(self.CREATABLES) and self._prev.text 2373 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2374 if self._match(TokenType.DOT): 2375 style = None 2376 self._retreat(self._index - 2) 2377 this = self._parse_table(schema=True) 2378 properties = self._parse_properties() 2379 expressions = properties.expressions if properties else None 2380 return self.expression( 2381 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2382 ) 2383 2384 def _parse_insert(self) -> exp.Insert: 2385 comments = ensure_list(self._prev_comments) 2386 hint = self._parse_hint() 2387 overwrite = self._match(TokenType.OVERWRITE) 2388 ignore = self._match(TokenType.IGNORE) 2389 local = self._match_text_seq("LOCAL") 2390 alternative = None 2391 is_function = None 2392 2393 if self._match_text_seq("DIRECTORY"): 2394 this: t.Optional[exp.Expression] = self.expression( 2395 exp.Directory, 2396 this=self._parse_var_or_string(), 2397 local=local, 2398 row_format=self._parse_row_format(match_row=True), 2399 ) 2400 else: 2401 if self._match(TokenType.OR): 2402 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2403 2404 self._match(TokenType.INTO) 2405 comments += ensure_list(self._prev_comments) 2406 self._match(TokenType.TABLE) 2407 is_function = self._match(TokenType.FUNCTION) 2408 2409 this = ( 2410 self._parse_table(schema=True, parse_partition=True) 2411 if not is_function 2412 else self._parse_function() 2413 ) 2414 2415 returning = self._parse_returning() 2416 2417 return self.expression( 2418 exp.Insert, 2419 comments=comments, 2420 hint=hint, 2421 is_function=is_function, 2422 this=this, 2423 stored=self._match_text_seq("STORED") and self._parse_stored(), 2424 by_name=self._match_text_seq("BY", "NAME"), 2425 exists=self._parse_exists(), 2426 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2427 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2428 conflict=self._parse_on_conflict(), 2429 returning=returning or self._parse_returning(), 2430 overwrite=overwrite, 2431 alternative=alternative, 2432 ignore=ignore, 2433 ) 2434 2435 def _parse_kill(self) -> exp.Kill: 2436 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2437 2438 return self.expression( 2439 exp.Kill, 2440 this=self._parse_primary(), 2441 kind=kind, 2442 ) 2443 2444 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2445 conflict = self._match_text_seq("ON", "CONFLICT") 2446 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2447 2448 if not conflict and not duplicate: 2449 return None 2450 2451 conflict_keys = None 2452 constraint = None 2453 2454 if conflict: 2455 if self._match_text_seq("ON", "CONSTRAINT"): 2456 constraint = self._parse_id_var() 2457 elif self._match(TokenType.L_PAREN): 2458 conflict_keys = self._parse_csv(self._parse_id_var) 2459 self._match_r_paren() 2460 2461 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2462 if self._prev.token_type == TokenType.UPDATE: 2463 self._match(TokenType.SET) 2464 expressions = self._parse_csv(self._parse_equality) 2465 else: 2466 expressions = None 2467 2468 return self.expression( 2469 exp.OnConflict, 2470 duplicate=duplicate, 2471 expressions=expressions, 2472 action=action, 2473 conflict_keys=conflict_keys, 2474 constraint=constraint, 2475 ) 2476 2477 def _parse_returning(self) -> t.Optional[exp.Returning]: 2478 if not self._match(TokenType.RETURNING): 2479 return None 2480 return self.expression( 2481 exp.Returning, 2482 expressions=self._parse_csv(self._parse_expression), 2483 into=self._match(TokenType.INTO) and self._parse_table_part(), 2484 ) 2485 2486 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2487 if not self._match(TokenType.FORMAT): 2488 return None 2489 return self._parse_row_format() 2490 2491 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2492 index = self._index 2493 with_ = with_ or self._match_text_seq("WITH") 2494 2495 if not self._match(TokenType.SERDE_PROPERTIES): 2496 self._retreat(index) 2497 return None 2498 return self.expression( 2499 exp.SerdeProperties, 2500 **{ # type: ignore 2501 "expressions": self._parse_wrapped_properties(), 2502 "with": with_, 2503 }, 2504 ) 2505 2506 def _parse_row_format( 2507 self, match_row: bool = False 2508 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2509 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2510 return None 2511 2512 if self._match_text_seq("SERDE"): 2513 this = self._parse_string() 2514 2515 serde_properties = self._parse_serde_properties() 2516 2517 return self.expression( 2518 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2519 ) 2520 2521 self._match_text_seq("DELIMITED") 2522 2523 kwargs = {} 2524 2525 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2526 kwargs["fields"] = self._parse_string() 2527 if self._match_text_seq("ESCAPED", "BY"): 2528 kwargs["escaped"] = self._parse_string() 2529 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2530 kwargs["collection_items"] = self._parse_string() 2531 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2532 kwargs["map_keys"] = self._parse_string() 2533 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2534 kwargs["lines"] = self._parse_string() 2535 if self._match_text_seq("NULL", "DEFINED", "AS"): 2536 kwargs["null"] = self._parse_string() 2537 2538 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2539 2540 def _parse_load(self) -> exp.LoadData | exp.Command: 2541 if self._match_text_seq("DATA"): 2542 local = self._match_text_seq("LOCAL") 2543 self._match_text_seq("INPATH") 2544 inpath = self._parse_string() 2545 overwrite = self._match(TokenType.OVERWRITE) 2546 self._match_pair(TokenType.INTO, TokenType.TABLE) 2547 2548 return self.expression( 2549 exp.LoadData, 2550 this=self._parse_table(schema=True), 2551 local=local, 2552 overwrite=overwrite, 2553 inpath=inpath, 2554 partition=self._parse_partition(), 2555 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2556 serde=self._match_text_seq("SERDE") and self._parse_string(), 2557 ) 2558 return self._parse_as_command(self._prev) 2559 2560 def _parse_delete(self) -> exp.Delete: 2561 # This handles MySQL's "Multiple-Table Syntax" 2562 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2563 tables = None 2564 comments = self._prev_comments 2565 if not self._match(TokenType.FROM, advance=False): 2566 tables = self._parse_csv(self._parse_table) or None 2567 2568 returning = self._parse_returning() 2569 2570 return self.expression( 2571 exp.Delete, 2572 comments=comments, 2573 tables=tables, 2574 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2575 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2576 where=self._parse_where(), 2577 returning=returning or self._parse_returning(), 2578 limit=self._parse_limit(), 2579 ) 2580 2581 def _parse_update(self) -> exp.Update: 2582 comments = self._prev_comments 2583 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2584 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2585 returning = self._parse_returning() 2586 return self.expression( 2587 exp.Update, 2588 comments=comments, 2589 **{ # type: ignore 2590 "this": this, 2591 "expressions": expressions, 2592 "from": self._parse_from(joins=True), 2593 "where": self._parse_where(), 2594 "returning": returning or self._parse_returning(), 2595 "order": self._parse_order(), 2596 "limit": self._parse_limit(), 2597 }, 2598 ) 2599 2600 def _parse_uncache(self) -> exp.Uncache: 2601 if not self._match(TokenType.TABLE): 2602 self.raise_error("Expecting TABLE after UNCACHE") 2603 2604 return self.expression( 2605 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2606 ) 2607 2608 def _parse_cache(self) -> exp.Cache: 2609 lazy = self._match_text_seq("LAZY") 2610 self._match(TokenType.TABLE) 2611 table = self._parse_table(schema=True) 2612 2613 options = [] 2614 if self._match_text_seq("OPTIONS"): 2615 self._match_l_paren() 2616 k = self._parse_string() 2617 self._match(TokenType.EQ) 2618 v = self._parse_string() 2619 options = [k, v] 2620 self._match_r_paren() 2621 2622 self._match(TokenType.ALIAS) 2623 return self.expression( 2624 exp.Cache, 2625 this=table, 2626 lazy=lazy, 2627 options=options, 2628 expression=self._parse_select(nested=True), 2629 ) 2630 2631 def _parse_partition(self) -> t.Optional[exp.Partition]: 2632 if not self._match(TokenType.PARTITION): 2633 return None 2634 2635 return self.expression( 2636 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2637 ) 2638 2639 def _parse_value(self) -> t.Optional[exp.Tuple]: 2640 if self._match(TokenType.L_PAREN): 2641 expressions = self._parse_csv(self._parse_expression) 2642 self._match_r_paren() 2643 return self.expression(exp.Tuple, expressions=expressions) 2644 2645 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2646 expression = self._parse_expression() 2647 if expression: 2648 return self.expression(exp.Tuple, expressions=[expression]) 2649 return None 2650 2651 def _parse_projections(self) -> t.List[exp.Expression]: 2652 return self._parse_expressions() 2653 2654 def _parse_select( 2655 self, 2656 nested: bool = False, 2657 table: bool = False, 2658 parse_subquery_alias: bool = True, 2659 parse_set_operation: bool = True, 2660 ) -> t.Optional[exp.Expression]: 2661 cte = self._parse_with() 2662 2663 if cte: 2664 this = self._parse_statement() 2665 2666 if not this: 2667 self.raise_error("Failed to parse any statement following CTE") 2668 return cte 2669 2670 if "with" in this.arg_types: 2671 this.set("with", cte) 2672 else: 2673 self.raise_error(f"{this.key} does not support CTE") 2674 this = cte 2675 2676 return this 2677 2678 # duckdb supports leading with FROM x 2679 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2680 2681 if self._match(TokenType.SELECT): 2682 comments = self._prev_comments 2683 2684 hint = self._parse_hint() 2685 all_ = self._match(TokenType.ALL) 2686 distinct = self._match_set(self.DISTINCT_TOKENS) 2687 2688 kind = ( 2689 self._match(TokenType.ALIAS) 2690 and self._match_texts(("STRUCT", "VALUE")) 2691 and self._prev.text.upper() 2692 ) 2693 2694 if distinct: 2695 distinct = self.expression( 2696 exp.Distinct, 2697 on=self._parse_value() if self._match(TokenType.ON) else None, 2698 ) 2699 2700 if all_ and distinct: 2701 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2702 2703 limit = self._parse_limit(top=True) 2704 projections = self._parse_projections() 2705 2706 this = self.expression( 2707 exp.Select, 2708 kind=kind, 2709 hint=hint, 2710 distinct=distinct, 2711 expressions=projections, 2712 limit=limit, 2713 ) 2714 this.comments = comments 2715 2716 into = self._parse_into() 2717 if into: 2718 this.set("into", into) 2719 2720 if not from_: 2721 from_ = self._parse_from() 2722 2723 if from_: 2724 this.set("from", from_) 2725 2726 this = self._parse_query_modifiers(this) 2727 elif (table or nested) and self._match(TokenType.L_PAREN): 2728 if self._match(TokenType.PIVOT): 2729 this = self._parse_simplified_pivot() 2730 elif self._match(TokenType.FROM): 2731 this = exp.select("*").from_( 2732 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2733 ) 2734 else: 2735 this = ( 2736 self._parse_table() 2737 if table 2738 else self._parse_select(nested=True, parse_set_operation=False) 2739 ) 2740 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2741 2742 self._match_r_paren() 2743 2744 # We return early here so that the UNION isn't attached to the subquery by the 2745 # following call to _parse_set_operations, but instead becomes the parent node 2746 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2747 elif self._match(TokenType.VALUES, advance=False): 2748 this = self._parse_derived_table_values() 2749 elif from_: 2750 this = exp.select("*").from_(from_.this, copy=False) 2751 else: 2752 this = None 2753 2754 if parse_set_operation: 2755 return self._parse_set_operations(this) 2756 return this 2757 2758 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2759 if not skip_with_token and not self._match(TokenType.WITH): 2760 return None 2761 2762 comments = self._prev_comments 2763 recursive = self._match(TokenType.RECURSIVE) 2764 2765 expressions = [] 2766 while True: 2767 expressions.append(self._parse_cte()) 2768 2769 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2770 break 2771 else: 2772 self._match(TokenType.WITH) 2773 2774 return self.expression( 2775 exp.With, comments=comments, expressions=expressions, recursive=recursive 2776 ) 2777 2778 def _parse_cte(self) -> exp.CTE: 2779 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2780 if not alias or not alias.this: 2781 self.raise_error("Expected CTE to have alias") 2782 2783 self._match(TokenType.ALIAS) 2784 2785 if self._match_text_seq("NOT", "MATERIALIZED"): 2786 materialized = False 2787 elif self._match_text_seq("MATERIALIZED"): 2788 materialized = True 2789 else: 2790 materialized = None 2791 2792 return self.expression( 2793 exp.CTE, 2794 this=self._parse_wrapped(self._parse_statement), 2795 alias=alias, 2796 materialized=materialized, 2797 ) 2798 2799 def _parse_table_alias( 2800 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2801 ) -> t.Optional[exp.TableAlias]: 2802 any_token = self._match(TokenType.ALIAS) 2803 alias = ( 2804 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2805 or self._parse_string_as_identifier() 2806 ) 2807 2808 index = self._index 2809 if self._match(TokenType.L_PAREN): 2810 columns = self._parse_csv(self._parse_function_parameter) 2811 self._match_r_paren() if columns else self._retreat(index) 2812 else: 2813 columns = None 2814 2815 if not alias and not columns: 2816 return None 2817 2818 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2819 2820 # We bubble up comments from the Identifier to the TableAlias 2821 if isinstance(alias, exp.Identifier): 2822 table_alias.add_comments(alias.pop_comments()) 2823 2824 return table_alias 2825 2826 def _parse_subquery( 2827 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2828 ) -> t.Optional[exp.Subquery]: 2829 if not this: 2830 return None 2831 2832 return self.expression( 2833 exp.Subquery, 2834 this=this, 2835 pivots=self._parse_pivots(), 2836 alias=self._parse_table_alias() if parse_alias else None, 2837 ) 2838 2839 def _implicit_unnests_to_explicit(self, this: E) -> E: 2840 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2841 2842 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2843 for i, join in enumerate(this.args.get("joins") or []): 2844 table = join.this 2845 normalized_table = table.copy() 2846 normalized_table.meta["maybe_column"] = True 2847 normalized_table = _norm(normalized_table, dialect=self.dialect) 2848 2849 if isinstance(table, exp.Table) and not join.args.get("on"): 2850 if normalized_table.parts[0].name in refs: 2851 table_as_column = table.to_column() 2852 unnest = exp.Unnest(expressions=[table_as_column]) 2853 2854 # Table.to_column creates a parent Alias node that we want to convert to 2855 # a TableAlias and attach to the Unnest, so it matches the parser's output 2856 if isinstance(table.args.get("alias"), exp.TableAlias): 2857 table_as_column.replace(table_as_column.this) 2858 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2859 2860 table.replace(unnest) 2861 2862 refs.add(normalized_table.alias_or_name) 2863 2864 return this 2865 2866 def _parse_query_modifiers( 2867 self, this: t.Optional[exp.Expression] 2868 ) -> t.Optional[exp.Expression]: 2869 if isinstance(this, (exp.Query, exp.Table)): 2870 for join in self._parse_joins(): 2871 this.append("joins", join) 2872 for lateral in iter(self._parse_lateral, None): 2873 this.append("laterals", lateral) 2874 2875 while True: 2876 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2877 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2878 key, expression = parser(self) 2879 2880 if expression: 2881 this.set(key, expression) 2882 if key == "limit": 2883 offset = expression.args.pop("offset", None) 2884 2885 if offset: 2886 offset = exp.Offset(expression=offset) 2887 this.set("offset", offset) 2888 2889 limit_by_expressions = expression.expressions 2890 expression.set("expressions", None) 2891 offset.set("expressions", limit_by_expressions) 2892 continue 2893 break 2894 2895 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2896 this = self._implicit_unnests_to_explicit(this) 2897 2898 return this 2899 2900 def _parse_hint(self) -> t.Optional[exp.Hint]: 2901 if self._match(TokenType.HINT): 2902 hints = [] 2903 for hint in iter( 2904 lambda: self._parse_csv( 2905 lambda: self._parse_function() or self._parse_var(upper=True) 2906 ), 2907 [], 2908 ): 2909 hints.extend(hint) 2910 2911 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2912 self.raise_error("Expected */ after HINT") 2913 2914 return self.expression(exp.Hint, expressions=hints) 2915 2916 return None 2917 2918 def _parse_into(self) -> t.Optional[exp.Into]: 2919 if not self._match(TokenType.INTO): 2920 return None 2921 2922 temp = self._match(TokenType.TEMPORARY) 2923 unlogged = self._match_text_seq("UNLOGGED") 2924 self._match(TokenType.TABLE) 2925 2926 return self.expression( 2927 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2928 ) 2929 2930 def _parse_from( 2931 self, joins: bool = False, skip_from_token: bool = False 2932 ) -> t.Optional[exp.From]: 2933 if not skip_from_token and not self._match(TokenType.FROM): 2934 return None 2935 2936 return self.expression( 2937 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2938 ) 2939 2940 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2941 return self.expression( 2942 exp.MatchRecognizeMeasure, 2943 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2944 this=self._parse_expression(), 2945 ) 2946 2947 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2948 if not self._match(TokenType.MATCH_RECOGNIZE): 2949 return None 2950 2951 self._match_l_paren() 2952 2953 partition = self._parse_partition_by() 2954 order = self._parse_order() 2955 2956 measures = ( 2957 self._parse_csv(self._parse_match_recognize_measure) 2958 if self._match_text_seq("MEASURES") 2959 else None 2960 ) 2961 2962 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2963 rows = exp.var("ONE ROW PER MATCH") 2964 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2965 text = "ALL ROWS PER MATCH" 2966 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2967 text += " SHOW EMPTY MATCHES" 2968 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2969 text += " OMIT EMPTY MATCHES" 2970 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2971 text += " WITH UNMATCHED ROWS" 2972 rows = exp.var(text) 2973 else: 2974 rows = None 2975 2976 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2977 text = "AFTER MATCH SKIP" 2978 if self._match_text_seq("PAST", "LAST", "ROW"): 2979 text += " PAST LAST ROW" 2980 elif self._match_text_seq("TO", "NEXT", "ROW"): 2981 text += " TO NEXT ROW" 2982 elif self._match_text_seq("TO", "FIRST"): 2983 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2984 elif self._match_text_seq("TO", "LAST"): 2985 text += f" TO LAST {self._advance_any().text}" # type: ignore 2986 after = exp.var(text) 2987 else: 2988 after = None 2989 2990 if self._match_text_seq("PATTERN"): 2991 self._match_l_paren() 2992 2993 if not self._curr: 2994 self.raise_error("Expecting )", self._curr) 2995 2996 paren = 1 2997 start = self._curr 2998 2999 while self._curr and paren > 0: 3000 if self._curr.token_type == TokenType.L_PAREN: 3001 paren += 1 3002 if self._curr.token_type == TokenType.R_PAREN: 3003 paren -= 1 3004 3005 end = self._prev 3006 self._advance() 3007 3008 if paren > 0: 3009 self.raise_error("Expecting )", self._curr) 3010 3011 pattern = exp.var(self._find_sql(start, end)) 3012 else: 3013 pattern = None 3014 3015 define = ( 3016 self._parse_csv(self._parse_name_as_expression) 3017 if self._match_text_seq("DEFINE") 3018 else None 3019 ) 3020 3021 self._match_r_paren() 3022 3023 return self.expression( 3024 exp.MatchRecognize, 3025 partition_by=partition, 3026 order=order, 3027 measures=measures, 3028 rows=rows, 3029 after=after, 3030 pattern=pattern, 3031 define=define, 3032 alias=self._parse_table_alias(), 3033 ) 3034 3035 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3036 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3037 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3038 cross_apply = False 3039 3040 if cross_apply is not None: 3041 this = self._parse_select(table=True) 3042 view = None 3043 outer = None 3044 elif self._match(TokenType.LATERAL): 3045 this = self._parse_select(table=True) 3046 view = self._match(TokenType.VIEW) 3047 outer = self._match(TokenType.OUTER) 3048 else: 3049 return None 3050 3051 if not this: 3052 this = ( 3053 self._parse_unnest() 3054 or self._parse_function() 3055 or self._parse_id_var(any_token=False) 3056 ) 3057 3058 while self._match(TokenType.DOT): 3059 this = exp.Dot( 3060 this=this, 3061 expression=self._parse_function() or self._parse_id_var(any_token=False), 3062 ) 3063 3064 if view: 3065 table = self._parse_id_var(any_token=False) 3066 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3067 table_alias: t.Optional[exp.TableAlias] = self.expression( 3068 exp.TableAlias, this=table, columns=columns 3069 ) 3070 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3071 # We move the alias from the lateral's child node to the lateral itself 3072 table_alias = this.args["alias"].pop() 3073 else: 3074 table_alias = self._parse_table_alias() 3075 3076 return self.expression( 3077 exp.Lateral, 3078 this=this, 3079 view=view, 3080 outer=outer, 3081 alias=table_alias, 3082 cross_apply=cross_apply, 3083 ) 3084 3085 def _parse_join_parts( 3086 self, 3087 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3088 return ( 3089 self._match_set(self.JOIN_METHODS) and self._prev, 3090 self._match_set(self.JOIN_SIDES) and self._prev, 3091 self._match_set(self.JOIN_KINDS) and self._prev, 3092 ) 3093 3094 def _parse_join( 3095 self, skip_join_token: bool = False, parse_bracket: bool = False 3096 ) -> t.Optional[exp.Join]: 3097 if self._match(TokenType.COMMA): 3098 return self.expression(exp.Join, this=self._parse_table()) 3099 3100 index = self._index 3101 method, side, kind = self._parse_join_parts() 3102 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3103 join = self._match(TokenType.JOIN) 3104 3105 if not skip_join_token and not join: 3106 self._retreat(index) 3107 kind = None 3108 method = None 3109 side = None 3110 3111 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3112 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3113 3114 if not skip_join_token and not join and not outer_apply and not cross_apply: 3115 return None 3116 3117 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3118 3119 if method: 3120 kwargs["method"] = method.text 3121 if side: 3122 kwargs["side"] = side.text 3123 if kind: 3124 kwargs["kind"] = kind.text 3125 if hint: 3126 kwargs["hint"] = hint 3127 3128 if self._match(TokenType.MATCH_CONDITION): 3129 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3130 3131 if self._match(TokenType.ON): 3132 kwargs["on"] = self._parse_assignment() 3133 elif self._match(TokenType.USING): 3134 kwargs["using"] = self._parse_wrapped_id_vars() 3135 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3136 kind and kind.token_type == TokenType.CROSS 3137 ): 3138 index = self._index 3139 joins: t.Optional[list] = list(self._parse_joins()) 3140 3141 if joins and self._match(TokenType.ON): 3142 kwargs["on"] = self._parse_assignment() 3143 elif joins and self._match(TokenType.USING): 3144 kwargs["using"] = self._parse_wrapped_id_vars() 3145 else: 3146 joins = None 3147 self._retreat(index) 3148 3149 kwargs["this"].set("joins", joins if joins else None) 3150 3151 comments = [c for token in (method, side, kind) if token for c in token.comments] 3152 return self.expression(exp.Join, comments=comments, **kwargs) 3153 3154 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3155 this = self._parse_assignment() 3156 3157 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3158 return this 3159 3160 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3161 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3162 3163 return this 3164 3165 def _parse_index_params(self) -> exp.IndexParameters: 3166 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3167 3168 if self._match(TokenType.L_PAREN, advance=False): 3169 columns = self._parse_wrapped_csv(self._parse_with_operator) 3170 else: 3171 columns = None 3172 3173 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3174 partition_by = self._parse_partition_by() 3175 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3176 tablespace = ( 3177 self._parse_var(any_token=True) 3178 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3179 else None 3180 ) 3181 where = self._parse_where() 3182 3183 return self.expression( 3184 exp.IndexParameters, 3185 using=using, 3186 columns=columns, 3187 include=include, 3188 partition_by=partition_by, 3189 where=where, 3190 with_storage=with_storage, 3191 tablespace=tablespace, 3192 ) 3193 3194 def _parse_index( 3195 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3196 ) -> t.Optional[exp.Index]: 3197 if index or anonymous: 3198 unique = None 3199 primary = None 3200 amp = None 3201 3202 self._match(TokenType.ON) 3203 self._match(TokenType.TABLE) # hive 3204 table = self._parse_table_parts(schema=True) 3205 else: 3206 unique = self._match(TokenType.UNIQUE) 3207 primary = self._match_text_seq("PRIMARY") 3208 amp = self._match_text_seq("AMP") 3209 3210 if not self._match(TokenType.INDEX): 3211 return None 3212 3213 index = self._parse_id_var() 3214 table = None 3215 3216 params = self._parse_index_params() 3217 3218 return self.expression( 3219 exp.Index, 3220 this=index, 3221 table=table, 3222 unique=unique, 3223 primary=primary, 3224 amp=amp, 3225 params=params, 3226 ) 3227 3228 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3229 hints: t.List[exp.Expression] = [] 3230 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3231 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3232 hints.append( 3233 self.expression( 3234 exp.WithTableHint, 3235 expressions=self._parse_csv( 3236 lambda: self._parse_function() or self._parse_var(any_token=True) 3237 ), 3238 ) 3239 ) 3240 self._match_r_paren() 3241 else: 3242 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3243 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3244 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3245 3246 self._match_texts(("INDEX", "KEY")) 3247 if self._match(TokenType.FOR): 3248 hint.set("target", self._advance_any() and self._prev.text.upper()) 3249 3250 hint.set("expressions", self._parse_wrapped_id_vars()) 3251 hints.append(hint) 3252 3253 return hints or None 3254 3255 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3256 return ( 3257 (not schema and self._parse_function(optional_parens=False)) 3258 or self._parse_id_var(any_token=False) 3259 or self._parse_string_as_identifier() 3260 or self._parse_placeholder() 3261 ) 3262 3263 def _parse_table_parts( 3264 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3265 ) -> exp.Table: 3266 catalog = None 3267 db = None 3268 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3269 3270 while self._match(TokenType.DOT): 3271 if catalog: 3272 # This allows nesting the table in arbitrarily many dot expressions if needed 3273 table = self.expression( 3274 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3275 ) 3276 else: 3277 catalog = db 3278 db = table 3279 # "" used for tsql FROM a..b case 3280 table = self._parse_table_part(schema=schema) or "" 3281 3282 if ( 3283 wildcard 3284 and self._is_connected() 3285 and (isinstance(table, exp.Identifier) or not table) 3286 and self._match(TokenType.STAR) 3287 ): 3288 if isinstance(table, exp.Identifier): 3289 table.args["this"] += "*" 3290 else: 3291 table = exp.Identifier(this="*") 3292 3293 # We bubble up comments from the Identifier to the Table 3294 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3295 3296 if is_db_reference: 3297 catalog = db 3298 db = table 3299 table = None 3300 3301 if not table and not is_db_reference: 3302 self.raise_error(f"Expected table name but got {self._curr}") 3303 if not db and is_db_reference: 3304 self.raise_error(f"Expected database name but got {self._curr}") 3305 3306 return self.expression( 3307 exp.Table, 3308 comments=comments, 3309 this=table, 3310 db=db, 3311 catalog=catalog, 3312 pivots=self._parse_pivots(), 3313 ) 3314 3315 def _parse_table( 3316 self, 3317 schema: bool = False, 3318 joins: bool = False, 3319 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3320 parse_bracket: bool = False, 3321 is_db_reference: bool = False, 3322 parse_partition: bool = False, 3323 ) -> t.Optional[exp.Expression]: 3324 lateral = self._parse_lateral() 3325 if lateral: 3326 return lateral 3327 3328 unnest = self._parse_unnest() 3329 if unnest: 3330 return unnest 3331 3332 values = self._parse_derived_table_values() 3333 if values: 3334 return values 3335 3336 subquery = self._parse_select(table=True) 3337 if subquery: 3338 if not subquery.args.get("pivots"): 3339 subquery.set("pivots", self._parse_pivots()) 3340 return subquery 3341 3342 bracket = parse_bracket and self._parse_bracket(None) 3343 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3344 3345 only = self._match(TokenType.ONLY) 3346 3347 this = t.cast( 3348 exp.Expression, 3349 bracket 3350 or self._parse_bracket( 3351 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3352 ), 3353 ) 3354 3355 if only: 3356 this.set("only", only) 3357 3358 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3359 self._match_text_seq("*") 3360 3361 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3362 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3363 this.set("partition", self._parse_partition()) 3364 3365 if schema: 3366 return self._parse_schema(this=this) 3367 3368 version = self._parse_version() 3369 3370 if version: 3371 this.set("version", version) 3372 3373 if self.dialect.ALIAS_POST_TABLESAMPLE: 3374 table_sample = self._parse_table_sample() 3375 3376 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3377 if alias: 3378 this.set("alias", alias) 3379 3380 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3381 return self.expression( 3382 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3383 ) 3384 3385 this.set("hints", self._parse_table_hints()) 3386 3387 if not this.args.get("pivots"): 3388 this.set("pivots", self._parse_pivots()) 3389 3390 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3391 table_sample = self._parse_table_sample() 3392 3393 if table_sample: 3394 table_sample.set("this", this) 3395 this = table_sample 3396 3397 if joins: 3398 for join in self._parse_joins(): 3399 this.append("joins", join) 3400 3401 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3402 this.set("ordinality", True) 3403 this.set("alias", self._parse_table_alias()) 3404 3405 return this 3406 3407 def _parse_version(self) -> t.Optional[exp.Version]: 3408 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3409 this = "TIMESTAMP" 3410 elif self._match(TokenType.VERSION_SNAPSHOT): 3411 this = "VERSION" 3412 else: 3413 return None 3414 3415 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3416 kind = self._prev.text.upper() 3417 start = self._parse_bitwise() 3418 self._match_texts(("TO", "AND")) 3419 end = self._parse_bitwise() 3420 expression: t.Optional[exp.Expression] = self.expression( 3421 exp.Tuple, expressions=[start, end] 3422 ) 3423 elif self._match_text_seq("CONTAINED", "IN"): 3424 kind = "CONTAINED IN" 3425 expression = self.expression( 3426 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3427 ) 3428 elif self._match(TokenType.ALL): 3429 kind = "ALL" 3430 expression = None 3431 else: 3432 self._match_text_seq("AS", "OF") 3433 kind = "AS OF" 3434 expression = self._parse_type() 3435 3436 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3437 3438 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3439 if not self._match(TokenType.UNNEST): 3440 return None 3441 3442 expressions = self._parse_wrapped_csv(self._parse_equality) 3443 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3444 3445 alias = self._parse_table_alias() if with_alias else None 3446 3447 if alias: 3448 if self.dialect.UNNEST_COLUMN_ONLY: 3449 if alias.args.get("columns"): 3450 self.raise_error("Unexpected extra column alias in unnest.") 3451 3452 alias.set("columns", [alias.this]) 3453 alias.set("this", None) 3454 3455 columns = alias.args.get("columns") or [] 3456 if offset and len(expressions) < len(columns): 3457 offset = columns.pop() 3458 3459 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3460 self._match(TokenType.ALIAS) 3461 offset = self._parse_id_var( 3462 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3463 ) or exp.to_identifier("offset") 3464 3465 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3466 3467 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3468 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3469 if not is_derived and not self._match_text_seq("VALUES"): 3470 return None 3471 3472 expressions = self._parse_csv(self._parse_value) 3473 alias = self._parse_table_alias() 3474 3475 if is_derived: 3476 self._match_r_paren() 3477 3478 return self.expression( 3479 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3480 ) 3481 3482 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3483 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3484 as_modifier and self._match_text_seq("USING", "SAMPLE") 3485 ): 3486 return None 3487 3488 bucket_numerator = None 3489 bucket_denominator = None 3490 bucket_field = None 3491 percent = None 3492 size = None 3493 seed = None 3494 3495 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3496 matched_l_paren = self._match(TokenType.L_PAREN) 3497 3498 if self.TABLESAMPLE_CSV: 3499 num = None 3500 expressions = self._parse_csv(self._parse_primary) 3501 else: 3502 expressions = None 3503 num = ( 3504 self._parse_factor() 3505 if self._match(TokenType.NUMBER, advance=False) 3506 else self._parse_primary() or self._parse_placeholder() 3507 ) 3508 3509 if self._match_text_seq("BUCKET"): 3510 bucket_numerator = self._parse_number() 3511 self._match_text_seq("OUT", "OF") 3512 bucket_denominator = bucket_denominator = self._parse_number() 3513 self._match(TokenType.ON) 3514 bucket_field = self._parse_field() 3515 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3516 percent = num 3517 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3518 size = num 3519 else: 3520 percent = num 3521 3522 if matched_l_paren: 3523 self._match_r_paren() 3524 3525 if self._match(TokenType.L_PAREN): 3526 method = self._parse_var(upper=True) 3527 seed = self._match(TokenType.COMMA) and self._parse_number() 3528 self._match_r_paren() 3529 elif self._match_texts(("SEED", "REPEATABLE")): 3530 seed = self._parse_wrapped(self._parse_number) 3531 3532 if not method and self.DEFAULT_SAMPLING_METHOD: 3533 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3534 3535 return self.expression( 3536 exp.TableSample, 3537 expressions=expressions, 3538 method=method, 3539 bucket_numerator=bucket_numerator, 3540 bucket_denominator=bucket_denominator, 3541 bucket_field=bucket_field, 3542 percent=percent, 3543 size=size, 3544 seed=seed, 3545 ) 3546 3547 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3548 return list(iter(self._parse_pivot, None)) or None 3549 3550 def _parse_joins(self) -> t.Iterator[exp.Join]: 3551 return iter(self._parse_join, None) 3552 3553 # https://duckdb.org/docs/sql/statements/pivot 3554 def _parse_simplified_pivot(self) -> exp.Pivot: 3555 def _parse_on() -> t.Optional[exp.Expression]: 3556 this = self._parse_bitwise() 3557 return self._parse_in(this) if self._match(TokenType.IN) else this 3558 3559 this = self._parse_table() 3560 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3561 using = self._match(TokenType.USING) and self._parse_csv( 3562 lambda: self._parse_alias(self._parse_function()) 3563 ) 3564 group = self._parse_group() 3565 return self.expression( 3566 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3567 ) 3568 3569 def _parse_pivot_in(self) -> exp.In: 3570 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3571 this = self._parse_assignment() 3572 3573 self._match(TokenType.ALIAS) 3574 alias = self._parse_field() 3575 if alias: 3576 return self.expression(exp.PivotAlias, this=this, alias=alias) 3577 3578 return this 3579 3580 value = self._parse_column() 3581 3582 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3583 self.raise_error("Expecting IN (") 3584 3585 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3586 3587 self._match_r_paren() 3588 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3589 3590 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3591 index = self._index 3592 include_nulls = None 3593 3594 if self._match(TokenType.PIVOT): 3595 unpivot = False 3596 elif self._match(TokenType.UNPIVOT): 3597 unpivot = True 3598 3599 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3600 if self._match_text_seq("INCLUDE", "NULLS"): 3601 include_nulls = True 3602 elif self._match_text_seq("EXCLUDE", "NULLS"): 3603 include_nulls = False 3604 else: 3605 return None 3606 3607 expressions = [] 3608 3609 if not self._match(TokenType.L_PAREN): 3610 self._retreat(index) 3611 return None 3612 3613 if unpivot: 3614 expressions = self._parse_csv(self._parse_column) 3615 else: 3616 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3617 3618 if not expressions: 3619 self.raise_error("Failed to parse PIVOT's aggregation list") 3620 3621 if not self._match(TokenType.FOR): 3622 self.raise_error("Expecting FOR") 3623 3624 field = self._parse_pivot_in() 3625 3626 self._match_r_paren() 3627 3628 pivot = self.expression( 3629 exp.Pivot, 3630 expressions=expressions, 3631 field=field, 3632 unpivot=unpivot, 3633 include_nulls=include_nulls, 3634 ) 3635 3636 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3637 pivot.set("alias", self._parse_table_alias()) 3638 3639 if not unpivot: 3640 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3641 3642 columns: t.List[exp.Expression] = [] 3643 for fld in pivot.args["field"].expressions: 3644 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3645 for name in names: 3646 if self.PREFIXED_PIVOT_COLUMNS: 3647 name = f"{name}_{field_name}" if name else field_name 3648 else: 3649 name = f"{field_name}_{name}" if name else field_name 3650 3651 columns.append(exp.to_identifier(name)) 3652 3653 pivot.set("columns", columns) 3654 3655 return pivot 3656 3657 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3658 return [agg.alias for agg in aggregations] 3659 3660 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3661 if not skip_where_token and not self._match(TokenType.PREWHERE): 3662 return None 3663 3664 return self.expression( 3665 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3666 ) 3667 3668 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3669 if not skip_where_token and not self._match(TokenType.WHERE): 3670 return None 3671 3672 return self.expression( 3673 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3674 ) 3675 3676 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3677 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3678 return None 3679 3680 elements: t.Dict[str, t.Any] = defaultdict(list) 3681 3682 if self._match(TokenType.ALL): 3683 elements["all"] = True 3684 elif self._match(TokenType.DISTINCT): 3685 elements["all"] = False 3686 3687 while True: 3688 expressions = self._parse_csv( 3689 lambda: None 3690 if self._match(TokenType.ROLLUP, advance=False) 3691 else self._parse_assignment() 3692 ) 3693 if expressions: 3694 elements["expressions"].extend(expressions) 3695 3696 grouping_sets = self._parse_grouping_sets() 3697 if grouping_sets: 3698 elements["grouping_sets"].extend(grouping_sets) 3699 3700 rollup = None 3701 cube = None 3702 totals = None 3703 3704 index = self._index 3705 with_ = self._match(TokenType.WITH) 3706 if self._match(TokenType.ROLLUP): 3707 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3708 elements["rollup"].extend(ensure_list(rollup)) 3709 3710 if self._match(TokenType.CUBE): 3711 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3712 elements["cube"].extend(ensure_list(cube)) 3713 3714 if self._match_text_seq("TOTALS"): 3715 totals = True 3716 elements["totals"] = True # type: ignore 3717 3718 if not (grouping_sets or rollup or cube or totals): 3719 if with_: 3720 self._retreat(index) 3721 break 3722 3723 return self.expression(exp.Group, **elements) # type: ignore 3724 3725 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3726 if not self._match(TokenType.GROUPING_SETS): 3727 return None 3728 3729 return self._parse_wrapped_csv(self._parse_grouping_set) 3730 3731 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3732 if self._match(TokenType.L_PAREN): 3733 grouping_set = self._parse_csv(self._parse_column) 3734 self._match_r_paren() 3735 return self.expression(exp.Tuple, expressions=grouping_set) 3736 3737 return self._parse_column() 3738 3739 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3740 if not skip_having_token and not self._match(TokenType.HAVING): 3741 return None 3742 return self.expression(exp.Having, this=self._parse_assignment()) 3743 3744 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3745 if not self._match(TokenType.QUALIFY): 3746 return None 3747 return self.expression(exp.Qualify, this=self._parse_assignment()) 3748 3749 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3750 if skip_start_token: 3751 start = None 3752 elif self._match(TokenType.START_WITH): 3753 start = self._parse_assignment() 3754 else: 3755 return None 3756 3757 self._match(TokenType.CONNECT_BY) 3758 nocycle = self._match_text_seq("NOCYCLE") 3759 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3760 exp.Prior, this=self._parse_bitwise() 3761 ) 3762 connect = self._parse_assignment() 3763 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3764 3765 if not start and self._match(TokenType.START_WITH): 3766 start = self._parse_assignment() 3767 3768 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3769 3770 def _parse_name_as_expression(self) -> exp.Alias: 3771 return self.expression( 3772 exp.Alias, 3773 alias=self._parse_id_var(any_token=True), 3774 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3775 ) 3776 3777 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3778 if self._match_text_seq("INTERPOLATE"): 3779 return self._parse_wrapped_csv(self._parse_name_as_expression) 3780 return None 3781 3782 def _parse_order( 3783 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3784 ) -> t.Optional[exp.Expression]: 3785 siblings = None 3786 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3787 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3788 return this 3789 3790 siblings = True 3791 3792 return self.expression( 3793 exp.Order, 3794 this=this, 3795 expressions=self._parse_csv(self._parse_ordered), 3796 interpolate=self._parse_interpolate(), 3797 siblings=siblings, 3798 ) 3799 3800 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3801 if not self._match(token): 3802 return None 3803 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3804 3805 def _parse_ordered( 3806 self, parse_method: t.Optional[t.Callable] = None 3807 ) -> t.Optional[exp.Ordered]: 3808 this = parse_method() if parse_method else self._parse_assignment() 3809 if not this: 3810 return None 3811 3812 asc = self._match(TokenType.ASC) 3813 desc = self._match(TokenType.DESC) or (asc and False) 3814 3815 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3816 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3817 3818 nulls_first = is_nulls_first or False 3819 explicitly_null_ordered = is_nulls_first or is_nulls_last 3820 3821 if ( 3822 not explicitly_null_ordered 3823 and ( 3824 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3825 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3826 ) 3827 and self.dialect.NULL_ORDERING != "nulls_are_last" 3828 ): 3829 nulls_first = True 3830 3831 if self._match_text_seq("WITH", "FILL"): 3832 with_fill = self.expression( 3833 exp.WithFill, 3834 **{ # type: ignore 3835 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3836 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3837 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3838 }, 3839 ) 3840 else: 3841 with_fill = None 3842 3843 return self.expression( 3844 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3845 ) 3846 3847 def _parse_limit( 3848 self, 3849 this: t.Optional[exp.Expression] = None, 3850 top: bool = False, 3851 skip_limit_token: bool = False, 3852 ) -> t.Optional[exp.Expression]: 3853 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3854 comments = self._prev_comments 3855 if top: 3856 limit_paren = self._match(TokenType.L_PAREN) 3857 expression = self._parse_term() if limit_paren else self._parse_number() 3858 3859 if limit_paren: 3860 self._match_r_paren() 3861 else: 3862 expression = self._parse_term() 3863 3864 if self._match(TokenType.COMMA): 3865 offset = expression 3866 expression = self._parse_term() 3867 else: 3868 offset = None 3869 3870 limit_exp = self.expression( 3871 exp.Limit, 3872 this=this, 3873 expression=expression, 3874 offset=offset, 3875 comments=comments, 3876 expressions=self._parse_limit_by(), 3877 ) 3878 3879 return limit_exp 3880 3881 if self._match(TokenType.FETCH): 3882 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3883 direction = self._prev.text.upper() if direction else "FIRST" 3884 3885 count = self._parse_field(tokens=self.FETCH_TOKENS) 3886 percent = self._match(TokenType.PERCENT) 3887 3888 self._match_set((TokenType.ROW, TokenType.ROWS)) 3889 3890 only = self._match_text_seq("ONLY") 3891 with_ties = self._match_text_seq("WITH", "TIES") 3892 3893 if only and with_ties: 3894 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3895 3896 return self.expression( 3897 exp.Fetch, 3898 direction=direction, 3899 count=count, 3900 percent=percent, 3901 with_ties=with_ties, 3902 ) 3903 3904 return this 3905 3906 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3907 if not self._match(TokenType.OFFSET): 3908 return this 3909 3910 count = self._parse_term() 3911 self._match_set((TokenType.ROW, TokenType.ROWS)) 3912 3913 return self.expression( 3914 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3915 ) 3916 3917 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3918 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3919 3920 def _parse_locks(self) -> t.List[exp.Lock]: 3921 locks = [] 3922 while True: 3923 if self._match_text_seq("FOR", "UPDATE"): 3924 update = True 3925 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3926 "LOCK", "IN", "SHARE", "MODE" 3927 ): 3928 update = False 3929 else: 3930 break 3931 3932 expressions = None 3933 if self._match_text_seq("OF"): 3934 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3935 3936 wait: t.Optional[bool | exp.Expression] = None 3937 if self._match_text_seq("NOWAIT"): 3938 wait = True 3939 elif self._match_text_seq("WAIT"): 3940 wait = self._parse_primary() 3941 elif self._match_text_seq("SKIP", "LOCKED"): 3942 wait = False 3943 3944 locks.append( 3945 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3946 ) 3947 3948 return locks 3949 3950 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3951 while this and self._match_set(self.SET_OPERATIONS): 3952 token_type = self._prev.token_type 3953 3954 if token_type == TokenType.UNION: 3955 operation = exp.Union 3956 elif token_type == TokenType.EXCEPT: 3957 operation = exp.Except 3958 else: 3959 operation = exp.Intersect 3960 3961 comments = self._prev.comments 3962 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3963 by_name = self._match_text_seq("BY", "NAME") 3964 expression = self._parse_select(nested=True, parse_set_operation=False) 3965 3966 this = self.expression( 3967 operation, 3968 comments=comments, 3969 this=this, 3970 distinct=distinct, 3971 by_name=by_name, 3972 expression=expression, 3973 ) 3974 3975 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3976 expression = this.expression 3977 3978 if expression: 3979 for arg in self.UNION_MODIFIERS: 3980 expr = expression.args.get(arg) 3981 if expr: 3982 this.set(arg, expr.pop()) 3983 3984 return this 3985 3986 def _parse_expression(self) -> t.Optional[exp.Expression]: 3987 return self._parse_alias(self._parse_assignment()) 3988 3989 def _parse_assignment(self) -> t.Optional[exp.Expression]: 3990 this = self._parse_disjunction() 3991 3992 while self._match_set(self.ASSIGNMENT): 3993 this = self.expression( 3994 self.ASSIGNMENT[self._prev.token_type], 3995 this=this, 3996 comments=self._prev_comments, 3997 expression=self._parse_assignment(), 3998 ) 3999 4000 return this 4001 4002 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4003 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4004 4005 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4006 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4007 4008 def _parse_equality(self) -> t.Optional[exp.Expression]: 4009 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4010 4011 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4012 return self._parse_tokens(self._parse_range, self.COMPARISON) 4013 4014 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4015 this = this or self._parse_bitwise() 4016 negate = self._match(TokenType.NOT) 4017 4018 if self._match_set(self.RANGE_PARSERS): 4019 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4020 if not expression: 4021 return this 4022 4023 this = expression 4024 elif self._match(TokenType.ISNULL): 4025 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4026 4027 # Postgres supports ISNULL and NOTNULL for conditions. 4028 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4029 if self._match(TokenType.NOTNULL): 4030 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4031 this = self.expression(exp.Not, this=this) 4032 4033 if negate: 4034 this = self.expression(exp.Not, this=this) 4035 4036 if self._match(TokenType.IS): 4037 this = self._parse_is(this) 4038 4039 return this 4040 4041 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4042 index = self._index - 1 4043 negate = self._match(TokenType.NOT) 4044 4045 if self._match_text_seq("DISTINCT", "FROM"): 4046 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4047 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4048 4049 expression = self._parse_null() or self._parse_boolean() 4050 if not expression: 4051 self._retreat(index) 4052 return None 4053 4054 this = self.expression(exp.Is, this=this, expression=expression) 4055 return self.expression(exp.Not, this=this) if negate else this 4056 4057 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4058 unnest = self._parse_unnest(with_alias=False) 4059 if unnest: 4060 this = self.expression(exp.In, this=this, unnest=unnest) 4061 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4062 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4063 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4064 4065 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4066 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4067 else: 4068 this = self.expression(exp.In, this=this, expressions=expressions) 4069 4070 if matched_l_paren: 4071 self._match_r_paren(this) 4072 elif not self._match(TokenType.R_BRACKET, expression=this): 4073 self.raise_error("Expecting ]") 4074 else: 4075 this = self.expression(exp.In, this=this, field=self._parse_field()) 4076 4077 return this 4078 4079 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4080 low = self._parse_bitwise() 4081 self._match(TokenType.AND) 4082 high = self._parse_bitwise() 4083 return self.expression(exp.Between, this=this, low=low, high=high) 4084 4085 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4086 if not self._match(TokenType.ESCAPE): 4087 return this 4088 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4089 4090 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4091 index = self._index 4092 4093 if not self._match(TokenType.INTERVAL) and match_interval: 4094 return None 4095 4096 if self._match(TokenType.STRING, advance=False): 4097 this = self._parse_primary() 4098 else: 4099 this = self._parse_term() 4100 4101 if not this or ( 4102 isinstance(this, exp.Column) 4103 and not this.table 4104 and not this.this.quoted 4105 and this.name.upper() == "IS" 4106 ): 4107 self._retreat(index) 4108 return None 4109 4110 unit = self._parse_function() or ( 4111 not self._match(TokenType.ALIAS, advance=False) 4112 and self._parse_var(any_token=True, upper=True) 4113 ) 4114 4115 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4116 # each INTERVAL expression into this canonical form so it's easy to transpile 4117 if this and this.is_number: 4118 this = exp.Literal.string(this.name) 4119 elif this and this.is_string: 4120 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4121 if len(parts) == 1: 4122 if unit: 4123 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4124 self._retreat(self._index - 1) 4125 4126 this = exp.Literal.string(parts[0][0]) 4127 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4128 4129 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4130 unit = self.expression( 4131 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4132 ) 4133 4134 interval = self.expression(exp.Interval, this=this, unit=unit) 4135 4136 index = self._index 4137 self._match(TokenType.PLUS) 4138 4139 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4140 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4141 return self.expression( 4142 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4143 ) 4144 4145 self._retreat(index) 4146 return interval 4147 4148 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4149 this = self._parse_term() 4150 4151 while True: 4152 if self._match_set(self.BITWISE): 4153 this = self.expression( 4154 self.BITWISE[self._prev.token_type], 4155 this=this, 4156 expression=self._parse_term(), 4157 ) 4158 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4159 this = self.expression( 4160 exp.DPipe, 4161 this=this, 4162 expression=self._parse_term(), 4163 safe=not self.dialect.STRICT_STRING_CONCAT, 4164 ) 4165 elif self._match(TokenType.DQMARK): 4166 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4167 elif self._match_pair(TokenType.LT, TokenType.LT): 4168 this = self.expression( 4169 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4170 ) 4171 elif self._match_pair(TokenType.GT, TokenType.GT): 4172 this = self.expression( 4173 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4174 ) 4175 else: 4176 break 4177 4178 return this 4179 4180 def _parse_term(self) -> t.Optional[exp.Expression]: 4181 return self._parse_tokens(self._parse_factor, self.TERM) 4182 4183 def _parse_factor(self) -> t.Optional[exp.Expression]: 4184 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4185 this = parse_method() 4186 4187 while self._match_set(self.FACTOR): 4188 klass = self.FACTOR[self._prev.token_type] 4189 comments = self._prev_comments 4190 expression = parse_method() 4191 4192 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4193 self._retreat(self._index - 1) 4194 return this 4195 4196 this = self.expression(klass, this=this, comments=comments, expression=expression) 4197 4198 if isinstance(this, exp.Div): 4199 this.args["typed"] = self.dialect.TYPED_DIVISION 4200 this.args["safe"] = self.dialect.SAFE_DIVISION 4201 4202 return this 4203 4204 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4205 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4206 4207 def _parse_unary(self) -> t.Optional[exp.Expression]: 4208 if self._match_set(self.UNARY_PARSERS): 4209 return self.UNARY_PARSERS[self._prev.token_type](self) 4210 return self._parse_at_time_zone(self._parse_type()) 4211 4212 def _parse_type( 4213 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4214 ) -> t.Optional[exp.Expression]: 4215 interval = parse_interval and self._parse_interval() 4216 if interval: 4217 return interval 4218 4219 index = self._index 4220 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4221 4222 if data_type: 4223 index2 = self._index 4224 this = self._parse_primary() 4225 4226 if isinstance(this, exp.Literal): 4227 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4228 if parser: 4229 return parser(self, this, data_type) 4230 4231 return self.expression(exp.Cast, this=this, to=data_type) 4232 4233 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4234 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4235 # 4236 # If the index difference here is greater than 1, that means the parser itself must have 4237 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4238 # 4239 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4240 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4241 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4242 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4243 # 4244 # In these cases, we don't really want to return the converted type, but instead retreat 4245 # and try to parse a Column or Identifier in the section below. 4246 if data_type.expressions and index2 - index > 1: 4247 self._retreat(index2) 4248 return self._parse_column_ops(data_type) 4249 4250 self._retreat(index) 4251 4252 if fallback_to_identifier: 4253 return self._parse_id_var() 4254 4255 this = self._parse_column() 4256 return this and self._parse_column_ops(this) 4257 4258 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4259 this = self._parse_type() 4260 if not this: 4261 return None 4262 4263 if isinstance(this, exp.Column) and not this.table: 4264 this = exp.var(this.name.upper()) 4265 4266 return self.expression( 4267 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4268 ) 4269 4270 def _parse_types( 4271 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4272 ) -> t.Optional[exp.Expression]: 4273 index = self._index 4274 4275 this: t.Optional[exp.Expression] = None 4276 prefix = self._match_text_seq("SYSUDTLIB", ".") 4277 4278 if not self._match_set(self.TYPE_TOKENS): 4279 identifier = allow_identifiers and self._parse_id_var( 4280 any_token=False, tokens=(TokenType.VAR,) 4281 ) 4282 if identifier: 4283 tokens = self.dialect.tokenize(identifier.name) 4284 4285 if len(tokens) != 1: 4286 self.raise_error("Unexpected identifier", self._prev) 4287 4288 if tokens[0].token_type in self.TYPE_TOKENS: 4289 self._prev = tokens[0] 4290 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4291 type_name = identifier.name 4292 4293 while self._match(TokenType.DOT): 4294 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4295 4296 this = exp.DataType.build(type_name, udt=True) 4297 else: 4298 self._retreat(self._index - 1) 4299 return None 4300 else: 4301 return None 4302 4303 type_token = self._prev.token_type 4304 4305 if type_token == TokenType.PSEUDO_TYPE: 4306 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4307 4308 if type_token == TokenType.OBJECT_IDENTIFIER: 4309 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4310 4311 # https://materialize.com/docs/sql/types/map/ 4312 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4313 key_type = self._parse_types( 4314 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4315 ) 4316 if not self._match(TokenType.FARROW): 4317 self._retreat(index) 4318 return None 4319 4320 value_type = self._parse_types( 4321 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4322 ) 4323 if not self._match(TokenType.R_BRACKET): 4324 self._retreat(index) 4325 return None 4326 4327 return exp.DataType( 4328 this=exp.DataType.Type.MAP, 4329 expressions=[key_type, value_type], 4330 nested=True, 4331 prefix=prefix, 4332 ) 4333 4334 nested = type_token in self.NESTED_TYPE_TOKENS 4335 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4336 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4337 expressions = None 4338 maybe_func = False 4339 4340 if self._match(TokenType.L_PAREN): 4341 if is_struct: 4342 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4343 elif nested: 4344 expressions = self._parse_csv( 4345 lambda: self._parse_types( 4346 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4347 ) 4348 ) 4349 elif type_token in self.ENUM_TYPE_TOKENS: 4350 expressions = self._parse_csv(self._parse_equality) 4351 elif is_aggregate: 4352 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4353 any_token=False, tokens=(TokenType.VAR,) 4354 ) 4355 if not func_or_ident or not self._match(TokenType.COMMA): 4356 return None 4357 expressions = self._parse_csv( 4358 lambda: self._parse_types( 4359 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4360 ) 4361 ) 4362 expressions.insert(0, func_or_ident) 4363 else: 4364 expressions = self._parse_csv(self._parse_type_size) 4365 4366 if not expressions or not self._match(TokenType.R_PAREN): 4367 self._retreat(index) 4368 return None 4369 4370 maybe_func = True 4371 4372 values: t.Optional[t.List[exp.Expression]] = None 4373 4374 if nested and self._match(TokenType.LT): 4375 if is_struct: 4376 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4377 else: 4378 expressions = self._parse_csv( 4379 lambda: self._parse_types( 4380 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4381 ) 4382 ) 4383 4384 if not self._match(TokenType.GT): 4385 self.raise_error("Expecting >") 4386 4387 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4388 values = self._parse_csv(self._parse_assignment) 4389 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4390 4391 if type_token in self.TIMESTAMPS: 4392 if self._match_text_seq("WITH", "TIME", "ZONE"): 4393 maybe_func = False 4394 tz_type = ( 4395 exp.DataType.Type.TIMETZ 4396 if type_token in self.TIMES 4397 else exp.DataType.Type.TIMESTAMPTZ 4398 ) 4399 this = exp.DataType(this=tz_type, expressions=expressions) 4400 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4401 maybe_func = False 4402 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4403 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4404 maybe_func = False 4405 elif type_token == TokenType.INTERVAL: 4406 unit = self._parse_var(upper=True) 4407 if unit: 4408 if self._match_text_seq("TO"): 4409 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4410 4411 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4412 else: 4413 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4414 4415 if maybe_func and check_func: 4416 index2 = self._index 4417 peek = self._parse_string() 4418 4419 if not peek: 4420 self._retreat(index) 4421 return None 4422 4423 self._retreat(index2) 4424 4425 if not this: 4426 if self._match_text_seq("UNSIGNED"): 4427 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4428 if not unsigned_type_token: 4429 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4430 4431 type_token = unsigned_type_token or type_token 4432 4433 this = exp.DataType( 4434 this=exp.DataType.Type[type_token.value], 4435 expressions=expressions, 4436 nested=nested, 4437 values=values, 4438 prefix=prefix, 4439 ) 4440 elif expressions: 4441 this.set("expressions", expressions) 4442 4443 # https://materialize.com/docs/sql/types/list/#type-name 4444 while self._match(TokenType.LIST): 4445 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4446 4447 index = self._index 4448 4449 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4450 matched_array = self._match(TokenType.ARRAY) 4451 4452 while self._curr: 4453 matched_l_bracket = self._match(TokenType.L_BRACKET) 4454 if not matched_l_bracket and not matched_array: 4455 break 4456 4457 matched_array = False 4458 values = self._parse_csv(self._parse_assignment) or None 4459 if values and not schema: 4460 self._retreat(index) 4461 break 4462 4463 this = exp.DataType( 4464 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4465 ) 4466 self._match(TokenType.R_BRACKET) 4467 4468 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4469 converter = self.TYPE_CONVERTER.get(this.this) 4470 if converter: 4471 this = converter(t.cast(exp.DataType, this)) 4472 4473 return this 4474 4475 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4476 index = self._index 4477 this = ( 4478 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4479 or self._parse_id_var() 4480 ) 4481 self._match(TokenType.COLON) 4482 4483 if ( 4484 type_required 4485 and not isinstance(this, exp.DataType) 4486 and not self._match_set(self.TYPE_TOKENS, advance=False) 4487 ): 4488 self._retreat(index) 4489 return self._parse_types() 4490 4491 return self._parse_column_def(this) 4492 4493 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4494 if not self._match_text_seq("AT", "TIME", "ZONE"): 4495 return this 4496 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4497 4498 def _parse_column(self) -> t.Optional[exp.Expression]: 4499 this = self._parse_column_reference() 4500 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4501 4502 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4503 this = self._parse_field() 4504 if ( 4505 not this 4506 and self._match(TokenType.VALUES, advance=False) 4507 and self.VALUES_FOLLOWED_BY_PAREN 4508 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4509 ): 4510 this = self._parse_id_var() 4511 4512 if isinstance(this, exp.Identifier): 4513 # We bubble up comments from the Identifier to the Column 4514 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4515 4516 return this 4517 4518 def _parse_colon_as_json_extract( 4519 self, this: t.Optional[exp.Expression] 4520 ) -> t.Optional[exp.Expression]: 4521 casts = [] 4522 json_path = [] 4523 4524 while self._match(TokenType.COLON): 4525 start_index = self._index 4526 path = self._parse_column_ops(self._parse_field(any_token=True)) 4527 4528 # The cast :: operator has a lower precedence than the extraction operator :, so 4529 # we rearrange the AST appropriately to avoid casting the JSON path 4530 while isinstance(path, exp.Cast): 4531 casts.append(path.to) 4532 path = path.this 4533 4534 if casts: 4535 dcolon_offset = next( 4536 i 4537 for i, t in enumerate(self._tokens[start_index:]) 4538 if t.token_type == TokenType.DCOLON 4539 ) 4540 end_token = self._tokens[start_index + dcolon_offset - 1] 4541 else: 4542 end_token = self._prev 4543 4544 if path: 4545 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4546 4547 if json_path: 4548 this = self.expression( 4549 exp.JSONExtract, 4550 this=this, 4551 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4552 ) 4553 4554 while casts: 4555 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4556 4557 return this 4558 4559 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4560 this = self._parse_bracket(this) 4561 4562 while self._match_set(self.COLUMN_OPERATORS): 4563 op_token = self._prev.token_type 4564 op = self.COLUMN_OPERATORS.get(op_token) 4565 4566 if op_token == TokenType.DCOLON: 4567 field = self._parse_types() 4568 if not field: 4569 self.raise_error("Expected type") 4570 elif op and self._curr: 4571 field = self._parse_column_reference() 4572 else: 4573 field = self._parse_field(any_token=True, anonymous_func=True) 4574 4575 if isinstance(field, exp.Func) and this: 4576 # bigquery allows function calls like x.y.count(...) 4577 # SAFE.SUBSTR(...) 4578 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4579 this = exp.replace_tree( 4580 this, 4581 lambda n: ( 4582 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4583 if n.table 4584 else n.this 4585 ) 4586 if isinstance(n, exp.Column) 4587 else n, 4588 ) 4589 4590 if op: 4591 this = op(self, this, field) 4592 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4593 this = self.expression( 4594 exp.Column, 4595 this=field, 4596 table=this.this, 4597 db=this.args.get("table"), 4598 catalog=this.args.get("db"), 4599 ) 4600 else: 4601 this = self.expression(exp.Dot, this=this, expression=field) 4602 4603 this = self._parse_bracket(this) 4604 4605 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4606 4607 def _parse_primary(self) -> t.Optional[exp.Expression]: 4608 if self._match_set(self.PRIMARY_PARSERS): 4609 token_type = self._prev.token_type 4610 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4611 4612 if token_type == TokenType.STRING: 4613 expressions = [primary] 4614 while self._match(TokenType.STRING): 4615 expressions.append(exp.Literal.string(self._prev.text)) 4616 4617 if len(expressions) > 1: 4618 return self.expression(exp.Concat, expressions=expressions) 4619 4620 return primary 4621 4622 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4623 return exp.Literal.number(f"0.{self._prev.text}") 4624 4625 if self._match(TokenType.L_PAREN): 4626 comments = self._prev_comments 4627 query = self._parse_select() 4628 4629 if query: 4630 expressions = [query] 4631 else: 4632 expressions = self._parse_expressions() 4633 4634 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4635 4636 if not this and self._match(TokenType.R_PAREN, advance=False): 4637 this = self.expression(exp.Tuple) 4638 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4639 this = self._parse_subquery(this=this, parse_alias=False) 4640 elif isinstance(this, exp.Subquery): 4641 this = self._parse_subquery( 4642 this=self._parse_set_operations(this), parse_alias=False 4643 ) 4644 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4645 this = self.expression(exp.Tuple, expressions=expressions) 4646 else: 4647 this = self.expression(exp.Paren, this=this) 4648 4649 if this: 4650 this.add_comments(comments) 4651 4652 self._match_r_paren(expression=this) 4653 return this 4654 4655 return None 4656 4657 def _parse_field( 4658 self, 4659 any_token: bool = False, 4660 tokens: t.Optional[t.Collection[TokenType]] = None, 4661 anonymous_func: bool = False, 4662 ) -> t.Optional[exp.Expression]: 4663 if anonymous_func: 4664 field = ( 4665 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4666 or self._parse_primary() 4667 ) 4668 else: 4669 field = self._parse_primary() or self._parse_function( 4670 anonymous=anonymous_func, any_token=any_token 4671 ) 4672 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4673 4674 def _parse_function( 4675 self, 4676 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4677 anonymous: bool = False, 4678 optional_parens: bool = True, 4679 any_token: bool = False, 4680 ) -> t.Optional[exp.Expression]: 4681 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4682 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4683 fn_syntax = False 4684 if ( 4685 self._match(TokenType.L_BRACE, advance=False) 4686 and self._next 4687 and self._next.text.upper() == "FN" 4688 ): 4689 self._advance(2) 4690 fn_syntax = True 4691 4692 func = self._parse_function_call( 4693 functions=functions, 4694 anonymous=anonymous, 4695 optional_parens=optional_parens, 4696 any_token=any_token, 4697 ) 4698 4699 if fn_syntax: 4700 self._match(TokenType.R_BRACE) 4701 4702 return func 4703 4704 def _parse_function_call( 4705 self, 4706 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4707 anonymous: bool = False, 4708 optional_parens: bool = True, 4709 any_token: bool = False, 4710 ) -> t.Optional[exp.Expression]: 4711 if not self._curr: 4712 return None 4713 4714 comments = self._curr.comments 4715 token_type = self._curr.token_type 4716 this = self._curr.text 4717 upper = this.upper() 4718 4719 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4720 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4721 self._advance() 4722 return self._parse_window(parser(self)) 4723 4724 if not self._next or self._next.token_type != TokenType.L_PAREN: 4725 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4726 self._advance() 4727 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4728 4729 return None 4730 4731 if any_token: 4732 if token_type in self.RESERVED_TOKENS: 4733 return None 4734 elif token_type not in self.FUNC_TOKENS: 4735 return None 4736 4737 self._advance(2) 4738 4739 parser = self.FUNCTION_PARSERS.get(upper) 4740 if parser and not anonymous: 4741 this = parser(self) 4742 else: 4743 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4744 4745 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4746 this = self.expression(subquery_predicate, this=self._parse_select()) 4747 self._match_r_paren() 4748 return this 4749 4750 if functions is None: 4751 functions = self.FUNCTIONS 4752 4753 function = functions.get(upper) 4754 4755 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4756 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4757 4758 if alias: 4759 args = self._kv_to_prop_eq(args) 4760 4761 if function and not anonymous: 4762 if "dialect" in function.__code__.co_varnames: 4763 func = function(args, dialect=self.dialect) 4764 else: 4765 func = function(args) 4766 4767 func = self.validate_expression(func, args) 4768 if not self.dialect.NORMALIZE_FUNCTIONS: 4769 func.meta["name"] = this 4770 4771 this = func 4772 else: 4773 if token_type == TokenType.IDENTIFIER: 4774 this = exp.Identifier(this=this, quoted=True) 4775 this = self.expression(exp.Anonymous, this=this, expressions=args) 4776 4777 if isinstance(this, exp.Expression): 4778 this.add_comments(comments) 4779 4780 self._match_r_paren(this) 4781 return self._parse_window(this) 4782 4783 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4784 transformed = [] 4785 4786 for e in expressions: 4787 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4788 if isinstance(e, exp.Alias): 4789 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4790 4791 if not isinstance(e, exp.PropertyEQ): 4792 e = self.expression( 4793 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4794 ) 4795 4796 if isinstance(e.this, exp.Column): 4797 e.this.replace(e.this.this) 4798 4799 transformed.append(e) 4800 4801 return transformed 4802 4803 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4804 return self._parse_column_def(self._parse_id_var()) 4805 4806 def _parse_user_defined_function( 4807 self, kind: t.Optional[TokenType] = None 4808 ) -> t.Optional[exp.Expression]: 4809 this = self._parse_id_var() 4810 4811 while self._match(TokenType.DOT): 4812 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4813 4814 if not self._match(TokenType.L_PAREN): 4815 return this 4816 4817 expressions = self._parse_csv(self._parse_function_parameter) 4818 self._match_r_paren() 4819 return self.expression( 4820 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4821 ) 4822 4823 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4824 literal = self._parse_primary() 4825 if literal: 4826 return self.expression(exp.Introducer, this=token.text, expression=literal) 4827 4828 return self.expression(exp.Identifier, this=token.text) 4829 4830 def _parse_session_parameter(self) -> exp.SessionParameter: 4831 kind = None 4832 this = self._parse_id_var() or self._parse_primary() 4833 4834 if this and self._match(TokenType.DOT): 4835 kind = this.name 4836 this = self._parse_var() or self._parse_primary() 4837 4838 return self.expression(exp.SessionParameter, this=this, kind=kind) 4839 4840 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4841 return self._parse_id_var() 4842 4843 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4844 index = self._index 4845 4846 if self._match(TokenType.L_PAREN): 4847 expressions = t.cast( 4848 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4849 ) 4850 4851 if not self._match(TokenType.R_PAREN): 4852 self._retreat(index) 4853 else: 4854 expressions = [self._parse_lambda_arg()] 4855 4856 if self._match_set(self.LAMBDAS): 4857 return self.LAMBDAS[self._prev.token_type](self, expressions) 4858 4859 self._retreat(index) 4860 4861 this: t.Optional[exp.Expression] 4862 4863 if self._match(TokenType.DISTINCT): 4864 this = self.expression( 4865 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4866 ) 4867 else: 4868 this = self._parse_select_or_expression(alias=alias) 4869 4870 return self._parse_limit( 4871 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4872 ) 4873 4874 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4875 index = self._index 4876 if not self._match(TokenType.L_PAREN): 4877 return this 4878 4879 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4880 # expr can be of both types 4881 if self._match_set(self.SELECT_START_TOKENS): 4882 self._retreat(index) 4883 return this 4884 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4885 self._match_r_paren() 4886 return self.expression(exp.Schema, this=this, expressions=args) 4887 4888 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4889 return self._parse_column_def(self._parse_field(any_token=True)) 4890 4891 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4892 # column defs are not really columns, they're identifiers 4893 if isinstance(this, exp.Column): 4894 this = this.this 4895 4896 kind = self._parse_types(schema=True) 4897 4898 if self._match_text_seq("FOR", "ORDINALITY"): 4899 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4900 4901 constraints: t.List[exp.Expression] = [] 4902 4903 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4904 ("ALIAS", "MATERIALIZED") 4905 ): 4906 persisted = self._prev.text.upper() == "MATERIALIZED" 4907 constraints.append( 4908 self.expression( 4909 exp.ComputedColumnConstraint, 4910 this=self._parse_assignment(), 4911 persisted=persisted or self._match_text_seq("PERSISTED"), 4912 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4913 ) 4914 ) 4915 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4916 self._match(TokenType.ALIAS) 4917 constraints.append( 4918 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4919 ) 4920 4921 while True: 4922 constraint = self._parse_column_constraint() 4923 if not constraint: 4924 break 4925 constraints.append(constraint) 4926 4927 if not kind and not constraints: 4928 return this 4929 4930 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4931 4932 def _parse_auto_increment( 4933 self, 4934 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4935 start = None 4936 increment = None 4937 4938 if self._match(TokenType.L_PAREN, advance=False): 4939 args = self._parse_wrapped_csv(self._parse_bitwise) 4940 start = seq_get(args, 0) 4941 increment = seq_get(args, 1) 4942 elif self._match_text_seq("START"): 4943 start = self._parse_bitwise() 4944 self._match_text_seq("INCREMENT") 4945 increment = self._parse_bitwise() 4946 4947 if start and increment: 4948 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4949 4950 return exp.AutoIncrementColumnConstraint() 4951 4952 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4953 if not self._match_text_seq("REFRESH"): 4954 self._retreat(self._index - 1) 4955 return None 4956 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4957 4958 def _parse_compress(self) -> exp.CompressColumnConstraint: 4959 if self._match(TokenType.L_PAREN, advance=False): 4960 return self.expression( 4961 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4962 ) 4963 4964 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4965 4966 def _parse_generated_as_identity( 4967 self, 4968 ) -> ( 4969 exp.GeneratedAsIdentityColumnConstraint 4970 | exp.ComputedColumnConstraint 4971 | exp.GeneratedAsRowColumnConstraint 4972 ): 4973 if self._match_text_seq("BY", "DEFAULT"): 4974 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4975 this = self.expression( 4976 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4977 ) 4978 else: 4979 self._match_text_seq("ALWAYS") 4980 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4981 4982 self._match(TokenType.ALIAS) 4983 4984 if self._match_text_seq("ROW"): 4985 start = self._match_text_seq("START") 4986 if not start: 4987 self._match(TokenType.END) 4988 hidden = self._match_text_seq("HIDDEN") 4989 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4990 4991 identity = self._match_text_seq("IDENTITY") 4992 4993 if self._match(TokenType.L_PAREN): 4994 if self._match(TokenType.START_WITH): 4995 this.set("start", self._parse_bitwise()) 4996 if self._match_text_seq("INCREMENT", "BY"): 4997 this.set("increment", self._parse_bitwise()) 4998 if self._match_text_seq("MINVALUE"): 4999 this.set("minvalue", self._parse_bitwise()) 5000 if self._match_text_seq("MAXVALUE"): 5001 this.set("maxvalue", self._parse_bitwise()) 5002 5003 if self._match_text_seq("CYCLE"): 5004 this.set("cycle", True) 5005 elif self._match_text_seq("NO", "CYCLE"): 5006 this.set("cycle", False) 5007 5008 if not identity: 5009 this.set("expression", self._parse_range()) 5010 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5011 args = self._parse_csv(self._parse_bitwise) 5012 this.set("start", seq_get(args, 0)) 5013 this.set("increment", seq_get(args, 1)) 5014 5015 self._match_r_paren() 5016 5017 return this 5018 5019 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5020 self._match_text_seq("LENGTH") 5021 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5022 5023 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5024 if self._match_text_seq("NULL"): 5025 return self.expression(exp.NotNullColumnConstraint) 5026 if self._match_text_seq("CASESPECIFIC"): 5027 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5028 if self._match_text_seq("FOR", "REPLICATION"): 5029 return self.expression(exp.NotForReplicationColumnConstraint) 5030 return None 5031 5032 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5033 if self._match(TokenType.CONSTRAINT): 5034 this = self._parse_id_var() 5035 else: 5036 this = None 5037 5038 if self._match_texts(self.CONSTRAINT_PARSERS): 5039 return self.expression( 5040 exp.ColumnConstraint, 5041 this=this, 5042 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5043 ) 5044 5045 return this 5046 5047 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5048 if not self._match(TokenType.CONSTRAINT): 5049 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5050 5051 return self.expression( 5052 exp.Constraint, 5053 this=self._parse_id_var(), 5054 expressions=self._parse_unnamed_constraints(), 5055 ) 5056 5057 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5058 constraints = [] 5059 while True: 5060 constraint = self._parse_unnamed_constraint() or self._parse_function() 5061 if not constraint: 5062 break 5063 constraints.append(constraint) 5064 5065 return constraints 5066 5067 def _parse_unnamed_constraint( 5068 self, constraints: t.Optional[t.Collection[str]] = None 5069 ) -> t.Optional[exp.Expression]: 5070 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5071 constraints or self.CONSTRAINT_PARSERS 5072 ): 5073 return None 5074 5075 constraint = self._prev.text.upper() 5076 if constraint not in self.CONSTRAINT_PARSERS: 5077 self.raise_error(f"No parser found for schema constraint {constraint}.") 5078 5079 return self.CONSTRAINT_PARSERS[constraint](self) 5080 5081 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5082 self._match_text_seq("KEY") 5083 return self.expression( 5084 exp.UniqueColumnConstraint, 5085 this=self._parse_schema(self._parse_id_var(any_token=False)), 5086 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5087 on_conflict=self._parse_on_conflict(), 5088 ) 5089 5090 def _parse_key_constraint_options(self) -> t.List[str]: 5091 options = [] 5092 while True: 5093 if not self._curr: 5094 break 5095 5096 if self._match(TokenType.ON): 5097 action = None 5098 on = self._advance_any() and self._prev.text 5099 5100 if self._match_text_seq("NO", "ACTION"): 5101 action = "NO ACTION" 5102 elif self._match_text_seq("CASCADE"): 5103 action = "CASCADE" 5104 elif self._match_text_seq("RESTRICT"): 5105 action = "RESTRICT" 5106 elif self._match_pair(TokenType.SET, TokenType.NULL): 5107 action = "SET NULL" 5108 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5109 action = "SET DEFAULT" 5110 else: 5111 self.raise_error("Invalid key constraint") 5112 5113 options.append(f"ON {on} {action}") 5114 elif self._match_text_seq("NOT", "ENFORCED"): 5115 options.append("NOT ENFORCED") 5116 elif self._match_text_seq("DEFERRABLE"): 5117 options.append("DEFERRABLE") 5118 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5119 options.append("INITIALLY DEFERRED") 5120 elif self._match_text_seq("NORELY"): 5121 options.append("NORELY") 5122 elif self._match_text_seq("MATCH", "FULL"): 5123 options.append("MATCH FULL") 5124 else: 5125 break 5126 5127 return options 5128 5129 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5130 if match and not self._match(TokenType.REFERENCES): 5131 return None 5132 5133 expressions = None 5134 this = self._parse_table(schema=True) 5135 options = self._parse_key_constraint_options() 5136 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5137 5138 def _parse_foreign_key(self) -> exp.ForeignKey: 5139 expressions = self._parse_wrapped_id_vars() 5140 reference = self._parse_references() 5141 options = {} 5142 5143 while self._match(TokenType.ON): 5144 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5145 self.raise_error("Expected DELETE or UPDATE") 5146 5147 kind = self._prev.text.lower() 5148 5149 if self._match_text_seq("NO", "ACTION"): 5150 action = "NO ACTION" 5151 elif self._match(TokenType.SET): 5152 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5153 action = "SET " + self._prev.text.upper() 5154 else: 5155 self._advance() 5156 action = self._prev.text.upper() 5157 5158 options[kind] = action 5159 5160 return self.expression( 5161 exp.ForeignKey, 5162 expressions=expressions, 5163 reference=reference, 5164 **options, # type: ignore 5165 ) 5166 5167 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5168 return self._parse_field() 5169 5170 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5171 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5172 self._retreat(self._index - 1) 5173 return None 5174 5175 id_vars = self._parse_wrapped_id_vars() 5176 return self.expression( 5177 exp.PeriodForSystemTimeConstraint, 5178 this=seq_get(id_vars, 0), 5179 expression=seq_get(id_vars, 1), 5180 ) 5181 5182 def _parse_primary_key( 5183 self, wrapped_optional: bool = False, in_props: bool = False 5184 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5185 desc = ( 5186 self._match_set((TokenType.ASC, TokenType.DESC)) 5187 and self._prev.token_type == TokenType.DESC 5188 ) 5189 5190 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5191 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5192 5193 expressions = self._parse_wrapped_csv( 5194 self._parse_primary_key_part, optional=wrapped_optional 5195 ) 5196 options = self._parse_key_constraint_options() 5197 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5198 5199 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5200 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5201 5202 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5203 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5204 return this 5205 5206 bracket_kind = self._prev.token_type 5207 expressions = self._parse_csv( 5208 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5209 ) 5210 5211 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5212 self.raise_error("Expected ]") 5213 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5214 self.raise_error("Expected }") 5215 5216 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5217 if bracket_kind == TokenType.L_BRACE: 5218 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5219 elif not this: 5220 this = self.expression(exp.Array, expressions=expressions) 5221 else: 5222 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5223 if constructor_type: 5224 return self.expression(constructor_type, expressions=expressions) 5225 5226 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5227 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5228 5229 self._add_comments(this) 5230 return self._parse_bracket(this) 5231 5232 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5233 if self._match(TokenType.COLON): 5234 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5235 return this 5236 5237 def _parse_case(self) -> t.Optional[exp.Expression]: 5238 ifs = [] 5239 default = None 5240 5241 comments = self._prev_comments 5242 expression = self._parse_assignment() 5243 5244 while self._match(TokenType.WHEN): 5245 this = self._parse_assignment() 5246 self._match(TokenType.THEN) 5247 then = self._parse_assignment() 5248 ifs.append(self.expression(exp.If, this=this, true=then)) 5249 5250 if self._match(TokenType.ELSE): 5251 default = self._parse_assignment() 5252 5253 if not self._match(TokenType.END): 5254 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5255 default = exp.column("interval") 5256 else: 5257 self.raise_error("Expected END after CASE", self._prev) 5258 5259 return self.expression( 5260 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5261 ) 5262 5263 def _parse_if(self) -> t.Optional[exp.Expression]: 5264 if self._match(TokenType.L_PAREN): 5265 args = self._parse_csv(self._parse_assignment) 5266 this = self.validate_expression(exp.If.from_arg_list(args), args) 5267 self._match_r_paren() 5268 else: 5269 index = self._index - 1 5270 5271 if self.NO_PAREN_IF_COMMANDS and index == 0: 5272 return self._parse_as_command(self._prev) 5273 5274 condition = self._parse_assignment() 5275 5276 if not condition: 5277 self._retreat(index) 5278 return None 5279 5280 self._match(TokenType.THEN) 5281 true = self._parse_assignment() 5282 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5283 self._match(TokenType.END) 5284 this = self.expression(exp.If, this=condition, true=true, false=false) 5285 5286 return this 5287 5288 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5289 if not self._match_text_seq("VALUE", "FOR"): 5290 self._retreat(self._index - 1) 5291 return None 5292 5293 return self.expression( 5294 exp.NextValueFor, 5295 this=self._parse_column(), 5296 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5297 ) 5298 5299 def _parse_extract(self) -> exp.Extract: 5300 this = self._parse_function() or self._parse_var() or self._parse_type() 5301 5302 if self._match(TokenType.FROM): 5303 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5304 5305 if not self._match(TokenType.COMMA): 5306 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5307 5308 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5309 5310 def _parse_gap_fill(self) -> exp.GapFill: 5311 self._match(TokenType.TABLE) 5312 this = self._parse_table() 5313 5314 self._match(TokenType.COMMA) 5315 args = [this, *self._parse_csv(self._parse_lambda)] 5316 5317 gap_fill = exp.GapFill.from_arg_list(args) 5318 return self.validate_expression(gap_fill, args) 5319 5320 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5321 this = self._parse_assignment() 5322 5323 if not self._match(TokenType.ALIAS): 5324 if self._match(TokenType.COMMA): 5325 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5326 5327 self.raise_error("Expected AS after CAST") 5328 5329 fmt = None 5330 to = self._parse_types() 5331 5332 if self._match(TokenType.FORMAT): 5333 fmt_string = self._parse_string() 5334 fmt = self._parse_at_time_zone(fmt_string) 5335 5336 if not to: 5337 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5338 if to.this in exp.DataType.TEMPORAL_TYPES: 5339 this = self.expression( 5340 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5341 this=this, 5342 format=exp.Literal.string( 5343 format_time( 5344 fmt_string.this if fmt_string else "", 5345 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5346 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5347 ) 5348 ), 5349 ) 5350 5351 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5352 this.set("zone", fmt.args["zone"]) 5353 return this 5354 elif not to: 5355 self.raise_error("Expected TYPE after CAST") 5356 elif isinstance(to, exp.Identifier): 5357 to = exp.DataType.build(to.name, udt=True) 5358 elif to.this == exp.DataType.Type.CHAR: 5359 if self._match(TokenType.CHARACTER_SET): 5360 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5361 5362 return self.expression( 5363 exp.Cast if strict else exp.TryCast, 5364 this=this, 5365 to=to, 5366 format=fmt, 5367 safe=safe, 5368 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5369 ) 5370 5371 def _parse_string_agg(self) -> exp.Expression: 5372 if self._match(TokenType.DISTINCT): 5373 args: t.List[t.Optional[exp.Expression]] = [ 5374 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5375 ] 5376 if self._match(TokenType.COMMA): 5377 args.extend(self._parse_csv(self._parse_assignment)) 5378 else: 5379 args = self._parse_csv(self._parse_assignment) # type: ignore 5380 5381 index = self._index 5382 if not self._match(TokenType.R_PAREN) and args: 5383 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5384 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5385 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5386 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5387 5388 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5389 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5390 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5391 if not self._match_text_seq("WITHIN", "GROUP"): 5392 self._retreat(index) 5393 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5394 5395 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5396 order = self._parse_order(this=seq_get(args, 0)) 5397 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5398 5399 def _parse_convert( 5400 self, strict: bool, safe: t.Optional[bool] = None 5401 ) -> t.Optional[exp.Expression]: 5402 this = self._parse_bitwise() 5403 5404 if self._match(TokenType.USING): 5405 to: t.Optional[exp.Expression] = self.expression( 5406 exp.CharacterSet, this=self._parse_var() 5407 ) 5408 elif self._match(TokenType.COMMA): 5409 to = self._parse_types() 5410 else: 5411 to = None 5412 5413 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5414 5415 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5416 """ 5417 There are generally two variants of the DECODE function: 5418 5419 - DECODE(bin, charset) 5420 - DECODE(expression, search, result [, search, result] ... [, default]) 5421 5422 The second variant will always be parsed into a CASE expression. Note that NULL 5423 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5424 instead of relying on pattern matching. 5425 """ 5426 args = self._parse_csv(self._parse_assignment) 5427 5428 if len(args) < 3: 5429 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5430 5431 expression, *expressions = args 5432 if not expression: 5433 return None 5434 5435 ifs = [] 5436 for search, result in zip(expressions[::2], expressions[1::2]): 5437 if not search or not result: 5438 return None 5439 5440 if isinstance(search, exp.Literal): 5441 ifs.append( 5442 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5443 ) 5444 elif isinstance(search, exp.Null): 5445 ifs.append( 5446 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5447 ) 5448 else: 5449 cond = exp.or_( 5450 exp.EQ(this=expression.copy(), expression=search), 5451 exp.and_( 5452 exp.Is(this=expression.copy(), expression=exp.Null()), 5453 exp.Is(this=search.copy(), expression=exp.Null()), 5454 copy=False, 5455 ), 5456 copy=False, 5457 ) 5458 ifs.append(exp.If(this=cond, true=result)) 5459 5460 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5461 5462 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5463 self._match_text_seq("KEY") 5464 key = self._parse_column() 5465 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5466 self._match_text_seq("VALUE") 5467 value = self._parse_bitwise() 5468 5469 if not key and not value: 5470 return None 5471 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5472 5473 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5474 if not this or not self._match_text_seq("FORMAT", "JSON"): 5475 return this 5476 5477 return self.expression(exp.FormatJson, this=this) 5478 5479 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5480 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5481 for value in values: 5482 if self._match_text_seq(value, "ON", on): 5483 return f"{value} ON {on}" 5484 5485 return None 5486 5487 @t.overload 5488 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5489 5490 @t.overload 5491 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5492 5493 def _parse_json_object(self, agg=False): 5494 star = self._parse_star() 5495 expressions = ( 5496 [star] 5497 if star 5498 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5499 ) 5500 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5501 5502 unique_keys = None 5503 if self._match_text_seq("WITH", "UNIQUE"): 5504 unique_keys = True 5505 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5506 unique_keys = False 5507 5508 self._match_text_seq("KEYS") 5509 5510 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5511 self._parse_type() 5512 ) 5513 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5514 5515 return self.expression( 5516 exp.JSONObjectAgg if agg else exp.JSONObject, 5517 expressions=expressions, 5518 null_handling=null_handling, 5519 unique_keys=unique_keys, 5520 return_type=return_type, 5521 encoding=encoding, 5522 ) 5523 5524 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5525 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5526 if not self._match_text_seq("NESTED"): 5527 this = self._parse_id_var() 5528 kind = self._parse_types(allow_identifiers=False) 5529 nested = None 5530 else: 5531 this = None 5532 kind = None 5533 nested = True 5534 5535 path = self._match_text_seq("PATH") and self._parse_string() 5536 nested_schema = nested and self._parse_json_schema() 5537 5538 return self.expression( 5539 exp.JSONColumnDef, 5540 this=this, 5541 kind=kind, 5542 path=path, 5543 nested_schema=nested_schema, 5544 ) 5545 5546 def _parse_json_schema(self) -> exp.JSONSchema: 5547 self._match_text_seq("COLUMNS") 5548 return self.expression( 5549 exp.JSONSchema, 5550 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5551 ) 5552 5553 def _parse_json_table(self) -> exp.JSONTable: 5554 this = self._parse_format_json(self._parse_bitwise()) 5555 path = self._match(TokenType.COMMA) and self._parse_string() 5556 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5557 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5558 schema = self._parse_json_schema() 5559 5560 return exp.JSONTable( 5561 this=this, 5562 schema=schema, 5563 path=path, 5564 error_handling=error_handling, 5565 empty_handling=empty_handling, 5566 ) 5567 5568 def _parse_match_against(self) -> exp.MatchAgainst: 5569 expressions = self._parse_csv(self._parse_column) 5570 5571 self._match_text_seq(")", "AGAINST", "(") 5572 5573 this = self._parse_string() 5574 5575 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5576 modifier = "IN NATURAL LANGUAGE MODE" 5577 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5578 modifier = f"{modifier} WITH QUERY EXPANSION" 5579 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5580 modifier = "IN BOOLEAN MODE" 5581 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5582 modifier = "WITH QUERY EXPANSION" 5583 else: 5584 modifier = None 5585 5586 return self.expression( 5587 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5588 ) 5589 5590 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5591 def _parse_open_json(self) -> exp.OpenJSON: 5592 this = self._parse_bitwise() 5593 path = self._match(TokenType.COMMA) and self._parse_string() 5594 5595 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5596 this = self._parse_field(any_token=True) 5597 kind = self._parse_types() 5598 path = self._parse_string() 5599 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5600 5601 return self.expression( 5602 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5603 ) 5604 5605 expressions = None 5606 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5607 self._match_l_paren() 5608 expressions = self._parse_csv(_parse_open_json_column_def) 5609 5610 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5611 5612 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5613 args = self._parse_csv(self._parse_bitwise) 5614 5615 if self._match(TokenType.IN): 5616 return self.expression( 5617 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5618 ) 5619 5620 if haystack_first: 5621 haystack = seq_get(args, 0) 5622 needle = seq_get(args, 1) 5623 else: 5624 needle = seq_get(args, 0) 5625 haystack = seq_get(args, 1) 5626 5627 return self.expression( 5628 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5629 ) 5630 5631 def _parse_predict(self) -> exp.Predict: 5632 self._match_text_seq("MODEL") 5633 this = self._parse_table() 5634 5635 self._match(TokenType.COMMA) 5636 self._match_text_seq("TABLE") 5637 5638 return self.expression( 5639 exp.Predict, 5640 this=this, 5641 expression=self._parse_table(), 5642 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5643 ) 5644 5645 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5646 args = self._parse_csv(self._parse_table) 5647 return exp.JoinHint(this=func_name.upper(), expressions=args) 5648 5649 def _parse_substring(self) -> exp.Substring: 5650 # Postgres supports the form: substring(string [from int] [for int]) 5651 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5652 5653 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5654 5655 if self._match(TokenType.FROM): 5656 args.append(self._parse_bitwise()) 5657 if self._match(TokenType.FOR): 5658 if len(args) == 1: 5659 args.append(exp.Literal.number(1)) 5660 args.append(self._parse_bitwise()) 5661 5662 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5663 5664 def _parse_trim(self) -> exp.Trim: 5665 # https://www.w3resource.com/sql/character-functions/trim.php 5666 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5667 5668 position = None 5669 collation = None 5670 expression = None 5671 5672 if self._match_texts(self.TRIM_TYPES): 5673 position = self._prev.text.upper() 5674 5675 this = self._parse_bitwise() 5676 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5677 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5678 expression = self._parse_bitwise() 5679 5680 if invert_order: 5681 this, expression = expression, this 5682 5683 if self._match(TokenType.COLLATE): 5684 collation = self._parse_bitwise() 5685 5686 return self.expression( 5687 exp.Trim, this=this, position=position, expression=expression, collation=collation 5688 ) 5689 5690 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5691 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5692 5693 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5694 return self._parse_window(self._parse_id_var(), alias=True) 5695 5696 def _parse_respect_or_ignore_nulls( 5697 self, this: t.Optional[exp.Expression] 5698 ) -> t.Optional[exp.Expression]: 5699 if self._match_text_seq("IGNORE", "NULLS"): 5700 return self.expression(exp.IgnoreNulls, this=this) 5701 if self._match_text_seq("RESPECT", "NULLS"): 5702 return self.expression(exp.RespectNulls, this=this) 5703 return this 5704 5705 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5706 if self._match(TokenType.HAVING): 5707 self._match_texts(("MAX", "MIN")) 5708 max = self._prev.text.upper() != "MIN" 5709 return self.expression( 5710 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5711 ) 5712 5713 return this 5714 5715 def _parse_window( 5716 self, this: t.Optional[exp.Expression], alias: bool = False 5717 ) -> t.Optional[exp.Expression]: 5718 func = this 5719 comments = func.comments if isinstance(func, exp.Expression) else None 5720 5721 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5722 self._match(TokenType.WHERE) 5723 this = self.expression( 5724 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5725 ) 5726 self._match_r_paren() 5727 5728 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5729 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5730 if self._match_text_seq("WITHIN", "GROUP"): 5731 order = self._parse_wrapped(self._parse_order) 5732 this = self.expression(exp.WithinGroup, this=this, expression=order) 5733 5734 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5735 # Some dialects choose to implement and some do not. 5736 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5737 5738 # There is some code above in _parse_lambda that handles 5739 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5740 5741 # The below changes handle 5742 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5743 5744 # Oracle allows both formats 5745 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5746 # and Snowflake chose to do the same for familiarity 5747 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5748 if isinstance(this, exp.AggFunc): 5749 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5750 5751 if ignore_respect and ignore_respect is not this: 5752 ignore_respect.replace(ignore_respect.this) 5753 this = self.expression(ignore_respect.__class__, this=this) 5754 5755 this = self._parse_respect_or_ignore_nulls(this) 5756 5757 # bigquery select from window x AS (partition by ...) 5758 if alias: 5759 over = None 5760 self._match(TokenType.ALIAS) 5761 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5762 return this 5763 else: 5764 over = self._prev.text.upper() 5765 5766 if comments and isinstance(func, exp.Expression): 5767 func.pop_comments() 5768 5769 if not self._match(TokenType.L_PAREN): 5770 return self.expression( 5771 exp.Window, 5772 comments=comments, 5773 this=this, 5774 alias=self._parse_id_var(False), 5775 over=over, 5776 ) 5777 5778 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5779 5780 first = self._match(TokenType.FIRST) 5781 if self._match_text_seq("LAST"): 5782 first = False 5783 5784 partition, order = self._parse_partition_and_order() 5785 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5786 5787 if kind: 5788 self._match(TokenType.BETWEEN) 5789 start = self._parse_window_spec() 5790 self._match(TokenType.AND) 5791 end = self._parse_window_spec() 5792 5793 spec = self.expression( 5794 exp.WindowSpec, 5795 kind=kind, 5796 start=start["value"], 5797 start_side=start["side"], 5798 end=end["value"], 5799 end_side=end["side"], 5800 ) 5801 else: 5802 spec = None 5803 5804 self._match_r_paren() 5805 5806 window = self.expression( 5807 exp.Window, 5808 comments=comments, 5809 this=this, 5810 partition_by=partition, 5811 order=order, 5812 spec=spec, 5813 alias=window_alias, 5814 over=over, 5815 first=first, 5816 ) 5817 5818 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5819 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5820 return self._parse_window(window, alias=alias) 5821 5822 return window 5823 5824 def _parse_partition_and_order( 5825 self, 5826 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5827 return self._parse_partition_by(), self._parse_order() 5828 5829 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5830 self._match(TokenType.BETWEEN) 5831 5832 return { 5833 "value": ( 5834 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5835 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5836 or self._parse_bitwise() 5837 ), 5838 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5839 } 5840 5841 def _parse_alias( 5842 self, this: t.Optional[exp.Expression], explicit: bool = False 5843 ) -> t.Optional[exp.Expression]: 5844 any_token = self._match(TokenType.ALIAS) 5845 comments = self._prev_comments or [] 5846 5847 if explicit and not any_token: 5848 return this 5849 5850 if self._match(TokenType.L_PAREN): 5851 aliases = self.expression( 5852 exp.Aliases, 5853 comments=comments, 5854 this=this, 5855 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5856 ) 5857 self._match_r_paren(aliases) 5858 return aliases 5859 5860 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5861 self.STRING_ALIASES and self._parse_string_as_identifier() 5862 ) 5863 5864 if alias: 5865 comments.extend(alias.pop_comments()) 5866 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5867 column = this.this 5868 5869 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5870 if not this.comments and column and column.comments: 5871 this.comments = column.pop_comments() 5872 5873 return this 5874 5875 def _parse_id_var( 5876 self, 5877 any_token: bool = True, 5878 tokens: t.Optional[t.Collection[TokenType]] = None, 5879 ) -> t.Optional[exp.Expression]: 5880 expression = self._parse_identifier() 5881 if not expression and ( 5882 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5883 ): 5884 quoted = self._prev.token_type == TokenType.STRING 5885 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5886 5887 return expression 5888 5889 def _parse_string(self) -> t.Optional[exp.Expression]: 5890 if self._match_set(self.STRING_PARSERS): 5891 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5892 return self._parse_placeholder() 5893 5894 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5895 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5896 5897 def _parse_number(self) -> t.Optional[exp.Expression]: 5898 if self._match_set(self.NUMERIC_PARSERS): 5899 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5900 return self._parse_placeholder() 5901 5902 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5903 if self._match(TokenType.IDENTIFIER): 5904 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5905 return self._parse_placeholder() 5906 5907 def _parse_var( 5908 self, 5909 any_token: bool = False, 5910 tokens: t.Optional[t.Collection[TokenType]] = None, 5911 upper: bool = False, 5912 ) -> t.Optional[exp.Expression]: 5913 if ( 5914 (any_token and self._advance_any()) 5915 or self._match(TokenType.VAR) 5916 or (self._match_set(tokens) if tokens else False) 5917 ): 5918 return self.expression( 5919 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5920 ) 5921 return self._parse_placeholder() 5922 5923 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5924 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5925 self._advance() 5926 return self._prev 5927 return None 5928 5929 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5930 return self._parse_var() or self._parse_string() 5931 5932 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5933 return self._parse_primary() or self._parse_var(any_token=True) 5934 5935 def _parse_null(self) -> t.Optional[exp.Expression]: 5936 if self._match_set(self.NULL_TOKENS): 5937 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5938 return self._parse_placeholder() 5939 5940 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5941 if self._match(TokenType.TRUE): 5942 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5943 if self._match(TokenType.FALSE): 5944 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5945 return self._parse_placeholder() 5946 5947 def _parse_star(self) -> t.Optional[exp.Expression]: 5948 if self._match(TokenType.STAR): 5949 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5950 return self._parse_placeholder() 5951 5952 def _parse_parameter(self) -> exp.Parameter: 5953 this = self._parse_identifier() or self._parse_primary_or_var() 5954 return self.expression(exp.Parameter, this=this) 5955 5956 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5957 if self._match_set(self.PLACEHOLDER_PARSERS): 5958 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5959 if placeholder: 5960 return placeholder 5961 self._advance(-1) 5962 return None 5963 5964 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5965 if not self._match_texts(keywords): 5966 return None 5967 if self._match(TokenType.L_PAREN, advance=False): 5968 return self._parse_wrapped_csv(self._parse_expression) 5969 5970 expression = self._parse_expression() 5971 return [expression] if expression else None 5972 5973 def _parse_csv( 5974 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5975 ) -> t.List[exp.Expression]: 5976 parse_result = parse_method() 5977 items = [parse_result] if parse_result is not None else [] 5978 5979 while self._match(sep): 5980 self._add_comments(parse_result) 5981 parse_result = parse_method() 5982 if parse_result is not None: 5983 items.append(parse_result) 5984 5985 return items 5986 5987 def _parse_tokens( 5988 self, parse_method: t.Callable, expressions: t.Dict 5989 ) -> t.Optional[exp.Expression]: 5990 this = parse_method() 5991 5992 while self._match_set(expressions): 5993 this = self.expression( 5994 expressions[self._prev.token_type], 5995 this=this, 5996 comments=self._prev_comments, 5997 expression=parse_method(), 5998 ) 5999 6000 return this 6001 6002 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6003 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6004 6005 def _parse_wrapped_csv( 6006 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6007 ) -> t.List[exp.Expression]: 6008 return self._parse_wrapped( 6009 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6010 ) 6011 6012 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6013 wrapped = self._match(TokenType.L_PAREN) 6014 if not wrapped and not optional: 6015 self.raise_error("Expecting (") 6016 parse_result = parse_method() 6017 if wrapped: 6018 self._match_r_paren() 6019 return parse_result 6020 6021 def _parse_expressions(self) -> t.List[exp.Expression]: 6022 return self._parse_csv(self._parse_expression) 6023 6024 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6025 return self._parse_select() or self._parse_set_operations( 6026 self._parse_expression() if alias else self._parse_assignment() 6027 ) 6028 6029 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6030 return self._parse_query_modifiers( 6031 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6032 ) 6033 6034 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6035 this = None 6036 if self._match_texts(self.TRANSACTION_KIND): 6037 this = self._prev.text 6038 6039 self._match_texts(("TRANSACTION", "WORK")) 6040 6041 modes = [] 6042 while True: 6043 mode = [] 6044 while self._match(TokenType.VAR): 6045 mode.append(self._prev.text) 6046 6047 if mode: 6048 modes.append(" ".join(mode)) 6049 if not self._match(TokenType.COMMA): 6050 break 6051 6052 return self.expression(exp.Transaction, this=this, modes=modes) 6053 6054 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6055 chain = None 6056 savepoint = None 6057 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6058 6059 self._match_texts(("TRANSACTION", "WORK")) 6060 6061 if self._match_text_seq("TO"): 6062 self._match_text_seq("SAVEPOINT") 6063 savepoint = self._parse_id_var() 6064 6065 if self._match(TokenType.AND): 6066 chain = not self._match_text_seq("NO") 6067 self._match_text_seq("CHAIN") 6068 6069 if is_rollback: 6070 return self.expression(exp.Rollback, savepoint=savepoint) 6071 6072 return self.expression(exp.Commit, chain=chain) 6073 6074 def _parse_refresh(self) -> exp.Refresh: 6075 self._match(TokenType.TABLE) 6076 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6077 6078 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6079 if not self._match_text_seq("ADD"): 6080 return None 6081 6082 self._match(TokenType.COLUMN) 6083 exists_column = self._parse_exists(not_=True) 6084 expression = self._parse_field_def() 6085 6086 if expression: 6087 expression.set("exists", exists_column) 6088 6089 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6090 if self._match_texts(("FIRST", "AFTER")): 6091 position = self._prev.text 6092 column_position = self.expression( 6093 exp.ColumnPosition, this=self._parse_column(), position=position 6094 ) 6095 expression.set("position", column_position) 6096 6097 return expression 6098 6099 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6100 drop = self._match(TokenType.DROP) and self._parse_drop() 6101 if drop and not isinstance(drop, exp.Command): 6102 drop.set("kind", drop.args.get("kind", "COLUMN")) 6103 return drop 6104 6105 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6106 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6107 return self.expression( 6108 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6109 ) 6110 6111 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6112 index = self._index - 1 6113 6114 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6115 return self._parse_csv( 6116 lambda: self.expression( 6117 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6118 ) 6119 ) 6120 6121 self._retreat(index) 6122 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6123 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6124 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6125 6126 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6127 if self._match_texts(self.ALTER_ALTER_PARSERS): 6128 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6129 6130 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6131 # keyword after ALTER we default to parsing this statement 6132 self._match(TokenType.COLUMN) 6133 column = self._parse_field(any_token=True) 6134 6135 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6136 return self.expression(exp.AlterColumn, this=column, drop=True) 6137 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6138 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6139 if self._match(TokenType.COMMENT): 6140 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6141 if self._match_text_seq("DROP", "NOT", "NULL"): 6142 return self.expression( 6143 exp.AlterColumn, 6144 this=column, 6145 drop=True, 6146 allow_null=True, 6147 ) 6148 if self._match_text_seq("SET", "NOT", "NULL"): 6149 return self.expression( 6150 exp.AlterColumn, 6151 this=column, 6152 allow_null=False, 6153 ) 6154 self._match_text_seq("SET", "DATA") 6155 self._match_text_seq("TYPE") 6156 return self.expression( 6157 exp.AlterColumn, 6158 this=column, 6159 dtype=self._parse_types(), 6160 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6161 using=self._match(TokenType.USING) and self._parse_assignment(), 6162 ) 6163 6164 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6165 if self._match_texts(("ALL", "EVEN", "AUTO")): 6166 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6167 6168 self._match_text_seq("KEY", "DISTKEY") 6169 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6170 6171 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6172 if compound: 6173 self._match_text_seq("SORTKEY") 6174 6175 if self._match(TokenType.L_PAREN, advance=False): 6176 return self.expression( 6177 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6178 ) 6179 6180 self._match_texts(("AUTO", "NONE")) 6181 return self.expression( 6182 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6183 ) 6184 6185 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6186 index = self._index - 1 6187 6188 partition_exists = self._parse_exists() 6189 if self._match(TokenType.PARTITION, advance=False): 6190 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6191 6192 self._retreat(index) 6193 return self._parse_csv(self._parse_drop_column) 6194 6195 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6196 if self._match(TokenType.COLUMN): 6197 exists = self._parse_exists() 6198 old_column = self._parse_column() 6199 to = self._match_text_seq("TO") 6200 new_column = self._parse_column() 6201 6202 if old_column is None or to is None or new_column is None: 6203 return None 6204 6205 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6206 6207 self._match_text_seq("TO") 6208 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6209 6210 def _parse_alter_table_set(self) -> exp.AlterSet: 6211 alter_set = self.expression(exp.AlterSet) 6212 6213 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6214 "TABLE", "PROPERTIES" 6215 ): 6216 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6217 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6218 alter_set.set("expressions", [self._parse_assignment()]) 6219 elif self._match_texts(("LOGGED", "UNLOGGED")): 6220 alter_set.set("option", exp.var(self._prev.text.upper())) 6221 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6222 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6223 elif self._match_text_seq("LOCATION"): 6224 alter_set.set("location", self._parse_field()) 6225 elif self._match_text_seq("ACCESS", "METHOD"): 6226 alter_set.set("access_method", self._parse_field()) 6227 elif self._match_text_seq("TABLESPACE"): 6228 alter_set.set("tablespace", self._parse_field()) 6229 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6230 alter_set.set("file_format", [self._parse_field()]) 6231 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6232 alter_set.set("file_format", self._parse_wrapped_options()) 6233 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6234 alter_set.set("copy_options", self._parse_wrapped_options()) 6235 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6236 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6237 else: 6238 if self._match_text_seq("SERDE"): 6239 alter_set.set("serde", self._parse_field()) 6240 6241 alter_set.set("expressions", [self._parse_properties()]) 6242 6243 return alter_set 6244 6245 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6246 start = self._prev 6247 6248 if not self._match(TokenType.TABLE): 6249 return self._parse_as_command(start) 6250 6251 exists = self._parse_exists() 6252 only = self._match_text_seq("ONLY") 6253 this = self._parse_table(schema=True) 6254 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6255 6256 if self._next: 6257 self._advance() 6258 6259 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6260 if parser: 6261 actions = ensure_list(parser(self)) 6262 options = self._parse_csv(self._parse_property) 6263 6264 if not self._curr and actions: 6265 return self.expression( 6266 exp.AlterTable, 6267 this=this, 6268 exists=exists, 6269 actions=actions, 6270 only=only, 6271 options=options, 6272 cluster=cluster, 6273 ) 6274 6275 return self._parse_as_command(start) 6276 6277 def _parse_merge(self) -> exp.Merge: 6278 self._match(TokenType.INTO) 6279 target = self._parse_table() 6280 6281 if target and self._match(TokenType.ALIAS, advance=False): 6282 target.set("alias", self._parse_table_alias()) 6283 6284 self._match(TokenType.USING) 6285 using = self._parse_table() 6286 6287 self._match(TokenType.ON) 6288 on = self._parse_assignment() 6289 6290 return self.expression( 6291 exp.Merge, 6292 this=target, 6293 using=using, 6294 on=on, 6295 expressions=self._parse_when_matched(), 6296 ) 6297 6298 def _parse_when_matched(self) -> t.List[exp.When]: 6299 whens = [] 6300 6301 while self._match(TokenType.WHEN): 6302 matched = not self._match(TokenType.NOT) 6303 self._match_text_seq("MATCHED") 6304 source = ( 6305 False 6306 if self._match_text_seq("BY", "TARGET") 6307 else self._match_text_seq("BY", "SOURCE") 6308 ) 6309 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6310 6311 self._match(TokenType.THEN) 6312 6313 if self._match(TokenType.INSERT): 6314 _this = self._parse_star() 6315 if _this: 6316 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6317 else: 6318 then = self.expression( 6319 exp.Insert, 6320 this=self._parse_value(), 6321 expression=self._match_text_seq("VALUES") and self._parse_value(), 6322 ) 6323 elif self._match(TokenType.UPDATE): 6324 expressions = self._parse_star() 6325 if expressions: 6326 then = self.expression(exp.Update, expressions=expressions) 6327 else: 6328 then = self.expression( 6329 exp.Update, 6330 expressions=self._match(TokenType.SET) 6331 and self._parse_csv(self._parse_equality), 6332 ) 6333 elif self._match(TokenType.DELETE): 6334 then = self.expression(exp.Var, this=self._prev.text) 6335 else: 6336 then = None 6337 6338 whens.append( 6339 self.expression( 6340 exp.When, 6341 matched=matched, 6342 source=source, 6343 condition=condition, 6344 then=then, 6345 ) 6346 ) 6347 return whens 6348 6349 def _parse_show(self) -> t.Optional[exp.Expression]: 6350 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6351 if parser: 6352 return parser(self) 6353 return self._parse_as_command(self._prev) 6354 6355 def _parse_set_item_assignment( 6356 self, kind: t.Optional[str] = None 6357 ) -> t.Optional[exp.Expression]: 6358 index = self._index 6359 6360 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6361 return self._parse_set_transaction(global_=kind == "GLOBAL") 6362 6363 left = self._parse_primary() or self._parse_column() 6364 assignment_delimiter = self._match_texts(("=", "TO")) 6365 6366 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6367 self._retreat(index) 6368 return None 6369 6370 right = self._parse_statement() or self._parse_id_var() 6371 if isinstance(right, (exp.Column, exp.Identifier)): 6372 right = exp.var(right.name) 6373 6374 this = self.expression(exp.EQ, this=left, expression=right) 6375 return self.expression(exp.SetItem, this=this, kind=kind) 6376 6377 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6378 self._match_text_seq("TRANSACTION") 6379 characteristics = self._parse_csv( 6380 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6381 ) 6382 return self.expression( 6383 exp.SetItem, 6384 expressions=characteristics, 6385 kind="TRANSACTION", 6386 **{"global": global_}, # type: ignore 6387 ) 6388 6389 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6390 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6391 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6392 6393 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6394 index = self._index 6395 set_ = self.expression( 6396 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6397 ) 6398 6399 if self._curr: 6400 self._retreat(index) 6401 return self._parse_as_command(self._prev) 6402 6403 return set_ 6404 6405 def _parse_var_from_options( 6406 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6407 ) -> t.Optional[exp.Var]: 6408 start = self._curr 6409 if not start: 6410 return None 6411 6412 option = start.text.upper() 6413 continuations = options.get(option) 6414 6415 index = self._index 6416 self._advance() 6417 for keywords in continuations or []: 6418 if isinstance(keywords, str): 6419 keywords = (keywords,) 6420 6421 if self._match_text_seq(*keywords): 6422 option = f"{option} {' '.join(keywords)}" 6423 break 6424 else: 6425 if continuations or continuations is None: 6426 if raise_unmatched: 6427 self.raise_error(f"Unknown option {option}") 6428 6429 self._retreat(index) 6430 return None 6431 6432 return exp.var(option) 6433 6434 def _parse_as_command(self, start: Token) -> exp.Command: 6435 while self._curr: 6436 self._advance() 6437 text = self._find_sql(start, self._prev) 6438 size = len(start.text) 6439 self._warn_unsupported() 6440 return exp.Command(this=text[:size], expression=text[size:]) 6441 6442 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6443 settings = [] 6444 6445 self._match_l_paren() 6446 kind = self._parse_id_var() 6447 6448 if self._match(TokenType.L_PAREN): 6449 while True: 6450 key = self._parse_id_var() 6451 value = self._parse_primary() 6452 6453 if not key and value is None: 6454 break 6455 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6456 self._match(TokenType.R_PAREN) 6457 6458 self._match_r_paren() 6459 6460 return self.expression( 6461 exp.DictProperty, 6462 this=this, 6463 kind=kind.this if kind else None, 6464 settings=settings, 6465 ) 6466 6467 def _parse_dict_range(self, this: str) -> exp.DictRange: 6468 self._match_l_paren() 6469 has_min = self._match_text_seq("MIN") 6470 if has_min: 6471 min = self._parse_var() or self._parse_primary() 6472 self._match_text_seq("MAX") 6473 max = self._parse_var() or self._parse_primary() 6474 else: 6475 max = self._parse_var() or self._parse_primary() 6476 min = exp.Literal.number(0) 6477 self._match_r_paren() 6478 return self.expression(exp.DictRange, this=this, min=min, max=max) 6479 6480 def _parse_comprehension( 6481 self, this: t.Optional[exp.Expression] 6482 ) -> t.Optional[exp.Comprehension]: 6483 index = self._index 6484 expression = self._parse_column() 6485 if not self._match(TokenType.IN): 6486 self._retreat(index - 1) 6487 return None 6488 iterator = self._parse_column() 6489 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6490 return self.expression( 6491 exp.Comprehension, 6492 this=this, 6493 expression=expression, 6494 iterator=iterator, 6495 condition=condition, 6496 ) 6497 6498 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6499 if self._match(TokenType.HEREDOC_STRING): 6500 return self.expression(exp.Heredoc, this=self._prev.text) 6501 6502 if not self._match_text_seq("$"): 6503 return None 6504 6505 tags = ["$"] 6506 tag_text = None 6507 6508 if self._is_connected(): 6509 self._advance() 6510 tags.append(self._prev.text.upper()) 6511 else: 6512 self.raise_error("No closing $ found") 6513 6514 if tags[-1] != "$": 6515 if self._is_connected() and self._match_text_seq("$"): 6516 tag_text = tags[-1] 6517 tags.append("$") 6518 else: 6519 self.raise_error("No closing $ found") 6520 6521 heredoc_start = self._curr 6522 6523 while self._curr: 6524 if self._match_text_seq(*tags, advance=False): 6525 this = self._find_sql(heredoc_start, self._prev) 6526 self._advance(len(tags)) 6527 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6528 6529 self._advance() 6530 6531 self.raise_error(f"No closing {''.join(tags)} found") 6532 return None 6533 6534 def _find_parser( 6535 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6536 ) -> t.Optional[t.Callable]: 6537 if not self._curr: 6538 return None 6539 6540 index = self._index 6541 this = [] 6542 while True: 6543 # The current token might be multiple words 6544 curr = self._curr.text.upper() 6545 key = curr.split(" ") 6546 this.append(curr) 6547 6548 self._advance() 6549 result, trie = in_trie(trie, key) 6550 if result == TrieResult.FAILED: 6551 break 6552 6553 if result == TrieResult.EXISTS: 6554 subparser = parsers[" ".join(this)] 6555 return subparser 6556 6557 self._retreat(index) 6558 return None 6559 6560 def _match(self, token_type, advance=True, expression=None): 6561 if not self._curr: 6562 return None 6563 6564 if self._curr.token_type == token_type: 6565 if advance: 6566 self._advance() 6567 self._add_comments(expression) 6568 return True 6569 6570 return None 6571 6572 def _match_set(self, types, advance=True): 6573 if not self._curr: 6574 return None 6575 6576 if self._curr.token_type in types: 6577 if advance: 6578 self._advance() 6579 return True 6580 6581 return None 6582 6583 def _match_pair(self, token_type_a, token_type_b, advance=True): 6584 if not self._curr or not self._next: 6585 return None 6586 6587 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6588 if advance: 6589 self._advance(2) 6590 return True 6591 6592 return None 6593 6594 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6595 if not self._match(TokenType.L_PAREN, expression=expression): 6596 self.raise_error("Expecting (") 6597 6598 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6599 if not self._match(TokenType.R_PAREN, expression=expression): 6600 self.raise_error("Expecting )") 6601 6602 def _match_texts(self, texts, advance=True): 6603 if self._curr and self._curr.text.upper() in texts: 6604 if advance: 6605 self._advance() 6606 return True 6607 return None 6608 6609 def _match_text_seq(self, *texts, advance=True): 6610 index = self._index 6611 for text in texts: 6612 if self._curr and self._curr.text.upper() == text: 6613 self._advance() 6614 else: 6615 self._retreat(index) 6616 return None 6617 6618 if not advance: 6619 self._retreat(index) 6620 6621 return True 6622 6623 def _replace_lambda( 6624 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6625 ) -> t.Optional[exp.Expression]: 6626 if not node: 6627 return node 6628 6629 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6630 6631 for column in node.find_all(exp.Column): 6632 typ = lambda_types.get(column.parts[0].name) 6633 if typ is not None: 6634 dot_or_id = column.to_dot() if column.table else column.this 6635 6636 if typ: 6637 dot_or_id = self.expression( 6638 exp.Cast, 6639 this=dot_or_id, 6640 to=typ, 6641 ) 6642 6643 parent = column.parent 6644 6645 while isinstance(parent, exp.Dot): 6646 if not isinstance(parent.parent, exp.Dot): 6647 parent.replace(dot_or_id) 6648 break 6649 parent = parent.parent 6650 else: 6651 if column is node: 6652 node = dot_or_id 6653 else: 6654 column.replace(dot_or_id) 6655 return node 6656 6657 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6658 start = self._prev 6659 6660 # Not to be confused with TRUNCATE(number, decimals) function call 6661 if self._match(TokenType.L_PAREN): 6662 self._retreat(self._index - 2) 6663 return self._parse_function() 6664 6665 # Clickhouse supports TRUNCATE DATABASE as well 6666 is_database = self._match(TokenType.DATABASE) 6667 6668 self._match(TokenType.TABLE) 6669 6670 exists = self._parse_exists(not_=False) 6671 6672 expressions = self._parse_csv( 6673 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6674 ) 6675 6676 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6677 6678 if self._match_text_seq("RESTART", "IDENTITY"): 6679 identity = "RESTART" 6680 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6681 identity = "CONTINUE" 6682 else: 6683 identity = None 6684 6685 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6686 option = self._prev.text 6687 else: 6688 option = None 6689 6690 partition = self._parse_partition() 6691 6692 # Fallback case 6693 if self._curr: 6694 return self._parse_as_command(start) 6695 6696 return self.expression( 6697 exp.TruncateTable, 6698 expressions=expressions, 6699 is_database=is_database, 6700 exists=exists, 6701 cluster=cluster, 6702 identity=identity, 6703 option=option, 6704 partition=partition, 6705 ) 6706 6707 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6708 this = self._parse_ordered(self._parse_opclass) 6709 6710 if not self._match(TokenType.WITH): 6711 return this 6712 6713 op = self._parse_var(any_token=True) 6714 6715 return self.expression(exp.WithOperator, this=this, op=op) 6716 6717 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6718 self._match(TokenType.EQ) 6719 self._match(TokenType.L_PAREN) 6720 6721 opts: t.List[t.Optional[exp.Expression]] = [] 6722 while self._curr and not self._match(TokenType.R_PAREN): 6723 if self._match_text_seq("FORMAT_NAME", "="): 6724 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6725 # so we parse it separately to use _parse_field() 6726 prop = self.expression( 6727 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6728 ) 6729 opts.append(prop) 6730 else: 6731 opts.append(self._parse_property()) 6732 6733 self._match(TokenType.COMMA) 6734 6735 return opts 6736 6737 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6738 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6739 6740 options = [] 6741 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6742 option = self._parse_var(any_token=True) 6743 prev = self._prev.text.upper() 6744 6745 # Different dialects might separate options and values by white space, "=" and "AS" 6746 self._match(TokenType.EQ) 6747 self._match(TokenType.ALIAS) 6748 6749 param = self.expression(exp.CopyParameter, this=option) 6750 6751 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6752 TokenType.L_PAREN, advance=False 6753 ): 6754 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6755 param.set("expressions", self._parse_wrapped_options()) 6756 elif prev == "FILE_FORMAT": 6757 # T-SQL's external file format case 6758 param.set("expression", self._parse_field()) 6759 else: 6760 param.set("expression", self._parse_unquoted_field()) 6761 6762 options.append(param) 6763 self._match(sep) 6764 6765 return options 6766 6767 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6768 expr = self.expression(exp.Credentials) 6769 6770 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6771 expr.set("storage", self._parse_field()) 6772 if self._match_text_seq("CREDENTIALS"): 6773 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6774 creds = ( 6775 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6776 ) 6777 expr.set("credentials", creds) 6778 if self._match_text_seq("ENCRYPTION"): 6779 expr.set("encryption", self._parse_wrapped_options()) 6780 if self._match_text_seq("IAM_ROLE"): 6781 expr.set("iam_role", self._parse_field()) 6782 if self._match_text_seq("REGION"): 6783 expr.set("region", self._parse_field()) 6784 6785 return expr 6786 6787 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6788 return self._parse_field() 6789 6790 def _parse_copy(self) -> exp.Copy | exp.Command: 6791 start = self._prev 6792 6793 self._match(TokenType.INTO) 6794 6795 this = ( 6796 self._parse_select(nested=True, parse_subquery_alias=False) 6797 if self._match(TokenType.L_PAREN, advance=False) 6798 else self._parse_table(schema=True) 6799 ) 6800 6801 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6802 6803 files = self._parse_csv(self._parse_file_location) 6804 credentials = self._parse_credentials() 6805 6806 self._match_text_seq("WITH") 6807 6808 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6809 6810 # Fallback case 6811 if self._curr: 6812 return self._parse_as_command(start) 6813 6814 return self.expression( 6815 exp.Copy, 6816 this=this, 6817 kind=kind, 6818 credentials=credentials, 6819 files=files, 6820 params=params, 6821 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1224 def __init__( 1225 self, 1226 error_level: t.Optional[ErrorLevel] = None, 1227 error_message_context: int = 100, 1228 max_errors: int = 3, 1229 dialect: DialectType = None, 1230 ): 1231 from sqlglot.dialects import Dialect 1232 1233 self.error_level = error_level or ErrorLevel.IMMEDIATE 1234 self.error_message_context = error_message_context 1235 self.max_errors = max_errors 1236 self.dialect = Dialect.get_or_raise(dialect) 1237 self.reset()
1249 def parse( 1250 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1251 ) -> t.List[t.Optional[exp.Expression]]: 1252 """ 1253 Parses a list of tokens and returns a list of syntax trees, one tree 1254 per parsed SQL statement. 1255 1256 Args: 1257 raw_tokens: The list of tokens. 1258 sql: The original SQL string, used to produce helpful debug messages. 1259 1260 Returns: 1261 The list of the produced syntax trees. 1262 """ 1263 return self._parse( 1264 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1265 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1267 def parse_into( 1268 self, 1269 expression_types: exp.IntoType, 1270 raw_tokens: t.List[Token], 1271 sql: t.Optional[str] = None, 1272 ) -> t.List[t.Optional[exp.Expression]]: 1273 """ 1274 Parses a list of tokens into a given Expression type. If a collection of Expression 1275 types is given instead, this method will try to parse the token list into each one 1276 of them, stopping at the first for which the parsing succeeds. 1277 1278 Args: 1279 expression_types: The expression type(s) to try and parse the token list into. 1280 raw_tokens: The list of tokens. 1281 sql: The original SQL string, used to produce helpful debug messages. 1282 1283 Returns: 1284 The target Expression. 1285 """ 1286 errors = [] 1287 for expression_type in ensure_list(expression_types): 1288 parser = self.EXPRESSION_PARSERS.get(expression_type) 1289 if not parser: 1290 raise TypeError(f"No parser registered for {expression_type}") 1291 1292 try: 1293 return self._parse(parser, raw_tokens, sql) 1294 except ParseError as e: 1295 e.errors[0]["into_expression"] = expression_type 1296 errors.append(e) 1297 1298 raise ParseError( 1299 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1300 errors=merge_errors(errors), 1301 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1341 def check_errors(self) -> None: 1342 """Logs or raises any found errors, depending on the chosen error level setting.""" 1343 if self.error_level == ErrorLevel.WARN: 1344 for error in self.errors: 1345 logger.error(str(error)) 1346 elif self.error_level == ErrorLevel.RAISE and self.errors: 1347 raise ParseError( 1348 concat_messages(self.errors, self.max_errors), 1349 errors=merge_errors(self.errors), 1350 )
Logs or raises any found errors, depending on the chosen error level setting.
1352 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1353 """ 1354 Appends an error in the list of recorded errors or raises it, depending on the chosen 1355 error level setting. 1356 """ 1357 token = token or self._curr or self._prev or Token.string("") 1358 start = token.start 1359 end = token.end + 1 1360 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1361 highlight = self.sql[start:end] 1362 end_context = self.sql[end : end + self.error_message_context] 1363 1364 error = ParseError.new( 1365 f"{message}. Line {token.line}, Col: {token.col}.\n" 1366 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1367 description=message, 1368 line=token.line, 1369 col=token.col, 1370 start_context=start_context, 1371 highlight=highlight, 1372 end_context=end_context, 1373 ) 1374 1375 if self.error_level == ErrorLevel.IMMEDIATE: 1376 raise error 1377 1378 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1380 def expression( 1381 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1382 ) -> E: 1383 """ 1384 Creates a new, validated Expression. 1385 1386 Args: 1387 exp_class: The expression class to instantiate. 1388 comments: An optional list of comments to attach to the expression. 1389 kwargs: The arguments to set for the expression along with their respective values. 1390 1391 Returns: 1392 The target expression. 1393 """ 1394 instance = exp_class(**kwargs) 1395 instance.add_comments(comments) if comments else self._add_comments(instance) 1396 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1403 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1404 """ 1405 Validates an Expression, making sure that all its mandatory arguments are set. 1406 1407 Args: 1408 expression: The expression to validate. 1409 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1410 1411 Returns: 1412 The validated expression. 1413 """ 1414 if self.error_level != ErrorLevel.IGNORE: 1415 for error_message in expression.error_messages(args): 1416 self.raise_error(error_message) 1417 1418 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.