sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.TIMESTAMPNTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 TokenType.TDIGEST, 270 *ENUM_TYPE_TOKENS, 271 *NESTED_TYPE_TOKENS, 272 *AGGREGATE_TYPE_TOKENS, 273 } 274 275 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 276 TokenType.BIGINT: TokenType.UBIGINT, 277 TokenType.INT: TokenType.UINT, 278 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 279 TokenType.SMALLINT: TokenType.USMALLINT, 280 TokenType.TINYINT: TokenType.UTINYINT, 281 TokenType.DECIMAL: TokenType.UDECIMAL, 282 } 283 284 SUBQUERY_PREDICATES = { 285 TokenType.ANY: exp.Any, 286 TokenType.ALL: exp.All, 287 TokenType.EXISTS: exp.Exists, 288 TokenType.SOME: exp.Any, 289 } 290 291 RESERVED_TOKENS = { 292 *Tokenizer.SINGLE_TOKENS.values(), 293 TokenType.SELECT, 294 } - {TokenType.IDENTIFIER} 295 296 DB_CREATABLES = { 297 TokenType.DATABASE, 298 TokenType.SCHEMA, 299 TokenType.TABLE, 300 TokenType.VIEW, 301 TokenType.MODEL, 302 TokenType.DICTIONARY, 303 TokenType.SEQUENCE, 304 TokenType.STORAGE_INTEGRATION, 305 } 306 307 CREATABLES = { 308 TokenType.COLUMN, 309 TokenType.CONSTRAINT, 310 TokenType.FUNCTION, 311 TokenType.INDEX, 312 TokenType.PROCEDURE, 313 TokenType.FOREIGN_KEY, 314 *DB_CREATABLES, 315 } 316 317 # Tokens that can represent identifiers 318 ID_VAR_TOKENS = { 319 TokenType.VAR, 320 TokenType.ANTI, 321 TokenType.APPLY, 322 TokenType.ASC, 323 TokenType.ASOF, 324 TokenType.AUTO_INCREMENT, 325 TokenType.BEGIN, 326 TokenType.BPCHAR, 327 TokenType.CACHE, 328 TokenType.CASE, 329 TokenType.COLLATE, 330 TokenType.COMMAND, 331 TokenType.COMMENT, 332 TokenType.COMMIT, 333 TokenType.CONSTRAINT, 334 TokenType.COPY, 335 TokenType.DEFAULT, 336 TokenType.DELETE, 337 TokenType.DESC, 338 TokenType.DESCRIBE, 339 TokenType.DICTIONARY, 340 TokenType.DIV, 341 TokenType.END, 342 TokenType.EXECUTE, 343 TokenType.ESCAPE, 344 TokenType.FALSE, 345 TokenType.FIRST, 346 TokenType.FILTER, 347 TokenType.FINAL, 348 TokenType.FORMAT, 349 TokenType.FULL, 350 TokenType.IDENTIFIER, 351 TokenType.IS, 352 TokenType.ISNULL, 353 TokenType.INTERVAL, 354 TokenType.KEEP, 355 TokenType.KILL, 356 TokenType.LEFT, 357 TokenType.LOAD, 358 TokenType.MERGE, 359 TokenType.NATURAL, 360 TokenType.NEXT, 361 TokenType.OFFSET, 362 TokenType.OPERATOR, 363 TokenType.ORDINALITY, 364 TokenType.OVERLAPS, 365 TokenType.OVERWRITE, 366 TokenType.PARTITION, 367 TokenType.PERCENT, 368 TokenType.PIVOT, 369 TokenType.PRAGMA, 370 TokenType.RANGE, 371 TokenType.RECURSIVE, 372 TokenType.REFERENCES, 373 TokenType.REFRESH, 374 TokenType.REPLACE, 375 TokenType.RIGHT, 376 TokenType.ROW, 377 TokenType.ROWS, 378 TokenType.SEMI, 379 TokenType.SET, 380 TokenType.SETTINGS, 381 TokenType.SHOW, 382 TokenType.TEMPORARY, 383 TokenType.TOP, 384 TokenType.TRUE, 385 TokenType.TRUNCATE, 386 TokenType.UNIQUE, 387 TokenType.UNPIVOT, 388 TokenType.UPDATE, 389 TokenType.USE, 390 TokenType.VOLATILE, 391 TokenType.WINDOW, 392 *CREATABLES, 393 *SUBQUERY_PREDICATES, 394 *TYPE_TOKENS, 395 *NO_PAREN_FUNCTIONS, 396 } 397 398 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 399 400 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 401 TokenType.ANTI, 402 TokenType.APPLY, 403 TokenType.ASOF, 404 TokenType.FULL, 405 TokenType.LEFT, 406 TokenType.LOCK, 407 TokenType.NATURAL, 408 TokenType.OFFSET, 409 TokenType.RIGHT, 410 TokenType.SEMI, 411 TokenType.WINDOW, 412 } 413 414 ALIAS_TOKENS = ID_VAR_TOKENS 415 416 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 417 418 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 419 420 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 421 422 FUNC_TOKENS = { 423 TokenType.COLLATE, 424 TokenType.COMMAND, 425 TokenType.CURRENT_DATE, 426 TokenType.CURRENT_DATETIME, 427 TokenType.CURRENT_TIMESTAMP, 428 TokenType.CURRENT_TIME, 429 TokenType.CURRENT_USER, 430 TokenType.FILTER, 431 TokenType.FIRST, 432 TokenType.FORMAT, 433 TokenType.GLOB, 434 TokenType.IDENTIFIER, 435 TokenType.INDEX, 436 TokenType.ISNULL, 437 TokenType.ILIKE, 438 TokenType.INSERT, 439 TokenType.LIKE, 440 TokenType.MERGE, 441 TokenType.OFFSET, 442 TokenType.PRIMARY_KEY, 443 TokenType.RANGE, 444 TokenType.REPLACE, 445 TokenType.RLIKE, 446 TokenType.ROW, 447 TokenType.UNNEST, 448 TokenType.VAR, 449 TokenType.LEFT, 450 TokenType.RIGHT, 451 TokenType.SEQUENCE, 452 TokenType.DATE, 453 TokenType.DATETIME, 454 TokenType.TABLE, 455 TokenType.TIMESTAMP, 456 TokenType.TIMESTAMPTZ, 457 TokenType.TRUNCATE, 458 TokenType.WINDOW, 459 TokenType.XOR, 460 *TYPE_TOKENS, 461 *SUBQUERY_PREDICATES, 462 } 463 464 CONJUNCTION = { 465 TokenType.AND: exp.And, 466 TokenType.OR: exp.Or, 467 } 468 469 EQUALITY = { 470 TokenType.COLON_EQ: exp.PropertyEQ, 471 TokenType.EQ: exp.EQ, 472 TokenType.NEQ: exp.NEQ, 473 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 474 } 475 476 COMPARISON = { 477 TokenType.GT: exp.GT, 478 TokenType.GTE: exp.GTE, 479 TokenType.LT: exp.LT, 480 TokenType.LTE: exp.LTE, 481 } 482 483 BITWISE = { 484 TokenType.AMP: exp.BitwiseAnd, 485 TokenType.CARET: exp.BitwiseXor, 486 TokenType.PIPE: exp.BitwiseOr, 487 } 488 489 TERM = { 490 TokenType.DASH: exp.Sub, 491 TokenType.PLUS: exp.Add, 492 TokenType.MOD: exp.Mod, 493 TokenType.COLLATE: exp.Collate, 494 } 495 496 FACTOR = { 497 TokenType.DIV: exp.IntDiv, 498 TokenType.LR_ARROW: exp.Distance, 499 TokenType.SLASH: exp.Div, 500 TokenType.STAR: exp.Mul, 501 } 502 503 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 504 505 TIMES = { 506 TokenType.TIME, 507 TokenType.TIMETZ, 508 } 509 510 TIMESTAMPS = { 511 TokenType.TIMESTAMP, 512 TokenType.TIMESTAMPTZ, 513 TokenType.TIMESTAMPLTZ, 514 *TIMES, 515 } 516 517 SET_OPERATIONS = { 518 TokenType.UNION, 519 TokenType.INTERSECT, 520 TokenType.EXCEPT, 521 } 522 523 JOIN_METHODS = { 524 TokenType.ASOF, 525 TokenType.NATURAL, 526 TokenType.POSITIONAL, 527 } 528 529 JOIN_SIDES = { 530 TokenType.LEFT, 531 TokenType.RIGHT, 532 TokenType.FULL, 533 } 534 535 JOIN_KINDS = { 536 TokenType.INNER, 537 TokenType.OUTER, 538 TokenType.CROSS, 539 TokenType.SEMI, 540 TokenType.ANTI, 541 } 542 543 JOIN_HINTS: t.Set[str] = set() 544 545 LAMBDAS = { 546 TokenType.ARROW: lambda self, expressions: self.expression( 547 exp.Lambda, 548 this=self._replace_lambda( 549 self._parse_conjunction(), 550 {node.name for node in expressions}, 551 ), 552 expressions=expressions, 553 ), 554 TokenType.FARROW: lambda self, expressions: self.expression( 555 exp.Kwarg, 556 this=exp.var(expressions[0].name), 557 expression=self._parse_conjunction(), 558 ), 559 } 560 561 COLUMN_OPERATORS = { 562 TokenType.DOT: None, 563 TokenType.DCOLON: lambda self, this, to: self.expression( 564 exp.Cast if self.STRICT_CAST else exp.TryCast, 565 this=this, 566 to=to, 567 ), 568 TokenType.ARROW: lambda self, this, path: self.expression( 569 exp.JSONExtract, 570 this=this, 571 expression=self.dialect.to_json_path(path), 572 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 573 ), 574 TokenType.DARROW: lambda self, this, path: self.expression( 575 exp.JSONExtractScalar, 576 this=this, 577 expression=self.dialect.to_json_path(path), 578 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 579 ), 580 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 581 exp.JSONBExtract, 582 this=this, 583 expression=path, 584 ), 585 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 586 exp.JSONBExtractScalar, 587 this=this, 588 expression=path, 589 ), 590 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 591 exp.JSONBContains, 592 this=this, 593 expression=key, 594 ), 595 } 596 597 EXPRESSION_PARSERS = { 598 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 599 exp.Column: lambda self: self._parse_column(), 600 exp.Condition: lambda self: self._parse_conjunction(), 601 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 602 exp.Expression: lambda self: self._parse_expression(), 603 exp.From: lambda self: self._parse_from(joins=True), 604 exp.Group: lambda self: self._parse_group(), 605 exp.Having: lambda self: self._parse_having(), 606 exp.Identifier: lambda self: self._parse_id_var(), 607 exp.Join: lambda self: self._parse_join(), 608 exp.Lambda: lambda self: self._parse_lambda(), 609 exp.Lateral: lambda self: self._parse_lateral(), 610 exp.Limit: lambda self: self._parse_limit(), 611 exp.Offset: lambda self: self._parse_offset(), 612 exp.Order: lambda self: self._parse_order(), 613 exp.Ordered: lambda self: self._parse_ordered(), 614 exp.Properties: lambda self: self._parse_properties(), 615 exp.Qualify: lambda self: self._parse_qualify(), 616 exp.Returning: lambda self: self._parse_returning(), 617 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 618 exp.Table: lambda self: self._parse_table_parts(), 619 exp.TableAlias: lambda self: self._parse_table_alias(), 620 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 621 exp.Where: lambda self: self._parse_where(), 622 exp.Window: lambda self: self._parse_named_window(), 623 exp.With: lambda self: self._parse_with(), 624 "JOIN_TYPE": lambda self: self._parse_join_parts(), 625 } 626 627 STATEMENT_PARSERS = { 628 TokenType.ALTER: lambda self: self._parse_alter(), 629 TokenType.BEGIN: lambda self: self._parse_transaction(), 630 TokenType.CACHE: lambda self: self._parse_cache(), 631 TokenType.COMMENT: lambda self: self._parse_comment(), 632 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 633 TokenType.COPY: lambda self: self._parse_copy(), 634 TokenType.CREATE: lambda self: self._parse_create(), 635 TokenType.DELETE: lambda self: self._parse_delete(), 636 TokenType.DESC: lambda self: self._parse_describe(), 637 TokenType.DESCRIBE: lambda self: self._parse_describe(), 638 TokenType.DROP: lambda self: self._parse_drop(), 639 TokenType.INSERT: lambda self: self._parse_insert(), 640 TokenType.KILL: lambda self: self._parse_kill(), 641 TokenType.LOAD: lambda self: self._parse_load(), 642 TokenType.MERGE: lambda self: self._parse_merge(), 643 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 644 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 645 TokenType.REFRESH: lambda self: self._parse_refresh(), 646 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 647 TokenType.SET: lambda self: self._parse_set(), 648 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 649 TokenType.UNCACHE: lambda self: self._parse_uncache(), 650 TokenType.UPDATE: lambda self: self._parse_update(), 651 TokenType.USE: lambda self: self.expression( 652 exp.Use, 653 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 654 this=self._parse_table(schema=False), 655 ), 656 } 657 658 UNARY_PARSERS = { 659 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 660 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 661 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 662 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 663 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 664 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 665 } 666 667 STRING_PARSERS = { 668 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 669 exp.RawString, this=token.text 670 ), 671 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 672 exp.National, this=token.text 673 ), 674 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 675 TokenType.STRING: lambda self, token: self.expression( 676 exp.Literal, this=token.text, is_string=True 677 ), 678 TokenType.UNICODE_STRING: lambda self, token: self.expression( 679 exp.UnicodeString, 680 this=token.text, 681 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 682 ), 683 } 684 685 NUMERIC_PARSERS = { 686 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 687 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 688 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 689 TokenType.NUMBER: lambda self, token: self.expression( 690 exp.Literal, this=token.text, is_string=False 691 ), 692 } 693 694 PRIMARY_PARSERS = { 695 **STRING_PARSERS, 696 **NUMERIC_PARSERS, 697 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 698 TokenType.NULL: lambda self, _: self.expression(exp.Null), 699 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 700 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 701 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 702 TokenType.STAR: lambda self, _: self.expression( 703 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 704 ), 705 } 706 707 PLACEHOLDER_PARSERS = { 708 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 709 TokenType.PARAMETER: lambda self: self._parse_parameter(), 710 TokenType.COLON: lambda self: ( 711 self.expression(exp.Placeholder, this=self._prev.text) 712 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 713 else None 714 ), 715 } 716 717 RANGE_PARSERS = { 718 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 719 TokenType.GLOB: binary_range_parser(exp.Glob), 720 TokenType.ILIKE: binary_range_parser(exp.ILike), 721 TokenType.IN: lambda self, this: self._parse_in(this), 722 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 723 TokenType.IS: lambda self, this: self._parse_is(this), 724 TokenType.LIKE: binary_range_parser(exp.Like), 725 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 726 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 727 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 728 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 729 } 730 731 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 732 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 733 "AUTO": lambda self: self._parse_auto_property(), 734 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 735 "BACKUP": lambda self: self.expression( 736 exp.BackupProperty, this=self._parse_var(any_token=True) 737 ), 738 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 739 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 740 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 741 "CHECKSUM": lambda self: self._parse_checksum(), 742 "CLUSTER BY": lambda self: self._parse_cluster(), 743 "CLUSTERED": lambda self: self._parse_clustered_by(), 744 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 745 exp.CollateProperty, **kwargs 746 ), 747 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 748 "CONTAINS": lambda self: self._parse_contains_property(), 749 "COPY": lambda self: self._parse_copy_property(), 750 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 751 "DEFINER": lambda self: self._parse_definer(), 752 "DETERMINISTIC": lambda self: self.expression( 753 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 754 ), 755 "DISTKEY": lambda self: self._parse_distkey(), 756 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 757 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 758 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 759 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 760 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 761 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 762 "FREESPACE": lambda self: self._parse_freespace(), 763 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 764 "HEAP": lambda self: self.expression(exp.HeapProperty), 765 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 766 "IMMUTABLE": lambda self: self.expression( 767 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 768 ), 769 "INHERITS": lambda self: self.expression( 770 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 771 ), 772 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 773 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 774 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 775 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 776 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 777 "LIKE": lambda self: self._parse_create_like(), 778 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 779 "LOCK": lambda self: self._parse_locking(), 780 "LOCKING": lambda self: self._parse_locking(), 781 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 782 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 783 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 784 "MODIFIES": lambda self: self._parse_modifies_property(), 785 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 786 "NO": lambda self: self._parse_no_property(), 787 "ON": lambda self: self._parse_on_property(), 788 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 789 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 790 "PARTITION": lambda self: self._parse_partitioned_of(), 791 "PARTITION BY": lambda self: self._parse_partitioned_by(), 792 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 793 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 794 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 795 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 796 "READS": lambda self: self._parse_reads_property(), 797 "REMOTE": lambda self: self._parse_remote_with_connection(), 798 "RETURNS": lambda self: self._parse_returns(), 799 "ROW": lambda self: self._parse_row(), 800 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 801 "SAMPLE": lambda self: self.expression( 802 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 803 ), 804 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 805 "SETTINGS": lambda self: self.expression( 806 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 807 ), 808 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 809 "SORTKEY": lambda self: self._parse_sortkey(), 810 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 811 "STABLE": lambda self: self.expression( 812 exp.StabilityProperty, this=exp.Literal.string("STABLE") 813 ), 814 "STORED": lambda self: self._parse_stored(), 815 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 816 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 817 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 818 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 819 "TO": lambda self: self._parse_to_table(), 820 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 821 "TRANSFORM": lambda self: self.expression( 822 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 823 ), 824 "TTL": lambda self: self._parse_ttl(), 825 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 826 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 827 "VOLATILE": lambda self: self._parse_volatile_property(), 828 "WITH": lambda self: self._parse_with_property(), 829 } 830 831 CONSTRAINT_PARSERS = { 832 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 833 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 834 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 835 "CHARACTER SET": lambda self: self.expression( 836 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 837 ), 838 "CHECK": lambda self: self.expression( 839 exp.CheckColumnConstraint, 840 this=self._parse_wrapped(self._parse_conjunction), 841 enforced=self._match_text_seq("ENFORCED"), 842 ), 843 "COLLATE": lambda self: self.expression( 844 exp.CollateColumnConstraint, this=self._parse_var() 845 ), 846 "COMMENT": lambda self: self.expression( 847 exp.CommentColumnConstraint, this=self._parse_string() 848 ), 849 "COMPRESS": lambda self: self._parse_compress(), 850 "CLUSTERED": lambda self: self.expression( 851 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 852 ), 853 "NONCLUSTERED": lambda self: self.expression( 854 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 855 ), 856 "DEFAULT": lambda self: self.expression( 857 exp.DefaultColumnConstraint, this=self._parse_bitwise() 858 ), 859 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 860 "EPHEMERAL": lambda self: self.expression( 861 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 862 ), 863 "EXCLUDE": lambda self: self.expression( 864 exp.ExcludeColumnConstraint, this=self._parse_index_params() 865 ), 866 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 867 "FORMAT": lambda self: self.expression( 868 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 869 ), 870 "GENERATED": lambda self: self._parse_generated_as_identity(), 871 "IDENTITY": lambda self: self._parse_auto_increment(), 872 "INLINE": lambda self: self._parse_inline(), 873 "LIKE": lambda self: self._parse_create_like(), 874 "NOT": lambda self: self._parse_not_constraint(), 875 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 876 "ON": lambda self: ( 877 self._match(TokenType.UPDATE) 878 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 879 ) 880 or self.expression(exp.OnProperty, this=self._parse_id_var()), 881 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 882 "PERIOD": lambda self: self._parse_period_for_system_time(), 883 "PRIMARY KEY": lambda self: self._parse_primary_key(), 884 "REFERENCES": lambda self: self._parse_references(match=False), 885 "TITLE": lambda self: self.expression( 886 exp.TitleColumnConstraint, this=self._parse_var_or_string() 887 ), 888 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 889 "UNIQUE": lambda self: self._parse_unique(), 890 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 891 "WITH": lambda self: self.expression( 892 exp.Properties, expressions=self._parse_wrapped_properties() 893 ), 894 } 895 896 ALTER_PARSERS = { 897 "ADD": lambda self: self._parse_alter_table_add(), 898 "ALTER": lambda self: self._parse_alter_table_alter(), 899 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 900 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 901 "DROP": lambda self: self._parse_alter_table_drop(), 902 "RENAME": lambda self: self._parse_alter_table_rename(), 903 } 904 905 SCHEMA_UNNAMED_CONSTRAINTS = { 906 "CHECK", 907 "EXCLUDE", 908 "FOREIGN KEY", 909 "LIKE", 910 "PERIOD", 911 "PRIMARY KEY", 912 "UNIQUE", 913 } 914 915 NO_PAREN_FUNCTION_PARSERS = { 916 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 917 "CASE": lambda self: self._parse_case(), 918 "IF": lambda self: self._parse_if(), 919 "NEXT": lambda self: self._parse_next_value_for(), 920 } 921 922 INVALID_FUNC_NAME_TOKENS = { 923 TokenType.IDENTIFIER, 924 TokenType.STRING, 925 } 926 927 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 928 929 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 930 931 FUNCTION_PARSERS = { 932 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 933 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 934 "DECODE": lambda self: self._parse_decode(), 935 "EXTRACT": lambda self: self._parse_extract(), 936 "JSON_OBJECT": lambda self: self._parse_json_object(), 937 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 938 "JSON_TABLE": lambda self: self._parse_json_table(), 939 "MATCH": lambda self: self._parse_match_against(), 940 "OPENJSON": lambda self: self._parse_open_json(), 941 "POSITION": lambda self: self._parse_position(), 942 "PREDICT": lambda self: self._parse_predict(), 943 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 944 "STRING_AGG": lambda self: self._parse_string_agg(), 945 "SUBSTRING": lambda self: self._parse_substring(), 946 "TRIM": lambda self: self._parse_trim(), 947 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 948 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 949 } 950 951 QUERY_MODIFIER_PARSERS = { 952 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 953 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 954 TokenType.WHERE: lambda self: ("where", self._parse_where()), 955 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 956 TokenType.HAVING: lambda self: ("having", self._parse_having()), 957 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 958 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 959 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 960 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 961 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 962 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 963 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 964 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 965 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 966 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 967 TokenType.CLUSTER_BY: lambda self: ( 968 "cluster", 969 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 970 ), 971 TokenType.DISTRIBUTE_BY: lambda self: ( 972 "distribute", 973 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 974 ), 975 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 976 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 977 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 978 } 979 980 SET_PARSERS = { 981 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 982 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 983 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 984 "TRANSACTION": lambda self: self._parse_set_transaction(), 985 } 986 987 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 988 989 TYPE_LITERAL_PARSERS = { 990 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 991 } 992 993 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 994 995 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 996 997 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 998 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 999 "ISOLATION": ( 1000 ("LEVEL", "REPEATABLE", "READ"), 1001 ("LEVEL", "READ", "COMMITTED"), 1002 ("LEVEL", "READ", "UNCOMITTED"), 1003 ("LEVEL", "SERIALIZABLE"), 1004 ), 1005 "READ": ("WRITE", "ONLY"), 1006 } 1007 1008 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1009 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1010 ) 1011 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1012 1013 CREATE_SEQUENCE: OPTIONS_TYPE = { 1014 "SCALE": ("EXTEND", "NOEXTEND"), 1015 "SHARD": ("EXTEND", "NOEXTEND"), 1016 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1017 **dict.fromkeys( 1018 ( 1019 "SESSION", 1020 "GLOBAL", 1021 "KEEP", 1022 "NOKEEP", 1023 "ORDER", 1024 "NOORDER", 1025 "NOCACHE", 1026 "CYCLE", 1027 "NOCYCLE", 1028 "NOMINVALUE", 1029 "NOMAXVALUE", 1030 "NOSCALE", 1031 "NOSHARD", 1032 ), 1033 tuple(), 1034 ), 1035 } 1036 1037 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1038 1039 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1040 1041 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1042 1043 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1044 1045 CLONE_KEYWORDS = {"CLONE", "COPY"} 1046 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1047 1048 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1049 1050 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1051 1052 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1053 1054 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1055 1056 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1057 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1058 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1059 1060 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1061 1062 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1063 1064 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1065 1066 DISTINCT_TOKENS = {TokenType.DISTINCT} 1067 1068 NULL_TOKENS = {TokenType.NULL} 1069 1070 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1071 1072 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1073 1074 STRICT_CAST = True 1075 1076 PREFIXED_PIVOT_COLUMNS = False 1077 IDENTIFY_PIVOT_STRINGS = False 1078 1079 LOG_DEFAULTS_TO_LN = False 1080 1081 # Whether ADD is present for each column added by ALTER TABLE 1082 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1083 1084 # Whether the table sample clause expects CSV syntax 1085 TABLESAMPLE_CSV = False 1086 1087 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1088 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1089 1090 # Whether the TRIM function expects the characters to trim as its first argument 1091 TRIM_PATTERN_FIRST = False 1092 1093 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1094 STRING_ALIASES = False 1095 1096 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1097 MODIFIERS_ATTACHED_TO_UNION = True 1098 UNION_MODIFIERS = {"order", "limit", "offset"} 1099 1100 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1101 NO_PAREN_IF_COMMANDS = True 1102 1103 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1104 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1105 1106 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1107 # If this is True and '(' is not found, the keyword will be treated as an identifier 1108 VALUES_FOLLOWED_BY_PAREN = True 1109 1110 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1111 SUPPORTS_IMPLICIT_UNNEST = False 1112 1113 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1114 INTERVAL_SPANS = True 1115 1116 # Whether a PARTITION clause can follow a table reference 1117 SUPPORTS_PARTITION_SELECTION = False 1118 1119 __slots__ = ( 1120 "error_level", 1121 "error_message_context", 1122 "max_errors", 1123 "dialect", 1124 "sql", 1125 "errors", 1126 "_tokens", 1127 "_index", 1128 "_curr", 1129 "_next", 1130 "_prev", 1131 "_prev_comments", 1132 ) 1133 1134 # Autofilled 1135 SHOW_TRIE: t.Dict = {} 1136 SET_TRIE: t.Dict = {} 1137 1138 def __init__( 1139 self, 1140 error_level: t.Optional[ErrorLevel] = None, 1141 error_message_context: int = 100, 1142 max_errors: int = 3, 1143 dialect: DialectType = None, 1144 ): 1145 from sqlglot.dialects import Dialect 1146 1147 self.error_level = error_level or ErrorLevel.IMMEDIATE 1148 self.error_message_context = error_message_context 1149 self.max_errors = max_errors 1150 self.dialect = Dialect.get_or_raise(dialect) 1151 self.reset() 1152 1153 def reset(self): 1154 self.sql = "" 1155 self.errors = [] 1156 self._tokens = [] 1157 self._index = 0 1158 self._curr = None 1159 self._next = None 1160 self._prev = None 1161 self._prev_comments = None 1162 1163 def parse( 1164 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1165 ) -> t.List[t.Optional[exp.Expression]]: 1166 """ 1167 Parses a list of tokens and returns a list of syntax trees, one tree 1168 per parsed SQL statement. 1169 1170 Args: 1171 raw_tokens: The list of tokens. 1172 sql: The original SQL string, used to produce helpful debug messages. 1173 1174 Returns: 1175 The list of the produced syntax trees. 1176 """ 1177 return self._parse( 1178 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1179 ) 1180 1181 def parse_into( 1182 self, 1183 expression_types: exp.IntoType, 1184 raw_tokens: t.List[Token], 1185 sql: t.Optional[str] = None, 1186 ) -> t.List[t.Optional[exp.Expression]]: 1187 """ 1188 Parses a list of tokens into a given Expression type. If a collection of Expression 1189 types is given instead, this method will try to parse the token list into each one 1190 of them, stopping at the first for which the parsing succeeds. 1191 1192 Args: 1193 expression_types: The expression type(s) to try and parse the token list into. 1194 raw_tokens: The list of tokens. 1195 sql: The original SQL string, used to produce helpful debug messages. 1196 1197 Returns: 1198 The target Expression. 1199 """ 1200 errors = [] 1201 for expression_type in ensure_list(expression_types): 1202 parser = self.EXPRESSION_PARSERS.get(expression_type) 1203 if not parser: 1204 raise TypeError(f"No parser registered for {expression_type}") 1205 1206 try: 1207 return self._parse(parser, raw_tokens, sql) 1208 except ParseError as e: 1209 e.errors[0]["into_expression"] = expression_type 1210 errors.append(e) 1211 1212 raise ParseError( 1213 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1214 errors=merge_errors(errors), 1215 ) from errors[-1] 1216 1217 def _parse( 1218 self, 1219 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1220 raw_tokens: t.List[Token], 1221 sql: t.Optional[str] = None, 1222 ) -> t.List[t.Optional[exp.Expression]]: 1223 self.reset() 1224 self.sql = sql or "" 1225 1226 total = len(raw_tokens) 1227 chunks: t.List[t.List[Token]] = [[]] 1228 1229 for i, token in enumerate(raw_tokens): 1230 if token.token_type == TokenType.SEMICOLON: 1231 if i < total - 1: 1232 chunks.append([]) 1233 else: 1234 chunks[-1].append(token) 1235 1236 expressions = [] 1237 1238 for tokens in chunks: 1239 self._index = -1 1240 self._tokens = tokens 1241 self._advance() 1242 1243 expressions.append(parse_method(self)) 1244 1245 if self._index < len(self._tokens): 1246 self.raise_error("Invalid expression / Unexpected token") 1247 1248 self.check_errors() 1249 1250 return expressions 1251 1252 def check_errors(self) -> None: 1253 """Logs or raises any found errors, depending on the chosen error level setting.""" 1254 if self.error_level == ErrorLevel.WARN: 1255 for error in self.errors: 1256 logger.error(str(error)) 1257 elif self.error_level == ErrorLevel.RAISE and self.errors: 1258 raise ParseError( 1259 concat_messages(self.errors, self.max_errors), 1260 errors=merge_errors(self.errors), 1261 ) 1262 1263 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1264 """ 1265 Appends an error in the list of recorded errors or raises it, depending on the chosen 1266 error level setting. 1267 """ 1268 token = token or self._curr or self._prev or Token.string("") 1269 start = token.start 1270 end = token.end + 1 1271 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1272 highlight = self.sql[start:end] 1273 end_context = self.sql[end : end + self.error_message_context] 1274 1275 error = ParseError.new( 1276 f"{message}. Line {token.line}, Col: {token.col}.\n" 1277 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1278 description=message, 1279 line=token.line, 1280 col=token.col, 1281 start_context=start_context, 1282 highlight=highlight, 1283 end_context=end_context, 1284 ) 1285 1286 if self.error_level == ErrorLevel.IMMEDIATE: 1287 raise error 1288 1289 self.errors.append(error) 1290 1291 def expression( 1292 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1293 ) -> E: 1294 """ 1295 Creates a new, validated Expression. 1296 1297 Args: 1298 exp_class: The expression class to instantiate. 1299 comments: An optional list of comments to attach to the expression. 1300 kwargs: The arguments to set for the expression along with their respective values. 1301 1302 Returns: 1303 The target expression. 1304 """ 1305 instance = exp_class(**kwargs) 1306 instance.add_comments(comments) if comments else self._add_comments(instance) 1307 return self.validate_expression(instance) 1308 1309 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1310 if expression and self._prev_comments: 1311 expression.add_comments(self._prev_comments) 1312 self._prev_comments = None 1313 1314 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1315 """ 1316 Validates an Expression, making sure that all its mandatory arguments are set. 1317 1318 Args: 1319 expression: The expression to validate. 1320 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1321 1322 Returns: 1323 The validated expression. 1324 """ 1325 if self.error_level != ErrorLevel.IGNORE: 1326 for error_message in expression.error_messages(args): 1327 self.raise_error(error_message) 1328 1329 return expression 1330 1331 def _find_sql(self, start: Token, end: Token) -> str: 1332 return self.sql[start.start : end.end + 1] 1333 1334 def _is_connected(self) -> bool: 1335 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1336 1337 def _advance(self, times: int = 1) -> None: 1338 self._index += times 1339 self._curr = seq_get(self._tokens, self._index) 1340 self._next = seq_get(self._tokens, self._index + 1) 1341 1342 if self._index > 0: 1343 self._prev = self._tokens[self._index - 1] 1344 self._prev_comments = self._prev.comments 1345 else: 1346 self._prev = None 1347 self._prev_comments = None 1348 1349 def _retreat(self, index: int) -> None: 1350 if index != self._index: 1351 self._advance(index - self._index) 1352 1353 def _warn_unsupported(self) -> None: 1354 if len(self._tokens) <= 1: 1355 return 1356 1357 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1358 # interested in emitting a warning for the one being currently processed. 1359 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1360 1361 logger.warning( 1362 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1363 ) 1364 1365 def _parse_command(self) -> exp.Command: 1366 self._warn_unsupported() 1367 return self.expression( 1368 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1369 ) 1370 1371 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1372 """ 1373 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1374 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1375 the parser state accordingly 1376 """ 1377 index = self._index 1378 error_level = self.error_level 1379 1380 self.error_level = ErrorLevel.IMMEDIATE 1381 try: 1382 this = parse_method() 1383 except ParseError: 1384 this = None 1385 finally: 1386 if not this or retreat: 1387 self._retreat(index) 1388 self.error_level = error_level 1389 1390 return this 1391 1392 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1393 start = self._prev 1394 exists = self._parse_exists() if allow_exists else None 1395 1396 self._match(TokenType.ON) 1397 1398 materialized = self._match_text_seq("MATERIALIZED") 1399 kind = self._match_set(self.CREATABLES) and self._prev 1400 if not kind: 1401 return self._parse_as_command(start) 1402 1403 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1404 this = self._parse_user_defined_function(kind=kind.token_type) 1405 elif kind.token_type == TokenType.TABLE: 1406 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1407 elif kind.token_type == TokenType.COLUMN: 1408 this = self._parse_column() 1409 else: 1410 this = self._parse_id_var() 1411 1412 self._match(TokenType.IS) 1413 1414 return self.expression( 1415 exp.Comment, 1416 this=this, 1417 kind=kind.text, 1418 expression=self._parse_string(), 1419 exists=exists, 1420 materialized=materialized, 1421 ) 1422 1423 def _parse_to_table( 1424 self, 1425 ) -> exp.ToTableProperty: 1426 table = self._parse_table_parts(schema=True) 1427 return self.expression(exp.ToTableProperty, this=table) 1428 1429 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1430 def _parse_ttl(self) -> exp.Expression: 1431 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1432 this = self._parse_bitwise() 1433 1434 if self._match_text_seq("DELETE"): 1435 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1436 if self._match_text_seq("RECOMPRESS"): 1437 return self.expression( 1438 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1439 ) 1440 if self._match_text_seq("TO", "DISK"): 1441 return self.expression( 1442 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1443 ) 1444 if self._match_text_seq("TO", "VOLUME"): 1445 return self.expression( 1446 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1447 ) 1448 1449 return this 1450 1451 expressions = self._parse_csv(_parse_ttl_action) 1452 where = self._parse_where() 1453 group = self._parse_group() 1454 1455 aggregates = None 1456 if group and self._match(TokenType.SET): 1457 aggregates = self._parse_csv(self._parse_set_item) 1458 1459 return self.expression( 1460 exp.MergeTreeTTL, 1461 expressions=expressions, 1462 where=where, 1463 group=group, 1464 aggregates=aggregates, 1465 ) 1466 1467 def _parse_statement(self) -> t.Optional[exp.Expression]: 1468 if self._curr is None: 1469 return None 1470 1471 if self._match_set(self.STATEMENT_PARSERS): 1472 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1473 1474 if self._match_set(Tokenizer.COMMANDS): 1475 return self._parse_command() 1476 1477 expression = self._parse_expression() 1478 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1479 return self._parse_query_modifiers(expression) 1480 1481 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1482 start = self._prev 1483 temporary = self._match(TokenType.TEMPORARY) 1484 materialized = self._match_text_seq("MATERIALIZED") 1485 1486 kind = self._match_set(self.CREATABLES) and self._prev.text 1487 if not kind: 1488 return self._parse_as_command(start) 1489 1490 if_exists = exists or self._parse_exists() 1491 table = self._parse_table_parts( 1492 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1493 ) 1494 1495 if self._match(TokenType.L_PAREN, advance=False): 1496 expressions = self._parse_wrapped_csv(self._parse_types) 1497 else: 1498 expressions = None 1499 1500 return self.expression( 1501 exp.Drop, 1502 comments=start.comments, 1503 exists=if_exists, 1504 this=table, 1505 expressions=expressions, 1506 kind=kind, 1507 temporary=temporary, 1508 materialized=materialized, 1509 cascade=self._match_text_seq("CASCADE"), 1510 constraints=self._match_text_seq("CONSTRAINTS"), 1511 purge=self._match_text_seq("PURGE"), 1512 ) 1513 1514 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1515 return ( 1516 self._match_text_seq("IF") 1517 and (not not_ or self._match(TokenType.NOT)) 1518 and self._match(TokenType.EXISTS) 1519 ) 1520 1521 def _parse_create(self) -> exp.Create | exp.Command: 1522 # Note: this can't be None because we've matched a statement parser 1523 start = self._prev 1524 comments = self._prev_comments 1525 1526 replace = ( 1527 start.token_type == TokenType.REPLACE 1528 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1529 or self._match_pair(TokenType.OR, TokenType.ALTER) 1530 ) 1531 1532 unique = self._match(TokenType.UNIQUE) 1533 1534 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1535 self._advance() 1536 1537 properties = None 1538 create_token = self._match_set(self.CREATABLES) and self._prev 1539 1540 if not create_token: 1541 # exp.Properties.Location.POST_CREATE 1542 properties = self._parse_properties() 1543 create_token = self._match_set(self.CREATABLES) and self._prev 1544 1545 if not properties or not create_token: 1546 return self._parse_as_command(start) 1547 1548 exists = self._parse_exists(not_=True) 1549 this = None 1550 expression: t.Optional[exp.Expression] = None 1551 indexes = None 1552 no_schema_binding = None 1553 begin = None 1554 end = None 1555 clone = None 1556 1557 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1558 nonlocal properties 1559 if properties and temp_props: 1560 properties.expressions.extend(temp_props.expressions) 1561 elif temp_props: 1562 properties = temp_props 1563 1564 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1565 this = self._parse_user_defined_function(kind=create_token.token_type) 1566 1567 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1568 extend_props(self._parse_properties()) 1569 1570 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1571 1572 if not expression: 1573 if self._match(TokenType.COMMAND): 1574 expression = self._parse_as_command(self._prev) 1575 else: 1576 begin = self._match(TokenType.BEGIN) 1577 return_ = self._match_text_seq("RETURN") 1578 1579 if self._match(TokenType.STRING, advance=False): 1580 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1581 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1582 expression = self._parse_string() 1583 extend_props(self._parse_properties()) 1584 else: 1585 expression = self._parse_statement() 1586 1587 end = self._match_text_seq("END") 1588 1589 if return_: 1590 expression = self.expression(exp.Return, this=expression) 1591 elif create_token.token_type == TokenType.INDEX: 1592 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1593 if not self._match(TokenType.ON): 1594 index = self._parse_id_var() 1595 anonymous = False 1596 else: 1597 index = None 1598 anonymous = True 1599 1600 this = self._parse_index(index=index, anonymous=anonymous) 1601 elif create_token.token_type in self.DB_CREATABLES: 1602 table_parts = self._parse_table_parts( 1603 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1604 ) 1605 1606 # exp.Properties.Location.POST_NAME 1607 self._match(TokenType.COMMA) 1608 extend_props(self._parse_properties(before=True)) 1609 1610 this = self._parse_schema(this=table_parts) 1611 1612 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1613 extend_props(self._parse_properties()) 1614 1615 self._match(TokenType.ALIAS) 1616 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1617 # exp.Properties.Location.POST_ALIAS 1618 extend_props(self._parse_properties()) 1619 1620 if create_token.token_type == TokenType.SEQUENCE: 1621 expression = self._parse_types() 1622 extend_props(self._parse_properties()) 1623 else: 1624 expression = self._parse_ddl_select() 1625 1626 if create_token.token_type == TokenType.TABLE: 1627 # exp.Properties.Location.POST_EXPRESSION 1628 extend_props(self._parse_properties()) 1629 1630 indexes = [] 1631 while True: 1632 index = self._parse_index() 1633 1634 # exp.Properties.Location.POST_INDEX 1635 extend_props(self._parse_properties()) 1636 1637 if not index: 1638 break 1639 else: 1640 self._match(TokenType.COMMA) 1641 indexes.append(index) 1642 elif create_token.token_type == TokenType.VIEW: 1643 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1644 no_schema_binding = True 1645 1646 shallow = self._match_text_seq("SHALLOW") 1647 1648 if self._match_texts(self.CLONE_KEYWORDS): 1649 copy = self._prev.text.lower() == "copy" 1650 clone = self.expression( 1651 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1652 ) 1653 1654 if self._curr: 1655 return self._parse_as_command(start) 1656 1657 return self.expression( 1658 exp.Create, 1659 comments=comments, 1660 this=this, 1661 kind=create_token.text.upper(), 1662 replace=replace, 1663 unique=unique, 1664 expression=expression, 1665 exists=exists, 1666 properties=properties, 1667 indexes=indexes, 1668 no_schema_binding=no_schema_binding, 1669 begin=begin, 1670 end=end, 1671 clone=clone, 1672 ) 1673 1674 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1675 seq = exp.SequenceProperties() 1676 1677 options = [] 1678 index = self._index 1679 1680 while self._curr: 1681 if self._match_text_seq("INCREMENT"): 1682 self._match_text_seq("BY") 1683 self._match_text_seq("=") 1684 seq.set("increment", self._parse_term()) 1685 elif self._match_text_seq("MINVALUE"): 1686 seq.set("minvalue", self._parse_term()) 1687 elif self._match_text_seq("MAXVALUE"): 1688 seq.set("maxvalue", self._parse_term()) 1689 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1690 self._match_text_seq("=") 1691 seq.set("start", self._parse_term()) 1692 elif self._match_text_seq("CACHE"): 1693 # T-SQL allows empty CACHE which is initialized dynamically 1694 seq.set("cache", self._parse_number() or True) 1695 elif self._match_text_seq("OWNED", "BY"): 1696 # "OWNED BY NONE" is the default 1697 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1698 else: 1699 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1700 if opt: 1701 options.append(opt) 1702 else: 1703 break 1704 1705 seq.set("options", options if options else None) 1706 return None if self._index == index else seq 1707 1708 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1709 # only used for teradata currently 1710 self._match(TokenType.COMMA) 1711 1712 kwargs = { 1713 "no": self._match_text_seq("NO"), 1714 "dual": self._match_text_seq("DUAL"), 1715 "before": self._match_text_seq("BEFORE"), 1716 "default": self._match_text_seq("DEFAULT"), 1717 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1718 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1719 "after": self._match_text_seq("AFTER"), 1720 "minimum": self._match_texts(("MIN", "MINIMUM")), 1721 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1722 } 1723 1724 if self._match_texts(self.PROPERTY_PARSERS): 1725 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1726 try: 1727 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1728 except TypeError: 1729 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1730 1731 return None 1732 1733 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1734 return self._parse_wrapped_csv(self._parse_property) 1735 1736 def _parse_property(self) -> t.Optional[exp.Expression]: 1737 if self._match_texts(self.PROPERTY_PARSERS): 1738 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1739 1740 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1741 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1742 1743 if self._match_text_seq("COMPOUND", "SORTKEY"): 1744 return self._parse_sortkey(compound=True) 1745 1746 if self._match_text_seq("SQL", "SECURITY"): 1747 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1748 1749 index = self._index 1750 key = self._parse_column() 1751 1752 if not self._match(TokenType.EQ): 1753 self._retreat(index) 1754 return self._parse_sequence_properties() 1755 1756 return self.expression( 1757 exp.Property, 1758 this=key.to_dot() if isinstance(key, exp.Column) else key, 1759 value=self._parse_bitwise() or self._parse_var(any_token=True), 1760 ) 1761 1762 def _parse_stored(self) -> exp.FileFormatProperty: 1763 self._match(TokenType.ALIAS) 1764 1765 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1766 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1767 1768 return self.expression( 1769 exp.FileFormatProperty, 1770 this=( 1771 self.expression( 1772 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1773 ) 1774 if input_format or output_format 1775 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1776 ), 1777 ) 1778 1779 def _parse_unquoted_field(self): 1780 field = self._parse_field() 1781 if isinstance(field, exp.Identifier) and not field.quoted: 1782 field = exp.var(field) 1783 1784 return field 1785 1786 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1787 self._match(TokenType.EQ) 1788 self._match(TokenType.ALIAS) 1789 1790 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1791 1792 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1793 properties = [] 1794 while True: 1795 if before: 1796 prop = self._parse_property_before() 1797 else: 1798 prop = self._parse_property() 1799 if not prop: 1800 break 1801 for p in ensure_list(prop): 1802 properties.append(p) 1803 1804 if properties: 1805 return self.expression(exp.Properties, expressions=properties) 1806 1807 return None 1808 1809 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1810 return self.expression( 1811 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1812 ) 1813 1814 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1815 if self._index >= 2: 1816 pre_volatile_token = self._tokens[self._index - 2] 1817 else: 1818 pre_volatile_token = None 1819 1820 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1821 return exp.VolatileProperty() 1822 1823 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1824 1825 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1826 self._match_pair(TokenType.EQ, TokenType.ON) 1827 1828 prop = self.expression(exp.WithSystemVersioningProperty) 1829 if self._match(TokenType.L_PAREN): 1830 self._match_text_seq("HISTORY_TABLE", "=") 1831 prop.set("this", self._parse_table_parts()) 1832 1833 if self._match(TokenType.COMMA): 1834 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1835 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1836 1837 self._match_r_paren() 1838 1839 return prop 1840 1841 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1842 if self._match(TokenType.L_PAREN, advance=False): 1843 return self._parse_wrapped_properties() 1844 1845 if self._match_text_seq("JOURNAL"): 1846 return self._parse_withjournaltable() 1847 1848 if self._match_texts(self.VIEW_ATTRIBUTES): 1849 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1850 1851 if self._match_text_seq("DATA"): 1852 return self._parse_withdata(no=False) 1853 elif self._match_text_seq("NO", "DATA"): 1854 return self._parse_withdata(no=True) 1855 1856 if not self._next: 1857 return None 1858 1859 return self._parse_withisolatedloading() 1860 1861 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1862 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1863 self._match(TokenType.EQ) 1864 1865 user = self._parse_id_var() 1866 self._match(TokenType.PARAMETER) 1867 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1868 1869 if not user or not host: 1870 return None 1871 1872 return exp.DefinerProperty(this=f"{user}@{host}") 1873 1874 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1875 self._match(TokenType.TABLE) 1876 self._match(TokenType.EQ) 1877 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1878 1879 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1880 return self.expression(exp.LogProperty, no=no) 1881 1882 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1883 return self.expression(exp.JournalProperty, **kwargs) 1884 1885 def _parse_checksum(self) -> exp.ChecksumProperty: 1886 self._match(TokenType.EQ) 1887 1888 on = None 1889 if self._match(TokenType.ON): 1890 on = True 1891 elif self._match_text_seq("OFF"): 1892 on = False 1893 1894 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1895 1896 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1897 return self.expression( 1898 exp.Cluster, 1899 expressions=( 1900 self._parse_wrapped_csv(self._parse_ordered) 1901 if wrapped 1902 else self._parse_csv(self._parse_ordered) 1903 ), 1904 ) 1905 1906 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1907 self._match_text_seq("BY") 1908 1909 self._match_l_paren() 1910 expressions = self._parse_csv(self._parse_column) 1911 self._match_r_paren() 1912 1913 if self._match_text_seq("SORTED", "BY"): 1914 self._match_l_paren() 1915 sorted_by = self._parse_csv(self._parse_ordered) 1916 self._match_r_paren() 1917 else: 1918 sorted_by = None 1919 1920 self._match(TokenType.INTO) 1921 buckets = self._parse_number() 1922 self._match_text_seq("BUCKETS") 1923 1924 return self.expression( 1925 exp.ClusteredByProperty, 1926 expressions=expressions, 1927 sorted_by=sorted_by, 1928 buckets=buckets, 1929 ) 1930 1931 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1932 if not self._match_text_seq("GRANTS"): 1933 self._retreat(self._index - 1) 1934 return None 1935 1936 return self.expression(exp.CopyGrantsProperty) 1937 1938 def _parse_freespace(self) -> exp.FreespaceProperty: 1939 self._match(TokenType.EQ) 1940 return self.expression( 1941 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1942 ) 1943 1944 def _parse_mergeblockratio( 1945 self, no: bool = False, default: bool = False 1946 ) -> exp.MergeBlockRatioProperty: 1947 if self._match(TokenType.EQ): 1948 return self.expression( 1949 exp.MergeBlockRatioProperty, 1950 this=self._parse_number(), 1951 percent=self._match(TokenType.PERCENT), 1952 ) 1953 1954 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1955 1956 def _parse_datablocksize( 1957 self, 1958 default: t.Optional[bool] = None, 1959 minimum: t.Optional[bool] = None, 1960 maximum: t.Optional[bool] = None, 1961 ) -> exp.DataBlocksizeProperty: 1962 self._match(TokenType.EQ) 1963 size = self._parse_number() 1964 1965 units = None 1966 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1967 units = self._prev.text 1968 1969 return self.expression( 1970 exp.DataBlocksizeProperty, 1971 size=size, 1972 units=units, 1973 default=default, 1974 minimum=minimum, 1975 maximum=maximum, 1976 ) 1977 1978 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1979 self._match(TokenType.EQ) 1980 always = self._match_text_seq("ALWAYS") 1981 manual = self._match_text_seq("MANUAL") 1982 never = self._match_text_seq("NEVER") 1983 default = self._match_text_seq("DEFAULT") 1984 1985 autotemp = None 1986 if self._match_text_seq("AUTOTEMP"): 1987 autotemp = self._parse_schema() 1988 1989 return self.expression( 1990 exp.BlockCompressionProperty, 1991 always=always, 1992 manual=manual, 1993 never=never, 1994 default=default, 1995 autotemp=autotemp, 1996 ) 1997 1998 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1999 index = self._index 2000 no = self._match_text_seq("NO") 2001 concurrent = self._match_text_seq("CONCURRENT") 2002 2003 if not self._match_text_seq("ISOLATED", "LOADING"): 2004 self._retreat(index) 2005 return None 2006 2007 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2008 return self.expression( 2009 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2010 ) 2011 2012 def _parse_locking(self) -> exp.LockingProperty: 2013 if self._match(TokenType.TABLE): 2014 kind = "TABLE" 2015 elif self._match(TokenType.VIEW): 2016 kind = "VIEW" 2017 elif self._match(TokenType.ROW): 2018 kind = "ROW" 2019 elif self._match_text_seq("DATABASE"): 2020 kind = "DATABASE" 2021 else: 2022 kind = None 2023 2024 if kind in ("DATABASE", "TABLE", "VIEW"): 2025 this = self._parse_table_parts() 2026 else: 2027 this = None 2028 2029 if self._match(TokenType.FOR): 2030 for_or_in = "FOR" 2031 elif self._match(TokenType.IN): 2032 for_or_in = "IN" 2033 else: 2034 for_or_in = None 2035 2036 if self._match_text_seq("ACCESS"): 2037 lock_type = "ACCESS" 2038 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2039 lock_type = "EXCLUSIVE" 2040 elif self._match_text_seq("SHARE"): 2041 lock_type = "SHARE" 2042 elif self._match_text_seq("READ"): 2043 lock_type = "READ" 2044 elif self._match_text_seq("WRITE"): 2045 lock_type = "WRITE" 2046 elif self._match_text_seq("CHECKSUM"): 2047 lock_type = "CHECKSUM" 2048 else: 2049 lock_type = None 2050 2051 override = self._match_text_seq("OVERRIDE") 2052 2053 return self.expression( 2054 exp.LockingProperty, 2055 this=this, 2056 kind=kind, 2057 for_or_in=for_or_in, 2058 lock_type=lock_type, 2059 override=override, 2060 ) 2061 2062 def _parse_partition_by(self) -> t.List[exp.Expression]: 2063 if self._match(TokenType.PARTITION_BY): 2064 return self._parse_csv(self._parse_conjunction) 2065 return [] 2066 2067 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2068 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2069 if self._match_text_seq("MINVALUE"): 2070 return exp.var("MINVALUE") 2071 if self._match_text_seq("MAXVALUE"): 2072 return exp.var("MAXVALUE") 2073 return self._parse_bitwise() 2074 2075 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2076 expression = None 2077 from_expressions = None 2078 to_expressions = None 2079 2080 if self._match(TokenType.IN): 2081 this = self._parse_wrapped_csv(self._parse_bitwise) 2082 elif self._match(TokenType.FROM): 2083 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2084 self._match_text_seq("TO") 2085 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2086 elif self._match_text_seq("WITH", "(", "MODULUS"): 2087 this = self._parse_number() 2088 self._match_text_seq(",", "REMAINDER") 2089 expression = self._parse_number() 2090 self._match_r_paren() 2091 else: 2092 self.raise_error("Failed to parse partition bound spec.") 2093 2094 return self.expression( 2095 exp.PartitionBoundSpec, 2096 this=this, 2097 expression=expression, 2098 from_expressions=from_expressions, 2099 to_expressions=to_expressions, 2100 ) 2101 2102 # https://www.postgresql.org/docs/current/sql-createtable.html 2103 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2104 if not self._match_text_seq("OF"): 2105 self._retreat(self._index - 1) 2106 return None 2107 2108 this = self._parse_table(schema=True) 2109 2110 if self._match(TokenType.DEFAULT): 2111 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2112 elif self._match_text_seq("FOR", "VALUES"): 2113 expression = self._parse_partition_bound_spec() 2114 else: 2115 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2116 2117 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2118 2119 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2120 self._match(TokenType.EQ) 2121 return self.expression( 2122 exp.PartitionedByProperty, 2123 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2124 ) 2125 2126 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2127 if self._match_text_seq("AND", "STATISTICS"): 2128 statistics = True 2129 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2130 statistics = False 2131 else: 2132 statistics = None 2133 2134 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2135 2136 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2137 if self._match_text_seq("SQL"): 2138 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2139 return None 2140 2141 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2142 if self._match_text_seq("SQL", "DATA"): 2143 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2144 return None 2145 2146 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2147 if self._match_text_seq("PRIMARY", "INDEX"): 2148 return exp.NoPrimaryIndexProperty() 2149 if self._match_text_seq("SQL"): 2150 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2151 return None 2152 2153 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2154 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2155 return exp.OnCommitProperty() 2156 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2157 return exp.OnCommitProperty(delete=True) 2158 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2159 2160 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2161 if self._match_text_seq("SQL", "DATA"): 2162 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2163 return None 2164 2165 def _parse_distkey(self) -> exp.DistKeyProperty: 2166 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2167 2168 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2169 table = self._parse_table(schema=True) 2170 2171 options = [] 2172 while self._match_texts(("INCLUDING", "EXCLUDING")): 2173 this = self._prev.text.upper() 2174 2175 id_var = self._parse_id_var() 2176 if not id_var: 2177 return None 2178 2179 options.append( 2180 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2181 ) 2182 2183 return self.expression(exp.LikeProperty, this=table, expressions=options) 2184 2185 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2186 return self.expression( 2187 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2188 ) 2189 2190 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2191 self._match(TokenType.EQ) 2192 return self.expression( 2193 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2194 ) 2195 2196 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2197 self._match_text_seq("WITH", "CONNECTION") 2198 return self.expression( 2199 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2200 ) 2201 2202 def _parse_returns(self) -> exp.ReturnsProperty: 2203 value: t.Optional[exp.Expression] 2204 is_table = self._match(TokenType.TABLE) 2205 2206 if is_table: 2207 if self._match(TokenType.LT): 2208 value = self.expression( 2209 exp.Schema, 2210 this="TABLE", 2211 expressions=self._parse_csv(self._parse_struct_types), 2212 ) 2213 if not self._match(TokenType.GT): 2214 self.raise_error("Expecting >") 2215 else: 2216 value = self._parse_schema(exp.var("TABLE")) 2217 else: 2218 value = self._parse_types() 2219 2220 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2221 2222 def _parse_describe(self) -> exp.Describe: 2223 kind = self._match_set(self.CREATABLES) and self._prev.text 2224 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2225 if self._match(TokenType.DOT): 2226 style = None 2227 self._retreat(self._index - 2) 2228 this = self._parse_table(schema=True) 2229 properties = self._parse_properties() 2230 expressions = properties.expressions if properties else None 2231 return self.expression( 2232 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2233 ) 2234 2235 def _parse_insert(self) -> exp.Insert: 2236 comments = ensure_list(self._prev_comments) 2237 hint = self._parse_hint() 2238 overwrite = self._match(TokenType.OVERWRITE) 2239 ignore = self._match(TokenType.IGNORE) 2240 local = self._match_text_seq("LOCAL") 2241 alternative = None 2242 is_function = None 2243 2244 if self._match_text_seq("DIRECTORY"): 2245 this: t.Optional[exp.Expression] = self.expression( 2246 exp.Directory, 2247 this=self._parse_var_or_string(), 2248 local=local, 2249 row_format=self._parse_row_format(match_row=True), 2250 ) 2251 else: 2252 if self._match(TokenType.OR): 2253 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2254 2255 self._match(TokenType.INTO) 2256 comments += ensure_list(self._prev_comments) 2257 self._match(TokenType.TABLE) 2258 is_function = self._match(TokenType.FUNCTION) 2259 2260 this = ( 2261 self._parse_table(schema=True, parse_partition=True) 2262 if not is_function 2263 else self._parse_function() 2264 ) 2265 2266 returning = self._parse_returning() 2267 2268 return self.expression( 2269 exp.Insert, 2270 comments=comments, 2271 hint=hint, 2272 is_function=is_function, 2273 this=this, 2274 stored=self._match_text_seq("STORED") and self._parse_stored(), 2275 by_name=self._match_text_seq("BY", "NAME"), 2276 exists=self._parse_exists(), 2277 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2278 and self._parse_conjunction(), 2279 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2280 conflict=self._parse_on_conflict(), 2281 returning=returning or self._parse_returning(), 2282 overwrite=overwrite, 2283 alternative=alternative, 2284 ignore=ignore, 2285 ) 2286 2287 def _parse_kill(self) -> exp.Kill: 2288 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2289 2290 return self.expression( 2291 exp.Kill, 2292 this=self._parse_primary(), 2293 kind=kind, 2294 ) 2295 2296 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2297 conflict = self._match_text_seq("ON", "CONFLICT") 2298 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2299 2300 if not conflict and not duplicate: 2301 return None 2302 2303 conflict_keys = None 2304 constraint = None 2305 2306 if conflict: 2307 if self._match_text_seq("ON", "CONSTRAINT"): 2308 constraint = self._parse_id_var() 2309 elif self._match(TokenType.L_PAREN): 2310 conflict_keys = self._parse_csv(self._parse_id_var) 2311 self._match_r_paren() 2312 2313 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2314 if self._prev.token_type == TokenType.UPDATE: 2315 self._match(TokenType.SET) 2316 expressions = self._parse_csv(self._parse_equality) 2317 else: 2318 expressions = None 2319 2320 return self.expression( 2321 exp.OnConflict, 2322 duplicate=duplicate, 2323 expressions=expressions, 2324 action=action, 2325 conflict_keys=conflict_keys, 2326 constraint=constraint, 2327 ) 2328 2329 def _parse_returning(self) -> t.Optional[exp.Returning]: 2330 if not self._match(TokenType.RETURNING): 2331 return None 2332 return self.expression( 2333 exp.Returning, 2334 expressions=self._parse_csv(self._parse_expression), 2335 into=self._match(TokenType.INTO) and self._parse_table_part(), 2336 ) 2337 2338 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2339 if not self._match(TokenType.FORMAT): 2340 return None 2341 return self._parse_row_format() 2342 2343 def _parse_row_format( 2344 self, match_row: bool = False 2345 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2346 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2347 return None 2348 2349 if self._match_text_seq("SERDE"): 2350 this = self._parse_string() 2351 2352 serde_properties = None 2353 if self._match(TokenType.SERDE_PROPERTIES): 2354 serde_properties = self.expression( 2355 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2356 ) 2357 2358 return self.expression( 2359 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2360 ) 2361 2362 self._match_text_seq("DELIMITED") 2363 2364 kwargs = {} 2365 2366 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2367 kwargs["fields"] = self._parse_string() 2368 if self._match_text_seq("ESCAPED", "BY"): 2369 kwargs["escaped"] = self._parse_string() 2370 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2371 kwargs["collection_items"] = self._parse_string() 2372 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2373 kwargs["map_keys"] = self._parse_string() 2374 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2375 kwargs["lines"] = self._parse_string() 2376 if self._match_text_seq("NULL", "DEFINED", "AS"): 2377 kwargs["null"] = self._parse_string() 2378 2379 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2380 2381 def _parse_load(self) -> exp.LoadData | exp.Command: 2382 if self._match_text_seq("DATA"): 2383 local = self._match_text_seq("LOCAL") 2384 self._match_text_seq("INPATH") 2385 inpath = self._parse_string() 2386 overwrite = self._match(TokenType.OVERWRITE) 2387 self._match_pair(TokenType.INTO, TokenType.TABLE) 2388 2389 return self.expression( 2390 exp.LoadData, 2391 this=self._parse_table(schema=True), 2392 local=local, 2393 overwrite=overwrite, 2394 inpath=inpath, 2395 partition=self._parse_partition(), 2396 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2397 serde=self._match_text_seq("SERDE") and self._parse_string(), 2398 ) 2399 return self._parse_as_command(self._prev) 2400 2401 def _parse_delete(self) -> exp.Delete: 2402 # This handles MySQL's "Multiple-Table Syntax" 2403 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2404 tables = None 2405 comments = self._prev_comments 2406 if not self._match(TokenType.FROM, advance=False): 2407 tables = self._parse_csv(self._parse_table) or None 2408 2409 returning = self._parse_returning() 2410 2411 return self.expression( 2412 exp.Delete, 2413 comments=comments, 2414 tables=tables, 2415 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2416 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2417 where=self._parse_where(), 2418 returning=returning or self._parse_returning(), 2419 limit=self._parse_limit(), 2420 ) 2421 2422 def _parse_update(self) -> exp.Update: 2423 comments = self._prev_comments 2424 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2425 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2426 returning = self._parse_returning() 2427 return self.expression( 2428 exp.Update, 2429 comments=comments, 2430 **{ # type: ignore 2431 "this": this, 2432 "expressions": expressions, 2433 "from": self._parse_from(joins=True), 2434 "where": self._parse_where(), 2435 "returning": returning or self._parse_returning(), 2436 "order": self._parse_order(), 2437 "limit": self._parse_limit(), 2438 }, 2439 ) 2440 2441 def _parse_uncache(self) -> exp.Uncache: 2442 if not self._match(TokenType.TABLE): 2443 self.raise_error("Expecting TABLE after UNCACHE") 2444 2445 return self.expression( 2446 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2447 ) 2448 2449 def _parse_cache(self) -> exp.Cache: 2450 lazy = self._match_text_seq("LAZY") 2451 self._match(TokenType.TABLE) 2452 table = self._parse_table(schema=True) 2453 2454 options = [] 2455 if self._match_text_seq("OPTIONS"): 2456 self._match_l_paren() 2457 k = self._parse_string() 2458 self._match(TokenType.EQ) 2459 v = self._parse_string() 2460 options = [k, v] 2461 self._match_r_paren() 2462 2463 self._match(TokenType.ALIAS) 2464 return self.expression( 2465 exp.Cache, 2466 this=table, 2467 lazy=lazy, 2468 options=options, 2469 expression=self._parse_select(nested=True), 2470 ) 2471 2472 def _parse_partition(self) -> t.Optional[exp.Partition]: 2473 if not self._match(TokenType.PARTITION): 2474 return None 2475 2476 return self.expression( 2477 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2478 ) 2479 2480 def _parse_value(self) -> t.Optional[exp.Tuple]: 2481 if self._match(TokenType.L_PAREN): 2482 expressions = self._parse_csv(self._parse_expression) 2483 self._match_r_paren() 2484 return self.expression(exp.Tuple, expressions=expressions) 2485 2486 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2487 expression = self._parse_expression() 2488 if expression: 2489 return self.expression(exp.Tuple, expressions=[expression]) 2490 return None 2491 2492 def _parse_projections(self) -> t.List[exp.Expression]: 2493 return self._parse_expressions() 2494 2495 def _parse_select( 2496 self, 2497 nested: bool = False, 2498 table: bool = False, 2499 parse_subquery_alias: bool = True, 2500 parse_set_operation: bool = True, 2501 ) -> t.Optional[exp.Expression]: 2502 cte = self._parse_with() 2503 2504 if cte: 2505 this = self._parse_statement() 2506 2507 if not this: 2508 self.raise_error("Failed to parse any statement following CTE") 2509 return cte 2510 2511 if "with" in this.arg_types: 2512 this.set("with", cte) 2513 else: 2514 self.raise_error(f"{this.key} does not support CTE") 2515 this = cte 2516 2517 return this 2518 2519 # duckdb supports leading with FROM x 2520 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2521 2522 if self._match(TokenType.SELECT): 2523 comments = self._prev_comments 2524 2525 hint = self._parse_hint() 2526 all_ = self._match(TokenType.ALL) 2527 distinct = self._match_set(self.DISTINCT_TOKENS) 2528 2529 kind = ( 2530 self._match(TokenType.ALIAS) 2531 and self._match_texts(("STRUCT", "VALUE")) 2532 and self._prev.text.upper() 2533 ) 2534 2535 if distinct: 2536 distinct = self.expression( 2537 exp.Distinct, 2538 on=self._parse_value() if self._match(TokenType.ON) else None, 2539 ) 2540 2541 if all_ and distinct: 2542 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2543 2544 limit = self._parse_limit(top=True) 2545 projections = self._parse_projections() 2546 2547 this = self.expression( 2548 exp.Select, 2549 kind=kind, 2550 hint=hint, 2551 distinct=distinct, 2552 expressions=projections, 2553 limit=limit, 2554 ) 2555 this.comments = comments 2556 2557 into = self._parse_into() 2558 if into: 2559 this.set("into", into) 2560 2561 if not from_: 2562 from_ = self._parse_from() 2563 2564 if from_: 2565 this.set("from", from_) 2566 2567 this = self._parse_query_modifiers(this) 2568 elif (table or nested) and self._match(TokenType.L_PAREN): 2569 if self._match(TokenType.PIVOT): 2570 this = self._parse_simplified_pivot() 2571 elif self._match(TokenType.FROM): 2572 this = exp.select("*").from_( 2573 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2574 ) 2575 else: 2576 this = ( 2577 self._parse_table() 2578 if table 2579 else self._parse_select(nested=True, parse_set_operation=False) 2580 ) 2581 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2582 2583 self._match_r_paren() 2584 2585 # We return early here so that the UNION isn't attached to the subquery by the 2586 # following call to _parse_set_operations, but instead becomes the parent node 2587 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2588 elif self._match(TokenType.VALUES, advance=False): 2589 this = self._parse_derived_table_values() 2590 elif from_: 2591 this = exp.select("*").from_(from_.this, copy=False) 2592 else: 2593 this = None 2594 2595 if parse_set_operation: 2596 return self._parse_set_operations(this) 2597 return this 2598 2599 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2600 if not skip_with_token and not self._match(TokenType.WITH): 2601 return None 2602 2603 comments = self._prev_comments 2604 recursive = self._match(TokenType.RECURSIVE) 2605 2606 expressions = [] 2607 while True: 2608 expressions.append(self._parse_cte()) 2609 2610 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2611 break 2612 else: 2613 self._match(TokenType.WITH) 2614 2615 return self.expression( 2616 exp.With, comments=comments, expressions=expressions, recursive=recursive 2617 ) 2618 2619 def _parse_cte(self) -> exp.CTE: 2620 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2621 if not alias or not alias.this: 2622 self.raise_error("Expected CTE to have alias") 2623 2624 self._match(TokenType.ALIAS) 2625 2626 if self._match_text_seq("NOT", "MATERIALIZED"): 2627 materialized = False 2628 elif self._match_text_seq("MATERIALIZED"): 2629 materialized = True 2630 else: 2631 materialized = None 2632 2633 return self.expression( 2634 exp.CTE, 2635 this=self._parse_wrapped(self._parse_statement), 2636 alias=alias, 2637 materialized=materialized, 2638 ) 2639 2640 def _parse_table_alias( 2641 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2642 ) -> t.Optional[exp.TableAlias]: 2643 any_token = self._match(TokenType.ALIAS) 2644 alias = ( 2645 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2646 or self._parse_string_as_identifier() 2647 ) 2648 2649 index = self._index 2650 if self._match(TokenType.L_PAREN): 2651 columns = self._parse_csv(self._parse_function_parameter) 2652 self._match_r_paren() if columns else self._retreat(index) 2653 else: 2654 columns = None 2655 2656 if not alias and not columns: 2657 return None 2658 2659 return self.expression(exp.TableAlias, this=alias, columns=columns) 2660 2661 def _parse_subquery( 2662 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2663 ) -> t.Optional[exp.Subquery]: 2664 if not this: 2665 return None 2666 2667 return self.expression( 2668 exp.Subquery, 2669 this=this, 2670 pivots=self._parse_pivots(), 2671 alias=self._parse_table_alias() if parse_alias else None, 2672 ) 2673 2674 def _implicit_unnests_to_explicit(self, this: E) -> E: 2675 from sqlglot.optimizer.normalize_identifiers import ( 2676 normalize_identifiers as _norm, 2677 ) 2678 2679 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2680 for i, join in enumerate(this.args.get("joins") or []): 2681 table = join.this 2682 normalized_table = table.copy() 2683 normalized_table.meta["maybe_column"] = True 2684 normalized_table = _norm(normalized_table, dialect=self.dialect) 2685 2686 if isinstance(table, exp.Table) and not join.args.get("on"): 2687 if normalized_table.parts[0].name in refs: 2688 table_as_column = table.to_column() 2689 unnest = exp.Unnest(expressions=[table_as_column]) 2690 2691 # Table.to_column creates a parent Alias node that we want to convert to 2692 # a TableAlias and attach to the Unnest, so it matches the parser's output 2693 if isinstance(table.args.get("alias"), exp.TableAlias): 2694 table_as_column.replace(table_as_column.this) 2695 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2696 2697 table.replace(unnest) 2698 2699 refs.add(normalized_table.alias_or_name) 2700 2701 return this 2702 2703 def _parse_query_modifiers( 2704 self, this: t.Optional[exp.Expression] 2705 ) -> t.Optional[exp.Expression]: 2706 if isinstance(this, (exp.Query, exp.Table)): 2707 for join in self._parse_joins(): 2708 this.append("joins", join) 2709 for lateral in iter(self._parse_lateral, None): 2710 this.append("laterals", lateral) 2711 2712 while True: 2713 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2714 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2715 key, expression = parser(self) 2716 2717 if expression: 2718 this.set(key, expression) 2719 if key == "limit": 2720 offset = expression.args.pop("offset", None) 2721 2722 if offset: 2723 offset = exp.Offset(expression=offset) 2724 this.set("offset", offset) 2725 2726 limit_by_expressions = expression.expressions 2727 expression.set("expressions", None) 2728 offset.set("expressions", limit_by_expressions) 2729 continue 2730 break 2731 2732 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2733 this = self._implicit_unnests_to_explicit(this) 2734 2735 return this 2736 2737 def _parse_hint(self) -> t.Optional[exp.Hint]: 2738 if self._match(TokenType.HINT): 2739 hints = [] 2740 for hint in iter( 2741 lambda: self._parse_csv( 2742 lambda: self._parse_function() or self._parse_var(upper=True) 2743 ), 2744 [], 2745 ): 2746 hints.extend(hint) 2747 2748 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2749 self.raise_error("Expected */ after HINT") 2750 2751 return self.expression(exp.Hint, expressions=hints) 2752 2753 return None 2754 2755 def _parse_into(self) -> t.Optional[exp.Into]: 2756 if not self._match(TokenType.INTO): 2757 return None 2758 2759 temp = self._match(TokenType.TEMPORARY) 2760 unlogged = self._match_text_seq("UNLOGGED") 2761 self._match(TokenType.TABLE) 2762 2763 return self.expression( 2764 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2765 ) 2766 2767 def _parse_from( 2768 self, joins: bool = False, skip_from_token: bool = False 2769 ) -> t.Optional[exp.From]: 2770 if not skip_from_token and not self._match(TokenType.FROM): 2771 return None 2772 2773 return self.expression( 2774 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2775 ) 2776 2777 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2778 return self.expression( 2779 exp.MatchRecognizeMeasure, 2780 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2781 this=self._parse_expression(), 2782 ) 2783 2784 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2785 if not self._match(TokenType.MATCH_RECOGNIZE): 2786 return None 2787 2788 self._match_l_paren() 2789 2790 partition = self._parse_partition_by() 2791 order = self._parse_order() 2792 2793 measures = ( 2794 self._parse_csv(self._parse_match_recognize_measure) 2795 if self._match_text_seq("MEASURES") 2796 else None 2797 ) 2798 2799 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2800 rows = exp.var("ONE ROW PER MATCH") 2801 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2802 text = "ALL ROWS PER MATCH" 2803 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2804 text += " SHOW EMPTY MATCHES" 2805 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2806 text += " OMIT EMPTY MATCHES" 2807 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2808 text += " WITH UNMATCHED ROWS" 2809 rows = exp.var(text) 2810 else: 2811 rows = None 2812 2813 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2814 text = "AFTER MATCH SKIP" 2815 if self._match_text_seq("PAST", "LAST", "ROW"): 2816 text += " PAST LAST ROW" 2817 elif self._match_text_seq("TO", "NEXT", "ROW"): 2818 text += " TO NEXT ROW" 2819 elif self._match_text_seq("TO", "FIRST"): 2820 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2821 elif self._match_text_seq("TO", "LAST"): 2822 text += f" TO LAST {self._advance_any().text}" # type: ignore 2823 after = exp.var(text) 2824 else: 2825 after = None 2826 2827 if self._match_text_seq("PATTERN"): 2828 self._match_l_paren() 2829 2830 if not self._curr: 2831 self.raise_error("Expecting )", self._curr) 2832 2833 paren = 1 2834 start = self._curr 2835 2836 while self._curr and paren > 0: 2837 if self._curr.token_type == TokenType.L_PAREN: 2838 paren += 1 2839 if self._curr.token_type == TokenType.R_PAREN: 2840 paren -= 1 2841 2842 end = self._prev 2843 self._advance() 2844 2845 if paren > 0: 2846 self.raise_error("Expecting )", self._curr) 2847 2848 pattern = exp.var(self._find_sql(start, end)) 2849 else: 2850 pattern = None 2851 2852 define = ( 2853 self._parse_csv(self._parse_name_as_expression) 2854 if self._match_text_seq("DEFINE") 2855 else None 2856 ) 2857 2858 self._match_r_paren() 2859 2860 return self.expression( 2861 exp.MatchRecognize, 2862 partition_by=partition, 2863 order=order, 2864 measures=measures, 2865 rows=rows, 2866 after=after, 2867 pattern=pattern, 2868 define=define, 2869 alias=self._parse_table_alias(), 2870 ) 2871 2872 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2873 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2874 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2875 cross_apply = False 2876 2877 if cross_apply is not None: 2878 this = self._parse_select(table=True) 2879 view = None 2880 outer = None 2881 elif self._match(TokenType.LATERAL): 2882 this = self._parse_select(table=True) 2883 view = self._match(TokenType.VIEW) 2884 outer = self._match(TokenType.OUTER) 2885 else: 2886 return None 2887 2888 if not this: 2889 this = ( 2890 self._parse_unnest() 2891 or self._parse_function() 2892 or self._parse_id_var(any_token=False) 2893 ) 2894 2895 while self._match(TokenType.DOT): 2896 this = exp.Dot( 2897 this=this, 2898 expression=self._parse_function() or self._parse_id_var(any_token=False), 2899 ) 2900 2901 if view: 2902 table = self._parse_id_var(any_token=False) 2903 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2904 table_alias: t.Optional[exp.TableAlias] = self.expression( 2905 exp.TableAlias, this=table, columns=columns 2906 ) 2907 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2908 # We move the alias from the lateral's child node to the lateral itself 2909 table_alias = this.args["alias"].pop() 2910 else: 2911 table_alias = self._parse_table_alias() 2912 2913 return self.expression( 2914 exp.Lateral, 2915 this=this, 2916 view=view, 2917 outer=outer, 2918 alias=table_alias, 2919 cross_apply=cross_apply, 2920 ) 2921 2922 def _parse_join_parts( 2923 self, 2924 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2925 return ( 2926 self._match_set(self.JOIN_METHODS) and self._prev, 2927 self._match_set(self.JOIN_SIDES) and self._prev, 2928 self._match_set(self.JOIN_KINDS) and self._prev, 2929 ) 2930 2931 def _parse_join( 2932 self, skip_join_token: bool = False, parse_bracket: bool = False 2933 ) -> t.Optional[exp.Join]: 2934 if self._match(TokenType.COMMA): 2935 return self.expression(exp.Join, this=self._parse_table()) 2936 2937 index = self._index 2938 method, side, kind = self._parse_join_parts() 2939 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2940 join = self._match(TokenType.JOIN) 2941 2942 if not skip_join_token and not join: 2943 self._retreat(index) 2944 kind = None 2945 method = None 2946 side = None 2947 2948 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2949 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2950 2951 if not skip_join_token and not join and not outer_apply and not cross_apply: 2952 return None 2953 2954 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2955 2956 if method: 2957 kwargs["method"] = method.text 2958 if side: 2959 kwargs["side"] = side.text 2960 if kind: 2961 kwargs["kind"] = kind.text 2962 if hint: 2963 kwargs["hint"] = hint 2964 2965 if self._match(TokenType.MATCH_CONDITION): 2966 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2967 2968 if self._match(TokenType.ON): 2969 kwargs["on"] = self._parse_conjunction() 2970 elif self._match(TokenType.USING): 2971 kwargs["using"] = self._parse_wrapped_id_vars() 2972 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2973 kind and kind.token_type == TokenType.CROSS 2974 ): 2975 index = self._index 2976 joins: t.Optional[list] = list(self._parse_joins()) 2977 2978 if joins and self._match(TokenType.ON): 2979 kwargs["on"] = self._parse_conjunction() 2980 elif joins and self._match(TokenType.USING): 2981 kwargs["using"] = self._parse_wrapped_id_vars() 2982 else: 2983 joins = None 2984 self._retreat(index) 2985 2986 kwargs["this"].set("joins", joins if joins else None) 2987 2988 comments = [c for token in (method, side, kind) if token for c in token.comments] 2989 return self.expression(exp.Join, comments=comments, **kwargs) 2990 2991 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2992 this = self._parse_conjunction() 2993 2994 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2995 return this 2996 2997 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2998 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2999 3000 return this 3001 3002 def _parse_index_params(self) -> exp.IndexParameters: 3003 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3004 3005 if self._match(TokenType.L_PAREN, advance=False): 3006 columns = self._parse_wrapped_csv(self._parse_with_operator) 3007 else: 3008 columns = None 3009 3010 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3011 partition_by = self._parse_partition_by() 3012 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3013 tablespace = ( 3014 self._parse_var(any_token=True) 3015 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3016 else None 3017 ) 3018 where = self._parse_where() 3019 3020 return self.expression( 3021 exp.IndexParameters, 3022 using=using, 3023 columns=columns, 3024 include=include, 3025 partition_by=partition_by, 3026 where=where, 3027 with_storage=with_storage, 3028 tablespace=tablespace, 3029 ) 3030 3031 def _parse_index( 3032 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3033 ) -> t.Optional[exp.Index]: 3034 if index or anonymous: 3035 unique = None 3036 primary = None 3037 amp = None 3038 3039 self._match(TokenType.ON) 3040 self._match(TokenType.TABLE) # hive 3041 table = self._parse_table_parts(schema=True) 3042 else: 3043 unique = self._match(TokenType.UNIQUE) 3044 primary = self._match_text_seq("PRIMARY") 3045 amp = self._match_text_seq("AMP") 3046 3047 if not self._match(TokenType.INDEX): 3048 return None 3049 3050 index = self._parse_id_var() 3051 table = None 3052 3053 params = self._parse_index_params() 3054 3055 return self.expression( 3056 exp.Index, 3057 this=index, 3058 table=table, 3059 unique=unique, 3060 primary=primary, 3061 amp=amp, 3062 params=params, 3063 ) 3064 3065 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3066 hints: t.List[exp.Expression] = [] 3067 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3068 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3069 hints.append( 3070 self.expression( 3071 exp.WithTableHint, 3072 expressions=self._parse_csv( 3073 lambda: self._parse_function() or self._parse_var(any_token=True) 3074 ), 3075 ) 3076 ) 3077 self._match_r_paren() 3078 else: 3079 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3080 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3081 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3082 3083 self._match_texts(("INDEX", "KEY")) 3084 if self._match(TokenType.FOR): 3085 hint.set("target", self._advance_any() and self._prev.text.upper()) 3086 3087 hint.set("expressions", self._parse_wrapped_id_vars()) 3088 hints.append(hint) 3089 3090 return hints or None 3091 3092 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3093 return ( 3094 (not schema and self._parse_function(optional_parens=False)) 3095 or self._parse_id_var(any_token=False) 3096 or self._parse_string_as_identifier() 3097 or self._parse_placeholder() 3098 ) 3099 3100 def _parse_table_parts( 3101 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3102 ) -> exp.Table: 3103 catalog = None 3104 db = None 3105 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3106 3107 while self._match(TokenType.DOT): 3108 if catalog: 3109 # This allows nesting the table in arbitrarily many dot expressions if needed 3110 table = self.expression( 3111 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3112 ) 3113 else: 3114 catalog = db 3115 db = table 3116 # "" used for tsql FROM a..b case 3117 table = self._parse_table_part(schema=schema) or "" 3118 3119 if ( 3120 wildcard 3121 and self._is_connected() 3122 and (isinstance(table, exp.Identifier) or not table) 3123 and self._match(TokenType.STAR) 3124 ): 3125 if isinstance(table, exp.Identifier): 3126 table.args["this"] += "*" 3127 else: 3128 table = exp.Identifier(this="*") 3129 3130 # We bubble up comments from the Identifier to the Table 3131 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3132 3133 if is_db_reference: 3134 catalog = db 3135 db = table 3136 table = None 3137 3138 if not table and not is_db_reference: 3139 self.raise_error(f"Expected table name but got {self._curr}") 3140 if not db and is_db_reference: 3141 self.raise_error(f"Expected database name but got {self._curr}") 3142 3143 return self.expression( 3144 exp.Table, 3145 comments=comments, 3146 this=table, 3147 db=db, 3148 catalog=catalog, 3149 pivots=self._parse_pivots(), 3150 ) 3151 3152 def _parse_table( 3153 self, 3154 schema: bool = False, 3155 joins: bool = False, 3156 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3157 parse_bracket: bool = False, 3158 is_db_reference: bool = False, 3159 parse_partition: bool = False, 3160 ) -> t.Optional[exp.Expression]: 3161 lateral = self._parse_lateral() 3162 if lateral: 3163 return lateral 3164 3165 unnest = self._parse_unnest() 3166 if unnest: 3167 return unnest 3168 3169 values = self._parse_derived_table_values() 3170 if values: 3171 return values 3172 3173 subquery = self._parse_select(table=True) 3174 if subquery: 3175 if not subquery.args.get("pivots"): 3176 subquery.set("pivots", self._parse_pivots()) 3177 return subquery 3178 3179 bracket = parse_bracket and self._parse_bracket(None) 3180 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3181 3182 only = self._match(TokenType.ONLY) 3183 3184 this = t.cast( 3185 exp.Expression, 3186 bracket 3187 or self._parse_bracket( 3188 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3189 ), 3190 ) 3191 3192 if only: 3193 this.set("only", only) 3194 3195 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3196 self._match_text_seq("*") 3197 3198 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3199 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3200 this.set("partition", self._parse_partition()) 3201 3202 if schema: 3203 return self._parse_schema(this=this) 3204 3205 version = self._parse_version() 3206 3207 if version: 3208 this.set("version", version) 3209 3210 if self.dialect.ALIAS_POST_TABLESAMPLE: 3211 table_sample = self._parse_table_sample() 3212 3213 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3214 if alias: 3215 this.set("alias", alias) 3216 3217 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3218 return self.expression( 3219 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3220 ) 3221 3222 this.set("hints", self._parse_table_hints()) 3223 3224 if not this.args.get("pivots"): 3225 this.set("pivots", self._parse_pivots()) 3226 3227 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3228 table_sample = self._parse_table_sample() 3229 3230 if table_sample: 3231 table_sample.set("this", this) 3232 this = table_sample 3233 3234 if joins: 3235 for join in self._parse_joins(): 3236 this.append("joins", join) 3237 3238 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3239 this.set("ordinality", True) 3240 this.set("alias", self._parse_table_alias()) 3241 3242 return this 3243 3244 def _parse_version(self) -> t.Optional[exp.Version]: 3245 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3246 this = "TIMESTAMP" 3247 elif self._match(TokenType.VERSION_SNAPSHOT): 3248 this = "VERSION" 3249 else: 3250 return None 3251 3252 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3253 kind = self._prev.text.upper() 3254 start = self._parse_bitwise() 3255 self._match_texts(("TO", "AND")) 3256 end = self._parse_bitwise() 3257 expression: t.Optional[exp.Expression] = self.expression( 3258 exp.Tuple, expressions=[start, end] 3259 ) 3260 elif self._match_text_seq("CONTAINED", "IN"): 3261 kind = "CONTAINED IN" 3262 expression = self.expression( 3263 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3264 ) 3265 elif self._match(TokenType.ALL): 3266 kind = "ALL" 3267 expression = None 3268 else: 3269 self._match_text_seq("AS", "OF") 3270 kind = "AS OF" 3271 expression = self._parse_type() 3272 3273 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3274 3275 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3276 if not self._match(TokenType.UNNEST): 3277 return None 3278 3279 expressions = self._parse_wrapped_csv(self._parse_equality) 3280 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3281 3282 alias = self._parse_table_alias() if with_alias else None 3283 3284 if alias: 3285 if self.dialect.UNNEST_COLUMN_ONLY: 3286 if alias.args.get("columns"): 3287 self.raise_error("Unexpected extra column alias in unnest.") 3288 3289 alias.set("columns", [alias.this]) 3290 alias.set("this", None) 3291 3292 columns = alias.args.get("columns") or [] 3293 if offset and len(expressions) < len(columns): 3294 offset = columns.pop() 3295 3296 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3297 self._match(TokenType.ALIAS) 3298 offset = self._parse_id_var( 3299 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3300 ) or exp.to_identifier("offset") 3301 3302 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3303 3304 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3305 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3306 if not is_derived and not self._match_text_seq("VALUES"): 3307 return None 3308 3309 expressions = self._parse_csv(self._parse_value) 3310 alias = self._parse_table_alias() 3311 3312 if is_derived: 3313 self._match_r_paren() 3314 3315 return self.expression( 3316 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3317 ) 3318 3319 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3320 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3321 as_modifier and self._match_text_seq("USING", "SAMPLE") 3322 ): 3323 return None 3324 3325 bucket_numerator = None 3326 bucket_denominator = None 3327 bucket_field = None 3328 percent = None 3329 size = None 3330 seed = None 3331 3332 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3333 matched_l_paren = self._match(TokenType.L_PAREN) 3334 3335 if self.TABLESAMPLE_CSV: 3336 num = None 3337 expressions = self._parse_csv(self._parse_primary) 3338 else: 3339 expressions = None 3340 num = ( 3341 self._parse_factor() 3342 if self._match(TokenType.NUMBER, advance=False) 3343 else self._parse_primary() or self._parse_placeholder() 3344 ) 3345 3346 if self._match_text_seq("BUCKET"): 3347 bucket_numerator = self._parse_number() 3348 self._match_text_seq("OUT", "OF") 3349 bucket_denominator = bucket_denominator = self._parse_number() 3350 self._match(TokenType.ON) 3351 bucket_field = self._parse_field() 3352 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3353 percent = num 3354 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3355 size = num 3356 else: 3357 percent = num 3358 3359 if matched_l_paren: 3360 self._match_r_paren() 3361 3362 if self._match(TokenType.L_PAREN): 3363 method = self._parse_var(upper=True) 3364 seed = self._match(TokenType.COMMA) and self._parse_number() 3365 self._match_r_paren() 3366 elif self._match_texts(("SEED", "REPEATABLE")): 3367 seed = self._parse_wrapped(self._parse_number) 3368 3369 return self.expression( 3370 exp.TableSample, 3371 expressions=expressions, 3372 method=method, 3373 bucket_numerator=bucket_numerator, 3374 bucket_denominator=bucket_denominator, 3375 bucket_field=bucket_field, 3376 percent=percent, 3377 size=size, 3378 seed=seed, 3379 ) 3380 3381 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3382 return list(iter(self._parse_pivot, None)) or None 3383 3384 def _parse_joins(self) -> t.Iterator[exp.Join]: 3385 return iter(self._parse_join, None) 3386 3387 # https://duckdb.org/docs/sql/statements/pivot 3388 def _parse_simplified_pivot(self) -> exp.Pivot: 3389 def _parse_on() -> t.Optional[exp.Expression]: 3390 this = self._parse_bitwise() 3391 return self._parse_in(this) if self._match(TokenType.IN) else this 3392 3393 this = self._parse_table() 3394 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3395 using = self._match(TokenType.USING) and self._parse_csv( 3396 lambda: self._parse_alias(self._parse_function()) 3397 ) 3398 group = self._parse_group() 3399 return self.expression( 3400 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3401 ) 3402 3403 def _parse_pivot_in(self) -> exp.In: 3404 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3405 this = self._parse_conjunction() 3406 3407 self._match(TokenType.ALIAS) 3408 alias = self._parse_field() 3409 if alias: 3410 return self.expression(exp.PivotAlias, this=this, alias=alias) 3411 3412 return this 3413 3414 value = self._parse_column() 3415 3416 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3417 self.raise_error("Expecting IN (") 3418 3419 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3420 3421 self._match_r_paren() 3422 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3423 3424 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3425 index = self._index 3426 include_nulls = None 3427 3428 if self._match(TokenType.PIVOT): 3429 unpivot = False 3430 elif self._match(TokenType.UNPIVOT): 3431 unpivot = True 3432 3433 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3434 if self._match_text_seq("INCLUDE", "NULLS"): 3435 include_nulls = True 3436 elif self._match_text_seq("EXCLUDE", "NULLS"): 3437 include_nulls = False 3438 else: 3439 return None 3440 3441 expressions = [] 3442 3443 if not self._match(TokenType.L_PAREN): 3444 self._retreat(index) 3445 return None 3446 3447 if unpivot: 3448 expressions = self._parse_csv(self._parse_column) 3449 else: 3450 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3451 3452 if not expressions: 3453 self.raise_error("Failed to parse PIVOT's aggregation list") 3454 3455 if not self._match(TokenType.FOR): 3456 self.raise_error("Expecting FOR") 3457 3458 field = self._parse_pivot_in() 3459 3460 self._match_r_paren() 3461 3462 pivot = self.expression( 3463 exp.Pivot, 3464 expressions=expressions, 3465 field=field, 3466 unpivot=unpivot, 3467 include_nulls=include_nulls, 3468 ) 3469 3470 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3471 pivot.set("alias", self._parse_table_alias()) 3472 3473 if not unpivot: 3474 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3475 3476 columns: t.List[exp.Expression] = [] 3477 for fld in pivot.args["field"].expressions: 3478 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3479 for name in names: 3480 if self.PREFIXED_PIVOT_COLUMNS: 3481 name = f"{name}_{field_name}" if name else field_name 3482 else: 3483 name = f"{field_name}_{name}" if name else field_name 3484 3485 columns.append(exp.to_identifier(name)) 3486 3487 pivot.set("columns", columns) 3488 3489 return pivot 3490 3491 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3492 return [agg.alias for agg in aggregations] 3493 3494 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3495 if not skip_where_token and not self._match(TokenType.PREWHERE): 3496 return None 3497 3498 return self.expression( 3499 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3500 ) 3501 3502 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3503 if not skip_where_token and not self._match(TokenType.WHERE): 3504 return None 3505 3506 return self.expression( 3507 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3508 ) 3509 3510 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3511 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3512 return None 3513 3514 elements: t.Dict[str, t.Any] = defaultdict(list) 3515 3516 if self._match(TokenType.ALL): 3517 elements["all"] = True 3518 elif self._match(TokenType.DISTINCT): 3519 elements["all"] = False 3520 3521 while True: 3522 expressions = self._parse_csv(self._parse_conjunction) 3523 if expressions: 3524 elements["expressions"].extend(expressions) 3525 3526 grouping_sets = self._parse_grouping_sets() 3527 if grouping_sets: 3528 elements["grouping_sets"].extend(grouping_sets) 3529 3530 rollup = None 3531 cube = None 3532 totals = None 3533 3534 index = self._index 3535 with_ = self._match(TokenType.WITH) 3536 if self._match(TokenType.ROLLUP): 3537 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3538 elements["rollup"].extend(ensure_list(rollup)) 3539 3540 if self._match(TokenType.CUBE): 3541 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3542 elements["cube"].extend(ensure_list(cube)) 3543 3544 if self._match_text_seq("TOTALS"): 3545 totals = True 3546 elements["totals"] = True # type: ignore 3547 3548 if not (grouping_sets or rollup or cube or totals): 3549 if with_: 3550 self._retreat(index) 3551 break 3552 3553 return self.expression(exp.Group, **elements) # type: ignore 3554 3555 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3556 if not self._match(TokenType.GROUPING_SETS): 3557 return None 3558 3559 return self._parse_wrapped_csv(self._parse_grouping_set) 3560 3561 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3562 if self._match(TokenType.L_PAREN): 3563 grouping_set = self._parse_csv(self._parse_column) 3564 self._match_r_paren() 3565 return self.expression(exp.Tuple, expressions=grouping_set) 3566 3567 return self._parse_column() 3568 3569 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3570 if not skip_having_token and not self._match(TokenType.HAVING): 3571 return None 3572 return self.expression(exp.Having, this=self._parse_conjunction()) 3573 3574 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3575 if not self._match(TokenType.QUALIFY): 3576 return None 3577 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3578 3579 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3580 if skip_start_token: 3581 start = None 3582 elif self._match(TokenType.START_WITH): 3583 start = self._parse_conjunction() 3584 else: 3585 return None 3586 3587 self._match(TokenType.CONNECT_BY) 3588 nocycle = self._match_text_seq("NOCYCLE") 3589 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3590 exp.Prior, this=self._parse_bitwise() 3591 ) 3592 connect = self._parse_conjunction() 3593 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3594 3595 if not start and self._match(TokenType.START_WITH): 3596 start = self._parse_conjunction() 3597 3598 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3599 3600 def _parse_name_as_expression(self) -> exp.Alias: 3601 return self.expression( 3602 exp.Alias, 3603 alias=self._parse_id_var(any_token=True), 3604 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3605 ) 3606 3607 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3608 if self._match_text_seq("INTERPOLATE"): 3609 return self._parse_wrapped_csv(self._parse_name_as_expression) 3610 return None 3611 3612 def _parse_order( 3613 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3614 ) -> t.Optional[exp.Expression]: 3615 siblings = None 3616 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3617 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3618 return this 3619 3620 siblings = True 3621 3622 return self.expression( 3623 exp.Order, 3624 this=this, 3625 expressions=self._parse_csv(self._parse_ordered), 3626 interpolate=self._parse_interpolate(), 3627 siblings=siblings, 3628 ) 3629 3630 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3631 if not self._match(token): 3632 return None 3633 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3634 3635 def _parse_ordered( 3636 self, parse_method: t.Optional[t.Callable] = None 3637 ) -> t.Optional[exp.Ordered]: 3638 this = parse_method() if parse_method else self._parse_conjunction() 3639 if not this: 3640 return None 3641 3642 asc = self._match(TokenType.ASC) 3643 desc = self._match(TokenType.DESC) or (asc and False) 3644 3645 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3646 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3647 3648 nulls_first = is_nulls_first or False 3649 explicitly_null_ordered = is_nulls_first or is_nulls_last 3650 3651 if ( 3652 not explicitly_null_ordered 3653 and ( 3654 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3655 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3656 ) 3657 and self.dialect.NULL_ORDERING != "nulls_are_last" 3658 ): 3659 nulls_first = True 3660 3661 if self._match_text_seq("WITH", "FILL"): 3662 with_fill = self.expression( 3663 exp.WithFill, 3664 **{ # type: ignore 3665 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3666 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3667 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3668 }, 3669 ) 3670 else: 3671 with_fill = None 3672 3673 return self.expression( 3674 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3675 ) 3676 3677 def _parse_limit( 3678 self, 3679 this: t.Optional[exp.Expression] = None, 3680 top: bool = False, 3681 skip_limit_token: bool = False, 3682 ) -> t.Optional[exp.Expression]: 3683 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3684 comments = self._prev_comments 3685 if top: 3686 limit_paren = self._match(TokenType.L_PAREN) 3687 expression = self._parse_term() if limit_paren else self._parse_number() 3688 3689 if limit_paren: 3690 self._match_r_paren() 3691 else: 3692 expression = self._parse_term() 3693 3694 if self._match(TokenType.COMMA): 3695 offset = expression 3696 expression = self._parse_term() 3697 else: 3698 offset = None 3699 3700 limit_exp = self.expression( 3701 exp.Limit, 3702 this=this, 3703 expression=expression, 3704 offset=offset, 3705 comments=comments, 3706 expressions=self._parse_limit_by(), 3707 ) 3708 3709 return limit_exp 3710 3711 if self._match(TokenType.FETCH): 3712 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3713 direction = self._prev.text.upper() if direction else "FIRST" 3714 3715 count = self._parse_field(tokens=self.FETCH_TOKENS) 3716 percent = self._match(TokenType.PERCENT) 3717 3718 self._match_set((TokenType.ROW, TokenType.ROWS)) 3719 3720 only = self._match_text_seq("ONLY") 3721 with_ties = self._match_text_seq("WITH", "TIES") 3722 3723 if only and with_ties: 3724 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3725 3726 return self.expression( 3727 exp.Fetch, 3728 direction=direction, 3729 count=count, 3730 percent=percent, 3731 with_ties=with_ties, 3732 ) 3733 3734 return this 3735 3736 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3737 if not self._match(TokenType.OFFSET): 3738 return this 3739 3740 count = self._parse_term() 3741 self._match_set((TokenType.ROW, TokenType.ROWS)) 3742 3743 return self.expression( 3744 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3745 ) 3746 3747 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3748 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3749 3750 def _parse_locks(self) -> t.List[exp.Lock]: 3751 locks = [] 3752 while True: 3753 if self._match_text_seq("FOR", "UPDATE"): 3754 update = True 3755 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3756 "LOCK", "IN", "SHARE", "MODE" 3757 ): 3758 update = False 3759 else: 3760 break 3761 3762 expressions = None 3763 if self._match_text_seq("OF"): 3764 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3765 3766 wait: t.Optional[bool | exp.Expression] = None 3767 if self._match_text_seq("NOWAIT"): 3768 wait = True 3769 elif self._match_text_seq("WAIT"): 3770 wait = self._parse_primary() 3771 elif self._match_text_seq("SKIP", "LOCKED"): 3772 wait = False 3773 3774 locks.append( 3775 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3776 ) 3777 3778 return locks 3779 3780 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3781 while this and self._match_set(self.SET_OPERATIONS): 3782 token_type = self._prev.token_type 3783 3784 if token_type == TokenType.UNION: 3785 operation = exp.Union 3786 elif token_type == TokenType.EXCEPT: 3787 operation = exp.Except 3788 else: 3789 operation = exp.Intersect 3790 3791 comments = self._prev.comments 3792 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3793 by_name = self._match_text_seq("BY", "NAME") 3794 expression = self._parse_select(nested=True, parse_set_operation=False) 3795 3796 this = self.expression( 3797 operation, 3798 comments=comments, 3799 this=this, 3800 distinct=distinct, 3801 by_name=by_name, 3802 expression=expression, 3803 ) 3804 3805 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3806 expression = this.expression 3807 3808 if expression: 3809 for arg in self.UNION_MODIFIERS: 3810 expr = expression.args.get(arg) 3811 if expr: 3812 this.set(arg, expr.pop()) 3813 3814 return this 3815 3816 def _parse_expression(self) -> t.Optional[exp.Expression]: 3817 return self._parse_alias(self._parse_conjunction()) 3818 3819 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3820 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3821 3822 def _parse_equality(self) -> t.Optional[exp.Expression]: 3823 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3824 3825 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3826 return self._parse_tokens(self._parse_range, self.COMPARISON) 3827 3828 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3829 this = this or self._parse_bitwise() 3830 negate = self._match(TokenType.NOT) 3831 3832 if self._match_set(self.RANGE_PARSERS): 3833 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3834 if not expression: 3835 return this 3836 3837 this = expression 3838 elif self._match(TokenType.ISNULL): 3839 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3840 3841 # Postgres supports ISNULL and NOTNULL for conditions. 3842 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3843 if self._match(TokenType.NOTNULL): 3844 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3845 this = self.expression(exp.Not, this=this) 3846 3847 if negate: 3848 this = self.expression(exp.Not, this=this) 3849 3850 if self._match(TokenType.IS): 3851 this = self._parse_is(this) 3852 3853 return this 3854 3855 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3856 index = self._index - 1 3857 negate = self._match(TokenType.NOT) 3858 3859 if self._match_text_seq("DISTINCT", "FROM"): 3860 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3861 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3862 3863 expression = self._parse_null() or self._parse_boolean() 3864 if not expression: 3865 self._retreat(index) 3866 return None 3867 3868 this = self.expression(exp.Is, this=this, expression=expression) 3869 return self.expression(exp.Not, this=this) if negate else this 3870 3871 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3872 unnest = self._parse_unnest(with_alias=False) 3873 if unnest: 3874 this = self.expression(exp.In, this=this, unnest=unnest) 3875 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3876 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3877 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3878 3879 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3880 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3881 else: 3882 this = self.expression(exp.In, this=this, expressions=expressions) 3883 3884 if matched_l_paren: 3885 self._match_r_paren(this) 3886 elif not self._match(TokenType.R_BRACKET, expression=this): 3887 self.raise_error("Expecting ]") 3888 else: 3889 this = self.expression(exp.In, this=this, field=self._parse_field()) 3890 3891 return this 3892 3893 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3894 low = self._parse_bitwise() 3895 self._match(TokenType.AND) 3896 high = self._parse_bitwise() 3897 return self.expression(exp.Between, this=this, low=low, high=high) 3898 3899 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3900 if not self._match(TokenType.ESCAPE): 3901 return this 3902 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3903 3904 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3905 index = self._index 3906 3907 if not self._match(TokenType.INTERVAL) and match_interval: 3908 return None 3909 3910 if self._match(TokenType.STRING, advance=False): 3911 this = self._parse_primary() 3912 else: 3913 this = self._parse_term() 3914 3915 if not this or ( 3916 isinstance(this, exp.Column) 3917 and not this.table 3918 and not this.this.quoted 3919 and this.name.upper() == "IS" 3920 ): 3921 self._retreat(index) 3922 return None 3923 3924 unit = self._parse_function() or ( 3925 not self._match(TokenType.ALIAS, advance=False) 3926 and self._parse_var(any_token=True, upper=True) 3927 ) 3928 3929 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3930 # each INTERVAL expression into this canonical form so it's easy to transpile 3931 if this and this.is_number: 3932 this = exp.Literal.string(this.name) 3933 elif this and this.is_string: 3934 parts = this.name.split() 3935 3936 if len(parts) == 2: 3937 if unit: 3938 # This is not actually a unit, it's something else (e.g. a "window side") 3939 unit = None 3940 self._retreat(self._index - 1) 3941 3942 this = exp.Literal.string(parts[0]) 3943 unit = self.expression(exp.Var, this=parts[1].upper()) 3944 3945 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3946 unit = self.expression( 3947 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3948 ) 3949 3950 return self.expression(exp.Interval, this=this, unit=unit) 3951 3952 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3953 this = self._parse_term() 3954 3955 while True: 3956 if self._match_set(self.BITWISE): 3957 this = self.expression( 3958 self.BITWISE[self._prev.token_type], 3959 this=this, 3960 expression=self._parse_term(), 3961 ) 3962 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3963 this = self.expression( 3964 exp.DPipe, 3965 this=this, 3966 expression=self._parse_term(), 3967 safe=not self.dialect.STRICT_STRING_CONCAT, 3968 ) 3969 elif self._match(TokenType.DQMARK): 3970 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3971 elif self._match_pair(TokenType.LT, TokenType.LT): 3972 this = self.expression( 3973 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3974 ) 3975 elif self._match_pair(TokenType.GT, TokenType.GT): 3976 this = self.expression( 3977 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3978 ) 3979 else: 3980 break 3981 3982 return this 3983 3984 def _parse_term(self) -> t.Optional[exp.Expression]: 3985 return self._parse_tokens(self._parse_factor, self.TERM) 3986 3987 def _parse_factor(self) -> t.Optional[exp.Expression]: 3988 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3989 this = parse_method() 3990 3991 while self._match_set(self.FACTOR): 3992 this = self.expression( 3993 self.FACTOR[self._prev.token_type], 3994 this=this, 3995 comments=self._prev_comments, 3996 expression=parse_method(), 3997 ) 3998 if isinstance(this, exp.Div): 3999 this.args["typed"] = self.dialect.TYPED_DIVISION 4000 this.args["safe"] = self.dialect.SAFE_DIVISION 4001 4002 return this 4003 4004 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4005 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4006 4007 def _parse_unary(self) -> t.Optional[exp.Expression]: 4008 if self._match_set(self.UNARY_PARSERS): 4009 return self.UNARY_PARSERS[self._prev.token_type](self) 4010 return self._parse_at_time_zone(self._parse_type()) 4011 4012 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4013 interval = parse_interval and self._parse_interval() 4014 if interval: 4015 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4016 while True: 4017 index = self._index 4018 self._match(TokenType.PLUS) 4019 4020 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4021 self._retreat(index) 4022 break 4023 4024 interval = self.expression( # type: ignore 4025 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4026 ) 4027 4028 return interval 4029 4030 index = self._index 4031 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4032 this = self._parse_column() 4033 4034 if data_type: 4035 if isinstance(this, exp.Literal): 4036 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4037 if parser: 4038 return parser(self, this, data_type) 4039 return self.expression(exp.Cast, this=this, to=data_type) 4040 if not data_type.expressions: 4041 self._retreat(index) 4042 return self._parse_column() 4043 return self._parse_column_ops(data_type) 4044 4045 return this and self._parse_column_ops(this) 4046 4047 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4048 this = self._parse_type() 4049 if not this: 4050 return None 4051 4052 if isinstance(this, exp.Column) and not this.table: 4053 this = exp.var(this.name.upper()) 4054 4055 return self.expression( 4056 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4057 ) 4058 4059 def _parse_types( 4060 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4061 ) -> t.Optional[exp.Expression]: 4062 index = self._index 4063 4064 prefix = self._match_text_seq("SYSUDTLIB", ".") 4065 4066 if not self._match_set(self.TYPE_TOKENS): 4067 identifier = allow_identifiers and self._parse_id_var( 4068 any_token=False, tokens=(TokenType.VAR,) 4069 ) 4070 if identifier: 4071 tokens = self.dialect.tokenize(identifier.name) 4072 4073 if len(tokens) != 1: 4074 self.raise_error("Unexpected identifier", self._prev) 4075 4076 if tokens[0].token_type in self.TYPE_TOKENS: 4077 self._prev = tokens[0] 4078 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4079 type_name = identifier.name 4080 4081 while self._match(TokenType.DOT): 4082 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4083 4084 return exp.DataType.build(type_name, udt=True) 4085 else: 4086 self._retreat(self._index - 1) 4087 return None 4088 else: 4089 return None 4090 4091 type_token = self._prev.token_type 4092 4093 if type_token == TokenType.PSEUDO_TYPE: 4094 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4095 4096 if type_token == TokenType.OBJECT_IDENTIFIER: 4097 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4098 4099 nested = type_token in self.NESTED_TYPE_TOKENS 4100 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4101 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4102 expressions = None 4103 maybe_func = False 4104 4105 if self._match(TokenType.L_PAREN): 4106 if is_struct: 4107 expressions = self._parse_csv(self._parse_struct_types) 4108 elif nested: 4109 expressions = self._parse_csv( 4110 lambda: self._parse_types( 4111 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4112 ) 4113 ) 4114 elif type_token in self.ENUM_TYPE_TOKENS: 4115 expressions = self._parse_csv(self._parse_equality) 4116 elif is_aggregate: 4117 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4118 any_token=False, tokens=(TokenType.VAR,) 4119 ) 4120 if not func_or_ident or not self._match(TokenType.COMMA): 4121 return None 4122 expressions = self._parse_csv( 4123 lambda: self._parse_types( 4124 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4125 ) 4126 ) 4127 expressions.insert(0, func_or_ident) 4128 else: 4129 expressions = self._parse_csv(self._parse_type_size) 4130 4131 if not expressions or not self._match(TokenType.R_PAREN): 4132 self._retreat(index) 4133 return None 4134 4135 maybe_func = True 4136 4137 this: t.Optional[exp.Expression] = None 4138 values: t.Optional[t.List[exp.Expression]] = None 4139 4140 if nested and self._match(TokenType.LT): 4141 if is_struct: 4142 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4143 else: 4144 expressions = self._parse_csv( 4145 lambda: self._parse_types( 4146 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4147 ) 4148 ) 4149 4150 if not self._match(TokenType.GT): 4151 self.raise_error("Expecting >") 4152 4153 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4154 values = self._parse_csv(self._parse_conjunction) 4155 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4156 4157 if type_token in self.TIMESTAMPS: 4158 if self._match_text_seq("WITH", "TIME", "ZONE"): 4159 maybe_func = False 4160 tz_type = ( 4161 exp.DataType.Type.TIMETZ 4162 if type_token in self.TIMES 4163 else exp.DataType.Type.TIMESTAMPTZ 4164 ) 4165 this = exp.DataType(this=tz_type, expressions=expressions) 4166 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4167 maybe_func = False 4168 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4169 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4170 maybe_func = False 4171 elif type_token == TokenType.INTERVAL: 4172 unit = self._parse_var(upper=True) 4173 if unit: 4174 if self._match_text_seq("TO"): 4175 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4176 4177 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4178 else: 4179 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4180 4181 if maybe_func and check_func: 4182 index2 = self._index 4183 peek = self._parse_string() 4184 4185 if not peek: 4186 self._retreat(index) 4187 return None 4188 4189 self._retreat(index2) 4190 4191 if not this: 4192 if self._match_text_seq("UNSIGNED"): 4193 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4194 if not unsigned_type_token: 4195 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4196 4197 type_token = unsigned_type_token or type_token 4198 4199 this = exp.DataType( 4200 this=exp.DataType.Type[type_token.value], 4201 expressions=expressions, 4202 nested=nested, 4203 values=values, 4204 prefix=prefix, 4205 ) 4206 4207 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4208 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4209 4210 return this 4211 4212 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4213 index = self._index 4214 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4215 self._match(TokenType.COLON) 4216 column_def = self._parse_column_def(this) 4217 4218 if type_required and ( 4219 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4220 ): 4221 self._retreat(index) 4222 return self._parse_types() 4223 4224 return column_def 4225 4226 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4227 if not self._match_text_seq("AT", "TIME", "ZONE"): 4228 return this 4229 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4230 4231 def _parse_column(self) -> t.Optional[exp.Expression]: 4232 this = self._parse_column_reference() 4233 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4234 4235 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4236 this = self._parse_field() 4237 if ( 4238 not this 4239 and self._match(TokenType.VALUES, advance=False) 4240 and self.VALUES_FOLLOWED_BY_PAREN 4241 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4242 ): 4243 this = self._parse_id_var() 4244 4245 if isinstance(this, exp.Identifier): 4246 # We bubble up comments from the Identifier to the Column 4247 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4248 4249 return this 4250 4251 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4252 this = self._parse_bracket(this) 4253 4254 while self._match_set(self.COLUMN_OPERATORS): 4255 op_token = self._prev.token_type 4256 op = self.COLUMN_OPERATORS.get(op_token) 4257 4258 if op_token == TokenType.DCOLON: 4259 field = self._parse_types() 4260 if not field: 4261 self.raise_error("Expected type") 4262 elif op and self._curr: 4263 field = self._parse_column_reference() 4264 else: 4265 field = self._parse_field(any_token=True, anonymous_func=True) 4266 4267 if isinstance(field, exp.Func) and this: 4268 # bigquery allows function calls like x.y.count(...) 4269 # SAFE.SUBSTR(...) 4270 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4271 this = exp.replace_tree( 4272 this, 4273 lambda n: ( 4274 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4275 if n.table 4276 else n.this 4277 ) 4278 if isinstance(n, exp.Column) 4279 else n, 4280 ) 4281 4282 if op: 4283 this = op(self, this, field) 4284 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4285 this = self.expression( 4286 exp.Column, 4287 this=field, 4288 table=this.this, 4289 db=this.args.get("table"), 4290 catalog=this.args.get("db"), 4291 ) 4292 else: 4293 this = self.expression(exp.Dot, this=this, expression=field) 4294 this = self._parse_bracket(this) 4295 return this 4296 4297 def _parse_primary(self) -> t.Optional[exp.Expression]: 4298 if self._match_set(self.PRIMARY_PARSERS): 4299 token_type = self._prev.token_type 4300 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4301 4302 if token_type == TokenType.STRING: 4303 expressions = [primary] 4304 while self._match(TokenType.STRING): 4305 expressions.append(exp.Literal.string(self._prev.text)) 4306 4307 if len(expressions) > 1: 4308 return self.expression(exp.Concat, expressions=expressions) 4309 4310 return primary 4311 4312 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4313 return exp.Literal.number(f"0.{self._prev.text}") 4314 4315 if self._match(TokenType.L_PAREN): 4316 comments = self._prev_comments 4317 query = self._parse_select() 4318 4319 if query: 4320 expressions = [query] 4321 else: 4322 expressions = self._parse_expressions() 4323 4324 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4325 4326 if isinstance(this, exp.UNWRAPPED_QUERIES): 4327 this = self._parse_set_operations( 4328 self._parse_subquery(this=this, parse_alias=False) 4329 ) 4330 elif isinstance(this, exp.Subquery): 4331 this = self._parse_subquery( 4332 this=self._parse_set_operations(this), parse_alias=False 4333 ) 4334 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4335 this = self.expression(exp.Tuple, expressions=expressions) 4336 else: 4337 this = self.expression(exp.Paren, this=this) 4338 4339 if this: 4340 this.add_comments(comments) 4341 4342 self._match_r_paren(expression=this) 4343 return this 4344 4345 return None 4346 4347 def _parse_field( 4348 self, 4349 any_token: bool = False, 4350 tokens: t.Optional[t.Collection[TokenType]] = None, 4351 anonymous_func: bool = False, 4352 ) -> t.Optional[exp.Expression]: 4353 if anonymous_func: 4354 field = ( 4355 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4356 or self._parse_primary() 4357 ) 4358 else: 4359 field = self._parse_primary() or self._parse_function( 4360 anonymous=anonymous_func, any_token=any_token 4361 ) 4362 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4363 4364 def _parse_function( 4365 self, 4366 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4367 anonymous: bool = False, 4368 optional_parens: bool = True, 4369 any_token: bool = False, 4370 ) -> t.Optional[exp.Expression]: 4371 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4372 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4373 fn_syntax = False 4374 if ( 4375 self._match(TokenType.L_BRACE, advance=False) 4376 and self._next 4377 and self._next.text.upper() == "FN" 4378 ): 4379 self._advance(2) 4380 fn_syntax = True 4381 4382 func = self._parse_function_call( 4383 functions=functions, 4384 anonymous=anonymous, 4385 optional_parens=optional_parens, 4386 any_token=any_token, 4387 ) 4388 4389 if fn_syntax: 4390 self._match(TokenType.R_BRACE) 4391 4392 return func 4393 4394 def _parse_function_call( 4395 self, 4396 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4397 anonymous: bool = False, 4398 optional_parens: bool = True, 4399 any_token: bool = False, 4400 ) -> t.Optional[exp.Expression]: 4401 if not self._curr: 4402 return None 4403 4404 comments = self._curr.comments 4405 token_type = self._curr.token_type 4406 this = self._curr.text 4407 upper = this.upper() 4408 4409 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4410 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4411 self._advance() 4412 return self._parse_window(parser(self)) 4413 4414 if not self._next or self._next.token_type != TokenType.L_PAREN: 4415 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4416 self._advance() 4417 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4418 4419 return None 4420 4421 if any_token: 4422 if token_type in self.RESERVED_TOKENS: 4423 return None 4424 elif token_type not in self.FUNC_TOKENS: 4425 return None 4426 4427 self._advance(2) 4428 4429 parser = self.FUNCTION_PARSERS.get(upper) 4430 if parser and not anonymous: 4431 this = parser(self) 4432 else: 4433 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4434 4435 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4436 this = self.expression(subquery_predicate, this=self._parse_select()) 4437 self._match_r_paren() 4438 return this 4439 4440 if functions is None: 4441 functions = self.FUNCTIONS 4442 4443 function = functions.get(upper) 4444 4445 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4446 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4447 4448 if alias: 4449 args = self._kv_to_prop_eq(args) 4450 4451 if function and not anonymous: 4452 if "dialect" in function.__code__.co_varnames: 4453 func = function(args, dialect=self.dialect) 4454 else: 4455 func = function(args) 4456 4457 func = self.validate_expression(func, args) 4458 if not self.dialect.NORMALIZE_FUNCTIONS: 4459 func.meta["name"] = this 4460 4461 this = func 4462 else: 4463 if token_type == TokenType.IDENTIFIER: 4464 this = exp.Identifier(this=this, quoted=True) 4465 this = self.expression(exp.Anonymous, this=this, expressions=args) 4466 4467 if isinstance(this, exp.Expression): 4468 this.add_comments(comments) 4469 4470 self._match_r_paren(this) 4471 return self._parse_window(this) 4472 4473 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4474 transformed = [] 4475 4476 for e in expressions: 4477 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4478 if isinstance(e, exp.Alias): 4479 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4480 4481 if not isinstance(e, exp.PropertyEQ): 4482 e = self.expression( 4483 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4484 ) 4485 4486 if isinstance(e.this, exp.Column): 4487 e.this.replace(e.this.this) 4488 4489 transformed.append(e) 4490 4491 return transformed 4492 4493 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4494 return self._parse_column_def(self._parse_id_var()) 4495 4496 def _parse_user_defined_function( 4497 self, kind: t.Optional[TokenType] = None 4498 ) -> t.Optional[exp.Expression]: 4499 this = self._parse_id_var() 4500 4501 while self._match(TokenType.DOT): 4502 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4503 4504 if not self._match(TokenType.L_PAREN): 4505 return this 4506 4507 expressions = self._parse_csv(self._parse_function_parameter) 4508 self._match_r_paren() 4509 return self.expression( 4510 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4511 ) 4512 4513 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4514 literal = self._parse_primary() 4515 if literal: 4516 return self.expression(exp.Introducer, this=token.text, expression=literal) 4517 4518 return self.expression(exp.Identifier, this=token.text) 4519 4520 def _parse_session_parameter(self) -> exp.SessionParameter: 4521 kind = None 4522 this = self._parse_id_var() or self._parse_primary() 4523 4524 if this and self._match(TokenType.DOT): 4525 kind = this.name 4526 this = self._parse_var() or self._parse_primary() 4527 4528 return self.expression(exp.SessionParameter, this=this, kind=kind) 4529 4530 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4531 index = self._index 4532 4533 if self._match(TokenType.L_PAREN): 4534 expressions = t.cast( 4535 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4536 ) 4537 4538 if not self._match(TokenType.R_PAREN): 4539 self._retreat(index) 4540 else: 4541 expressions = [self._parse_id_var()] 4542 4543 if self._match_set(self.LAMBDAS): 4544 return self.LAMBDAS[self._prev.token_type](self, expressions) 4545 4546 self._retreat(index) 4547 4548 this: t.Optional[exp.Expression] 4549 4550 if self._match(TokenType.DISTINCT): 4551 this = self.expression( 4552 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4553 ) 4554 else: 4555 this = self._parse_select_or_expression(alias=alias) 4556 4557 return self._parse_limit( 4558 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4559 ) 4560 4561 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4562 index = self._index 4563 if not self._match(TokenType.L_PAREN): 4564 return this 4565 4566 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4567 # expr can be of both types 4568 if self._match_set(self.SELECT_START_TOKENS): 4569 self._retreat(index) 4570 return this 4571 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4572 self._match_r_paren() 4573 return self.expression(exp.Schema, this=this, expressions=args) 4574 4575 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4576 return self._parse_column_def(self._parse_field(any_token=True)) 4577 4578 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4579 # column defs are not really columns, they're identifiers 4580 if isinstance(this, exp.Column): 4581 this = this.this 4582 4583 kind = self._parse_types(schema=True) 4584 4585 if self._match_text_seq("FOR", "ORDINALITY"): 4586 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4587 4588 constraints: t.List[exp.Expression] = [] 4589 4590 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4591 ("ALIAS", "MATERIALIZED") 4592 ): 4593 persisted = self._prev.text.upper() == "MATERIALIZED" 4594 constraints.append( 4595 self.expression( 4596 exp.ComputedColumnConstraint, 4597 this=self._parse_conjunction(), 4598 persisted=persisted or self._match_text_seq("PERSISTED"), 4599 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4600 ) 4601 ) 4602 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4603 self._match(TokenType.ALIAS) 4604 constraints.append( 4605 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4606 ) 4607 4608 while True: 4609 constraint = self._parse_column_constraint() 4610 if not constraint: 4611 break 4612 constraints.append(constraint) 4613 4614 if not kind and not constraints: 4615 return this 4616 4617 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4618 4619 def _parse_auto_increment( 4620 self, 4621 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4622 start = None 4623 increment = None 4624 4625 if self._match(TokenType.L_PAREN, advance=False): 4626 args = self._parse_wrapped_csv(self._parse_bitwise) 4627 start = seq_get(args, 0) 4628 increment = seq_get(args, 1) 4629 elif self._match_text_seq("START"): 4630 start = self._parse_bitwise() 4631 self._match_text_seq("INCREMENT") 4632 increment = self._parse_bitwise() 4633 4634 if start and increment: 4635 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4636 4637 return exp.AutoIncrementColumnConstraint() 4638 4639 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4640 if not self._match_text_seq("REFRESH"): 4641 self._retreat(self._index - 1) 4642 return None 4643 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4644 4645 def _parse_compress(self) -> exp.CompressColumnConstraint: 4646 if self._match(TokenType.L_PAREN, advance=False): 4647 return self.expression( 4648 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4649 ) 4650 4651 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4652 4653 def _parse_generated_as_identity( 4654 self, 4655 ) -> ( 4656 exp.GeneratedAsIdentityColumnConstraint 4657 | exp.ComputedColumnConstraint 4658 | exp.GeneratedAsRowColumnConstraint 4659 ): 4660 if self._match_text_seq("BY", "DEFAULT"): 4661 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4662 this = self.expression( 4663 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4664 ) 4665 else: 4666 self._match_text_seq("ALWAYS") 4667 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4668 4669 self._match(TokenType.ALIAS) 4670 4671 if self._match_text_seq("ROW"): 4672 start = self._match_text_seq("START") 4673 if not start: 4674 self._match(TokenType.END) 4675 hidden = self._match_text_seq("HIDDEN") 4676 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4677 4678 identity = self._match_text_seq("IDENTITY") 4679 4680 if self._match(TokenType.L_PAREN): 4681 if self._match(TokenType.START_WITH): 4682 this.set("start", self._parse_bitwise()) 4683 if self._match_text_seq("INCREMENT", "BY"): 4684 this.set("increment", self._parse_bitwise()) 4685 if self._match_text_seq("MINVALUE"): 4686 this.set("minvalue", self._parse_bitwise()) 4687 if self._match_text_seq("MAXVALUE"): 4688 this.set("maxvalue", self._parse_bitwise()) 4689 4690 if self._match_text_seq("CYCLE"): 4691 this.set("cycle", True) 4692 elif self._match_text_seq("NO", "CYCLE"): 4693 this.set("cycle", False) 4694 4695 if not identity: 4696 this.set("expression", self._parse_range()) 4697 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4698 args = self._parse_csv(self._parse_bitwise) 4699 this.set("start", seq_get(args, 0)) 4700 this.set("increment", seq_get(args, 1)) 4701 4702 self._match_r_paren() 4703 4704 return this 4705 4706 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4707 self._match_text_seq("LENGTH") 4708 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4709 4710 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4711 if self._match_text_seq("NULL"): 4712 return self.expression(exp.NotNullColumnConstraint) 4713 if self._match_text_seq("CASESPECIFIC"): 4714 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4715 if self._match_text_seq("FOR", "REPLICATION"): 4716 return self.expression(exp.NotForReplicationColumnConstraint) 4717 return None 4718 4719 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4720 if self._match(TokenType.CONSTRAINT): 4721 this = self._parse_id_var() 4722 else: 4723 this = None 4724 4725 if self._match_texts(self.CONSTRAINT_PARSERS): 4726 return self.expression( 4727 exp.ColumnConstraint, 4728 this=this, 4729 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4730 ) 4731 4732 return this 4733 4734 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4735 if not self._match(TokenType.CONSTRAINT): 4736 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4737 4738 return self.expression( 4739 exp.Constraint, 4740 this=self._parse_id_var(), 4741 expressions=self._parse_unnamed_constraints(), 4742 ) 4743 4744 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4745 constraints = [] 4746 while True: 4747 constraint = self._parse_unnamed_constraint() or self._parse_function() 4748 if not constraint: 4749 break 4750 constraints.append(constraint) 4751 4752 return constraints 4753 4754 def _parse_unnamed_constraint( 4755 self, constraints: t.Optional[t.Collection[str]] = None 4756 ) -> t.Optional[exp.Expression]: 4757 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4758 constraints or self.CONSTRAINT_PARSERS 4759 ): 4760 return None 4761 4762 constraint = self._prev.text.upper() 4763 if constraint not in self.CONSTRAINT_PARSERS: 4764 self.raise_error(f"No parser found for schema constraint {constraint}.") 4765 4766 return self.CONSTRAINT_PARSERS[constraint](self) 4767 4768 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4769 self._match_text_seq("KEY") 4770 return self.expression( 4771 exp.UniqueColumnConstraint, 4772 this=self._parse_schema(self._parse_id_var(any_token=False)), 4773 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4774 on_conflict=self._parse_on_conflict(), 4775 ) 4776 4777 def _parse_key_constraint_options(self) -> t.List[str]: 4778 options = [] 4779 while True: 4780 if not self._curr: 4781 break 4782 4783 if self._match(TokenType.ON): 4784 action = None 4785 on = self._advance_any() and self._prev.text 4786 4787 if self._match_text_seq("NO", "ACTION"): 4788 action = "NO ACTION" 4789 elif self._match_text_seq("CASCADE"): 4790 action = "CASCADE" 4791 elif self._match_text_seq("RESTRICT"): 4792 action = "RESTRICT" 4793 elif self._match_pair(TokenType.SET, TokenType.NULL): 4794 action = "SET NULL" 4795 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4796 action = "SET DEFAULT" 4797 else: 4798 self.raise_error("Invalid key constraint") 4799 4800 options.append(f"ON {on} {action}") 4801 elif self._match_text_seq("NOT", "ENFORCED"): 4802 options.append("NOT ENFORCED") 4803 elif self._match_text_seq("DEFERRABLE"): 4804 options.append("DEFERRABLE") 4805 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4806 options.append("INITIALLY DEFERRED") 4807 elif self._match_text_seq("NORELY"): 4808 options.append("NORELY") 4809 elif self._match_text_seq("MATCH", "FULL"): 4810 options.append("MATCH FULL") 4811 else: 4812 break 4813 4814 return options 4815 4816 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4817 if match and not self._match(TokenType.REFERENCES): 4818 return None 4819 4820 expressions = None 4821 this = self._parse_table(schema=True) 4822 options = self._parse_key_constraint_options() 4823 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4824 4825 def _parse_foreign_key(self) -> exp.ForeignKey: 4826 expressions = self._parse_wrapped_id_vars() 4827 reference = self._parse_references() 4828 options = {} 4829 4830 while self._match(TokenType.ON): 4831 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4832 self.raise_error("Expected DELETE or UPDATE") 4833 4834 kind = self._prev.text.lower() 4835 4836 if self._match_text_seq("NO", "ACTION"): 4837 action = "NO ACTION" 4838 elif self._match(TokenType.SET): 4839 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4840 action = "SET " + self._prev.text.upper() 4841 else: 4842 self._advance() 4843 action = self._prev.text.upper() 4844 4845 options[kind] = action 4846 4847 return self.expression( 4848 exp.ForeignKey, 4849 expressions=expressions, 4850 reference=reference, 4851 **options, # type: ignore 4852 ) 4853 4854 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4855 return self._parse_field() 4856 4857 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4858 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4859 self._retreat(self._index - 1) 4860 return None 4861 4862 id_vars = self._parse_wrapped_id_vars() 4863 return self.expression( 4864 exp.PeriodForSystemTimeConstraint, 4865 this=seq_get(id_vars, 0), 4866 expression=seq_get(id_vars, 1), 4867 ) 4868 4869 def _parse_primary_key( 4870 self, wrapped_optional: bool = False, in_props: bool = False 4871 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4872 desc = ( 4873 self._match_set((TokenType.ASC, TokenType.DESC)) 4874 and self._prev.token_type == TokenType.DESC 4875 ) 4876 4877 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4878 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4879 4880 expressions = self._parse_wrapped_csv( 4881 self._parse_primary_key_part, optional=wrapped_optional 4882 ) 4883 options = self._parse_key_constraint_options() 4884 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4885 4886 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4887 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4888 4889 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4890 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4891 return this 4892 4893 bracket_kind = self._prev.token_type 4894 expressions = self._parse_csv( 4895 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4896 ) 4897 4898 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4899 self.raise_error("Expected ]") 4900 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4901 self.raise_error("Expected }") 4902 4903 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4904 if bracket_kind == TokenType.L_BRACE: 4905 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4906 elif not this or this.name.upper() == "ARRAY": 4907 this = self.expression(exp.Array, expressions=expressions) 4908 else: 4909 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4910 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4911 4912 self._add_comments(this) 4913 return self._parse_bracket(this) 4914 4915 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4916 if self._match(TokenType.COLON): 4917 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4918 return this 4919 4920 def _parse_case(self) -> t.Optional[exp.Expression]: 4921 ifs = [] 4922 default = None 4923 4924 comments = self._prev_comments 4925 expression = self._parse_conjunction() 4926 4927 while self._match(TokenType.WHEN): 4928 this = self._parse_conjunction() 4929 self._match(TokenType.THEN) 4930 then = self._parse_conjunction() 4931 ifs.append(self.expression(exp.If, this=this, true=then)) 4932 4933 if self._match(TokenType.ELSE): 4934 default = self._parse_conjunction() 4935 4936 if not self._match(TokenType.END): 4937 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4938 default = exp.column("interval") 4939 else: 4940 self.raise_error("Expected END after CASE", self._prev) 4941 4942 return self.expression( 4943 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4944 ) 4945 4946 def _parse_if(self) -> t.Optional[exp.Expression]: 4947 if self._match(TokenType.L_PAREN): 4948 args = self._parse_csv(self._parse_conjunction) 4949 this = self.validate_expression(exp.If.from_arg_list(args), args) 4950 self._match_r_paren() 4951 else: 4952 index = self._index - 1 4953 4954 if self.NO_PAREN_IF_COMMANDS and index == 0: 4955 return self._parse_as_command(self._prev) 4956 4957 condition = self._parse_conjunction() 4958 4959 if not condition: 4960 self._retreat(index) 4961 return None 4962 4963 self._match(TokenType.THEN) 4964 true = self._parse_conjunction() 4965 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4966 self._match(TokenType.END) 4967 this = self.expression(exp.If, this=condition, true=true, false=false) 4968 4969 return this 4970 4971 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4972 if not self._match_text_seq("VALUE", "FOR"): 4973 self._retreat(self._index - 1) 4974 return None 4975 4976 return self.expression( 4977 exp.NextValueFor, 4978 this=self._parse_column(), 4979 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4980 ) 4981 4982 def _parse_extract(self) -> exp.Extract: 4983 this = self._parse_function() or self._parse_var() or self._parse_type() 4984 4985 if self._match(TokenType.FROM): 4986 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4987 4988 if not self._match(TokenType.COMMA): 4989 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4990 4991 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4992 4993 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4994 this = self._parse_conjunction() 4995 4996 if not self._match(TokenType.ALIAS): 4997 if self._match(TokenType.COMMA): 4998 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4999 5000 self.raise_error("Expected AS after CAST") 5001 5002 fmt = None 5003 to = self._parse_types() 5004 5005 if self._match(TokenType.FORMAT): 5006 fmt_string = self._parse_string() 5007 fmt = self._parse_at_time_zone(fmt_string) 5008 5009 if not to: 5010 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5011 if to.this in exp.DataType.TEMPORAL_TYPES: 5012 this = self.expression( 5013 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5014 this=this, 5015 format=exp.Literal.string( 5016 format_time( 5017 fmt_string.this if fmt_string else "", 5018 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5019 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5020 ) 5021 ), 5022 ) 5023 5024 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5025 this.set("zone", fmt.args["zone"]) 5026 return this 5027 elif not to: 5028 self.raise_error("Expected TYPE after CAST") 5029 elif isinstance(to, exp.Identifier): 5030 to = exp.DataType.build(to.name, udt=True) 5031 elif to.this == exp.DataType.Type.CHAR: 5032 if self._match(TokenType.CHARACTER_SET): 5033 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5034 5035 return self.expression( 5036 exp.Cast if strict else exp.TryCast, 5037 this=this, 5038 to=to, 5039 format=fmt, 5040 safe=safe, 5041 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5042 ) 5043 5044 def _parse_string_agg(self) -> exp.Expression: 5045 if self._match(TokenType.DISTINCT): 5046 args: t.List[t.Optional[exp.Expression]] = [ 5047 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5048 ] 5049 if self._match(TokenType.COMMA): 5050 args.extend(self._parse_csv(self._parse_conjunction)) 5051 else: 5052 args = self._parse_csv(self._parse_conjunction) # type: ignore 5053 5054 index = self._index 5055 if not self._match(TokenType.R_PAREN) and args: 5056 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5057 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5058 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5059 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5060 5061 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5062 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5063 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5064 if not self._match_text_seq("WITHIN", "GROUP"): 5065 self._retreat(index) 5066 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5067 5068 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5069 order = self._parse_order(this=seq_get(args, 0)) 5070 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5071 5072 def _parse_convert( 5073 self, strict: bool, safe: t.Optional[bool] = None 5074 ) -> t.Optional[exp.Expression]: 5075 this = self._parse_bitwise() 5076 5077 if self._match(TokenType.USING): 5078 to: t.Optional[exp.Expression] = self.expression( 5079 exp.CharacterSet, this=self._parse_var() 5080 ) 5081 elif self._match(TokenType.COMMA): 5082 to = self._parse_types() 5083 else: 5084 to = None 5085 5086 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5087 5088 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5089 """ 5090 There are generally two variants of the DECODE function: 5091 5092 - DECODE(bin, charset) 5093 - DECODE(expression, search, result [, search, result] ... [, default]) 5094 5095 The second variant will always be parsed into a CASE expression. Note that NULL 5096 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5097 instead of relying on pattern matching. 5098 """ 5099 args = self._parse_csv(self._parse_conjunction) 5100 5101 if len(args) < 3: 5102 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5103 5104 expression, *expressions = args 5105 if not expression: 5106 return None 5107 5108 ifs = [] 5109 for search, result in zip(expressions[::2], expressions[1::2]): 5110 if not search or not result: 5111 return None 5112 5113 if isinstance(search, exp.Literal): 5114 ifs.append( 5115 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5116 ) 5117 elif isinstance(search, exp.Null): 5118 ifs.append( 5119 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5120 ) 5121 else: 5122 cond = exp.or_( 5123 exp.EQ(this=expression.copy(), expression=search), 5124 exp.and_( 5125 exp.Is(this=expression.copy(), expression=exp.Null()), 5126 exp.Is(this=search.copy(), expression=exp.Null()), 5127 copy=False, 5128 ), 5129 copy=False, 5130 ) 5131 ifs.append(exp.If(this=cond, true=result)) 5132 5133 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5134 5135 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5136 self._match_text_seq("KEY") 5137 key = self._parse_column() 5138 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5139 self._match_text_seq("VALUE") 5140 value = self._parse_bitwise() 5141 5142 if not key and not value: 5143 return None 5144 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5145 5146 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5147 if not this or not self._match_text_seq("FORMAT", "JSON"): 5148 return this 5149 5150 return self.expression(exp.FormatJson, this=this) 5151 5152 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5153 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5154 for value in values: 5155 if self._match_text_seq(value, "ON", on): 5156 return f"{value} ON {on}" 5157 5158 return None 5159 5160 @t.overload 5161 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5162 5163 @t.overload 5164 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5165 5166 def _parse_json_object(self, agg=False): 5167 star = self._parse_star() 5168 expressions = ( 5169 [star] 5170 if star 5171 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5172 ) 5173 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5174 5175 unique_keys = None 5176 if self._match_text_seq("WITH", "UNIQUE"): 5177 unique_keys = True 5178 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5179 unique_keys = False 5180 5181 self._match_text_seq("KEYS") 5182 5183 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5184 self._parse_type() 5185 ) 5186 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5187 5188 return self.expression( 5189 exp.JSONObjectAgg if agg else exp.JSONObject, 5190 expressions=expressions, 5191 null_handling=null_handling, 5192 unique_keys=unique_keys, 5193 return_type=return_type, 5194 encoding=encoding, 5195 ) 5196 5197 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5198 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5199 if not self._match_text_seq("NESTED"): 5200 this = self._parse_id_var() 5201 kind = self._parse_types(allow_identifiers=False) 5202 nested = None 5203 else: 5204 this = None 5205 kind = None 5206 nested = True 5207 5208 path = self._match_text_seq("PATH") and self._parse_string() 5209 nested_schema = nested and self._parse_json_schema() 5210 5211 return self.expression( 5212 exp.JSONColumnDef, 5213 this=this, 5214 kind=kind, 5215 path=path, 5216 nested_schema=nested_schema, 5217 ) 5218 5219 def _parse_json_schema(self) -> exp.JSONSchema: 5220 self._match_text_seq("COLUMNS") 5221 return self.expression( 5222 exp.JSONSchema, 5223 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5224 ) 5225 5226 def _parse_json_table(self) -> exp.JSONTable: 5227 this = self._parse_format_json(self._parse_bitwise()) 5228 path = self._match(TokenType.COMMA) and self._parse_string() 5229 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5230 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5231 schema = self._parse_json_schema() 5232 5233 return exp.JSONTable( 5234 this=this, 5235 schema=schema, 5236 path=path, 5237 error_handling=error_handling, 5238 empty_handling=empty_handling, 5239 ) 5240 5241 def _parse_match_against(self) -> exp.MatchAgainst: 5242 expressions = self._parse_csv(self._parse_column) 5243 5244 self._match_text_seq(")", "AGAINST", "(") 5245 5246 this = self._parse_string() 5247 5248 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5249 modifier = "IN NATURAL LANGUAGE MODE" 5250 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5251 modifier = f"{modifier} WITH QUERY EXPANSION" 5252 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5253 modifier = "IN BOOLEAN MODE" 5254 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5255 modifier = "WITH QUERY EXPANSION" 5256 else: 5257 modifier = None 5258 5259 return self.expression( 5260 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5261 ) 5262 5263 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5264 def _parse_open_json(self) -> exp.OpenJSON: 5265 this = self._parse_bitwise() 5266 path = self._match(TokenType.COMMA) and self._parse_string() 5267 5268 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5269 this = self._parse_field(any_token=True) 5270 kind = self._parse_types() 5271 path = self._parse_string() 5272 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5273 5274 return self.expression( 5275 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5276 ) 5277 5278 expressions = None 5279 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5280 self._match_l_paren() 5281 expressions = self._parse_csv(_parse_open_json_column_def) 5282 5283 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5284 5285 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5286 args = self._parse_csv(self._parse_bitwise) 5287 5288 if self._match(TokenType.IN): 5289 return self.expression( 5290 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5291 ) 5292 5293 if haystack_first: 5294 haystack = seq_get(args, 0) 5295 needle = seq_get(args, 1) 5296 else: 5297 needle = seq_get(args, 0) 5298 haystack = seq_get(args, 1) 5299 5300 return self.expression( 5301 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5302 ) 5303 5304 def _parse_predict(self) -> exp.Predict: 5305 self._match_text_seq("MODEL") 5306 this = self._parse_table() 5307 5308 self._match(TokenType.COMMA) 5309 self._match_text_seq("TABLE") 5310 5311 return self.expression( 5312 exp.Predict, 5313 this=this, 5314 expression=self._parse_table(), 5315 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5316 ) 5317 5318 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5319 args = self._parse_csv(self._parse_table) 5320 return exp.JoinHint(this=func_name.upper(), expressions=args) 5321 5322 def _parse_substring(self) -> exp.Substring: 5323 # Postgres supports the form: substring(string [from int] [for int]) 5324 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5325 5326 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5327 5328 if self._match(TokenType.FROM): 5329 args.append(self._parse_bitwise()) 5330 if self._match(TokenType.FOR): 5331 if len(args) == 1: 5332 args.append(exp.Literal.number(1)) 5333 args.append(self._parse_bitwise()) 5334 5335 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5336 5337 def _parse_trim(self) -> exp.Trim: 5338 # https://www.w3resource.com/sql/character-functions/trim.php 5339 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5340 5341 position = None 5342 collation = None 5343 expression = None 5344 5345 if self._match_texts(self.TRIM_TYPES): 5346 position = self._prev.text.upper() 5347 5348 this = self._parse_bitwise() 5349 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5350 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5351 expression = self._parse_bitwise() 5352 5353 if invert_order: 5354 this, expression = expression, this 5355 5356 if self._match(TokenType.COLLATE): 5357 collation = self._parse_bitwise() 5358 5359 return self.expression( 5360 exp.Trim, this=this, position=position, expression=expression, collation=collation 5361 ) 5362 5363 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5364 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5365 5366 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5367 return self._parse_window(self._parse_id_var(), alias=True) 5368 5369 def _parse_respect_or_ignore_nulls( 5370 self, this: t.Optional[exp.Expression] 5371 ) -> t.Optional[exp.Expression]: 5372 if self._match_text_seq("IGNORE", "NULLS"): 5373 return self.expression(exp.IgnoreNulls, this=this) 5374 if self._match_text_seq("RESPECT", "NULLS"): 5375 return self.expression(exp.RespectNulls, this=this) 5376 return this 5377 5378 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5379 if self._match(TokenType.HAVING): 5380 self._match_texts(("MAX", "MIN")) 5381 max = self._prev.text.upper() != "MIN" 5382 return self.expression( 5383 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5384 ) 5385 5386 return this 5387 5388 def _parse_window( 5389 self, this: t.Optional[exp.Expression], alias: bool = False 5390 ) -> t.Optional[exp.Expression]: 5391 func = this 5392 comments = func.comments if isinstance(func, exp.Expression) else None 5393 5394 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5395 self._match(TokenType.WHERE) 5396 this = self.expression( 5397 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5398 ) 5399 self._match_r_paren() 5400 5401 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5402 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5403 if self._match_text_seq("WITHIN", "GROUP"): 5404 order = self._parse_wrapped(self._parse_order) 5405 this = self.expression(exp.WithinGroup, this=this, expression=order) 5406 5407 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5408 # Some dialects choose to implement and some do not. 5409 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5410 5411 # There is some code above in _parse_lambda that handles 5412 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5413 5414 # The below changes handle 5415 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5416 5417 # Oracle allows both formats 5418 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5419 # and Snowflake chose to do the same for familiarity 5420 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5421 if isinstance(this, exp.AggFunc): 5422 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5423 5424 if ignore_respect and ignore_respect is not this: 5425 ignore_respect.replace(ignore_respect.this) 5426 this = self.expression(ignore_respect.__class__, this=this) 5427 5428 this = self._parse_respect_or_ignore_nulls(this) 5429 5430 # bigquery select from window x AS (partition by ...) 5431 if alias: 5432 over = None 5433 self._match(TokenType.ALIAS) 5434 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5435 return this 5436 else: 5437 over = self._prev.text.upper() 5438 5439 if comments and isinstance(func, exp.Expression): 5440 func.pop_comments() 5441 5442 if not self._match(TokenType.L_PAREN): 5443 return self.expression( 5444 exp.Window, 5445 comments=comments, 5446 this=this, 5447 alias=self._parse_id_var(False), 5448 over=over, 5449 ) 5450 5451 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5452 5453 first = self._match(TokenType.FIRST) 5454 if self._match_text_seq("LAST"): 5455 first = False 5456 5457 partition, order = self._parse_partition_and_order() 5458 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5459 5460 if kind: 5461 self._match(TokenType.BETWEEN) 5462 start = self._parse_window_spec() 5463 self._match(TokenType.AND) 5464 end = self._parse_window_spec() 5465 5466 spec = self.expression( 5467 exp.WindowSpec, 5468 kind=kind, 5469 start=start["value"], 5470 start_side=start["side"], 5471 end=end["value"], 5472 end_side=end["side"], 5473 ) 5474 else: 5475 spec = None 5476 5477 self._match_r_paren() 5478 5479 window = self.expression( 5480 exp.Window, 5481 comments=comments, 5482 this=this, 5483 partition_by=partition, 5484 order=order, 5485 spec=spec, 5486 alias=window_alias, 5487 over=over, 5488 first=first, 5489 ) 5490 5491 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5492 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5493 return self._parse_window(window, alias=alias) 5494 5495 return window 5496 5497 def _parse_partition_and_order( 5498 self, 5499 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5500 return self._parse_partition_by(), self._parse_order() 5501 5502 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5503 self._match(TokenType.BETWEEN) 5504 5505 return { 5506 "value": ( 5507 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5508 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5509 or self._parse_bitwise() 5510 ), 5511 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5512 } 5513 5514 def _parse_alias( 5515 self, this: t.Optional[exp.Expression], explicit: bool = False 5516 ) -> t.Optional[exp.Expression]: 5517 any_token = self._match(TokenType.ALIAS) 5518 comments = self._prev_comments or [] 5519 5520 if explicit and not any_token: 5521 return this 5522 5523 if self._match(TokenType.L_PAREN): 5524 aliases = self.expression( 5525 exp.Aliases, 5526 comments=comments, 5527 this=this, 5528 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5529 ) 5530 self._match_r_paren(aliases) 5531 return aliases 5532 5533 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5534 self.STRING_ALIASES and self._parse_string_as_identifier() 5535 ) 5536 5537 if alias: 5538 comments.extend(alias.pop_comments()) 5539 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5540 column = this.this 5541 5542 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5543 if not this.comments and column and column.comments: 5544 this.comments = column.pop_comments() 5545 5546 return this 5547 5548 def _parse_id_var( 5549 self, 5550 any_token: bool = True, 5551 tokens: t.Optional[t.Collection[TokenType]] = None, 5552 ) -> t.Optional[exp.Expression]: 5553 expression = self._parse_identifier() 5554 if not expression and ( 5555 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5556 ): 5557 quoted = self._prev.token_type == TokenType.STRING 5558 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5559 5560 return expression 5561 5562 def _parse_string(self) -> t.Optional[exp.Expression]: 5563 if self._match_set(self.STRING_PARSERS): 5564 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5565 return self._parse_placeholder() 5566 5567 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5568 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5569 5570 def _parse_number(self) -> t.Optional[exp.Expression]: 5571 if self._match_set(self.NUMERIC_PARSERS): 5572 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5573 return self._parse_placeholder() 5574 5575 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5576 if self._match(TokenType.IDENTIFIER): 5577 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5578 return self._parse_placeholder() 5579 5580 def _parse_var( 5581 self, 5582 any_token: bool = False, 5583 tokens: t.Optional[t.Collection[TokenType]] = None, 5584 upper: bool = False, 5585 ) -> t.Optional[exp.Expression]: 5586 if ( 5587 (any_token and self._advance_any()) 5588 or self._match(TokenType.VAR) 5589 or (self._match_set(tokens) if tokens else False) 5590 ): 5591 return self.expression( 5592 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5593 ) 5594 return self._parse_placeholder() 5595 5596 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5597 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5598 self._advance() 5599 return self._prev 5600 return None 5601 5602 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5603 return self._parse_var() or self._parse_string() 5604 5605 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5606 return self._parse_primary() or self._parse_var(any_token=True) 5607 5608 def _parse_null(self) -> t.Optional[exp.Expression]: 5609 if self._match_set(self.NULL_TOKENS): 5610 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5611 return self._parse_placeholder() 5612 5613 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5614 if self._match(TokenType.TRUE): 5615 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5616 if self._match(TokenType.FALSE): 5617 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5618 return self._parse_placeholder() 5619 5620 def _parse_star(self) -> t.Optional[exp.Expression]: 5621 if self._match(TokenType.STAR): 5622 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5623 return self._parse_placeholder() 5624 5625 def _parse_parameter(self) -> exp.Parameter: 5626 self._match(TokenType.L_BRACE) 5627 this = self._parse_identifier() or self._parse_primary_or_var() 5628 expression = self._match(TokenType.COLON) and ( 5629 self._parse_identifier() or self._parse_primary_or_var() 5630 ) 5631 self._match(TokenType.R_BRACE) 5632 return self.expression(exp.Parameter, this=this, expression=expression) 5633 5634 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5635 if self._match_set(self.PLACEHOLDER_PARSERS): 5636 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5637 if placeholder: 5638 return placeholder 5639 self._advance(-1) 5640 return None 5641 5642 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5643 if not self._match(TokenType.EXCEPT): 5644 return None 5645 if self._match(TokenType.L_PAREN, advance=False): 5646 return self._parse_wrapped_csv(self._parse_column) 5647 5648 except_column = self._parse_column() 5649 return [except_column] if except_column else None 5650 5651 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5652 if not self._match(TokenType.REPLACE): 5653 return None 5654 if self._match(TokenType.L_PAREN, advance=False): 5655 return self._parse_wrapped_csv(self._parse_expression) 5656 5657 replace_expression = self._parse_expression() 5658 return [replace_expression] if replace_expression else None 5659 5660 def _parse_csv( 5661 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5662 ) -> t.List[exp.Expression]: 5663 parse_result = parse_method() 5664 items = [parse_result] if parse_result is not None else [] 5665 5666 while self._match(sep): 5667 self._add_comments(parse_result) 5668 parse_result = parse_method() 5669 if parse_result is not None: 5670 items.append(parse_result) 5671 5672 return items 5673 5674 def _parse_tokens( 5675 self, parse_method: t.Callable, expressions: t.Dict 5676 ) -> t.Optional[exp.Expression]: 5677 this = parse_method() 5678 5679 while self._match_set(expressions): 5680 this = self.expression( 5681 expressions[self._prev.token_type], 5682 this=this, 5683 comments=self._prev_comments, 5684 expression=parse_method(), 5685 ) 5686 5687 return this 5688 5689 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5690 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5691 5692 def _parse_wrapped_csv( 5693 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5694 ) -> t.List[exp.Expression]: 5695 return self._parse_wrapped( 5696 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5697 ) 5698 5699 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5700 wrapped = self._match(TokenType.L_PAREN) 5701 if not wrapped and not optional: 5702 self.raise_error("Expecting (") 5703 parse_result = parse_method() 5704 if wrapped: 5705 self._match_r_paren() 5706 return parse_result 5707 5708 def _parse_expressions(self) -> t.List[exp.Expression]: 5709 return self._parse_csv(self._parse_expression) 5710 5711 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5712 return self._parse_select() or self._parse_set_operations( 5713 self._parse_expression() if alias else self._parse_conjunction() 5714 ) 5715 5716 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5717 return self._parse_query_modifiers( 5718 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5719 ) 5720 5721 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5722 this = None 5723 if self._match_texts(self.TRANSACTION_KIND): 5724 this = self._prev.text 5725 5726 self._match_texts(("TRANSACTION", "WORK")) 5727 5728 modes = [] 5729 while True: 5730 mode = [] 5731 while self._match(TokenType.VAR): 5732 mode.append(self._prev.text) 5733 5734 if mode: 5735 modes.append(" ".join(mode)) 5736 if not self._match(TokenType.COMMA): 5737 break 5738 5739 return self.expression(exp.Transaction, this=this, modes=modes) 5740 5741 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5742 chain = None 5743 savepoint = None 5744 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5745 5746 self._match_texts(("TRANSACTION", "WORK")) 5747 5748 if self._match_text_seq("TO"): 5749 self._match_text_seq("SAVEPOINT") 5750 savepoint = self._parse_id_var() 5751 5752 if self._match(TokenType.AND): 5753 chain = not self._match_text_seq("NO") 5754 self._match_text_seq("CHAIN") 5755 5756 if is_rollback: 5757 return self.expression(exp.Rollback, savepoint=savepoint) 5758 5759 return self.expression(exp.Commit, chain=chain) 5760 5761 def _parse_refresh(self) -> exp.Refresh: 5762 self._match(TokenType.TABLE) 5763 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5764 5765 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5766 if not self._match_text_seq("ADD"): 5767 return None 5768 5769 self._match(TokenType.COLUMN) 5770 exists_column = self._parse_exists(not_=True) 5771 expression = self._parse_field_def() 5772 5773 if expression: 5774 expression.set("exists", exists_column) 5775 5776 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5777 if self._match_texts(("FIRST", "AFTER")): 5778 position = self._prev.text 5779 column_position = self.expression( 5780 exp.ColumnPosition, this=self._parse_column(), position=position 5781 ) 5782 expression.set("position", column_position) 5783 5784 return expression 5785 5786 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5787 drop = self._match(TokenType.DROP) and self._parse_drop() 5788 if drop and not isinstance(drop, exp.Command): 5789 drop.set("kind", drop.args.get("kind", "COLUMN")) 5790 return drop 5791 5792 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5793 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5794 return self.expression( 5795 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5796 ) 5797 5798 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5799 index = self._index - 1 5800 5801 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5802 return self._parse_csv( 5803 lambda: self.expression( 5804 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5805 ) 5806 ) 5807 5808 self._retreat(index) 5809 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5810 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5811 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5812 5813 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5814 self._match(TokenType.COLUMN) 5815 column = self._parse_field(any_token=True) 5816 5817 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5818 return self.expression(exp.AlterColumn, this=column, drop=True) 5819 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5820 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5821 if self._match(TokenType.COMMENT): 5822 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5823 5824 self._match_text_seq("SET", "DATA") 5825 self._match_text_seq("TYPE") 5826 return self.expression( 5827 exp.AlterColumn, 5828 this=column, 5829 dtype=self._parse_types(), 5830 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5831 using=self._match(TokenType.USING) and self._parse_conjunction(), 5832 ) 5833 5834 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5835 index = self._index - 1 5836 5837 partition_exists = self._parse_exists() 5838 if self._match(TokenType.PARTITION, advance=False): 5839 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5840 5841 self._retreat(index) 5842 return self._parse_csv(self._parse_drop_column) 5843 5844 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5845 if self._match(TokenType.COLUMN): 5846 exists = self._parse_exists() 5847 old_column = self._parse_column() 5848 to = self._match_text_seq("TO") 5849 new_column = self._parse_column() 5850 5851 if old_column is None or to is None or new_column is None: 5852 return None 5853 5854 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5855 5856 self._match_text_seq("TO") 5857 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5858 5859 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5860 start = self._prev 5861 5862 if not self._match(TokenType.TABLE): 5863 return self._parse_as_command(start) 5864 5865 exists = self._parse_exists() 5866 only = self._match_text_seq("ONLY") 5867 this = self._parse_table(schema=True) 5868 5869 if self._next: 5870 self._advance() 5871 5872 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5873 if parser: 5874 actions = ensure_list(parser(self)) 5875 options = self._parse_csv(self._parse_property) 5876 5877 if not self._curr and actions: 5878 return self.expression( 5879 exp.AlterTable, 5880 this=this, 5881 exists=exists, 5882 actions=actions, 5883 only=only, 5884 options=options, 5885 ) 5886 5887 return self._parse_as_command(start) 5888 5889 def _parse_merge(self) -> exp.Merge: 5890 self._match(TokenType.INTO) 5891 target = self._parse_table() 5892 5893 if target and self._match(TokenType.ALIAS, advance=False): 5894 target.set("alias", self._parse_table_alias()) 5895 5896 self._match(TokenType.USING) 5897 using = self._parse_table() 5898 5899 self._match(TokenType.ON) 5900 on = self._parse_conjunction() 5901 5902 return self.expression( 5903 exp.Merge, 5904 this=target, 5905 using=using, 5906 on=on, 5907 expressions=self._parse_when_matched(), 5908 ) 5909 5910 def _parse_when_matched(self) -> t.List[exp.When]: 5911 whens = [] 5912 5913 while self._match(TokenType.WHEN): 5914 matched = not self._match(TokenType.NOT) 5915 self._match_text_seq("MATCHED") 5916 source = ( 5917 False 5918 if self._match_text_seq("BY", "TARGET") 5919 else self._match_text_seq("BY", "SOURCE") 5920 ) 5921 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5922 5923 self._match(TokenType.THEN) 5924 5925 if self._match(TokenType.INSERT): 5926 _this = self._parse_star() 5927 if _this: 5928 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5929 else: 5930 then = self.expression( 5931 exp.Insert, 5932 this=self._parse_value(), 5933 expression=self._match_text_seq("VALUES") and self._parse_value(), 5934 ) 5935 elif self._match(TokenType.UPDATE): 5936 expressions = self._parse_star() 5937 if expressions: 5938 then = self.expression(exp.Update, expressions=expressions) 5939 else: 5940 then = self.expression( 5941 exp.Update, 5942 expressions=self._match(TokenType.SET) 5943 and self._parse_csv(self._parse_equality), 5944 ) 5945 elif self._match(TokenType.DELETE): 5946 then = self.expression(exp.Var, this=self._prev.text) 5947 else: 5948 then = None 5949 5950 whens.append( 5951 self.expression( 5952 exp.When, 5953 matched=matched, 5954 source=source, 5955 condition=condition, 5956 then=then, 5957 ) 5958 ) 5959 return whens 5960 5961 def _parse_show(self) -> t.Optional[exp.Expression]: 5962 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5963 if parser: 5964 return parser(self) 5965 return self._parse_as_command(self._prev) 5966 5967 def _parse_set_item_assignment( 5968 self, kind: t.Optional[str] = None 5969 ) -> t.Optional[exp.Expression]: 5970 index = self._index 5971 5972 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5973 return self._parse_set_transaction(global_=kind == "GLOBAL") 5974 5975 left = self._parse_primary() or self._parse_id_var() 5976 assignment_delimiter = self._match_texts(("=", "TO")) 5977 5978 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5979 self._retreat(index) 5980 return None 5981 5982 right = self._parse_statement() or self._parse_id_var() 5983 this = self.expression(exp.EQ, this=left, expression=right) 5984 5985 return self.expression(exp.SetItem, this=this, kind=kind) 5986 5987 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5988 self._match_text_seq("TRANSACTION") 5989 characteristics = self._parse_csv( 5990 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5991 ) 5992 return self.expression( 5993 exp.SetItem, 5994 expressions=characteristics, 5995 kind="TRANSACTION", 5996 **{"global": global_}, # type: ignore 5997 ) 5998 5999 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6000 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6001 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6002 6003 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6004 index = self._index 6005 set_ = self.expression( 6006 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6007 ) 6008 6009 if self._curr: 6010 self._retreat(index) 6011 return self._parse_as_command(self._prev) 6012 6013 return set_ 6014 6015 def _parse_var_from_options( 6016 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6017 ) -> t.Optional[exp.Var]: 6018 start = self._curr 6019 if not start: 6020 return None 6021 6022 option = start.text.upper() 6023 continuations = options.get(option) 6024 6025 index = self._index 6026 self._advance() 6027 for keywords in continuations or []: 6028 if isinstance(keywords, str): 6029 keywords = (keywords,) 6030 6031 if self._match_text_seq(*keywords): 6032 option = f"{option} {' '.join(keywords)}" 6033 break 6034 else: 6035 if continuations or continuations is None: 6036 if raise_unmatched: 6037 self.raise_error(f"Unknown option {option}") 6038 6039 self._retreat(index) 6040 return None 6041 6042 return exp.var(option) 6043 6044 def _parse_as_command(self, start: Token) -> exp.Command: 6045 while self._curr: 6046 self._advance() 6047 text = self._find_sql(start, self._prev) 6048 size = len(start.text) 6049 self._warn_unsupported() 6050 return exp.Command(this=text[:size], expression=text[size:]) 6051 6052 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6053 settings = [] 6054 6055 self._match_l_paren() 6056 kind = self._parse_id_var() 6057 6058 if self._match(TokenType.L_PAREN): 6059 while True: 6060 key = self._parse_id_var() 6061 value = self._parse_primary() 6062 6063 if not key and value is None: 6064 break 6065 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6066 self._match(TokenType.R_PAREN) 6067 6068 self._match_r_paren() 6069 6070 return self.expression( 6071 exp.DictProperty, 6072 this=this, 6073 kind=kind.this if kind else None, 6074 settings=settings, 6075 ) 6076 6077 def _parse_dict_range(self, this: str) -> exp.DictRange: 6078 self._match_l_paren() 6079 has_min = self._match_text_seq("MIN") 6080 if has_min: 6081 min = self._parse_var() or self._parse_primary() 6082 self._match_text_seq("MAX") 6083 max = self._parse_var() or self._parse_primary() 6084 else: 6085 max = self._parse_var() or self._parse_primary() 6086 min = exp.Literal.number(0) 6087 self._match_r_paren() 6088 return self.expression(exp.DictRange, this=this, min=min, max=max) 6089 6090 def _parse_comprehension( 6091 self, this: t.Optional[exp.Expression] 6092 ) -> t.Optional[exp.Comprehension]: 6093 index = self._index 6094 expression = self._parse_column() 6095 if not self._match(TokenType.IN): 6096 self._retreat(index - 1) 6097 return None 6098 iterator = self._parse_column() 6099 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6100 return self.expression( 6101 exp.Comprehension, 6102 this=this, 6103 expression=expression, 6104 iterator=iterator, 6105 condition=condition, 6106 ) 6107 6108 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6109 if self._match(TokenType.HEREDOC_STRING): 6110 return self.expression(exp.Heredoc, this=self._prev.text) 6111 6112 if not self._match_text_seq("$"): 6113 return None 6114 6115 tags = ["$"] 6116 tag_text = None 6117 6118 if self._is_connected(): 6119 self._advance() 6120 tags.append(self._prev.text.upper()) 6121 else: 6122 self.raise_error("No closing $ found") 6123 6124 if tags[-1] != "$": 6125 if self._is_connected() and self._match_text_seq("$"): 6126 tag_text = tags[-1] 6127 tags.append("$") 6128 else: 6129 self.raise_error("No closing $ found") 6130 6131 heredoc_start = self._curr 6132 6133 while self._curr: 6134 if self._match_text_seq(*tags, advance=False): 6135 this = self._find_sql(heredoc_start, self._prev) 6136 self._advance(len(tags)) 6137 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6138 6139 self._advance() 6140 6141 self.raise_error(f"No closing {''.join(tags)} found") 6142 return None 6143 6144 def _find_parser( 6145 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6146 ) -> t.Optional[t.Callable]: 6147 if not self._curr: 6148 return None 6149 6150 index = self._index 6151 this = [] 6152 while True: 6153 # The current token might be multiple words 6154 curr = self._curr.text.upper() 6155 key = curr.split(" ") 6156 this.append(curr) 6157 6158 self._advance() 6159 result, trie = in_trie(trie, key) 6160 if result == TrieResult.FAILED: 6161 break 6162 6163 if result == TrieResult.EXISTS: 6164 subparser = parsers[" ".join(this)] 6165 return subparser 6166 6167 self._retreat(index) 6168 return None 6169 6170 def _match(self, token_type, advance=True, expression=None): 6171 if not self._curr: 6172 return None 6173 6174 if self._curr.token_type == token_type: 6175 if advance: 6176 self._advance() 6177 self._add_comments(expression) 6178 return True 6179 6180 return None 6181 6182 def _match_set(self, types, advance=True): 6183 if not self._curr: 6184 return None 6185 6186 if self._curr.token_type in types: 6187 if advance: 6188 self._advance() 6189 return True 6190 6191 return None 6192 6193 def _match_pair(self, token_type_a, token_type_b, advance=True): 6194 if not self._curr or not self._next: 6195 return None 6196 6197 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6198 if advance: 6199 self._advance(2) 6200 return True 6201 6202 return None 6203 6204 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6205 if not self._match(TokenType.L_PAREN, expression=expression): 6206 self.raise_error("Expecting (") 6207 6208 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6209 if not self._match(TokenType.R_PAREN, expression=expression): 6210 self.raise_error("Expecting )") 6211 6212 def _match_texts(self, texts, advance=True): 6213 if self._curr and self._curr.text.upper() in texts: 6214 if advance: 6215 self._advance() 6216 return True 6217 return None 6218 6219 def _match_text_seq(self, *texts, advance=True): 6220 index = self._index 6221 for text in texts: 6222 if self._curr and self._curr.text.upper() == text: 6223 self._advance() 6224 else: 6225 self._retreat(index) 6226 return None 6227 6228 if not advance: 6229 self._retreat(index) 6230 6231 return True 6232 6233 def _replace_lambda( 6234 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6235 ) -> t.Optional[exp.Expression]: 6236 if not node: 6237 return node 6238 6239 for column in node.find_all(exp.Column): 6240 if column.parts[0].name in lambda_variables: 6241 dot_or_id = column.to_dot() if column.table else column.this 6242 parent = column.parent 6243 6244 while isinstance(parent, exp.Dot): 6245 if not isinstance(parent.parent, exp.Dot): 6246 parent.replace(dot_or_id) 6247 break 6248 parent = parent.parent 6249 else: 6250 if column is node: 6251 node = dot_or_id 6252 else: 6253 column.replace(dot_or_id) 6254 return node 6255 6256 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6257 start = self._prev 6258 6259 # Not to be confused with TRUNCATE(number, decimals) function call 6260 if self._match(TokenType.L_PAREN): 6261 self._retreat(self._index - 2) 6262 return self._parse_function() 6263 6264 # Clickhouse supports TRUNCATE DATABASE as well 6265 is_database = self._match(TokenType.DATABASE) 6266 6267 self._match(TokenType.TABLE) 6268 6269 exists = self._parse_exists(not_=False) 6270 6271 expressions = self._parse_csv( 6272 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6273 ) 6274 6275 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6276 6277 if self._match_text_seq("RESTART", "IDENTITY"): 6278 identity = "RESTART" 6279 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6280 identity = "CONTINUE" 6281 else: 6282 identity = None 6283 6284 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6285 option = self._prev.text 6286 else: 6287 option = None 6288 6289 partition = self._parse_partition() 6290 6291 # Fallback case 6292 if self._curr: 6293 return self._parse_as_command(start) 6294 6295 return self.expression( 6296 exp.TruncateTable, 6297 expressions=expressions, 6298 is_database=is_database, 6299 exists=exists, 6300 cluster=cluster, 6301 identity=identity, 6302 option=option, 6303 partition=partition, 6304 ) 6305 6306 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6307 this = self._parse_ordered(self._parse_opclass) 6308 6309 if not self._match(TokenType.WITH): 6310 return this 6311 6312 op = self._parse_var(any_token=True) 6313 6314 return self.expression(exp.WithOperator, this=this, op=op) 6315 6316 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6317 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6318 6319 options = [] 6320 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6321 option = self._parse_unquoted_field() 6322 value = None 6323 # Some options are defined as functions with the values as params 6324 if not isinstance(option, exp.Func): 6325 # Different dialects might separate options and values by white space, "=" and "AS" 6326 self._match(TokenType.EQ) 6327 self._match(TokenType.ALIAS) 6328 value = self._parse_unquoted_field() 6329 6330 param = self.expression(exp.CopyParameter, this=option, expression=value) 6331 options.append(param) 6332 6333 if sep: 6334 self._match(sep) 6335 6336 return options 6337 6338 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6339 def parse_options(): 6340 opts = [] 6341 self._match(TokenType.EQ) 6342 self._match(TokenType.L_PAREN) 6343 while self._curr and not self._match(TokenType.R_PAREN): 6344 opts.append(self._parse_conjunction()) 6345 return opts 6346 6347 expr = self.expression(exp.Credentials) 6348 6349 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6350 expr.set("storage", self._parse_conjunction()) 6351 if self._match_text_seq("CREDENTIALS"): 6352 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6353 creds = parse_options() if self._match(TokenType.EQ) else self._parse_field() 6354 expr.set("credentials", creds) 6355 if self._match_text_seq("ENCRYPTION"): 6356 expr.set("encryption", parse_options()) 6357 if self._match_text_seq("IAM_ROLE"): 6358 expr.set("iam_role", self._parse_field()) 6359 if self._match_text_seq("REGION"): 6360 expr.set("region", self._parse_field()) 6361 6362 return expr 6363 6364 def _parse_copy(self): 6365 start = self._prev 6366 6367 self._match(TokenType.INTO) 6368 6369 this = ( 6370 self._parse_conjunction() 6371 if self._match(TokenType.L_PAREN, advance=False) 6372 else self._parse_table(schema=True) 6373 ) 6374 6375 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6376 6377 files = self._parse_csv(self._parse_conjunction) 6378 credentials = self._parse_credentials() 6379 6380 self._match_text_seq("WITH") 6381 6382 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6383 6384 # Fallback case 6385 if self._curr: 6386 return self._parse_as_command(start) 6387 6388 return self.expression( 6389 exp.Copy, 6390 this=this, 6391 kind=kind, 6392 credentials=credentials, 6393 files=files, 6394 params=params, 6395 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.TIMESTAMPNTZ, 222 TokenType.DATETIME, 223 TokenType.DATETIME64, 224 TokenType.DATE, 225 TokenType.DATE32, 226 TokenType.INT4RANGE, 227 TokenType.INT4MULTIRANGE, 228 TokenType.INT8RANGE, 229 TokenType.INT8MULTIRANGE, 230 TokenType.NUMRANGE, 231 TokenType.NUMMULTIRANGE, 232 TokenType.TSRANGE, 233 TokenType.TSMULTIRANGE, 234 TokenType.TSTZRANGE, 235 TokenType.TSTZMULTIRANGE, 236 TokenType.DATERANGE, 237 TokenType.DATEMULTIRANGE, 238 TokenType.DECIMAL, 239 TokenType.UDECIMAL, 240 TokenType.BIGDECIMAL, 241 TokenType.UUID, 242 TokenType.GEOGRAPHY, 243 TokenType.GEOMETRY, 244 TokenType.HLLSKETCH, 245 TokenType.HSTORE, 246 TokenType.PSEUDO_TYPE, 247 TokenType.SUPER, 248 TokenType.SERIAL, 249 TokenType.SMALLSERIAL, 250 TokenType.BIGSERIAL, 251 TokenType.XML, 252 TokenType.YEAR, 253 TokenType.UNIQUEIDENTIFIER, 254 TokenType.USERDEFINED, 255 TokenType.MONEY, 256 TokenType.SMALLMONEY, 257 TokenType.ROWVERSION, 258 TokenType.IMAGE, 259 TokenType.VARIANT, 260 TokenType.OBJECT, 261 TokenType.OBJECT_IDENTIFIER, 262 TokenType.INET, 263 TokenType.IPADDRESS, 264 TokenType.IPPREFIX, 265 TokenType.IPV4, 266 TokenType.IPV6, 267 TokenType.UNKNOWN, 268 TokenType.NULL, 269 TokenType.NAME, 270 TokenType.TDIGEST, 271 *ENUM_TYPE_TOKENS, 272 *NESTED_TYPE_TOKENS, 273 *AGGREGATE_TYPE_TOKENS, 274 } 275 276 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 277 TokenType.BIGINT: TokenType.UBIGINT, 278 TokenType.INT: TokenType.UINT, 279 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 280 TokenType.SMALLINT: TokenType.USMALLINT, 281 TokenType.TINYINT: TokenType.UTINYINT, 282 TokenType.DECIMAL: TokenType.UDECIMAL, 283 } 284 285 SUBQUERY_PREDICATES = { 286 TokenType.ANY: exp.Any, 287 TokenType.ALL: exp.All, 288 TokenType.EXISTS: exp.Exists, 289 TokenType.SOME: exp.Any, 290 } 291 292 RESERVED_TOKENS = { 293 *Tokenizer.SINGLE_TOKENS.values(), 294 TokenType.SELECT, 295 } - {TokenType.IDENTIFIER} 296 297 DB_CREATABLES = { 298 TokenType.DATABASE, 299 TokenType.SCHEMA, 300 TokenType.TABLE, 301 TokenType.VIEW, 302 TokenType.MODEL, 303 TokenType.DICTIONARY, 304 TokenType.SEQUENCE, 305 TokenType.STORAGE_INTEGRATION, 306 } 307 308 CREATABLES = { 309 TokenType.COLUMN, 310 TokenType.CONSTRAINT, 311 TokenType.FUNCTION, 312 TokenType.INDEX, 313 TokenType.PROCEDURE, 314 TokenType.FOREIGN_KEY, 315 *DB_CREATABLES, 316 } 317 318 # Tokens that can represent identifiers 319 ID_VAR_TOKENS = { 320 TokenType.VAR, 321 TokenType.ANTI, 322 TokenType.APPLY, 323 TokenType.ASC, 324 TokenType.ASOF, 325 TokenType.AUTO_INCREMENT, 326 TokenType.BEGIN, 327 TokenType.BPCHAR, 328 TokenType.CACHE, 329 TokenType.CASE, 330 TokenType.COLLATE, 331 TokenType.COMMAND, 332 TokenType.COMMENT, 333 TokenType.COMMIT, 334 TokenType.CONSTRAINT, 335 TokenType.COPY, 336 TokenType.DEFAULT, 337 TokenType.DELETE, 338 TokenType.DESC, 339 TokenType.DESCRIBE, 340 TokenType.DICTIONARY, 341 TokenType.DIV, 342 TokenType.END, 343 TokenType.EXECUTE, 344 TokenType.ESCAPE, 345 TokenType.FALSE, 346 TokenType.FIRST, 347 TokenType.FILTER, 348 TokenType.FINAL, 349 TokenType.FORMAT, 350 TokenType.FULL, 351 TokenType.IDENTIFIER, 352 TokenType.IS, 353 TokenType.ISNULL, 354 TokenType.INTERVAL, 355 TokenType.KEEP, 356 TokenType.KILL, 357 TokenType.LEFT, 358 TokenType.LOAD, 359 TokenType.MERGE, 360 TokenType.NATURAL, 361 TokenType.NEXT, 362 TokenType.OFFSET, 363 TokenType.OPERATOR, 364 TokenType.ORDINALITY, 365 TokenType.OVERLAPS, 366 TokenType.OVERWRITE, 367 TokenType.PARTITION, 368 TokenType.PERCENT, 369 TokenType.PIVOT, 370 TokenType.PRAGMA, 371 TokenType.RANGE, 372 TokenType.RECURSIVE, 373 TokenType.REFERENCES, 374 TokenType.REFRESH, 375 TokenType.REPLACE, 376 TokenType.RIGHT, 377 TokenType.ROW, 378 TokenType.ROWS, 379 TokenType.SEMI, 380 TokenType.SET, 381 TokenType.SETTINGS, 382 TokenType.SHOW, 383 TokenType.TEMPORARY, 384 TokenType.TOP, 385 TokenType.TRUE, 386 TokenType.TRUNCATE, 387 TokenType.UNIQUE, 388 TokenType.UNPIVOT, 389 TokenType.UPDATE, 390 TokenType.USE, 391 TokenType.VOLATILE, 392 TokenType.WINDOW, 393 *CREATABLES, 394 *SUBQUERY_PREDICATES, 395 *TYPE_TOKENS, 396 *NO_PAREN_FUNCTIONS, 397 } 398 399 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 400 401 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 402 TokenType.ANTI, 403 TokenType.APPLY, 404 TokenType.ASOF, 405 TokenType.FULL, 406 TokenType.LEFT, 407 TokenType.LOCK, 408 TokenType.NATURAL, 409 TokenType.OFFSET, 410 TokenType.RIGHT, 411 TokenType.SEMI, 412 TokenType.WINDOW, 413 } 414 415 ALIAS_TOKENS = ID_VAR_TOKENS 416 417 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 418 419 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 420 421 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 422 423 FUNC_TOKENS = { 424 TokenType.COLLATE, 425 TokenType.COMMAND, 426 TokenType.CURRENT_DATE, 427 TokenType.CURRENT_DATETIME, 428 TokenType.CURRENT_TIMESTAMP, 429 TokenType.CURRENT_TIME, 430 TokenType.CURRENT_USER, 431 TokenType.FILTER, 432 TokenType.FIRST, 433 TokenType.FORMAT, 434 TokenType.GLOB, 435 TokenType.IDENTIFIER, 436 TokenType.INDEX, 437 TokenType.ISNULL, 438 TokenType.ILIKE, 439 TokenType.INSERT, 440 TokenType.LIKE, 441 TokenType.MERGE, 442 TokenType.OFFSET, 443 TokenType.PRIMARY_KEY, 444 TokenType.RANGE, 445 TokenType.REPLACE, 446 TokenType.RLIKE, 447 TokenType.ROW, 448 TokenType.UNNEST, 449 TokenType.VAR, 450 TokenType.LEFT, 451 TokenType.RIGHT, 452 TokenType.SEQUENCE, 453 TokenType.DATE, 454 TokenType.DATETIME, 455 TokenType.TABLE, 456 TokenType.TIMESTAMP, 457 TokenType.TIMESTAMPTZ, 458 TokenType.TRUNCATE, 459 TokenType.WINDOW, 460 TokenType.XOR, 461 *TYPE_TOKENS, 462 *SUBQUERY_PREDICATES, 463 } 464 465 CONJUNCTION = { 466 TokenType.AND: exp.And, 467 TokenType.OR: exp.Or, 468 } 469 470 EQUALITY = { 471 TokenType.COLON_EQ: exp.PropertyEQ, 472 TokenType.EQ: exp.EQ, 473 TokenType.NEQ: exp.NEQ, 474 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 475 } 476 477 COMPARISON = { 478 TokenType.GT: exp.GT, 479 TokenType.GTE: exp.GTE, 480 TokenType.LT: exp.LT, 481 TokenType.LTE: exp.LTE, 482 } 483 484 BITWISE = { 485 TokenType.AMP: exp.BitwiseAnd, 486 TokenType.CARET: exp.BitwiseXor, 487 TokenType.PIPE: exp.BitwiseOr, 488 } 489 490 TERM = { 491 TokenType.DASH: exp.Sub, 492 TokenType.PLUS: exp.Add, 493 TokenType.MOD: exp.Mod, 494 TokenType.COLLATE: exp.Collate, 495 } 496 497 FACTOR = { 498 TokenType.DIV: exp.IntDiv, 499 TokenType.LR_ARROW: exp.Distance, 500 TokenType.SLASH: exp.Div, 501 TokenType.STAR: exp.Mul, 502 } 503 504 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 505 506 TIMES = { 507 TokenType.TIME, 508 TokenType.TIMETZ, 509 } 510 511 TIMESTAMPS = { 512 TokenType.TIMESTAMP, 513 TokenType.TIMESTAMPTZ, 514 TokenType.TIMESTAMPLTZ, 515 *TIMES, 516 } 517 518 SET_OPERATIONS = { 519 TokenType.UNION, 520 TokenType.INTERSECT, 521 TokenType.EXCEPT, 522 } 523 524 JOIN_METHODS = { 525 TokenType.ASOF, 526 TokenType.NATURAL, 527 TokenType.POSITIONAL, 528 } 529 530 JOIN_SIDES = { 531 TokenType.LEFT, 532 TokenType.RIGHT, 533 TokenType.FULL, 534 } 535 536 JOIN_KINDS = { 537 TokenType.INNER, 538 TokenType.OUTER, 539 TokenType.CROSS, 540 TokenType.SEMI, 541 TokenType.ANTI, 542 } 543 544 JOIN_HINTS: t.Set[str] = set() 545 546 LAMBDAS = { 547 TokenType.ARROW: lambda self, expressions: self.expression( 548 exp.Lambda, 549 this=self._replace_lambda( 550 self._parse_conjunction(), 551 {node.name for node in expressions}, 552 ), 553 expressions=expressions, 554 ), 555 TokenType.FARROW: lambda self, expressions: self.expression( 556 exp.Kwarg, 557 this=exp.var(expressions[0].name), 558 expression=self._parse_conjunction(), 559 ), 560 } 561 562 COLUMN_OPERATORS = { 563 TokenType.DOT: None, 564 TokenType.DCOLON: lambda self, this, to: self.expression( 565 exp.Cast if self.STRICT_CAST else exp.TryCast, 566 this=this, 567 to=to, 568 ), 569 TokenType.ARROW: lambda self, this, path: self.expression( 570 exp.JSONExtract, 571 this=this, 572 expression=self.dialect.to_json_path(path), 573 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 574 ), 575 TokenType.DARROW: lambda self, this, path: self.expression( 576 exp.JSONExtractScalar, 577 this=this, 578 expression=self.dialect.to_json_path(path), 579 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 580 ), 581 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 582 exp.JSONBExtract, 583 this=this, 584 expression=path, 585 ), 586 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 587 exp.JSONBExtractScalar, 588 this=this, 589 expression=path, 590 ), 591 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 592 exp.JSONBContains, 593 this=this, 594 expression=key, 595 ), 596 } 597 598 EXPRESSION_PARSERS = { 599 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 600 exp.Column: lambda self: self._parse_column(), 601 exp.Condition: lambda self: self._parse_conjunction(), 602 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 603 exp.Expression: lambda self: self._parse_expression(), 604 exp.From: lambda self: self._parse_from(joins=True), 605 exp.Group: lambda self: self._parse_group(), 606 exp.Having: lambda self: self._parse_having(), 607 exp.Identifier: lambda self: self._parse_id_var(), 608 exp.Join: lambda self: self._parse_join(), 609 exp.Lambda: lambda self: self._parse_lambda(), 610 exp.Lateral: lambda self: self._parse_lateral(), 611 exp.Limit: lambda self: self._parse_limit(), 612 exp.Offset: lambda self: self._parse_offset(), 613 exp.Order: lambda self: self._parse_order(), 614 exp.Ordered: lambda self: self._parse_ordered(), 615 exp.Properties: lambda self: self._parse_properties(), 616 exp.Qualify: lambda self: self._parse_qualify(), 617 exp.Returning: lambda self: self._parse_returning(), 618 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 619 exp.Table: lambda self: self._parse_table_parts(), 620 exp.TableAlias: lambda self: self._parse_table_alias(), 621 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 622 exp.Where: lambda self: self._parse_where(), 623 exp.Window: lambda self: self._parse_named_window(), 624 exp.With: lambda self: self._parse_with(), 625 "JOIN_TYPE": lambda self: self._parse_join_parts(), 626 } 627 628 STATEMENT_PARSERS = { 629 TokenType.ALTER: lambda self: self._parse_alter(), 630 TokenType.BEGIN: lambda self: self._parse_transaction(), 631 TokenType.CACHE: lambda self: self._parse_cache(), 632 TokenType.COMMENT: lambda self: self._parse_comment(), 633 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 634 TokenType.COPY: lambda self: self._parse_copy(), 635 TokenType.CREATE: lambda self: self._parse_create(), 636 TokenType.DELETE: lambda self: self._parse_delete(), 637 TokenType.DESC: lambda self: self._parse_describe(), 638 TokenType.DESCRIBE: lambda self: self._parse_describe(), 639 TokenType.DROP: lambda self: self._parse_drop(), 640 TokenType.INSERT: lambda self: self._parse_insert(), 641 TokenType.KILL: lambda self: self._parse_kill(), 642 TokenType.LOAD: lambda self: self._parse_load(), 643 TokenType.MERGE: lambda self: self._parse_merge(), 644 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 645 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 646 TokenType.REFRESH: lambda self: self._parse_refresh(), 647 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 648 TokenType.SET: lambda self: self._parse_set(), 649 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 650 TokenType.UNCACHE: lambda self: self._parse_uncache(), 651 TokenType.UPDATE: lambda self: self._parse_update(), 652 TokenType.USE: lambda self: self.expression( 653 exp.Use, 654 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 655 this=self._parse_table(schema=False), 656 ), 657 } 658 659 UNARY_PARSERS = { 660 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 661 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 662 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 663 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 664 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 665 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 666 } 667 668 STRING_PARSERS = { 669 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 670 exp.RawString, this=token.text 671 ), 672 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 673 exp.National, this=token.text 674 ), 675 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 676 TokenType.STRING: lambda self, token: self.expression( 677 exp.Literal, this=token.text, is_string=True 678 ), 679 TokenType.UNICODE_STRING: lambda self, token: self.expression( 680 exp.UnicodeString, 681 this=token.text, 682 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 683 ), 684 } 685 686 NUMERIC_PARSERS = { 687 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 688 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 689 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 690 TokenType.NUMBER: lambda self, token: self.expression( 691 exp.Literal, this=token.text, is_string=False 692 ), 693 } 694 695 PRIMARY_PARSERS = { 696 **STRING_PARSERS, 697 **NUMERIC_PARSERS, 698 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 699 TokenType.NULL: lambda self, _: self.expression(exp.Null), 700 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 701 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 702 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 703 TokenType.STAR: lambda self, _: self.expression( 704 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 705 ), 706 } 707 708 PLACEHOLDER_PARSERS = { 709 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 710 TokenType.PARAMETER: lambda self: self._parse_parameter(), 711 TokenType.COLON: lambda self: ( 712 self.expression(exp.Placeholder, this=self._prev.text) 713 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 714 else None 715 ), 716 } 717 718 RANGE_PARSERS = { 719 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 720 TokenType.GLOB: binary_range_parser(exp.Glob), 721 TokenType.ILIKE: binary_range_parser(exp.ILike), 722 TokenType.IN: lambda self, this: self._parse_in(this), 723 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 724 TokenType.IS: lambda self, this: self._parse_is(this), 725 TokenType.LIKE: binary_range_parser(exp.Like), 726 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 727 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 728 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 729 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 730 } 731 732 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 733 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 734 "AUTO": lambda self: self._parse_auto_property(), 735 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 736 "BACKUP": lambda self: self.expression( 737 exp.BackupProperty, this=self._parse_var(any_token=True) 738 ), 739 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 740 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 741 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 742 "CHECKSUM": lambda self: self._parse_checksum(), 743 "CLUSTER BY": lambda self: self._parse_cluster(), 744 "CLUSTERED": lambda self: self._parse_clustered_by(), 745 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 746 exp.CollateProperty, **kwargs 747 ), 748 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 749 "CONTAINS": lambda self: self._parse_contains_property(), 750 "COPY": lambda self: self._parse_copy_property(), 751 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 752 "DEFINER": lambda self: self._parse_definer(), 753 "DETERMINISTIC": lambda self: self.expression( 754 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 755 ), 756 "DISTKEY": lambda self: self._parse_distkey(), 757 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 758 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 759 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 760 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 761 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 762 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 763 "FREESPACE": lambda self: self._parse_freespace(), 764 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 765 "HEAP": lambda self: self.expression(exp.HeapProperty), 766 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 767 "IMMUTABLE": lambda self: self.expression( 768 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 769 ), 770 "INHERITS": lambda self: self.expression( 771 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 772 ), 773 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 774 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 775 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 776 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 777 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 778 "LIKE": lambda self: self._parse_create_like(), 779 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 780 "LOCK": lambda self: self._parse_locking(), 781 "LOCKING": lambda self: self._parse_locking(), 782 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 783 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 784 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 785 "MODIFIES": lambda self: self._parse_modifies_property(), 786 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 787 "NO": lambda self: self._parse_no_property(), 788 "ON": lambda self: self._parse_on_property(), 789 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 790 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 791 "PARTITION": lambda self: self._parse_partitioned_of(), 792 "PARTITION BY": lambda self: self._parse_partitioned_by(), 793 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 794 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 795 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 796 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 797 "READS": lambda self: self._parse_reads_property(), 798 "REMOTE": lambda self: self._parse_remote_with_connection(), 799 "RETURNS": lambda self: self._parse_returns(), 800 "ROW": lambda self: self._parse_row(), 801 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 802 "SAMPLE": lambda self: self.expression( 803 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 804 ), 805 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 806 "SETTINGS": lambda self: self.expression( 807 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 808 ), 809 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 810 "SORTKEY": lambda self: self._parse_sortkey(), 811 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 812 "STABLE": lambda self: self.expression( 813 exp.StabilityProperty, this=exp.Literal.string("STABLE") 814 ), 815 "STORED": lambda self: self._parse_stored(), 816 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 817 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 818 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 819 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 820 "TO": lambda self: self._parse_to_table(), 821 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 822 "TRANSFORM": lambda self: self.expression( 823 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 824 ), 825 "TTL": lambda self: self._parse_ttl(), 826 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 827 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 828 "VOLATILE": lambda self: self._parse_volatile_property(), 829 "WITH": lambda self: self._parse_with_property(), 830 } 831 832 CONSTRAINT_PARSERS = { 833 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 834 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 835 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 836 "CHARACTER SET": lambda self: self.expression( 837 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 838 ), 839 "CHECK": lambda self: self.expression( 840 exp.CheckColumnConstraint, 841 this=self._parse_wrapped(self._parse_conjunction), 842 enforced=self._match_text_seq("ENFORCED"), 843 ), 844 "COLLATE": lambda self: self.expression( 845 exp.CollateColumnConstraint, this=self._parse_var() 846 ), 847 "COMMENT": lambda self: self.expression( 848 exp.CommentColumnConstraint, this=self._parse_string() 849 ), 850 "COMPRESS": lambda self: self._parse_compress(), 851 "CLUSTERED": lambda self: self.expression( 852 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 853 ), 854 "NONCLUSTERED": lambda self: self.expression( 855 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 856 ), 857 "DEFAULT": lambda self: self.expression( 858 exp.DefaultColumnConstraint, this=self._parse_bitwise() 859 ), 860 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 861 "EPHEMERAL": lambda self: self.expression( 862 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 863 ), 864 "EXCLUDE": lambda self: self.expression( 865 exp.ExcludeColumnConstraint, this=self._parse_index_params() 866 ), 867 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 868 "FORMAT": lambda self: self.expression( 869 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 870 ), 871 "GENERATED": lambda self: self._parse_generated_as_identity(), 872 "IDENTITY": lambda self: self._parse_auto_increment(), 873 "INLINE": lambda self: self._parse_inline(), 874 "LIKE": lambda self: self._parse_create_like(), 875 "NOT": lambda self: self._parse_not_constraint(), 876 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 877 "ON": lambda self: ( 878 self._match(TokenType.UPDATE) 879 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 880 ) 881 or self.expression(exp.OnProperty, this=self._parse_id_var()), 882 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 883 "PERIOD": lambda self: self._parse_period_for_system_time(), 884 "PRIMARY KEY": lambda self: self._parse_primary_key(), 885 "REFERENCES": lambda self: self._parse_references(match=False), 886 "TITLE": lambda self: self.expression( 887 exp.TitleColumnConstraint, this=self._parse_var_or_string() 888 ), 889 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 890 "UNIQUE": lambda self: self._parse_unique(), 891 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 892 "WITH": lambda self: self.expression( 893 exp.Properties, expressions=self._parse_wrapped_properties() 894 ), 895 } 896 897 ALTER_PARSERS = { 898 "ADD": lambda self: self._parse_alter_table_add(), 899 "ALTER": lambda self: self._parse_alter_table_alter(), 900 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 901 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 902 "DROP": lambda self: self._parse_alter_table_drop(), 903 "RENAME": lambda self: self._parse_alter_table_rename(), 904 } 905 906 SCHEMA_UNNAMED_CONSTRAINTS = { 907 "CHECK", 908 "EXCLUDE", 909 "FOREIGN KEY", 910 "LIKE", 911 "PERIOD", 912 "PRIMARY KEY", 913 "UNIQUE", 914 } 915 916 NO_PAREN_FUNCTION_PARSERS = { 917 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 918 "CASE": lambda self: self._parse_case(), 919 "IF": lambda self: self._parse_if(), 920 "NEXT": lambda self: self._parse_next_value_for(), 921 } 922 923 INVALID_FUNC_NAME_TOKENS = { 924 TokenType.IDENTIFIER, 925 TokenType.STRING, 926 } 927 928 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 929 930 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 931 932 FUNCTION_PARSERS = { 933 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 934 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 935 "DECODE": lambda self: self._parse_decode(), 936 "EXTRACT": lambda self: self._parse_extract(), 937 "JSON_OBJECT": lambda self: self._parse_json_object(), 938 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 939 "JSON_TABLE": lambda self: self._parse_json_table(), 940 "MATCH": lambda self: self._parse_match_against(), 941 "OPENJSON": lambda self: self._parse_open_json(), 942 "POSITION": lambda self: self._parse_position(), 943 "PREDICT": lambda self: self._parse_predict(), 944 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 945 "STRING_AGG": lambda self: self._parse_string_agg(), 946 "SUBSTRING": lambda self: self._parse_substring(), 947 "TRIM": lambda self: self._parse_trim(), 948 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 949 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 950 } 951 952 QUERY_MODIFIER_PARSERS = { 953 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 954 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 955 TokenType.WHERE: lambda self: ("where", self._parse_where()), 956 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 957 TokenType.HAVING: lambda self: ("having", self._parse_having()), 958 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 959 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 960 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 961 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 962 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 963 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 964 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 965 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 966 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 967 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 968 TokenType.CLUSTER_BY: lambda self: ( 969 "cluster", 970 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 971 ), 972 TokenType.DISTRIBUTE_BY: lambda self: ( 973 "distribute", 974 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 975 ), 976 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 977 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 978 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 979 } 980 981 SET_PARSERS = { 982 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 983 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 984 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 985 "TRANSACTION": lambda self: self._parse_set_transaction(), 986 } 987 988 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 989 990 TYPE_LITERAL_PARSERS = { 991 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 992 } 993 994 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 995 996 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 997 998 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 999 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1000 "ISOLATION": ( 1001 ("LEVEL", "REPEATABLE", "READ"), 1002 ("LEVEL", "READ", "COMMITTED"), 1003 ("LEVEL", "READ", "UNCOMITTED"), 1004 ("LEVEL", "SERIALIZABLE"), 1005 ), 1006 "READ": ("WRITE", "ONLY"), 1007 } 1008 1009 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1010 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1011 ) 1012 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1013 1014 CREATE_SEQUENCE: OPTIONS_TYPE = { 1015 "SCALE": ("EXTEND", "NOEXTEND"), 1016 "SHARD": ("EXTEND", "NOEXTEND"), 1017 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1018 **dict.fromkeys( 1019 ( 1020 "SESSION", 1021 "GLOBAL", 1022 "KEEP", 1023 "NOKEEP", 1024 "ORDER", 1025 "NOORDER", 1026 "NOCACHE", 1027 "CYCLE", 1028 "NOCYCLE", 1029 "NOMINVALUE", 1030 "NOMAXVALUE", 1031 "NOSCALE", 1032 "NOSHARD", 1033 ), 1034 tuple(), 1035 ), 1036 } 1037 1038 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1039 1040 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1041 1042 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1043 1044 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1045 1046 CLONE_KEYWORDS = {"CLONE", "COPY"} 1047 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1048 1049 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1050 1051 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1052 1053 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1054 1055 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1056 1057 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1058 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1059 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1060 1061 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1062 1063 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1064 1065 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1066 1067 DISTINCT_TOKENS = {TokenType.DISTINCT} 1068 1069 NULL_TOKENS = {TokenType.NULL} 1070 1071 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1072 1073 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1074 1075 STRICT_CAST = True 1076 1077 PREFIXED_PIVOT_COLUMNS = False 1078 IDENTIFY_PIVOT_STRINGS = False 1079 1080 LOG_DEFAULTS_TO_LN = False 1081 1082 # Whether ADD is present for each column added by ALTER TABLE 1083 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1084 1085 # Whether the table sample clause expects CSV syntax 1086 TABLESAMPLE_CSV = False 1087 1088 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1089 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1090 1091 # Whether the TRIM function expects the characters to trim as its first argument 1092 TRIM_PATTERN_FIRST = False 1093 1094 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1095 STRING_ALIASES = False 1096 1097 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1098 MODIFIERS_ATTACHED_TO_UNION = True 1099 UNION_MODIFIERS = {"order", "limit", "offset"} 1100 1101 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1102 NO_PAREN_IF_COMMANDS = True 1103 1104 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1105 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1106 1107 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1108 # If this is True and '(' is not found, the keyword will be treated as an identifier 1109 VALUES_FOLLOWED_BY_PAREN = True 1110 1111 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1112 SUPPORTS_IMPLICIT_UNNEST = False 1113 1114 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1115 INTERVAL_SPANS = True 1116 1117 # Whether a PARTITION clause can follow a table reference 1118 SUPPORTS_PARTITION_SELECTION = False 1119 1120 __slots__ = ( 1121 "error_level", 1122 "error_message_context", 1123 "max_errors", 1124 "dialect", 1125 "sql", 1126 "errors", 1127 "_tokens", 1128 "_index", 1129 "_curr", 1130 "_next", 1131 "_prev", 1132 "_prev_comments", 1133 ) 1134 1135 # Autofilled 1136 SHOW_TRIE: t.Dict = {} 1137 SET_TRIE: t.Dict = {} 1138 1139 def __init__( 1140 self, 1141 error_level: t.Optional[ErrorLevel] = None, 1142 error_message_context: int = 100, 1143 max_errors: int = 3, 1144 dialect: DialectType = None, 1145 ): 1146 from sqlglot.dialects import Dialect 1147 1148 self.error_level = error_level or ErrorLevel.IMMEDIATE 1149 self.error_message_context = error_message_context 1150 self.max_errors = max_errors 1151 self.dialect = Dialect.get_or_raise(dialect) 1152 self.reset() 1153 1154 def reset(self): 1155 self.sql = "" 1156 self.errors = [] 1157 self._tokens = [] 1158 self._index = 0 1159 self._curr = None 1160 self._next = None 1161 self._prev = None 1162 self._prev_comments = None 1163 1164 def parse( 1165 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1166 ) -> t.List[t.Optional[exp.Expression]]: 1167 """ 1168 Parses a list of tokens and returns a list of syntax trees, one tree 1169 per parsed SQL statement. 1170 1171 Args: 1172 raw_tokens: The list of tokens. 1173 sql: The original SQL string, used to produce helpful debug messages. 1174 1175 Returns: 1176 The list of the produced syntax trees. 1177 """ 1178 return self._parse( 1179 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1180 ) 1181 1182 def parse_into( 1183 self, 1184 expression_types: exp.IntoType, 1185 raw_tokens: t.List[Token], 1186 sql: t.Optional[str] = None, 1187 ) -> t.List[t.Optional[exp.Expression]]: 1188 """ 1189 Parses a list of tokens into a given Expression type. If a collection of Expression 1190 types is given instead, this method will try to parse the token list into each one 1191 of them, stopping at the first for which the parsing succeeds. 1192 1193 Args: 1194 expression_types: The expression type(s) to try and parse the token list into. 1195 raw_tokens: The list of tokens. 1196 sql: The original SQL string, used to produce helpful debug messages. 1197 1198 Returns: 1199 The target Expression. 1200 """ 1201 errors = [] 1202 for expression_type in ensure_list(expression_types): 1203 parser = self.EXPRESSION_PARSERS.get(expression_type) 1204 if not parser: 1205 raise TypeError(f"No parser registered for {expression_type}") 1206 1207 try: 1208 return self._parse(parser, raw_tokens, sql) 1209 except ParseError as e: 1210 e.errors[0]["into_expression"] = expression_type 1211 errors.append(e) 1212 1213 raise ParseError( 1214 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1215 errors=merge_errors(errors), 1216 ) from errors[-1] 1217 1218 def _parse( 1219 self, 1220 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1221 raw_tokens: t.List[Token], 1222 sql: t.Optional[str] = None, 1223 ) -> t.List[t.Optional[exp.Expression]]: 1224 self.reset() 1225 self.sql = sql or "" 1226 1227 total = len(raw_tokens) 1228 chunks: t.List[t.List[Token]] = [[]] 1229 1230 for i, token in enumerate(raw_tokens): 1231 if token.token_type == TokenType.SEMICOLON: 1232 if i < total - 1: 1233 chunks.append([]) 1234 else: 1235 chunks[-1].append(token) 1236 1237 expressions = [] 1238 1239 for tokens in chunks: 1240 self._index = -1 1241 self._tokens = tokens 1242 self._advance() 1243 1244 expressions.append(parse_method(self)) 1245 1246 if self._index < len(self._tokens): 1247 self.raise_error("Invalid expression / Unexpected token") 1248 1249 self.check_errors() 1250 1251 return expressions 1252 1253 def check_errors(self) -> None: 1254 """Logs or raises any found errors, depending on the chosen error level setting.""" 1255 if self.error_level == ErrorLevel.WARN: 1256 for error in self.errors: 1257 logger.error(str(error)) 1258 elif self.error_level == ErrorLevel.RAISE and self.errors: 1259 raise ParseError( 1260 concat_messages(self.errors, self.max_errors), 1261 errors=merge_errors(self.errors), 1262 ) 1263 1264 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1265 """ 1266 Appends an error in the list of recorded errors or raises it, depending on the chosen 1267 error level setting. 1268 """ 1269 token = token or self._curr or self._prev or Token.string("") 1270 start = token.start 1271 end = token.end + 1 1272 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1273 highlight = self.sql[start:end] 1274 end_context = self.sql[end : end + self.error_message_context] 1275 1276 error = ParseError.new( 1277 f"{message}. Line {token.line}, Col: {token.col}.\n" 1278 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1279 description=message, 1280 line=token.line, 1281 col=token.col, 1282 start_context=start_context, 1283 highlight=highlight, 1284 end_context=end_context, 1285 ) 1286 1287 if self.error_level == ErrorLevel.IMMEDIATE: 1288 raise error 1289 1290 self.errors.append(error) 1291 1292 def expression( 1293 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1294 ) -> E: 1295 """ 1296 Creates a new, validated Expression. 1297 1298 Args: 1299 exp_class: The expression class to instantiate. 1300 comments: An optional list of comments to attach to the expression. 1301 kwargs: The arguments to set for the expression along with their respective values. 1302 1303 Returns: 1304 The target expression. 1305 """ 1306 instance = exp_class(**kwargs) 1307 instance.add_comments(comments) if comments else self._add_comments(instance) 1308 return self.validate_expression(instance) 1309 1310 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1311 if expression and self._prev_comments: 1312 expression.add_comments(self._prev_comments) 1313 self._prev_comments = None 1314 1315 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1316 """ 1317 Validates an Expression, making sure that all its mandatory arguments are set. 1318 1319 Args: 1320 expression: The expression to validate. 1321 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1322 1323 Returns: 1324 The validated expression. 1325 """ 1326 if self.error_level != ErrorLevel.IGNORE: 1327 for error_message in expression.error_messages(args): 1328 self.raise_error(error_message) 1329 1330 return expression 1331 1332 def _find_sql(self, start: Token, end: Token) -> str: 1333 return self.sql[start.start : end.end + 1] 1334 1335 def _is_connected(self) -> bool: 1336 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1337 1338 def _advance(self, times: int = 1) -> None: 1339 self._index += times 1340 self._curr = seq_get(self._tokens, self._index) 1341 self._next = seq_get(self._tokens, self._index + 1) 1342 1343 if self._index > 0: 1344 self._prev = self._tokens[self._index - 1] 1345 self._prev_comments = self._prev.comments 1346 else: 1347 self._prev = None 1348 self._prev_comments = None 1349 1350 def _retreat(self, index: int) -> None: 1351 if index != self._index: 1352 self._advance(index - self._index) 1353 1354 def _warn_unsupported(self) -> None: 1355 if len(self._tokens) <= 1: 1356 return 1357 1358 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1359 # interested in emitting a warning for the one being currently processed. 1360 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1361 1362 logger.warning( 1363 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1364 ) 1365 1366 def _parse_command(self) -> exp.Command: 1367 self._warn_unsupported() 1368 return self.expression( 1369 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1370 ) 1371 1372 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1373 """ 1374 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1375 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1376 the parser state accordingly 1377 """ 1378 index = self._index 1379 error_level = self.error_level 1380 1381 self.error_level = ErrorLevel.IMMEDIATE 1382 try: 1383 this = parse_method() 1384 except ParseError: 1385 this = None 1386 finally: 1387 if not this or retreat: 1388 self._retreat(index) 1389 self.error_level = error_level 1390 1391 return this 1392 1393 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1394 start = self._prev 1395 exists = self._parse_exists() if allow_exists else None 1396 1397 self._match(TokenType.ON) 1398 1399 materialized = self._match_text_seq("MATERIALIZED") 1400 kind = self._match_set(self.CREATABLES) and self._prev 1401 if not kind: 1402 return self._parse_as_command(start) 1403 1404 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1405 this = self._parse_user_defined_function(kind=kind.token_type) 1406 elif kind.token_type == TokenType.TABLE: 1407 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1408 elif kind.token_type == TokenType.COLUMN: 1409 this = self._parse_column() 1410 else: 1411 this = self._parse_id_var() 1412 1413 self._match(TokenType.IS) 1414 1415 return self.expression( 1416 exp.Comment, 1417 this=this, 1418 kind=kind.text, 1419 expression=self._parse_string(), 1420 exists=exists, 1421 materialized=materialized, 1422 ) 1423 1424 def _parse_to_table( 1425 self, 1426 ) -> exp.ToTableProperty: 1427 table = self._parse_table_parts(schema=True) 1428 return self.expression(exp.ToTableProperty, this=table) 1429 1430 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1431 def _parse_ttl(self) -> exp.Expression: 1432 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1433 this = self._parse_bitwise() 1434 1435 if self._match_text_seq("DELETE"): 1436 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1437 if self._match_text_seq("RECOMPRESS"): 1438 return self.expression( 1439 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1440 ) 1441 if self._match_text_seq("TO", "DISK"): 1442 return self.expression( 1443 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1444 ) 1445 if self._match_text_seq("TO", "VOLUME"): 1446 return self.expression( 1447 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1448 ) 1449 1450 return this 1451 1452 expressions = self._parse_csv(_parse_ttl_action) 1453 where = self._parse_where() 1454 group = self._parse_group() 1455 1456 aggregates = None 1457 if group and self._match(TokenType.SET): 1458 aggregates = self._parse_csv(self._parse_set_item) 1459 1460 return self.expression( 1461 exp.MergeTreeTTL, 1462 expressions=expressions, 1463 where=where, 1464 group=group, 1465 aggregates=aggregates, 1466 ) 1467 1468 def _parse_statement(self) -> t.Optional[exp.Expression]: 1469 if self._curr is None: 1470 return None 1471 1472 if self._match_set(self.STATEMENT_PARSERS): 1473 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1474 1475 if self._match_set(Tokenizer.COMMANDS): 1476 return self._parse_command() 1477 1478 expression = self._parse_expression() 1479 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1480 return self._parse_query_modifiers(expression) 1481 1482 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1483 start = self._prev 1484 temporary = self._match(TokenType.TEMPORARY) 1485 materialized = self._match_text_seq("MATERIALIZED") 1486 1487 kind = self._match_set(self.CREATABLES) and self._prev.text 1488 if not kind: 1489 return self._parse_as_command(start) 1490 1491 if_exists = exists or self._parse_exists() 1492 table = self._parse_table_parts( 1493 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1494 ) 1495 1496 if self._match(TokenType.L_PAREN, advance=False): 1497 expressions = self._parse_wrapped_csv(self._parse_types) 1498 else: 1499 expressions = None 1500 1501 return self.expression( 1502 exp.Drop, 1503 comments=start.comments, 1504 exists=if_exists, 1505 this=table, 1506 expressions=expressions, 1507 kind=kind, 1508 temporary=temporary, 1509 materialized=materialized, 1510 cascade=self._match_text_seq("CASCADE"), 1511 constraints=self._match_text_seq("CONSTRAINTS"), 1512 purge=self._match_text_seq("PURGE"), 1513 ) 1514 1515 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1516 return ( 1517 self._match_text_seq("IF") 1518 and (not not_ or self._match(TokenType.NOT)) 1519 and self._match(TokenType.EXISTS) 1520 ) 1521 1522 def _parse_create(self) -> exp.Create | exp.Command: 1523 # Note: this can't be None because we've matched a statement parser 1524 start = self._prev 1525 comments = self._prev_comments 1526 1527 replace = ( 1528 start.token_type == TokenType.REPLACE 1529 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1530 or self._match_pair(TokenType.OR, TokenType.ALTER) 1531 ) 1532 1533 unique = self._match(TokenType.UNIQUE) 1534 1535 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1536 self._advance() 1537 1538 properties = None 1539 create_token = self._match_set(self.CREATABLES) and self._prev 1540 1541 if not create_token: 1542 # exp.Properties.Location.POST_CREATE 1543 properties = self._parse_properties() 1544 create_token = self._match_set(self.CREATABLES) and self._prev 1545 1546 if not properties or not create_token: 1547 return self._parse_as_command(start) 1548 1549 exists = self._parse_exists(not_=True) 1550 this = None 1551 expression: t.Optional[exp.Expression] = None 1552 indexes = None 1553 no_schema_binding = None 1554 begin = None 1555 end = None 1556 clone = None 1557 1558 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1559 nonlocal properties 1560 if properties and temp_props: 1561 properties.expressions.extend(temp_props.expressions) 1562 elif temp_props: 1563 properties = temp_props 1564 1565 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1566 this = self._parse_user_defined_function(kind=create_token.token_type) 1567 1568 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1569 extend_props(self._parse_properties()) 1570 1571 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1572 1573 if not expression: 1574 if self._match(TokenType.COMMAND): 1575 expression = self._parse_as_command(self._prev) 1576 else: 1577 begin = self._match(TokenType.BEGIN) 1578 return_ = self._match_text_seq("RETURN") 1579 1580 if self._match(TokenType.STRING, advance=False): 1581 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1582 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1583 expression = self._parse_string() 1584 extend_props(self._parse_properties()) 1585 else: 1586 expression = self._parse_statement() 1587 1588 end = self._match_text_seq("END") 1589 1590 if return_: 1591 expression = self.expression(exp.Return, this=expression) 1592 elif create_token.token_type == TokenType.INDEX: 1593 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1594 if not self._match(TokenType.ON): 1595 index = self._parse_id_var() 1596 anonymous = False 1597 else: 1598 index = None 1599 anonymous = True 1600 1601 this = self._parse_index(index=index, anonymous=anonymous) 1602 elif create_token.token_type in self.DB_CREATABLES: 1603 table_parts = self._parse_table_parts( 1604 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1605 ) 1606 1607 # exp.Properties.Location.POST_NAME 1608 self._match(TokenType.COMMA) 1609 extend_props(self._parse_properties(before=True)) 1610 1611 this = self._parse_schema(this=table_parts) 1612 1613 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1614 extend_props(self._parse_properties()) 1615 1616 self._match(TokenType.ALIAS) 1617 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1618 # exp.Properties.Location.POST_ALIAS 1619 extend_props(self._parse_properties()) 1620 1621 if create_token.token_type == TokenType.SEQUENCE: 1622 expression = self._parse_types() 1623 extend_props(self._parse_properties()) 1624 else: 1625 expression = self._parse_ddl_select() 1626 1627 if create_token.token_type == TokenType.TABLE: 1628 # exp.Properties.Location.POST_EXPRESSION 1629 extend_props(self._parse_properties()) 1630 1631 indexes = [] 1632 while True: 1633 index = self._parse_index() 1634 1635 # exp.Properties.Location.POST_INDEX 1636 extend_props(self._parse_properties()) 1637 1638 if not index: 1639 break 1640 else: 1641 self._match(TokenType.COMMA) 1642 indexes.append(index) 1643 elif create_token.token_type == TokenType.VIEW: 1644 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1645 no_schema_binding = True 1646 1647 shallow = self._match_text_seq("SHALLOW") 1648 1649 if self._match_texts(self.CLONE_KEYWORDS): 1650 copy = self._prev.text.lower() == "copy" 1651 clone = self.expression( 1652 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1653 ) 1654 1655 if self._curr: 1656 return self._parse_as_command(start) 1657 1658 return self.expression( 1659 exp.Create, 1660 comments=comments, 1661 this=this, 1662 kind=create_token.text.upper(), 1663 replace=replace, 1664 unique=unique, 1665 expression=expression, 1666 exists=exists, 1667 properties=properties, 1668 indexes=indexes, 1669 no_schema_binding=no_schema_binding, 1670 begin=begin, 1671 end=end, 1672 clone=clone, 1673 ) 1674 1675 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1676 seq = exp.SequenceProperties() 1677 1678 options = [] 1679 index = self._index 1680 1681 while self._curr: 1682 if self._match_text_seq("INCREMENT"): 1683 self._match_text_seq("BY") 1684 self._match_text_seq("=") 1685 seq.set("increment", self._parse_term()) 1686 elif self._match_text_seq("MINVALUE"): 1687 seq.set("minvalue", self._parse_term()) 1688 elif self._match_text_seq("MAXVALUE"): 1689 seq.set("maxvalue", self._parse_term()) 1690 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1691 self._match_text_seq("=") 1692 seq.set("start", self._parse_term()) 1693 elif self._match_text_seq("CACHE"): 1694 # T-SQL allows empty CACHE which is initialized dynamically 1695 seq.set("cache", self._parse_number() or True) 1696 elif self._match_text_seq("OWNED", "BY"): 1697 # "OWNED BY NONE" is the default 1698 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1699 else: 1700 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1701 if opt: 1702 options.append(opt) 1703 else: 1704 break 1705 1706 seq.set("options", options if options else None) 1707 return None if self._index == index else seq 1708 1709 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1710 # only used for teradata currently 1711 self._match(TokenType.COMMA) 1712 1713 kwargs = { 1714 "no": self._match_text_seq("NO"), 1715 "dual": self._match_text_seq("DUAL"), 1716 "before": self._match_text_seq("BEFORE"), 1717 "default": self._match_text_seq("DEFAULT"), 1718 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1719 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1720 "after": self._match_text_seq("AFTER"), 1721 "minimum": self._match_texts(("MIN", "MINIMUM")), 1722 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1723 } 1724 1725 if self._match_texts(self.PROPERTY_PARSERS): 1726 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1727 try: 1728 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1729 except TypeError: 1730 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1731 1732 return None 1733 1734 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1735 return self._parse_wrapped_csv(self._parse_property) 1736 1737 def _parse_property(self) -> t.Optional[exp.Expression]: 1738 if self._match_texts(self.PROPERTY_PARSERS): 1739 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1740 1741 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1742 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1743 1744 if self._match_text_seq("COMPOUND", "SORTKEY"): 1745 return self._parse_sortkey(compound=True) 1746 1747 if self._match_text_seq("SQL", "SECURITY"): 1748 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1749 1750 index = self._index 1751 key = self._parse_column() 1752 1753 if not self._match(TokenType.EQ): 1754 self._retreat(index) 1755 return self._parse_sequence_properties() 1756 1757 return self.expression( 1758 exp.Property, 1759 this=key.to_dot() if isinstance(key, exp.Column) else key, 1760 value=self._parse_bitwise() or self._parse_var(any_token=True), 1761 ) 1762 1763 def _parse_stored(self) -> exp.FileFormatProperty: 1764 self._match(TokenType.ALIAS) 1765 1766 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1767 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1768 1769 return self.expression( 1770 exp.FileFormatProperty, 1771 this=( 1772 self.expression( 1773 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1774 ) 1775 if input_format or output_format 1776 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1777 ), 1778 ) 1779 1780 def _parse_unquoted_field(self): 1781 field = self._parse_field() 1782 if isinstance(field, exp.Identifier) and not field.quoted: 1783 field = exp.var(field) 1784 1785 return field 1786 1787 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1788 self._match(TokenType.EQ) 1789 self._match(TokenType.ALIAS) 1790 1791 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1792 1793 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1794 properties = [] 1795 while True: 1796 if before: 1797 prop = self._parse_property_before() 1798 else: 1799 prop = self._parse_property() 1800 if not prop: 1801 break 1802 for p in ensure_list(prop): 1803 properties.append(p) 1804 1805 if properties: 1806 return self.expression(exp.Properties, expressions=properties) 1807 1808 return None 1809 1810 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1811 return self.expression( 1812 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1813 ) 1814 1815 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1816 if self._index >= 2: 1817 pre_volatile_token = self._tokens[self._index - 2] 1818 else: 1819 pre_volatile_token = None 1820 1821 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1822 return exp.VolatileProperty() 1823 1824 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1825 1826 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1827 self._match_pair(TokenType.EQ, TokenType.ON) 1828 1829 prop = self.expression(exp.WithSystemVersioningProperty) 1830 if self._match(TokenType.L_PAREN): 1831 self._match_text_seq("HISTORY_TABLE", "=") 1832 prop.set("this", self._parse_table_parts()) 1833 1834 if self._match(TokenType.COMMA): 1835 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1836 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1837 1838 self._match_r_paren() 1839 1840 return prop 1841 1842 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1843 if self._match(TokenType.L_PAREN, advance=False): 1844 return self._parse_wrapped_properties() 1845 1846 if self._match_text_seq("JOURNAL"): 1847 return self._parse_withjournaltable() 1848 1849 if self._match_texts(self.VIEW_ATTRIBUTES): 1850 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1851 1852 if self._match_text_seq("DATA"): 1853 return self._parse_withdata(no=False) 1854 elif self._match_text_seq("NO", "DATA"): 1855 return self._parse_withdata(no=True) 1856 1857 if not self._next: 1858 return None 1859 1860 return self._parse_withisolatedloading() 1861 1862 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1863 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1864 self._match(TokenType.EQ) 1865 1866 user = self._parse_id_var() 1867 self._match(TokenType.PARAMETER) 1868 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1869 1870 if not user or not host: 1871 return None 1872 1873 return exp.DefinerProperty(this=f"{user}@{host}") 1874 1875 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1876 self._match(TokenType.TABLE) 1877 self._match(TokenType.EQ) 1878 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1879 1880 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1881 return self.expression(exp.LogProperty, no=no) 1882 1883 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1884 return self.expression(exp.JournalProperty, **kwargs) 1885 1886 def _parse_checksum(self) -> exp.ChecksumProperty: 1887 self._match(TokenType.EQ) 1888 1889 on = None 1890 if self._match(TokenType.ON): 1891 on = True 1892 elif self._match_text_seq("OFF"): 1893 on = False 1894 1895 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1896 1897 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1898 return self.expression( 1899 exp.Cluster, 1900 expressions=( 1901 self._parse_wrapped_csv(self._parse_ordered) 1902 if wrapped 1903 else self._parse_csv(self._parse_ordered) 1904 ), 1905 ) 1906 1907 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1908 self._match_text_seq("BY") 1909 1910 self._match_l_paren() 1911 expressions = self._parse_csv(self._parse_column) 1912 self._match_r_paren() 1913 1914 if self._match_text_seq("SORTED", "BY"): 1915 self._match_l_paren() 1916 sorted_by = self._parse_csv(self._parse_ordered) 1917 self._match_r_paren() 1918 else: 1919 sorted_by = None 1920 1921 self._match(TokenType.INTO) 1922 buckets = self._parse_number() 1923 self._match_text_seq("BUCKETS") 1924 1925 return self.expression( 1926 exp.ClusteredByProperty, 1927 expressions=expressions, 1928 sorted_by=sorted_by, 1929 buckets=buckets, 1930 ) 1931 1932 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1933 if not self._match_text_seq("GRANTS"): 1934 self._retreat(self._index - 1) 1935 return None 1936 1937 return self.expression(exp.CopyGrantsProperty) 1938 1939 def _parse_freespace(self) -> exp.FreespaceProperty: 1940 self._match(TokenType.EQ) 1941 return self.expression( 1942 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1943 ) 1944 1945 def _parse_mergeblockratio( 1946 self, no: bool = False, default: bool = False 1947 ) -> exp.MergeBlockRatioProperty: 1948 if self._match(TokenType.EQ): 1949 return self.expression( 1950 exp.MergeBlockRatioProperty, 1951 this=self._parse_number(), 1952 percent=self._match(TokenType.PERCENT), 1953 ) 1954 1955 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1956 1957 def _parse_datablocksize( 1958 self, 1959 default: t.Optional[bool] = None, 1960 minimum: t.Optional[bool] = None, 1961 maximum: t.Optional[bool] = None, 1962 ) -> exp.DataBlocksizeProperty: 1963 self._match(TokenType.EQ) 1964 size = self._parse_number() 1965 1966 units = None 1967 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1968 units = self._prev.text 1969 1970 return self.expression( 1971 exp.DataBlocksizeProperty, 1972 size=size, 1973 units=units, 1974 default=default, 1975 minimum=minimum, 1976 maximum=maximum, 1977 ) 1978 1979 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1980 self._match(TokenType.EQ) 1981 always = self._match_text_seq("ALWAYS") 1982 manual = self._match_text_seq("MANUAL") 1983 never = self._match_text_seq("NEVER") 1984 default = self._match_text_seq("DEFAULT") 1985 1986 autotemp = None 1987 if self._match_text_seq("AUTOTEMP"): 1988 autotemp = self._parse_schema() 1989 1990 return self.expression( 1991 exp.BlockCompressionProperty, 1992 always=always, 1993 manual=manual, 1994 never=never, 1995 default=default, 1996 autotemp=autotemp, 1997 ) 1998 1999 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2000 index = self._index 2001 no = self._match_text_seq("NO") 2002 concurrent = self._match_text_seq("CONCURRENT") 2003 2004 if not self._match_text_seq("ISOLATED", "LOADING"): 2005 self._retreat(index) 2006 return None 2007 2008 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2009 return self.expression( 2010 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2011 ) 2012 2013 def _parse_locking(self) -> exp.LockingProperty: 2014 if self._match(TokenType.TABLE): 2015 kind = "TABLE" 2016 elif self._match(TokenType.VIEW): 2017 kind = "VIEW" 2018 elif self._match(TokenType.ROW): 2019 kind = "ROW" 2020 elif self._match_text_seq("DATABASE"): 2021 kind = "DATABASE" 2022 else: 2023 kind = None 2024 2025 if kind in ("DATABASE", "TABLE", "VIEW"): 2026 this = self._parse_table_parts() 2027 else: 2028 this = None 2029 2030 if self._match(TokenType.FOR): 2031 for_or_in = "FOR" 2032 elif self._match(TokenType.IN): 2033 for_or_in = "IN" 2034 else: 2035 for_or_in = None 2036 2037 if self._match_text_seq("ACCESS"): 2038 lock_type = "ACCESS" 2039 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2040 lock_type = "EXCLUSIVE" 2041 elif self._match_text_seq("SHARE"): 2042 lock_type = "SHARE" 2043 elif self._match_text_seq("READ"): 2044 lock_type = "READ" 2045 elif self._match_text_seq("WRITE"): 2046 lock_type = "WRITE" 2047 elif self._match_text_seq("CHECKSUM"): 2048 lock_type = "CHECKSUM" 2049 else: 2050 lock_type = None 2051 2052 override = self._match_text_seq("OVERRIDE") 2053 2054 return self.expression( 2055 exp.LockingProperty, 2056 this=this, 2057 kind=kind, 2058 for_or_in=for_or_in, 2059 lock_type=lock_type, 2060 override=override, 2061 ) 2062 2063 def _parse_partition_by(self) -> t.List[exp.Expression]: 2064 if self._match(TokenType.PARTITION_BY): 2065 return self._parse_csv(self._parse_conjunction) 2066 return [] 2067 2068 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2069 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2070 if self._match_text_seq("MINVALUE"): 2071 return exp.var("MINVALUE") 2072 if self._match_text_seq("MAXVALUE"): 2073 return exp.var("MAXVALUE") 2074 return self._parse_bitwise() 2075 2076 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2077 expression = None 2078 from_expressions = None 2079 to_expressions = None 2080 2081 if self._match(TokenType.IN): 2082 this = self._parse_wrapped_csv(self._parse_bitwise) 2083 elif self._match(TokenType.FROM): 2084 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2085 self._match_text_seq("TO") 2086 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2087 elif self._match_text_seq("WITH", "(", "MODULUS"): 2088 this = self._parse_number() 2089 self._match_text_seq(",", "REMAINDER") 2090 expression = self._parse_number() 2091 self._match_r_paren() 2092 else: 2093 self.raise_error("Failed to parse partition bound spec.") 2094 2095 return self.expression( 2096 exp.PartitionBoundSpec, 2097 this=this, 2098 expression=expression, 2099 from_expressions=from_expressions, 2100 to_expressions=to_expressions, 2101 ) 2102 2103 # https://www.postgresql.org/docs/current/sql-createtable.html 2104 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2105 if not self._match_text_seq("OF"): 2106 self._retreat(self._index - 1) 2107 return None 2108 2109 this = self._parse_table(schema=True) 2110 2111 if self._match(TokenType.DEFAULT): 2112 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2113 elif self._match_text_seq("FOR", "VALUES"): 2114 expression = self._parse_partition_bound_spec() 2115 else: 2116 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2117 2118 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2119 2120 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2121 self._match(TokenType.EQ) 2122 return self.expression( 2123 exp.PartitionedByProperty, 2124 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2125 ) 2126 2127 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2128 if self._match_text_seq("AND", "STATISTICS"): 2129 statistics = True 2130 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2131 statistics = False 2132 else: 2133 statistics = None 2134 2135 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2136 2137 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2138 if self._match_text_seq("SQL"): 2139 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2140 return None 2141 2142 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2143 if self._match_text_seq("SQL", "DATA"): 2144 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2145 return None 2146 2147 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2148 if self._match_text_seq("PRIMARY", "INDEX"): 2149 return exp.NoPrimaryIndexProperty() 2150 if self._match_text_seq("SQL"): 2151 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2152 return None 2153 2154 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2155 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2156 return exp.OnCommitProperty() 2157 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2158 return exp.OnCommitProperty(delete=True) 2159 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2160 2161 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2162 if self._match_text_seq("SQL", "DATA"): 2163 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2164 return None 2165 2166 def _parse_distkey(self) -> exp.DistKeyProperty: 2167 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2168 2169 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2170 table = self._parse_table(schema=True) 2171 2172 options = [] 2173 while self._match_texts(("INCLUDING", "EXCLUDING")): 2174 this = self._prev.text.upper() 2175 2176 id_var = self._parse_id_var() 2177 if not id_var: 2178 return None 2179 2180 options.append( 2181 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2182 ) 2183 2184 return self.expression(exp.LikeProperty, this=table, expressions=options) 2185 2186 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2187 return self.expression( 2188 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2189 ) 2190 2191 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2192 self._match(TokenType.EQ) 2193 return self.expression( 2194 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2195 ) 2196 2197 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2198 self._match_text_seq("WITH", "CONNECTION") 2199 return self.expression( 2200 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2201 ) 2202 2203 def _parse_returns(self) -> exp.ReturnsProperty: 2204 value: t.Optional[exp.Expression] 2205 is_table = self._match(TokenType.TABLE) 2206 2207 if is_table: 2208 if self._match(TokenType.LT): 2209 value = self.expression( 2210 exp.Schema, 2211 this="TABLE", 2212 expressions=self._parse_csv(self._parse_struct_types), 2213 ) 2214 if not self._match(TokenType.GT): 2215 self.raise_error("Expecting >") 2216 else: 2217 value = self._parse_schema(exp.var("TABLE")) 2218 else: 2219 value = self._parse_types() 2220 2221 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2222 2223 def _parse_describe(self) -> exp.Describe: 2224 kind = self._match_set(self.CREATABLES) and self._prev.text 2225 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2226 if self._match(TokenType.DOT): 2227 style = None 2228 self._retreat(self._index - 2) 2229 this = self._parse_table(schema=True) 2230 properties = self._parse_properties() 2231 expressions = properties.expressions if properties else None 2232 return self.expression( 2233 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2234 ) 2235 2236 def _parse_insert(self) -> exp.Insert: 2237 comments = ensure_list(self._prev_comments) 2238 hint = self._parse_hint() 2239 overwrite = self._match(TokenType.OVERWRITE) 2240 ignore = self._match(TokenType.IGNORE) 2241 local = self._match_text_seq("LOCAL") 2242 alternative = None 2243 is_function = None 2244 2245 if self._match_text_seq("DIRECTORY"): 2246 this: t.Optional[exp.Expression] = self.expression( 2247 exp.Directory, 2248 this=self._parse_var_or_string(), 2249 local=local, 2250 row_format=self._parse_row_format(match_row=True), 2251 ) 2252 else: 2253 if self._match(TokenType.OR): 2254 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2255 2256 self._match(TokenType.INTO) 2257 comments += ensure_list(self._prev_comments) 2258 self._match(TokenType.TABLE) 2259 is_function = self._match(TokenType.FUNCTION) 2260 2261 this = ( 2262 self._parse_table(schema=True, parse_partition=True) 2263 if not is_function 2264 else self._parse_function() 2265 ) 2266 2267 returning = self._parse_returning() 2268 2269 return self.expression( 2270 exp.Insert, 2271 comments=comments, 2272 hint=hint, 2273 is_function=is_function, 2274 this=this, 2275 stored=self._match_text_seq("STORED") and self._parse_stored(), 2276 by_name=self._match_text_seq("BY", "NAME"), 2277 exists=self._parse_exists(), 2278 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2279 and self._parse_conjunction(), 2280 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2281 conflict=self._parse_on_conflict(), 2282 returning=returning or self._parse_returning(), 2283 overwrite=overwrite, 2284 alternative=alternative, 2285 ignore=ignore, 2286 ) 2287 2288 def _parse_kill(self) -> exp.Kill: 2289 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2290 2291 return self.expression( 2292 exp.Kill, 2293 this=self._parse_primary(), 2294 kind=kind, 2295 ) 2296 2297 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2298 conflict = self._match_text_seq("ON", "CONFLICT") 2299 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2300 2301 if not conflict and not duplicate: 2302 return None 2303 2304 conflict_keys = None 2305 constraint = None 2306 2307 if conflict: 2308 if self._match_text_seq("ON", "CONSTRAINT"): 2309 constraint = self._parse_id_var() 2310 elif self._match(TokenType.L_PAREN): 2311 conflict_keys = self._parse_csv(self._parse_id_var) 2312 self._match_r_paren() 2313 2314 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2315 if self._prev.token_type == TokenType.UPDATE: 2316 self._match(TokenType.SET) 2317 expressions = self._parse_csv(self._parse_equality) 2318 else: 2319 expressions = None 2320 2321 return self.expression( 2322 exp.OnConflict, 2323 duplicate=duplicate, 2324 expressions=expressions, 2325 action=action, 2326 conflict_keys=conflict_keys, 2327 constraint=constraint, 2328 ) 2329 2330 def _parse_returning(self) -> t.Optional[exp.Returning]: 2331 if not self._match(TokenType.RETURNING): 2332 return None 2333 return self.expression( 2334 exp.Returning, 2335 expressions=self._parse_csv(self._parse_expression), 2336 into=self._match(TokenType.INTO) and self._parse_table_part(), 2337 ) 2338 2339 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2340 if not self._match(TokenType.FORMAT): 2341 return None 2342 return self._parse_row_format() 2343 2344 def _parse_row_format( 2345 self, match_row: bool = False 2346 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2347 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2348 return None 2349 2350 if self._match_text_seq("SERDE"): 2351 this = self._parse_string() 2352 2353 serde_properties = None 2354 if self._match(TokenType.SERDE_PROPERTIES): 2355 serde_properties = self.expression( 2356 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2357 ) 2358 2359 return self.expression( 2360 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2361 ) 2362 2363 self._match_text_seq("DELIMITED") 2364 2365 kwargs = {} 2366 2367 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2368 kwargs["fields"] = self._parse_string() 2369 if self._match_text_seq("ESCAPED", "BY"): 2370 kwargs["escaped"] = self._parse_string() 2371 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2372 kwargs["collection_items"] = self._parse_string() 2373 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2374 kwargs["map_keys"] = self._parse_string() 2375 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2376 kwargs["lines"] = self._parse_string() 2377 if self._match_text_seq("NULL", "DEFINED", "AS"): 2378 kwargs["null"] = self._parse_string() 2379 2380 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2381 2382 def _parse_load(self) -> exp.LoadData | exp.Command: 2383 if self._match_text_seq("DATA"): 2384 local = self._match_text_seq("LOCAL") 2385 self._match_text_seq("INPATH") 2386 inpath = self._parse_string() 2387 overwrite = self._match(TokenType.OVERWRITE) 2388 self._match_pair(TokenType.INTO, TokenType.TABLE) 2389 2390 return self.expression( 2391 exp.LoadData, 2392 this=self._parse_table(schema=True), 2393 local=local, 2394 overwrite=overwrite, 2395 inpath=inpath, 2396 partition=self._parse_partition(), 2397 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2398 serde=self._match_text_seq("SERDE") and self._parse_string(), 2399 ) 2400 return self._parse_as_command(self._prev) 2401 2402 def _parse_delete(self) -> exp.Delete: 2403 # This handles MySQL's "Multiple-Table Syntax" 2404 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2405 tables = None 2406 comments = self._prev_comments 2407 if not self._match(TokenType.FROM, advance=False): 2408 tables = self._parse_csv(self._parse_table) or None 2409 2410 returning = self._parse_returning() 2411 2412 return self.expression( 2413 exp.Delete, 2414 comments=comments, 2415 tables=tables, 2416 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2417 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2418 where=self._parse_where(), 2419 returning=returning or self._parse_returning(), 2420 limit=self._parse_limit(), 2421 ) 2422 2423 def _parse_update(self) -> exp.Update: 2424 comments = self._prev_comments 2425 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2426 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2427 returning = self._parse_returning() 2428 return self.expression( 2429 exp.Update, 2430 comments=comments, 2431 **{ # type: ignore 2432 "this": this, 2433 "expressions": expressions, 2434 "from": self._parse_from(joins=True), 2435 "where": self._parse_where(), 2436 "returning": returning or self._parse_returning(), 2437 "order": self._parse_order(), 2438 "limit": self._parse_limit(), 2439 }, 2440 ) 2441 2442 def _parse_uncache(self) -> exp.Uncache: 2443 if not self._match(TokenType.TABLE): 2444 self.raise_error("Expecting TABLE after UNCACHE") 2445 2446 return self.expression( 2447 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2448 ) 2449 2450 def _parse_cache(self) -> exp.Cache: 2451 lazy = self._match_text_seq("LAZY") 2452 self._match(TokenType.TABLE) 2453 table = self._parse_table(schema=True) 2454 2455 options = [] 2456 if self._match_text_seq("OPTIONS"): 2457 self._match_l_paren() 2458 k = self._parse_string() 2459 self._match(TokenType.EQ) 2460 v = self._parse_string() 2461 options = [k, v] 2462 self._match_r_paren() 2463 2464 self._match(TokenType.ALIAS) 2465 return self.expression( 2466 exp.Cache, 2467 this=table, 2468 lazy=lazy, 2469 options=options, 2470 expression=self._parse_select(nested=True), 2471 ) 2472 2473 def _parse_partition(self) -> t.Optional[exp.Partition]: 2474 if not self._match(TokenType.PARTITION): 2475 return None 2476 2477 return self.expression( 2478 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2479 ) 2480 2481 def _parse_value(self) -> t.Optional[exp.Tuple]: 2482 if self._match(TokenType.L_PAREN): 2483 expressions = self._parse_csv(self._parse_expression) 2484 self._match_r_paren() 2485 return self.expression(exp.Tuple, expressions=expressions) 2486 2487 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2488 expression = self._parse_expression() 2489 if expression: 2490 return self.expression(exp.Tuple, expressions=[expression]) 2491 return None 2492 2493 def _parse_projections(self) -> t.List[exp.Expression]: 2494 return self._parse_expressions() 2495 2496 def _parse_select( 2497 self, 2498 nested: bool = False, 2499 table: bool = False, 2500 parse_subquery_alias: bool = True, 2501 parse_set_operation: bool = True, 2502 ) -> t.Optional[exp.Expression]: 2503 cte = self._parse_with() 2504 2505 if cte: 2506 this = self._parse_statement() 2507 2508 if not this: 2509 self.raise_error("Failed to parse any statement following CTE") 2510 return cte 2511 2512 if "with" in this.arg_types: 2513 this.set("with", cte) 2514 else: 2515 self.raise_error(f"{this.key} does not support CTE") 2516 this = cte 2517 2518 return this 2519 2520 # duckdb supports leading with FROM x 2521 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2522 2523 if self._match(TokenType.SELECT): 2524 comments = self._prev_comments 2525 2526 hint = self._parse_hint() 2527 all_ = self._match(TokenType.ALL) 2528 distinct = self._match_set(self.DISTINCT_TOKENS) 2529 2530 kind = ( 2531 self._match(TokenType.ALIAS) 2532 and self._match_texts(("STRUCT", "VALUE")) 2533 and self._prev.text.upper() 2534 ) 2535 2536 if distinct: 2537 distinct = self.expression( 2538 exp.Distinct, 2539 on=self._parse_value() if self._match(TokenType.ON) else None, 2540 ) 2541 2542 if all_ and distinct: 2543 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2544 2545 limit = self._parse_limit(top=True) 2546 projections = self._parse_projections() 2547 2548 this = self.expression( 2549 exp.Select, 2550 kind=kind, 2551 hint=hint, 2552 distinct=distinct, 2553 expressions=projections, 2554 limit=limit, 2555 ) 2556 this.comments = comments 2557 2558 into = self._parse_into() 2559 if into: 2560 this.set("into", into) 2561 2562 if not from_: 2563 from_ = self._parse_from() 2564 2565 if from_: 2566 this.set("from", from_) 2567 2568 this = self._parse_query_modifiers(this) 2569 elif (table or nested) and self._match(TokenType.L_PAREN): 2570 if self._match(TokenType.PIVOT): 2571 this = self._parse_simplified_pivot() 2572 elif self._match(TokenType.FROM): 2573 this = exp.select("*").from_( 2574 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2575 ) 2576 else: 2577 this = ( 2578 self._parse_table() 2579 if table 2580 else self._parse_select(nested=True, parse_set_operation=False) 2581 ) 2582 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2583 2584 self._match_r_paren() 2585 2586 # We return early here so that the UNION isn't attached to the subquery by the 2587 # following call to _parse_set_operations, but instead becomes the parent node 2588 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2589 elif self._match(TokenType.VALUES, advance=False): 2590 this = self._parse_derived_table_values() 2591 elif from_: 2592 this = exp.select("*").from_(from_.this, copy=False) 2593 else: 2594 this = None 2595 2596 if parse_set_operation: 2597 return self._parse_set_operations(this) 2598 return this 2599 2600 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2601 if not skip_with_token and not self._match(TokenType.WITH): 2602 return None 2603 2604 comments = self._prev_comments 2605 recursive = self._match(TokenType.RECURSIVE) 2606 2607 expressions = [] 2608 while True: 2609 expressions.append(self._parse_cte()) 2610 2611 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2612 break 2613 else: 2614 self._match(TokenType.WITH) 2615 2616 return self.expression( 2617 exp.With, comments=comments, expressions=expressions, recursive=recursive 2618 ) 2619 2620 def _parse_cte(self) -> exp.CTE: 2621 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2622 if not alias or not alias.this: 2623 self.raise_error("Expected CTE to have alias") 2624 2625 self._match(TokenType.ALIAS) 2626 2627 if self._match_text_seq("NOT", "MATERIALIZED"): 2628 materialized = False 2629 elif self._match_text_seq("MATERIALIZED"): 2630 materialized = True 2631 else: 2632 materialized = None 2633 2634 return self.expression( 2635 exp.CTE, 2636 this=self._parse_wrapped(self._parse_statement), 2637 alias=alias, 2638 materialized=materialized, 2639 ) 2640 2641 def _parse_table_alias( 2642 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2643 ) -> t.Optional[exp.TableAlias]: 2644 any_token = self._match(TokenType.ALIAS) 2645 alias = ( 2646 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2647 or self._parse_string_as_identifier() 2648 ) 2649 2650 index = self._index 2651 if self._match(TokenType.L_PAREN): 2652 columns = self._parse_csv(self._parse_function_parameter) 2653 self._match_r_paren() if columns else self._retreat(index) 2654 else: 2655 columns = None 2656 2657 if not alias and not columns: 2658 return None 2659 2660 return self.expression(exp.TableAlias, this=alias, columns=columns) 2661 2662 def _parse_subquery( 2663 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2664 ) -> t.Optional[exp.Subquery]: 2665 if not this: 2666 return None 2667 2668 return self.expression( 2669 exp.Subquery, 2670 this=this, 2671 pivots=self._parse_pivots(), 2672 alias=self._parse_table_alias() if parse_alias else None, 2673 ) 2674 2675 def _implicit_unnests_to_explicit(self, this: E) -> E: 2676 from sqlglot.optimizer.normalize_identifiers import ( 2677 normalize_identifiers as _norm, 2678 ) 2679 2680 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2681 for i, join in enumerate(this.args.get("joins") or []): 2682 table = join.this 2683 normalized_table = table.copy() 2684 normalized_table.meta["maybe_column"] = True 2685 normalized_table = _norm(normalized_table, dialect=self.dialect) 2686 2687 if isinstance(table, exp.Table) and not join.args.get("on"): 2688 if normalized_table.parts[0].name in refs: 2689 table_as_column = table.to_column() 2690 unnest = exp.Unnest(expressions=[table_as_column]) 2691 2692 # Table.to_column creates a parent Alias node that we want to convert to 2693 # a TableAlias and attach to the Unnest, so it matches the parser's output 2694 if isinstance(table.args.get("alias"), exp.TableAlias): 2695 table_as_column.replace(table_as_column.this) 2696 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2697 2698 table.replace(unnest) 2699 2700 refs.add(normalized_table.alias_or_name) 2701 2702 return this 2703 2704 def _parse_query_modifiers( 2705 self, this: t.Optional[exp.Expression] 2706 ) -> t.Optional[exp.Expression]: 2707 if isinstance(this, (exp.Query, exp.Table)): 2708 for join in self._parse_joins(): 2709 this.append("joins", join) 2710 for lateral in iter(self._parse_lateral, None): 2711 this.append("laterals", lateral) 2712 2713 while True: 2714 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2715 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2716 key, expression = parser(self) 2717 2718 if expression: 2719 this.set(key, expression) 2720 if key == "limit": 2721 offset = expression.args.pop("offset", None) 2722 2723 if offset: 2724 offset = exp.Offset(expression=offset) 2725 this.set("offset", offset) 2726 2727 limit_by_expressions = expression.expressions 2728 expression.set("expressions", None) 2729 offset.set("expressions", limit_by_expressions) 2730 continue 2731 break 2732 2733 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2734 this = self._implicit_unnests_to_explicit(this) 2735 2736 return this 2737 2738 def _parse_hint(self) -> t.Optional[exp.Hint]: 2739 if self._match(TokenType.HINT): 2740 hints = [] 2741 for hint in iter( 2742 lambda: self._parse_csv( 2743 lambda: self._parse_function() or self._parse_var(upper=True) 2744 ), 2745 [], 2746 ): 2747 hints.extend(hint) 2748 2749 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2750 self.raise_error("Expected */ after HINT") 2751 2752 return self.expression(exp.Hint, expressions=hints) 2753 2754 return None 2755 2756 def _parse_into(self) -> t.Optional[exp.Into]: 2757 if not self._match(TokenType.INTO): 2758 return None 2759 2760 temp = self._match(TokenType.TEMPORARY) 2761 unlogged = self._match_text_seq("UNLOGGED") 2762 self._match(TokenType.TABLE) 2763 2764 return self.expression( 2765 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2766 ) 2767 2768 def _parse_from( 2769 self, joins: bool = False, skip_from_token: bool = False 2770 ) -> t.Optional[exp.From]: 2771 if not skip_from_token and not self._match(TokenType.FROM): 2772 return None 2773 2774 return self.expression( 2775 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2776 ) 2777 2778 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2779 return self.expression( 2780 exp.MatchRecognizeMeasure, 2781 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2782 this=self._parse_expression(), 2783 ) 2784 2785 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2786 if not self._match(TokenType.MATCH_RECOGNIZE): 2787 return None 2788 2789 self._match_l_paren() 2790 2791 partition = self._parse_partition_by() 2792 order = self._parse_order() 2793 2794 measures = ( 2795 self._parse_csv(self._parse_match_recognize_measure) 2796 if self._match_text_seq("MEASURES") 2797 else None 2798 ) 2799 2800 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2801 rows = exp.var("ONE ROW PER MATCH") 2802 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2803 text = "ALL ROWS PER MATCH" 2804 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2805 text += " SHOW EMPTY MATCHES" 2806 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2807 text += " OMIT EMPTY MATCHES" 2808 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2809 text += " WITH UNMATCHED ROWS" 2810 rows = exp.var(text) 2811 else: 2812 rows = None 2813 2814 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2815 text = "AFTER MATCH SKIP" 2816 if self._match_text_seq("PAST", "LAST", "ROW"): 2817 text += " PAST LAST ROW" 2818 elif self._match_text_seq("TO", "NEXT", "ROW"): 2819 text += " TO NEXT ROW" 2820 elif self._match_text_seq("TO", "FIRST"): 2821 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2822 elif self._match_text_seq("TO", "LAST"): 2823 text += f" TO LAST {self._advance_any().text}" # type: ignore 2824 after = exp.var(text) 2825 else: 2826 after = None 2827 2828 if self._match_text_seq("PATTERN"): 2829 self._match_l_paren() 2830 2831 if not self._curr: 2832 self.raise_error("Expecting )", self._curr) 2833 2834 paren = 1 2835 start = self._curr 2836 2837 while self._curr and paren > 0: 2838 if self._curr.token_type == TokenType.L_PAREN: 2839 paren += 1 2840 if self._curr.token_type == TokenType.R_PAREN: 2841 paren -= 1 2842 2843 end = self._prev 2844 self._advance() 2845 2846 if paren > 0: 2847 self.raise_error("Expecting )", self._curr) 2848 2849 pattern = exp.var(self._find_sql(start, end)) 2850 else: 2851 pattern = None 2852 2853 define = ( 2854 self._parse_csv(self._parse_name_as_expression) 2855 if self._match_text_seq("DEFINE") 2856 else None 2857 ) 2858 2859 self._match_r_paren() 2860 2861 return self.expression( 2862 exp.MatchRecognize, 2863 partition_by=partition, 2864 order=order, 2865 measures=measures, 2866 rows=rows, 2867 after=after, 2868 pattern=pattern, 2869 define=define, 2870 alias=self._parse_table_alias(), 2871 ) 2872 2873 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2874 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2875 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2876 cross_apply = False 2877 2878 if cross_apply is not None: 2879 this = self._parse_select(table=True) 2880 view = None 2881 outer = None 2882 elif self._match(TokenType.LATERAL): 2883 this = self._parse_select(table=True) 2884 view = self._match(TokenType.VIEW) 2885 outer = self._match(TokenType.OUTER) 2886 else: 2887 return None 2888 2889 if not this: 2890 this = ( 2891 self._parse_unnest() 2892 or self._parse_function() 2893 or self._parse_id_var(any_token=False) 2894 ) 2895 2896 while self._match(TokenType.DOT): 2897 this = exp.Dot( 2898 this=this, 2899 expression=self._parse_function() or self._parse_id_var(any_token=False), 2900 ) 2901 2902 if view: 2903 table = self._parse_id_var(any_token=False) 2904 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2905 table_alias: t.Optional[exp.TableAlias] = self.expression( 2906 exp.TableAlias, this=table, columns=columns 2907 ) 2908 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2909 # We move the alias from the lateral's child node to the lateral itself 2910 table_alias = this.args["alias"].pop() 2911 else: 2912 table_alias = self._parse_table_alias() 2913 2914 return self.expression( 2915 exp.Lateral, 2916 this=this, 2917 view=view, 2918 outer=outer, 2919 alias=table_alias, 2920 cross_apply=cross_apply, 2921 ) 2922 2923 def _parse_join_parts( 2924 self, 2925 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2926 return ( 2927 self._match_set(self.JOIN_METHODS) and self._prev, 2928 self._match_set(self.JOIN_SIDES) and self._prev, 2929 self._match_set(self.JOIN_KINDS) and self._prev, 2930 ) 2931 2932 def _parse_join( 2933 self, skip_join_token: bool = False, parse_bracket: bool = False 2934 ) -> t.Optional[exp.Join]: 2935 if self._match(TokenType.COMMA): 2936 return self.expression(exp.Join, this=self._parse_table()) 2937 2938 index = self._index 2939 method, side, kind = self._parse_join_parts() 2940 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2941 join = self._match(TokenType.JOIN) 2942 2943 if not skip_join_token and not join: 2944 self._retreat(index) 2945 kind = None 2946 method = None 2947 side = None 2948 2949 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2950 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2951 2952 if not skip_join_token and not join and not outer_apply and not cross_apply: 2953 return None 2954 2955 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2956 2957 if method: 2958 kwargs["method"] = method.text 2959 if side: 2960 kwargs["side"] = side.text 2961 if kind: 2962 kwargs["kind"] = kind.text 2963 if hint: 2964 kwargs["hint"] = hint 2965 2966 if self._match(TokenType.MATCH_CONDITION): 2967 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2968 2969 if self._match(TokenType.ON): 2970 kwargs["on"] = self._parse_conjunction() 2971 elif self._match(TokenType.USING): 2972 kwargs["using"] = self._parse_wrapped_id_vars() 2973 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2974 kind and kind.token_type == TokenType.CROSS 2975 ): 2976 index = self._index 2977 joins: t.Optional[list] = list(self._parse_joins()) 2978 2979 if joins and self._match(TokenType.ON): 2980 kwargs["on"] = self._parse_conjunction() 2981 elif joins and self._match(TokenType.USING): 2982 kwargs["using"] = self._parse_wrapped_id_vars() 2983 else: 2984 joins = None 2985 self._retreat(index) 2986 2987 kwargs["this"].set("joins", joins if joins else None) 2988 2989 comments = [c for token in (method, side, kind) if token for c in token.comments] 2990 return self.expression(exp.Join, comments=comments, **kwargs) 2991 2992 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2993 this = self._parse_conjunction() 2994 2995 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2996 return this 2997 2998 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2999 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3000 3001 return this 3002 3003 def _parse_index_params(self) -> exp.IndexParameters: 3004 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3005 3006 if self._match(TokenType.L_PAREN, advance=False): 3007 columns = self._parse_wrapped_csv(self._parse_with_operator) 3008 else: 3009 columns = None 3010 3011 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3012 partition_by = self._parse_partition_by() 3013 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3014 tablespace = ( 3015 self._parse_var(any_token=True) 3016 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3017 else None 3018 ) 3019 where = self._parse_where() 3020 3021 return self.expression( 3022 exp.IndexParameters, 3023 using=using, 3024 columns=columns, 3025 include=include, 3026 partition_by=partition_by, 3027 where=where, 3028 with_storage=with_storage, 3029 tablespace=tablespace, 3030 ) 3031 3032 def _parse_index( 3033 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3034 ) -> t.Optional[exp.Index]: 3035 if index or anonymous: 3036 unique = None 3037 primary = None 3038 amp = None 3039 3040 self._match(TokenType.ON) 3041 self._match(TokenType.TABLE) # hive 3042 table = self._parse_table_parts(schema=True) 3043 else: 3044 unique = self._match(TokenType.UNIQUE) 3045 primary = self._match_text_seq("PRIMARY") 3046 amp = self._match_text_seq("AMP") 3047 3048 if not self._match(TokenType.INDEX): 3049 return None 3050 3051 index = self._parse_id_var() 3052 table = None 3053 3054 params = self._parse_index_params() 3055 3056 return self.expression( 3057 exp.Index, 3058 this=index, 3059 table=table, 3060 unique=unique, 3061 primary=primary, 3062 amp=amp, 3063 params=params, 3064 ) 3065 3066 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3067 hints: t.List[exp.Expression] = [] 3068 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3069 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3070 hints.append( 3071 self.expression( 3072 exp.WithTableHint, 3073 expressions=self._parse_csv( 3074 lambda: self._parse_function() or self._parse_var(any_token=True) 3075 ), 3076 ) 3077 ) 3078 self._match_r_paren() 3079 else: 3080 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3081 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3082 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3083 3084 self._match_texts(("INDEX", "KEY")) 3085 if self._match(TokenType.FOR): 3086 hint.set("target", self._advance_any() and self._prev.text.upper()) 3087 3088 hint.set("expressions", self._parse_wrapped_id_vars()) 3089 hints.append(hint) 3090 3091 return hints or None 3092 3093 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3094 return ( 3095 (not schema and self._parse_function(optional_parens=False)) 3096 or self._parse_id_var(any_token=False) 3097 or self._parse_string_as_identifier() 3098 or self._parse_placeholder() 3099 ) 3100 3101 def _parse_table_parts( 3102 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3103 ) -> exp.Table: 3104 catalog = None 3105 db = None 3106 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3107 3108 while self._match(TokenType.DOT): 3109 if catalog: 3110 # This allows nesting the table in arbitrarily many dot expressions if needed 3111 table = self.expression( 3112 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3113 ) 3114 else: 3115 catalog = db 3116 db = table 3117 # "" used for tsql FROM a..b case 3118 table = self._parse_table_part(schema=schema) or "" 3119 3120 if ( 3121 wildcard 3122 and self._is_connected() 3123 and (isinstance(table, exp.Identifier) or not table) 3124 and self._match(TokenType.STAR) 3125 ): 3126 if isinstance(table, exp.Identifier): 3127 table.args["this"] += "*" 3128 else: 3129 table = exp.Identifier(this="*") 3130 3131 # We bubble up comments from the Identifier to the Table 3132 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3133 3134 if is_db_reference: 3135 catalog = db 3136 db = table 3137 table = None 3138 3139 if not table and not is_db_reference: 3140 self.raise_error(f"Expected table name but got {self._curr}") 3141 if not db and is_db_reference: 3142 self.raise_error(f"Expected database name but got {self._curr}") 3143 3144 return self.expression( 3145 exp.Table, 3146 comments=comments, 3147 this=table, 3148 db=db, 3149 catalog=catalog, 3150 pivots=self._parse_pivots(), 3151 ) 3152 3153 def _parse_table( 3154 self, 3155 schema: bool = False, 3156 joins: bool = False, 3157 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3158 parse_bracket: bool = False, 3159 is_db_reference: bool = False, 3160 parse_partition: bool = False, 3161 ) -> t.Optional[exp.Expression]: 3162 lateral = self._parse_lateral() 3163 if lateral: 3164 return lateral 3165 3166 unnest = self._parse_unnest() 3167 if unnest: 3168 return unnest 3169 3170 values = self._parse_derived_table_values() 3171 if values: 3172 return values 3173 3174 subquery = self._parse_select(table=True) 3175 if subquery: 3176 if not subquery.args.get("pivots"): 3177 subquery.set("pivots", self._parse_pivots()) 3178 return subquery 3179 3180 bracket = parse_bracket and self._parse_bracket(None) 3181 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3182 3183 only = self._match(TokenType.ONLY) 3184 3185 this = t.cast( 3186 exp.Expression, 3187 bracket 3188 or self._parse_bracket( 3189 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3190 ), 3191 ) 3192 3193 if only: 3194 this.set("only", only) 3195 3196 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3197 self._match_text_seq("*") 3198 3199 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3200 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3201 this.set("partition", self._parse_partition()) 3202 3203 if schema: 3204 return self._parse_schema(this=this) 3205 3206 version = self._parse_version() 3207 3208 if version: 3209 this.set("version", version) 3210 3211 if self.dialect.ALIAS_POST_TABLESAMPLE: 3212 table_sample = self._parse_table_sample() 3213 3214 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3215 if alias: 3216 this.set("alias", alias) 3217 3218 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3219 return self.expression( 3220 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3221 ) 3222 3223 this.set("hints", self._parse_table_hints()) 3224 3225 if not this.args.get("pivots"): 3226 this.set("pivots", self._parse_pivots()) 3227 3228 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3229 table_sample = self._parse_table_sample() 3230 3231 if table_sample: 3232 table_sample.set("this", this) 3233 this = table_sample 3234 3235 if joins: 3236 for join in self._parse_joins(): 3237 this.append("joins", join) 3238 3239 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3240 this.set("ordinality", True) 3241 this.set("alias", self._parse_table_alias()) 3242 3243 return this 3244 3245 def _parse_version(self) -> t.Optional[exp.Version]: 3246 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3247 this = "TIMESTAMP" 3248 elif self._match(TokenType.VERSION_SNAPSHOT): 3249 this = "VERSION" 3250 else: 3251 return None 3252 3253 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3254 kind = self._prev.text.upper() 3255 start = self._parse_bitwise() 3256 self._match_texts(("TO", "AND")) 3257 end = self._parse_bitwise() 3258 expression: t.Optional[exp.Expression] = self.expression( 3259 exp.Tuple, expressions=[start, end] 3260 ) 3261 elif self._match_text_seq("CONTAINED", "IN"): 3262 kind = "CONTAINED IN" 3263 expression = self.expression( 3264 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3265 ) 3266 elif self._match(TokenType.ALL): 3267 kind = "ALL" 3268 expression = None 3269 else: 3270 self._match_text_seq("AS", "OF") 3271 kind = "AS OF" 3272 expression = self._parse_type() 3273 3274 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3275 3276 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3277 if not self._match(TokenType.UNNEST): 3278 return None 3279 3280 expressions = self._parse_wrapped_csv(self._parse_equality) 3281 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3282 3283 alias = self._parse_table_alias() if with_alias else None 3284 3285 if alias: 3286 if self.dialect.UNNEST_COLUMN_ONLY: 3287 if alias.args.get("columns"): 3288 self.raise_error("Unexpected extra column alias in unnest.") 3289 3290 alias.set("columns", [alias.this]) 3291 alias.set("this", None) 3292 3293 columns = alias.args.get("columns") or [] 3294 if offset and len(expressions) < len(columns): 3295 offset = columns.pop() 3296 3297 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3298 self._match(TokenType.ALIAS) 3299 offset = self._parse_id_var( 3300 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3301 ) or exp.to_identifier("offset") 3302 3303 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3304 3305 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3306 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3307 if not is_derived and not self._match_text_seq("VALUES"): 3308 return None 3309 3310 expressions = self._parse_csv(self._parse_value) 3311 alias = self._parse_table_alias() 3312 3313 if is_derived: 3314 self._match_r_paren() 3315 3316 return self.expression( 3317 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3318 ) 3319 3320 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3321 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3322 as_modifier and self._match_text_seq("USING", "SAMPLE") 3323 ): 3324 return None 3325 3326 bucket_numerator = None 3327 bucket_denominator = None 3328 bucket_field = None 3329 percent = None 3330 size = None 3331 seed = None 3332 3333 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3334 matched_l_paren = self._match(TokenType.L_PAREN) 3335 3336 if self.TABLESAMPLE_CSV: 3337 num = None 3338 expressions = self._parse_csv(self._parse_primary) 3339 else: 3340 expressions = None 3341 num = ( 3342 self._parse_factor() 3343 if self._match(TokenType.NUMBER, advance=False) 3344 else self._parse_primary() or self._parse_placeholder() 3345 ) 3346 3347 if self._match_text_seq("BUCKET"): 3348 bucket_numerator = self._parse_number() 3349 self._match_text_seq("OUT", "OF") 3350 bucket_denominator = bucket_denominator = self._parse_number() 3351 self._match(TokenType.ON) 3352 bucket_field = self._parse_field() 3353 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3354 percent = num 3355 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3356 size = num 3357 else: 3358 percent = num 3359 3360 if matched_l_paren: 3361 self._match_r_paren() 3362 3363 if self._match(TokenType.L_PAREN): 3364 method = self._parse_var(upper=True) 3365 seed = self._match(TokenType.COMMA) and self._parse_number() 3366 self._match_r_paren() 3367 elif self._match_texts(("SEED", "REPEATABLE")): 3368 seed = self._parse_wrapped(self._parse_number) 3369 3370 return self.expression( 3371 exp.TableSample, 3372 expressions=expressions, 3373 method=method, 3374 bucket_numerator=bucket_numerator, 3375 bucket_denominator=bucket_denominator, 3376 bucket_field=bucket_field, 3377 percent=percent, 3378 size=size, 3379 seed=seed, 3380 ) 3381 3382 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3383 return list(iter(self._parse_pivot, None)) or None 3384 3385 def _parse_joins(self) -> t.Iterator[exp.Join]: 3386 return iter(self._parse_join, None) 3387 3388 # https://duckdb.org/docs/sql/statements/pivot 3389 def _parse_simplified_pivot(self) -> exp.Pivot: 3390 def _parse_on() -> t.Optional[exp.Expression]: 3391 this = self._parse_bitwise() 3392 return self._parse_in(this) if self._match(TokenType.IN) else this 3393 3394 this = self._parse_table() 3395 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3396 using = self._match(TokenType.USING) and self._parse_csv( 3397 lambda: self._parse_alias(self._parse_function()) 3398 ) 3399 group = self._parse_group() 3400 return self.expression( 3401 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3402 ) 3403 3404 def _parse_pivot_in(self) -> exp.In: 3405 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3406 this = self._parse_conjunction() 3407 3408 self._match(TokenType.ALIAS) 3409 alias = self._parse_field() 3410 if alias: 3411 return self.expression(exp.PivotAlias, this=this, alias=alias) 3412 3413 return this 3414 3415 value = self._parse_column() 3416 3417 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3418 self.raise_error("Expecting IN (") 3419 3420 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3421 3422 self._match_r_paren() 3423 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3424 3425 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3426 index = self._index 3427 include_nulls = None 3428 3429 if self._match(TokenType.PIVOT): 3430 unpivot = False 3431 elif self._match(TokenType.UNPIVOT): 3432 unpivot = True 3433 3434 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3435 if self._match_text_seq("INCLUDE", "NULLS"): 3436 include_nulls = True 3437 elif self._match_text_seq("EXCLUDE", "NULLS"): 3438 include_nulls = False 3439 else: 3440 return None 3441 3442 expressions = [] 3443 3444 if not self._match(TokenType.L_PAREN): 3445 self._retreat(index) 3446 return None 3447 3448 if unpivot: 3449 expressions = self._parse_csv(self._parse_column) 3450 else: 3451 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3452 3453 if not expressions: 3454 self.raise_error("Failed to parse PIVOT's aggregation list") 3455 3456 if not self._match(TokenType.FOR): 3457 self.raise_error("Expecting FOR") 3458 3459 field = self._parse_pivot_in() 3460 3461 self._match_r_paren() 3462 3463 pivot = self.expression( 3464 exp.Pivot, 3465 expressions=expressions, 3466 field=field, 3467 unpivot=unpivot, 3468 include_nulls=include_nulls, 3469 ) 3470 3471 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3472 pivot.set("alias", self._parse_table_alias()) 3473 3474 if not unpivot: 3475 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3476 3477 columns: t.List[exp.Expression] = [] 3478 for fld in pivot.args["field"].expressions: 3479 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3480 for name in names: 3481 if self.PREFIXED_PIVOT_COLUMNS: 3482 name = f"{name}_{field_name}" if name else field_name 3483 else: 3484 name = f"{field_name}_{name}" if name else field_name 3485 3486 columns.append(exp.to_identifier(name)) 3487 3488 pivot.set("columns", columns) 3489 3490 return pivot 3491 3492 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3493 return [agg.alias for agg in aggregations] 3494 3495 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3496 if not skip_where_token and not self._match(TokenType.PREWHERE): 3497 return None 3498 3499 return self.expression( 3500 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3501 ) 3502 3503 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3504 if not skip_where_token and not self._match(TokenType.WHERE): 3505 return None 3506 3507 return self.expression( 3508 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3509 ) 3510 3511 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3512 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3513 return None 3514 3515 elements: t.Dict[str, t.Any] = defaultdict(list) 3516 3517 if self._match(TokenType.ALL): 3518 elements["all"] = True 3519 elif self._match(TokenType.DISTINCT): 3520 elements["all"] = False 3521 3522 while True: 3523 expressions = self._parse_csv(self._parse_conjunction) 3524 if expressions: 3525 elements["expressions"].extend(expressions) 3526 3527 grouping_sets = self._parse_grouping_sets() 3528 if grouping_sets: 3529 elements["grouping_sets"].extend(grouping_sets) 3530 3531 rollup = None 3532 cube = None 3533 totals = None 3534 3535 index = self._index 3536 with_ = self._match(TokenType.WITH) 3537 if self._match(TokenType.ROLLUP): 3538 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3539 elements["rollup"].extend(ensure_list(rollup)) 3540 3541 if self._match(TokenType.CUBE): 3542 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3543 elements["cube"].extend(ensure_list(cube)) 3544 3545 if self._match_text_seq("TOTALS"): 3546 totals = True 3547 elements["totals"] = True # type: ignore 3548 3549 if not (grouping_sets or rollup or cube or totals): 3550 if with_: 3551 self._retreat(index) 3552 break 3553 3554 return self.expression(exp.Group, **elements) # type: ignore 3555 3556 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3557 if not self._match(TokenType.GROUPING_SETS): 3558 return None 3559 3560 return self._parse_wrapped_csv(self._parse_grouping_set) 3561 3562 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3563 if self._match(TokenType.L_PAREN): 3564 grouping_set = self._parse_csv(self._parse_column) 3565 self._match_r_paren() 3566 return self.expression(exp.Tuple, expressions=grouping_set) 3567 3568 return self._parse_column() 3569 3570 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3571 if not skip_having_token and not self._match(TokenType.HAVING): 3572 return None 3573 return self.expression(exp.Having, this=self._parse_conjunction()) 3574 3575 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3576 if not self._match(TokenType.QUALIFY): 3577 return None 3578 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3579 3580 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3581 if skip_start_token: 3582 start = None 3583 elif self._match(TokenType.START_WITH): 3584 start = self._parse_conjunction() 3585 else: 3586 return None 3587 3588 self._match(TokenType.CONNECT_BY) 3589 nocycle = self._match_text_seq("NOCYCLE") 3590 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3591 exp.Prior, this=self._parse_bitwise() 3592 ) 3593 connect = self._parse_conjunction() 3594 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3595 3596 if not start and self._match(TokenType.START_WITH): 3597 start = self._parse_conjunction() 3598 3599 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3600 3601 def _parse_name_as_expression(self) -> exp.Alias: 3602 return self.expression( 3603 exp.Alias, 3604 alias=self._parse_id_var(any_token=True), 3605 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3606 ) 3607 3608 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3609 if self._match_text_seq("INTERPOLATE"): 3610 return self._parse_wrapped_csv(self._parse_name_as_expression) 3611 return None 3612 3613 def _parse_order( 3614 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3615 ) -> t.Optional[exp.Expression]: 3616 siblings = None 3617 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3618 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3619 return this 3620 3621 siblings = True 3622 3623 return self.expression( 3624 exp.Order, 3625 this=this, 3626 expressions=self._parse_csv(self._parse_ordered), 3627 interpolate=self._parse_interpolate(), 3628 siblings=siblings, 3629 ) 3630 3631 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3632 if not self._match(token): 3633 return None 3634 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3635 3636 def _parse_ordered( 3637 self, parse_method: t.Optional[t.Callable] = None 3638 ) -> t.Optional[exp.Ordered]: 3639 this = parse_method() if parse_method else self._parse_conjunction() 3640 if not this: 3641 return None 3642 3643 asc = self._match(TokenType.ASC) 3644 desc = self._match(TokenType.DESC) or (asc and False) 3645 3646 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3647 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3648 3649 nulls_first = is_nulls_first or False 3650 explicitly_null_ordered = is_nulls_first or is_nulls_last 3651 3652 if ( 3653 not explicitly_null_ordered 3654 and ( 3655 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3656 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3657 ) 3658 and self.dialect.NULL_ORDERING != "nulls_are_last" 3659 ): 3660 nulls_first = True 3661 3662 if self._match_text_seq("WITH", "FILL"): 3663 with_fill = self.expression( 3664 exp.WithFill, 3665 **{ # type: ignore 3666 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3667 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3668 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3669 }, 3670 ) 3671 else: 3672 with_fill = None 3673 3674 return self.expression( 3675 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3676 ) 3677 3678 def _parse_limit( 3679 self, 3680 this: t.Optional[exp.Expression] = None, 3681 top: bool = False, 3682 skip_limit_token: bool = False, 3683 ) -> t.Optional[exp.Expression]: 3684 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3685 comments = self._prev_comments 3686 if top: 3687 limit_paren = self._match(TokenType.L_PAREN) 3688 expression = self._parse_term() if limit_paren else self._parse_number() 3689 3690 if limit_paren: 3691 self._match_r_paren() 3692 else: 3693 expression = self._parse_term() 3694 3695 if self._match(TokenType.COMMA): 3696 offset = expression 3697 expression = self._parse_term() 3698 else: 3699 offset = None 3700 3701 limit_exp = self.expression( 3702 exp.Limit, 3703 this=this, 3704 expression=expression, 3705 offset=offset, 3706 comments=comments, 3707 expressions=self._parse_limit_by(), 3708 ) 3709 3710 return limit_exp 3711 3712 if self._match(TokenType.FETCH): 3713 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3714 direction = self._prev.text.upper() if direction else "FIRST" 3715 3716 count = self._parse_field(tokens=self.FETCH_TOKENS) 3717 percent = self._match(TokenType.PERCENT) 3718 3719 self._match_set((TokenType.ROW, TokenType.ROWS)) 3720 3721 only = self._match_text_seq("ONLY") 3722 with_ties = self._match_text_seq("WITH", "TIES") 3723 3724 if only and with_ties: 3725 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3726 3727 return self.expression( 3728 exp.Fetch, 3729 direction=direction, 3730 count=count, 3731 percent=percent, 3732 with_ties=with_ties, 3733 ) 3734 3735 return this 3736 3737 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3738 if not self._match(TokenType.OFFSET): 3739 return this 3740 3741 count = self._parse_term() 3742 self._match_set((TokenType.ROW, TokenType.ROWS)) 3743 3744 return self.expression( 3745 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3746 ) 3747 3748 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3749 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3750 3751 def _parse_locks(self) -> t.List[exp.Lock]: 3752 locks = [] 3753 while True: 3754 if self._match_text_seq("FOR", "UPDATE"): 3755 update = True 3756 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3757 "LOCK", "IN", "SHARE", "MODE" 3758 ): 3759 update = False 3760 else: 3761 break 3762 3763 expressions = None 3764 if self._match_text_seq("OF"): 3765 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3766 3767 wait: t.Optional[bool | exp.Expression] = None 3768 if self._match_text_seq("NOWAIT"): 3769 wait = True 3770 elif self._match_text_seq("WAIT"): 3771 wait = self._parse_primary() 3772 elif self._match_text_seq("SKIP", "LOCKED"): 3773 wait = False 3774 3775 locks.append( 3776 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3777 ) 3778 3779 return locks 3780 3781 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3782 while this and self._match_set(self.SET_OPERATIONS): 3783 token_type = self._prev.token_type 3784 3785 if token_type == TokenType.UNION: 3786 operation = exp.Union 3787 elif token_type == TokenType.EXCEPT: 3788 operation = exp.Except 3789 else: 3790 operation = exp.Intersect 3791 3792 comments = self._prev.comments 3793 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3794 by_name = self._match_text_seq("BY", "NAME") 3795 expression = self._parse_select(nested=True, parse_set_operation=False) 3796 3797 this = self.expression( 3798 operation, 3799 comments=comments, 3800 this=this, 3801 distinct=distinct, 3802 by_name=by_name, 3803 expression=expression, 3804 ) 3805 3806 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3807 expression = this.expression 3808 3809 if expression: 3810 for arg in self.UNION_MODIFIERS: 3811 expr = expression.args.get(arg) 3812 if expr: 3813 this.set(arg, expr.pop()) 3814 3815 return this 3816 3817 def _parse_expression(self) -> t.Optional[exp.Expression]: 3818 return self._parse_alias(self._parse_conjunction()) 3819 3820 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3821 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3822 3823 def _parse_equality(self) -> t.Optional[exp.Expression]: 3824 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3825 3826 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3827 return self._parse_tokens(self._parse_range, self.COMPARISON) 3828 3829 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3830 this = this or self._parse_bitwise() 3831 negate = self._match(TokenType.NOT) 3832 3833 if self._match_set(self.RANGE_PARSERS): 3834 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3835 if not expression: 3836 return this 3837 3838 this = expression 3839 elif self._match(TokenType.ISNULL): 3840 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3841 3842 # Postgres supports ISNULL and NOTNULL for conditions. 3843 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3844 if self._match(TokenType.NOTNULL): 3845 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3846 this = self.expression(exp.Not, this=this) 3847 3848 if negate: 3849 this = self.expression(exp.Not, this=this) 3850 3851 if self._match(TokenType.IS): 3852 this = self._parse_is(this) 3853 3854 return this 3855 3856 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3857 index = self._index - 1 3858 negate = self._match(TokenType.NOT) 3859 3860 if self._match_text_seq("DISTINCT", "FROM"): 3861 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3862 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3863 3864 expression = self._parse_null() or self._parse_boolean() 3865 if not expression: 3866 self._retreat(index) 3867 return None 3868 3869 this = self.expression(exp.Is, this=this, expression=expression) 3870 return self.expression(exp.Not, this=this) if negate else this 3871 3872 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3873 unnest = self._parse_unnest(with_alias=False) 3874 if unnest: 3875 this = self.expression(exp.In, this=this, unnest=unnest) 3876 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3877 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3878 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3879 3880 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3881 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3882 else: 3883 this = self.expression(exp.In, this=this, expressions=expressions) 3884 3885 if matched_l_paren: 3886 self._match_r_paren(this) 3887 elif not self._match(TokenType.R_BRACKET, expression=this): 3888 self.raise_error("Expecting ]") 3889 else: 3890 this = self.expression(exp.In, this=this, field=self._parse_field()) 3891 3892 return this 3893 3894 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3895 low = self._parse_bitwise() 3896 self._match(TokenType.AND) 3897 high = self._parse_bitwise() 3898 return self.expression(exp.Between, this=this, low=low, high=high) 3899 3900 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3901 if not self._match(TokenType.ESCAPE): 3902 return this 3903 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3904 3905 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3906 index = self._index 3907 3908 if not self._match(TokenType.INTERVAL) and match_interval: 3909 return None 3910 3911 if self._match(TokenType.STRING, advance=False): 3912 this = self._parse_primary() 3913 else: 3914 this = self._parse_term() 3915 3916 if not this or ( 3917 isinstance(this, exp.Column) 3918 and not this.table 3919 and not this.this.quoted 3920 and this.name.upper() == "IS" 3921 ): 3922 self._retreat(index) 3923 return None 3924 3925 unit = self._parse_function() or ( 3926 not self._match(TokenType.ALIAS, advance=False) 3927 and self._parse_var(any_token=True, upper=True) 3928 ) 3929 3930 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3931 # each INTERVAL expression into this canonical form so it's easy to transpile 3932 if this and this.is_number: 3933 this = exp.Literal.string(this.name) 3934 elif this and this.is_string: 3935 parts = this.name.split() 3936 3937 if len(parts) == 2: 3938 if unit: 3939 # This is not actually a unit, it's something else (e.g. a "window side") 3940 unit = None 3941 self._retreat(self._index - 1) 3942 3943 this = exp.Literal.string(parts[0]) 3944 unit = self.expression(exp.Var, this=parts[1].upper()) 3945 3946 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3947 unit = self.expression( 3948 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3949 ) 3950 3951 return self.expression(exp.Interval, this=this, unit=unit) 3952 3953 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3954 this = self._parse_term() 3955 3956 while True: 3957 if self._match_set(self.BITWISE): 3958 this = self.expression( 3959 self.BITWISE[self._prev.token_type], 3960 this=this, 3961 expression=self._parse_term(), 3962 ) 3963 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3964 this = self.expression( 3965 exp.DPipe, 3966 this=this, 3967 expression=self._parse_term(), 3968 safe=not self.dialect.STRICT_STRING_CONCAT, 3969 ) 3970 elif self._match(TokenType.DQMARK): 3971 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3972 elif self._match_pair(TokenType.LT, TokenType.LT): 3973 this = self.expression( 3974 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3975 ) 3976 elif self._match_pair(TokenType.GT, TokenType.GT): 3977 this = self.expression( 3978 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3979 ) 3980 else: 3981 break 3982 3983 return this 3984 3985 def _parse_term(self) -> t.Optional[exp.Expression]: 3986 return self._parse_tokens(self._parse_factor, self.TERM) 3987 3988 def _parse_factor(self) -> t.Optional[exp.Expression]: 3989 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3990 this = parse_method() 3991 3992 while self._match_set(self.FACTOR): 3993 this = self.expression( 3994 self.FACTOR[self._prev.token_type], 3995 this=this, 3996 comments=self._prev_comments, 3997 expression=parse_method(), 3998 ) 3999 if isinstance(this, exp.Div): 4000 this.args["typed"] = self.dialect.TYPED_DIVISION 4001 this.args["safe"] = self.dialect.SAFE_DIVISION 4002 4003 return this 4004 4005 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4006 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4007 4008 def _parse_unary(self) -> t.Optional[exp.Expression]: 4009 if self._match_set(self.UNARY_PARSERS): 4010 return self.UNARY_PARSERS[self._prev.token_type](self) 4011 return self._parse_at_time_zone(self._parse_type()) 4012 4013 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4014 interval = parse_interval and self._parse_interval() 4015 if interval: 4016 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4017 while True: 4018 index = self._index 4019 self._match(TokenType.PLUS) 4020 4021 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4022 self._retreat(index) 4023 break 4024 4025 interval = self.expression( # type: ignore 4026 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4027 ) 4028 4029 return interval 4030 4031 index = self._index 4032 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4033 this = self._parse_column() 4034 4035 if data_type: 4036 if isinstance(this, exp.Literal): 4037 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4038 if parser: 4039 return parser(self, this, data_type) 4040 return self.expression(exp.Cast, this=this, to=data_type) 4041 if not data_type.expressions: 4042 self._retreat(index) 4043 return self._parse_column() 4044 return self._parse_column_ops(data_type) 4045 4046 return this and self._parse_column_ops(this) 4047 4048 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4049 this = self._parse_type() 4050 if not this: 4051 return None 4052 4053 if isinstance(this, exp.Column) and not this.table: 4054 this = exp.var(this.name.upper()) 4055 4056 return self.expression( 4057 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4058 ) 4059 4060 def _parse_types( 4061 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4062 ) -> t.Optional[exp.Expression]: 4063 index = self._index 4064 4065 prefix = self._match_text_seq("SYSUDTLIB", ".") 4066 4067 if not self._match_set(self.TYPE_TOKENS): 4068 identifier = allow_identifiers and self._parse_id_var( 4069 any_token=False, tokens=(TokenType.VAR,) 4070 ) 4071 if identifier: 4072 tokens = self.dialect.tokenize(identifier.name) 4073 4074 if len(tokens) != 1: 4075 self.raise_error("Unexpected identifier", self._prev) 4076 4077 if tokens[0].token_type in self.TYPE_TOKENS: 4078 self._prev = tokens[0] 4079 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4080 type_name = identifier.name 4081 4082 while self._match(TokenType.DOT): 4083 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4084 4085 return exp.DataType.build(type_name, udt=True) 4086 else: 4087 self._retreat(self._index - 1) 4088 return None 4089 else: 4090 return None 4091 4092 type_token = self._prev.token_type 4093 4094 if type_token == TokenType.PSEUDO_TYPE: 4095 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4096 4097 if type_token == TokenType.OBJECT_IDENTIFIER: 4098 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4099 4100 nested = type_token in self.NESTED_TYPE_TOKENS 4101 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4102 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4103 expressions = None 4104 maybe_func = False 4105 4106 if self._match(TokenType.L_PAREN): 4107 if is_struct: 4108 expressions = self._parse_csv(self._parse_struct_types) 4109 elif nested: 4110 expressions = self._parse_csv( 4111 lambda: self._parse_types( 4112 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4113 ) 4114 ) 4115 elif type_token in self.ENUM_TYPE_TOKENS: 4116 expressions = self._parse_csv(self._parse_equality) 4117 elif is_aggregate: 4118 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4119 any_token=False, tokens=(TokenType.VAR,) 4120 ) 4121 if not func_or_ident or not self._match(TokenType.COMMA): 4122 return None 4123 expressions = self._parse_csv( 4124 lambda: self._parse_types( 4125 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4126 ) 4127 ) 4128 expressions.insert(0, func_or_ident) 4129 else: 4130 expressions = self._parse_csv(self._parse_type_size) 4131 4132 if not expressions or not self._match(TokenType.R_PAREN): 4133 self._retreat(index) 4134 return None 4135 4136 maybe_func = True 4137 4138 this: t.Optional[exp.Expression] = None 4139 values: t.Optional[t.List[exp.Expression]] = None 4140 4141 if nested and self._match(TokenType.LT): 4142 if is_struct: 4143 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4144 else: 4145 expressions = self._parse_csv( 4146 lambda: self._parse_types( 4147 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4148 ) 4149 ) 4150 4151 if not self._match(TokenType.GT): 4152 self.raise_error("Expecting >") 4153 4154 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4155 values = self._parse_csv(self._parse_conjunction) 4156 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4157 4158 if type_token in self.TIMESTAMPS: 4159 if self._match_text_seq("WITH", "TIME", "ZONE"): 4160 maybe_func = False 4161 tz_type = ( 4162 exp.DataType.Type.TIMETZ 4163 if type_token in self.TIMES 4164 else exp.DataType.Type.TIMESTAMPTZ 4165 ) 4166 this = exp.DataType(this=tz_type, expressions=expressions) 4167 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4168 maybe_func = False 4169 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4170 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4171 maybe_func = False 4172 elif type_token == TokenType.INTERVAL: 4173 unit = self._parse_var(upper=True) 4174 if unit: 4175 if self._match_text_seq("TO"): 4176 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4177 4178 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4179 else: 4180 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4181 4182 if maybe_func and check_func: 4183 index2 = self._index 4184 peek = self._parse_string() 4185 4186 if not peek: 4187 self._retreat(index) 4188 return None 4189 4190 self._retreat(index2) 4191 4192 if not this: 4193 if self._match_text_seq("UNSIGNED"): 4194 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4195 if not unsigned_type_token: 4196 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4197 4198 type_token = unsigned_type_token or type_token 4199 4200 this = exp.DataType( 4201 this=exp.DataType.Type[type_token.value], 4202 expressions=expressions, 4203 nested=nested, 4204 values=values, 4205 prefix=prefix, 4206 ) 4207 4208 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4209 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4210 4211 return this 4212 4213 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4214 index = self._index 4215 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4216 self._match(TokenType.COLON) 4217 column_def = self._parse_column_def(this) 4218 4219 if type_required and ( 4220 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4221 ): 4222 self._retreat(index) 4223 return self._parse_types() 4224 4225 return column_def 4226 4227 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4228 if not self._match_text_seq("AT", "TIME", "ZONE"): 4229 return this 4230 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4231 4232 def _parse_column(self) -> t.Optional[exp.Expression]: 4233 this = self._parse_column_reference() 4234 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4235 4236 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4237 this = self._parse_field() 4238 if ( 4239 not this 4240 and self._match(TokenType.VALUES, advance=False) 4241 and self.VALUES_FOLLOWED_BY_PAREN 4242 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4243 ): 4244 this = self._parse_id_var() 4245 4246 if isinstance(this, exp.Identifier): 4247 # We bubble up comments from the Identifier to the Column 4248 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4249 4250 return this 4251 4252 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4253 this = self._parse_bracket(this) 4254 4255 while self._match_set(self.COLUMN_OPERATORS): 4256 op_token = self._prev.token_type 4257 op = self.COLUMN_OPERATORS.get(op_token) 4258 4259 if op_token == TokenType.DCOLON: 4260 field = self._parse_types() 4261 if not field: 4262 self.raise_error("Expected type") 4263 elif op and self._curr: 4264 field = self._parse_column_reference() 4265 else: 4266 field = self._parse_field(any_token=True, anonymous_func=True) 4267 4268 if isinstance(field, exp.Func) and this: 4269 # bigquery allows function calls like x.y.count(...) 4270 # SAFE.SUBSTR(...) 4271 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4272 this = exp.replace_tree( 4273 this, 4274 lambda n: ( 4275 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4276 if n.table 4277 else n.this 4278 ) 4279 if isinstance(n, exp.Column) 4280 else n, 4281 ) 4282 4283 if op: 4284 this = op(self, this, field) 4285 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4286 this = self.expression( 4287 exp.Column, 4288 this=field, 4289 table=this.this, 4290 db=this.args.get("table"), 4291 catalog=this.args.get("db"), 4292 ) 4293 else: 4294 this = self.expression(exp.Dot, this=this, expression=field) 4295 this = self._parse_bracket(this) 4296 return this 4297 4298 def _parse_primary(self) -> t.Optional[exp.Expression]: 4299 if self._match_set(self.PRIMARY_PARSERS): 4300 token_type = self._prev.token_type 4301 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4302 4303 if token_type == TokenType.STRING: 4304 expressions = [primary] 4305 while self._match(TokenType.STRING): 4306 expressions.append(exp.Literal.string(self._prev.text)) 4307 4308 if len(expressions) > 1: 4309 return self.expression(exp.Concat, expressions=expressions) 4310 4311 return primary 4312 4313 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4314 return exp.Literal.number(f"0.{self._prev.text}") 4315 4316 if self._match(TokenType.L_PAREN): 4317 comments = self._prev_comments 4318 query = self._parse_select() 4319 4320 if query: 4321 expressions = [query] 4322 else: 4323 expressions = self._parse_expressions() 4324 4325 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4326 4327 if isinstance(this, exp.UNWRAPPED_QUERIES): 4328 this = self._parse_set_operations( 4329 self._parse_subquery(this=this, parse_alias=False) 4330 ) 4331 elif isinstance(this, exp.Subquery): 4332 this = self._parse_subquery( 4333 this=self._parse_set_operations(this), parse_alias=False 4334 ) 4335 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4336 this = self.expression(exp.Tuple, expressions=expressions) 4337 else: 4338 this = self.expression(exp.Paren, this=this) 4339 4340 if this: 4341 this.add_comments(comments) 4342 4343 self._match_r_paren(expression=this) 4344 return this 4345 4346 return None 4347 4348 def _parse_field( 4349 self, 4350 any_token: bool = False, 4351 tokens: t.Optional[t.Collection[TokenType]] = None, 4352 anonymous_func: bool = False, 4353 ) -> t.Optional[exp.Expression]: 4354 if anonymous_func: 4355 field = ( 4356 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4357 or self._parse_primary() 4358 ) 4359 else: 4360 field = self._parse_primary() or self._parse_function( 4361 anonymous=anonymous_func, any_token=any_token 4362 ) 4363 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4364 4365 def _parse_function( 4366 self, 4367 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4368 anonymous: bool = False, 4369 optional_parens: bool = True, 4370 any_token: bool = False, 4371 ) -> t.Optional[exp.Expression]: 4372 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4373 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4374 fn_syntax = False 4375 if ( 4376 self._match(TokenType.L_BRACE, advance=False) 4377 and self._next 4378 and self._next.text.upper() == "FN" 4379 ): 4380 self._advance(2) 4381 fn_syntax = True 4382 4383 func = self._parse_function_call( 4384 functions=functions, 4385 anonymous=anonymous, 4386 optional_parens=optional_parens, 4387 any_token=any_token, 4388 ) 4389 4390 if fn_syntax: 4391 self._match(TokenType.R_BRACE) 4392 4393 return func 4394 4395 def _parse_function_call( 4396 self, 4397 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4398 anonymous: bool = False, 4399 optional_parens: bool = True, 4400 any_token: bool = False, 4401 ) -> t.Optional[exp.Expression]: 4402 if not self._curr: 4403 return None 4404 4405 comments = self._curr.comments 4406 token_type = self._curr.token_type 4407 this = self._curr.text 4408 upper = this.upper() 4409 4410 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4411 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4412 self._advance() 4413 return self._parse_window(parser(self)) 4414 4415 if not self._next or self._next.token_type != TokenType.L_PAREN: 4416 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4417 self._advance() 4418 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4419 4420 return None 4421 4422 if any_token: 4423 if token_type in self.RESERVED_TOKENS: 4424 return None 4425 elif token_type not in self.FUNC_TOKENS: 4426 return None 4427 4428 self._advance(2) 4429 4430 parser = self.FUNCTION_PARSERS.get(upper) 4431 if parser and not anonymous: 4432 this = parser(self) 4433 else: 4434 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4435 4436 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4437 this = self.expression(subquery_predicate, this=self._parse_select()) 4438 self._match_r_paren() 4439 return this 4440 4441 if functions is None: 4442 functions = self.FUNCTIONS 4443 4444 function = functions.get(upper) 4445 4446 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4447 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4448 4449 if alias: 4450 args = self._kv_to_prop_eq(args) 4451 4452 if function and not anonymous: 4453 if "dialect" in function.__code__.co_varnames: 4454 func = function(args, dialect=self.dialect) 4455 else: 4456 func = function(args) 4457 4458 func = self.validate_expression(func, args) 4459 if not self.dialect.NORMALIZE_FUNCTIONS: 4460 func.meta["name"] = this 4461 4462 this = func 4463 else: 4464 if token_type == TokenType.IDENTIFIER: 4465 this = exp.Identifier(this=this, quoted=True) 4466 this = self.expression(exp.Anonymous, this=this, expressions=args) 4467 4468 if isinstance(this, exp.Expression): 4469 this.add_comments(comments) 4470 4471 self._match_r_paren(this) 4472 return self._parse_window(this) 4473 4474 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4475 transformed = [] 4476 4477 for e in expressions: 4478 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4479 if isinstance(e, exp.Alias): 4480 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4481 4482 if not isinstance(e, exp.PropertyEQ): 4483 e = self.expression( 4484 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4485 ) 4486 4487 if isinstance(e.this, exp.Column): 4488 e.this.replace(e.this.this) 4489 4490 transformed.append(e) 4491 4492 return transformed 4493 4494 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4495 return self._parse_column_def(self._parse_id_var()) 4496 4497 def _parse_user_defined_function( 4498 self, kind: t.Optional[TokenType] = None 4499 ) -> t.Optional[exp.Expression]: 4500 this = self._parse_id_var() 4501 4502 while self._match(TokenType.DOT): 4503 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4504 4505 if not self._match(TokenType.L_PAREN): 4506 return this 4507 4508 expressions = self._parse_csv(self._parse_function_parameter) 4509 self._match_r_paren() 4510 return self.expression( 4511 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4512 ) 4513 4514 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4515 literal = self._parse_primary() 4516 if literal: 4517 return self.expression(exp.Introducer, this=token.text, expression=literal) 4518 4519 return self.expression(exp.Identifier, this=token.text) 4520 4521 def _parse_session_parameter(self) -> exp.SessionParameter: 4522 kind = None 4523 this = self._parse_id_var() or self._parse_primary() 4524 4525 if this and self._match(TokenType.DOT): 4526 kind = this.name 4527 this = self._parse_var() or self._parse_primary() 4528 4529 return self.expression(exp.SessionParameter, this=this, kind=kind) 4530 4531 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4532 index = self._index 4533 4534 if self._match(TokenType.L_PAREN): 4535 expressions = t.cast( 4536 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4537 ) 4538 4539 if not self._match(TokenType.R_PAREN): 4540 self._retreat(index) 4541 else: 4542 expressions = [self._parse_id_var()] 4543 4544 if self._match_set(self.LAMBDAS): 4545 return self.LAMBDAS[self._prev.token_type](self, expressions) 4546 4547 self._retreat(index) 4548 4549 this: t.Optional[exp.Expression] 4550 4551 if self._match(TokenType.DISTINCT): 4552 this = self.expression( 4553 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4554 ) 4555 else: 4556 this = self._parse_select_or_expression(alias=alias) 4557 4558 return self._parse_limit( 4559 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4560 ) 4561 4562 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4563 index = self._index 4564 if not self._match(TokenType.L_PAREN): 4565 return this 4566 4567 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4568 # expr can be of both types 4569 if self._match_set(self.SELECT_START_TOKENS): 4570 self._retreat(index) 4571 return this 4572 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4573 self._match_r_paren() 4574 return self.expression(exp.Schema, this=this, expressions=args) 4575 4576 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4577 return self._parse_column_def(self._parse_field(any_token=True)) 4578 4579 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4580 # column defs are not really columns, they're identifiers 4581 if isinstance(this, exp.Column): 4582 this = this.this 4583 4584 kind = self._parse_types(schema=True) 4585 4586 if self._match_text_seq("FOR", "ORDINALITY"): 4587 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4588 4589 constraints: t.List[exp.Expression] = [] 4590 4591 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4592 ("ALIAS", "MATERIALIZED") 4593 ): 4594 persisted = self._prev.text.upper() == "MATERIALIZED" 4595 constraints.append( 4596 self.expression( 4597 exp.ComputedColumnConstraint, 4598 this=self._parse_conjunction(), 4599 persisted=persisted or self._match_text_seq("PERSISTED"), 4600 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4601 ) 4602 ) 4603 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4604 self._match(TokenType.ALIAS) 4605 constraints.append( 4606 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4607 ) 4608 4609 while True: 4610 constraint = self._parse_column_constraint() 4611 if not constraint: 4612 break 4613 constraints.append(constraint) 4614 4615 if not kind and not constraints: 4616 return this 4617 4618 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4619 4620 def _parse_auto_increment( 4621 self, 4622 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4623 start = None 4624 increment = None 4625 4626 if self._match(TokenType.L_PAREN, advance=False): 4627 args = self._parse_wrapped_csv(self._parse_bitwise) 4628 start = seq_get(args, 0) 4629 increment = seq_get(args, 1) 4630 elif self._match_text_seq("START"): 4631 start = self._parse_bitwise() 4632 self._match_text_seq("INCREMENT") 4633 increment = self._parse_bitwise() 4634 4635 if start and increment: 4636 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4637 4638 return exp.AutoIncrementColumnConstraint() 4639 4640 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4641 if not self._match_text_seq("REFRESH"): 4642 self._retreat(self._index - 1) 4643 return None 4644 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4645 4646 def _parse_compress(self) -> exp.CompressColumnConstraint: 4647 if self._match(TokenType.L_PAREN, advance=False): 4648 return self.expression( 4649 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4650 ) 4651 4652 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4653 4654 def _parse_generated_as_identity( 4655 self, 4656 ) -> ( 4657 exp.GeneratedAsIdentityColumnConstraint 4658 | exp.ComputedColumnConstraint 4659 | exp.GeneratedAsRowColumnConstraint 4660 ): 4661 if self._match_text_seq("BY", "DEFAULT"): 4662 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4663 this = self.expression( 4664 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4665 ) 4666 else: 4667 self._match_text_seq("ALWAYS") 4668 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4669 4670 self._match(TokenType.ALIAS) 4671 4672 if self._match_text_seq("ROW"): 4673 start = self._match_text_seq("START") 4674 if not start: 4675 self._match(TokenType.END) 4676 hidden = self._match_text_seq("HIDDEN") 4677 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4678 4679 identity = self._match_text_seq("IDENTITY") 4680 4681 if self._match(TokenType.L_PAREN): 4682 if self._match(TokenType.START_WITH): 4683 this.set("start", self._parse_bitwise()) 4684 if self._match_text_seq("INCREMENT", "BY"): 4685 this.set("increment", self._parse_bitwise()) 4686 if self._match_text_seq("MINVALUE"): 4687 this.set("minvalue", self._parse_bitwise()) 4688 if self._match_text_seq("MAXVALUE"): 4689 this.set("maxvalue", self._parse_bitwise()) 4690 4691 if self._match_text_seq("CYCLE"): 4692 this.set("cycle", True) 4693 elif self._match_text_seq("NO", "CYCLE"): 4694 this.set("cycle", False) 4695 4696 if not identity: 4697 this.set("expression", self._parse_range()) 4698 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4699 args = self._parse_csv(self._parse_bitwise) 4700 this.set("start", seq_get(args, 0)) 4701 this.set("increment", seq_get(args, 1)) 4702 4703 self._match_r_paren() 4704 4705 return this 4706 4707 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4708 self._match_text_seq("LENGTH") 4709 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4710 4711 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4712 if self._match_text_seq("NULL"): 4713 return self.expression(exp.NotNullColumnConstraint) 4714 if self._match_text_seq("CASESPECIFIC"): 4715 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4716 if self._match_text_seq("FOR", "REPLICATION"): 4717 return self.expression(exp.NotForReplicationColumnConstraint) 4718 return None 4719 4720 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4721 if self._match(TokenType.CONSTRAINT): 4722 this = self._parse_id_var() 4723 else: 4724 this = None 4725 4726 if self._match_texts(self.CONSTRAINT_PARSERS): 4727 return self.expression( 4728 exp.ColumnConstraint, 4729 this=this, 4730 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4731 ) 4732 4733 return this 4734 4735 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4736 if not self._match(TokenType.CONSTRAINT): 4737 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4738 4739 return self.expression( 4740 exp.Constraint, 4741 this=self._parse_id_var(), 4742 expressions=self._parse_unnamed_constraints(), 4743 ) 4744 4745 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4746 constraints = [] 4747 while True: 4748 constraint = self._parse_unnamed_constraint() or self._parse_function() 4749 if not constraint: 4750 break 4751 constraints.append(constraint) 4752 4753 return constraints 4754 4755 def _parse_unnamed_constraint( 4756 self, constraints: t.Optional[t.Collection[str]] = None 4757 ) -> t.Optional[exp.Expression]: 4758 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4759 constraints or self.CONSTRAINT_PARSERS 4760 ): 4761 return None 4762 4763 constraint = self._prev.text.upper() 4764 if constraint not in self.CONSTRAINT_PARSERS: 4765 self.raise_error(f"No parser found for schema constraint {constraint}.") 4766 4767 return self.CONSTRAINT_PARSERS[constraint](self) 4768 4769 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4770 self._match_text_seq("KEY") 4771 return self.expression( 4772 exp.UniqueColumnConstraint, 4773 this=self._parse_schema(self._parse_id_var(any_token=False)), 4774 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4775 on_conflict=self._parse_on_conflict(), 4776 ) 4777 4778 def _parse_key_constraint_options(self) -> t.List[str]: 4779 options = [] 4780 while True: 4781 if not self._curr: 4782 break 4783 4784 if self._match(TokenType.ON): 4785 action = None 4786 on = self._advance_any() and self._prev.text 4787 4788 if self._match_text_seq("NO", "ACTION"): 4789 action = "NO ACTION" 4790 elif self._match_text_seq("CASCADE"): 4791 action = "CASCADE" 4792 elif self._match_text_seq("RESTRICT"): 4793 action = "RESTRICT" 4794 elif self._match_pair(TokenType.SET, TokenType.NULL): 4795 action = "SET NULL" 4796 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4797 action = "SET DEFAULT" 4798 else: 4799 self.raise_error("Invalid key constraint") 4800 4801 options.append(f"ON {on} {action}") 4802 elif self._match_text_seq("NOT", "ENFORCED"): 4803 options.append("NOT ENFORCED") 4804 elif self._match_text_seq("DEFERRABLE"): 4805 options.append("DEFERRABLE") 4806 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4807 options.append("INITIALLY DEFERRED") 4808 elif self._match_text_seq("NORELY"): 4809 options.append("NORELY") 4810 elif self._match_text_seq("MATCH", "FULL"): 4811 options.append("MATCH FULL") 4812 else: 4813 break 4814 4815 return options 4816 4817 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4818 if match and not self._match(TokenType.REFERENCES): 4819 return None 4820 4821 expressions = None 4822 this = self._parse_table(schema=True) 4823 options = self._parse_key_constraint_options() 4824 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4825 4826 def _parse_foreign_key(self) -> exp.ForeignKey: 4827 expressions = self._parse_wrapped_id_vars() 4828 reference = self._parse_references() 4829 options = {} 4830 4831 while self._match(TokenType.ON): 4832 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4833 self.raise_error("Expected DELETE or UPDATE") 4834 4835 kind = self._prev.text.lower() 4836 4837 if self._match_text_seq("NO", "ACTION"): 4838 action = "NO ACTION" 4839 elif self._match(TokenType.SET): 4840 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4841 action = "SET " + self._prev.text.upper() 4842 else: 4843 self._advance() 4844 action = self._prev.text.upper() 4845 4846 options[kind] = action 4847 4848 return self.expression( 4849 exp.ForeignKey, 4850 expressions=expressions, 4851 reference=reference, 4852 **options, # type: ignore 4853 ) 4854 4855 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4856 return self._parse_field() 4857 4858 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4859 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4860 self._retreat(self._index - 1) 4861 return None 4862 4863 id_vars = self._parse_wrapped_id_vars() 4864 return self.expression( 4865 exp.PeriodForSystemTimeConstraint, 4866 this=seq_get(id_vars, 0), 4867 expression=seq_get(id_vars, 1), 4868 ) 4869 4870 def _parse_primary_key( 4871 self, wrapped_optional: bool = False, in_props: bool = False 4872 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4873 desc = ( 4874 self._match_set((TokenType.ASC, TokenType.DESC)) 4875 and self._prev.token_type == TokenType.DESC 4876 ) 4877 4878 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4879 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4880 4881 expressions = self._parse_wrapped_csv( 4882 self._parse_primary_key_part, optional=wrapped_optional 4883 ) 4884 options = self._parse_key_constraint_options() 4885 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4886 4887 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4888 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4889 4890 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4891 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4892 return this 4893 4894 bracket_kind = self._prev.token_type 4895 expressions = self._parse_csv( 4896 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4897 ) 4898 4899 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4900 self.raise_error("Expected ]") 4901 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4902 self.raise_error("Expected }") 4903 4904 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4905 if bracket_kind == TokenType.L_BRACE: 4906 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4907 elif not this or this.name.upper() == "ARRAY": 4908 this = self.expression(exp.Array, expressions=expressions) 4909 else: 4910 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4911 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4912 4913 self._add_comments(this) 4914 return self._parse_bracket(this) 4915 4916 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4917 if self._match(TokenType.COLON): 4918 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4919 return this 4920 4921 def _parse_case(self) -> t.Optional[exp.Expression]: 4922 ifs = [] 4923 default = None 4924 4925 comments = self._prev_comments 4926 expression = self._parse_conjunction() 4927 4928 while self._match(TokenType.WHEN): 4929 this = self._parse_conjunction() 4930 self._match(TokenType.THEN) 4931 then = self._parse_conjunction() 4932 ifs.append(self.expression(exp.If, this=this, true=then)) 4933 4934 if self._match(TokenType.ELSE): 4935 default = self._parse_conjunction() 4936 4937 if not self._match(TokenType.END): 4938 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4939 default = exp.column("interval") 4940 else: 4941 self.raise_error("Expected END after CASE", self._prev) 4942 4943 return self.expression( 4944 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4945 ) 4946 4947 def _parse_if(self) -> t.Optional[exp.Expression]: 4948 if self._match(TokenType.L_PAREN): 4949 args = self._parse_csv(self._parse_conjunction) 4950 this = self.validate_expression(exp.If.from_arg_list(args), args) 4951 self._match_r_paren() 4952 else: 4953 index = self._index - 1 4954 4955 if self.NO_PAREN_IF_COMMANDS and index == 0: 4956 return self._parse_as_command(self._prev) 4957 4958 condition = self._parse_conjunction() 4959 4960 if not condition: 4961 self._retreat(index) 4962 return None 4963 4964 self._match(TokenType.THEN) 4965 true = self._parse_conjunction() 4966 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4967 self._match(TokenType.END) 4968 this = self.expression(exp.If, this=condition, true=true, false=false) 4969 4970 return this 4971 4972 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4973 if not self._match_text_seq("VALUE", "FOR"): 4974 self._retreat(self._index - 1) 4975 return None 4976 4977 return self.expression( 4978 exp.NextValueFor, 4979 this=self._parse_column(), 4980 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4981 ) 4982 4983 def _parse_extract(self) -> exp.Extract: 4984 this = self._parse_function() or self._parse_var() or self._parse_type() 4985 4986 if self._match(TokenType.FROM): 4987 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4988 4989 if not self._match(TokenType.COMMA): 4990 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4991 4992 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4993 4994 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4995 this = self._parse_conjunction() 4996 4997 if not self._match(TokenType.ALIAS): 4998 if self._match(TokenType.COMMA): 4999 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5000 5001 self.raise_error("Expected AS after CAST") 5002 5003 fmt = None 5004 to = self._parse_types() 5005 5006 if self._match(TokenType.FORMAT): 5007 fmt_string = self._parse_string() 5008 fmt = self._parse_at_time_zone(fmt_string) 5009 5010 if not to: 5011 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5012 if to.this in exp.DataType.TEMPORAL_TYPES: 5013 this = self.expression( 5014 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5015 this=this, 5016 format=exp.Literal.string( 5017 format_time( 5018 fmt_string.this if fmt_string else "", 5019 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5020 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5021 ) 5022 ), 5023 ) 5024 5025 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5026 this.set("zone", fmt.args["zone"]) 5027 return this 5028 elif not to: 5029 self.raise_error("Expected TYPE after CAST") 5030 elif isinstance(to, exp.Identifier): 5031 to = exp.DataType.build(to.name, udt=True) 5032 elif to.this == exp.DataType.Type.CHAR: 5033 if self._match(TokenType.CHARACTER_SET): 5034 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5035 5036 return self.expression( 5037 exp.Cast if strict else exp.TryCast, 5038 this=this, 5039 to=to, 5040 format=fmt, 5041 safe=safe, 5042 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5043 ) 5044 5045 def _parse_string_agg(self) -> exp.Expression: 5046 if self._match(TokenType.DISTINCT): 5047 args: t.List[t.Optional[exp.Expression]] = [ 5048 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5049 ] 5050 if self._match(TokenType.COMMA): 5051 args.extend(self._parse_csv(self._parse_conjunction)) 5052 else: 5053 args = self._parse_csv(self._parse_conjunction) # type: ignore 5054 5055 index = self._index 5056 if not self._match(TokenType.R_PAREN) and args: 5057 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5058 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5059 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5060 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5061 5062 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5063 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5064 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5065 if not self._match_text_seq("WITHIN", "GROUP"): 5066 self._retreat(index) 5067 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5068 5069 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5070 order = self._parse_order(this=seq_get(args, 0)) 5071 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5072 5073 def _parse_convert( 5074 self, strict: bool, safe: t.Optional[bool] = None 5075 ) -> t.Optional[exp.Expression]: 5076 this = self._parse_bitwise() 5077 5078 if self._match(TokenType.USING): 5079 to: t.Optional[exp.Expression] = self.expression( 5080 exp.CharacterSet, this=self._parse_var() 5081 ) 5082 elif self._match(TokenType.COMMA): 5083 to = self._parse_types() 5084 else: 5085 to = None 5086 5087 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5088 5089 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5090 """ 5091 There are generally two variants of the DECODE function: 5092 5093 - DECODE(bin, charset) 5094 - DECODE(expression, search, result [, search, result] ... [, default]) 5095 5096 The second variant will always be parsed into a CASE expression. Note that NULL 5097 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5098 instead of relying on pattern matching. 5099 """ 5100 args = self._parse_csv(self._parse_conjunction) 5101 5102 if len(args) < 3: 5103 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5104 5105 expression, *expressions = args 5106 if not expression: 5107 return None 5108 5109 ifs = [] 5110 for search, result in zip(expressions[::2], expressions[1::2]): 5111 if not search or not result: 5112 return None 5113 5114 if isinstance(search, exp.Literal): 5115 ifs.append( 5116 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5117 ) 5118 elif isinstance(search, exp.Null): 5119 ifs.append( 5120 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5121 ) 5122 else: 5123 cond = exp.or_( 5124 exp.EQ(this=expression.copy(), expression=search), 5125 exp.and_( 5126 exp.Is(this=expression.copy(), expression=exp.Null()), 5127 exp.Is(this=search.copy(), expression=exp.Null()), 5128 copy=False, 5129 ), 5130 copy=False, 5131 ) 5132 ifs.append(exp.If(this=cond, true=result)) 5133 5134 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5135 5136 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5137 self._match_text_seq("KEY") 5138 key = self._parse_column() 5139 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5140 self._match_text_seq("VALUE") 5141 value = self._parse_bitwise() 5142 5143 if not key and not value: 5144 return None 5145 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5146 5147 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5148 if not this or not self._match_text_seq("FORMAT", "JSON"): 5149 return this 5150 5151 return self.expression(exp.FormatJson, this=this) 5152 5153 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5154 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5155 for value in values: 5156 if self._match_text_seq(value, "ON", on): 5157 return f"{value} ON {on}" 5158 5159 return None 5160 5161 @t.overload 5162 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5163 5164 @t.overload 5165 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5166 5167 def _parse_json_object(self, agg=False): 5168 star = self._parse_star() 5169 expressions = ( 5170 [star] 5171 if star 5172 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5173 ) 5174 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5175 5176 unique_keys = None 5177 if self._match_text_seq("WITH", "UNIQUE"): 5178 unique_keys = True 5179 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5180 unique_keys = False 5181 5182 self._match_text_seq("KEYS") 5183 5184 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5185 self._parse_type() 5186 ) 5187 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5188 5189 return self.expression( 5190 exp.JSONObjectAgg if agg else exp.JSONObject, 5191 expressions=expressions, 5192 null_handling=null_handling, 5193 unique_keys=unique_keys, 5194 return_type=return_type, 5195 encoding=encoding, 5196 ) 5197 5198 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5199 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5200 if not self._match_text_seq("NESTED"): 5201 this = self._parse_id_var() 5202 kind = self._parse_types(allow_identifiers=False) 5203 nested = None 5204 else: 5205 this = None 5206 kind = None 5207 nested = True 5208 5209 path = self._match_text_seq("PATH") and self._parse_string() 5210 nested_schema = nested and self._parse_json_schema() 5211 5212 return self.expression( 5213 exp.JSONColumnDef, 5214 this=this, 5215 kind=kind, 5216 path=path, 5217 nested_schema=nested_schema, 5218 ) 5219 5220 def _parse_json_schema(self) -> exp.JSONSchema: 5221 self._match_text_seq("COLUMNS") 5222 return self.expression( 5223 exp.JSONSchema, 5224 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5225 ) 5226 5227 def _parse_json_table(self) -> exp.JSONTable: 5228 this = self._parse_format_json(self._parse_bitwise()) 5229 path = self._match(TokenType.COMMA) and self._parse_string() 5230 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5231 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5232 schema = self._parse_json_schema() 5233 5234 return exp.JSONTable( 5235 this=this, 5236 schema=schema, 5237 path=path, 5238 error_handling=error_handling, 5239 empty_handling=empty_handling, 5240 ) 5241 5242 def _parse_match_against(self) -> exp.MatchAgainst: 5243 expressions = self._parse_csv(self._parse_column) 5244 5245 self._match_text_seq(")", "AGAINST", "(") 5246 5247 this = self._parse_string() 5248 5249 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5250 modifier = "IN NATURAL LANGUAGE MODE" 5251 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5252 modifier = f"{modifier} WITH QUERY EXPANSION" 5253 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5254 modifier = "IN BOOLEAN MODE" 5255 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5256 modifier = "WITH QUERY EXPANSION" 5257 else: 5258 modifier = None 5259 5260 return self.expression( 5261 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5262 ) 5263 5264 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5265 def _parse_open_json(self) -> exp.OpenJSON: 5266 this = self._parse_bitwise() 5267 path = self._match(TokenType.COMMA) and self._parse_string() 5268 5269 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5270 this = self._parse_field(any_token=True) 5271 kind = self._parse_types() 5272 path = self._parse_string() 5273 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5274 5275 return self.expression( 5276 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5277 ) 5278 5279 expressions = None 5280 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5281 self._match_l_paren() 5282 expressions = self._parse_csv(_parse_open_json_column_def) 5283 5284 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5285 5286 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5287 args = self._parse_csv(self._parse_bitwise) 5288 5289 if self._match(TokenType.IN): 5290 return self.expression( 5291 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5292 ) 5293 5294 if haystack_first: 5295 haystack = seq_get(args, 0) 5296 needle = seq_get(args, 1) 5297 else: 5298 needle = seq_get(args, 0) 5299 haystack = seq_get(args, 1) 5300 5301 return self.expression( 5302 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5303 ) 5304 5305 def _parse_predict(self) -> exp.Predict: 5306 self._match_text_seq("MODEL") 5307 this = self._parse_table() 5308 5309 self._match(TokenType.COMMA) 5310 self._match_text_seq("TABLE") 5311 5312 return self.expression( 5313 exp.Predict, 5314 this=this, 5315 expression=self._parse_table(), 5316 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5317 ) 5318 5319 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5320 args = self._parse_csv(self._parse_table) 5321 return exp.JoinHint(this=func_name.upper(), expressions=args) 5322 5323 def _parse_substring(self) -> exp.Substring: 5324 # Postgres supports the form: substring(string [from int] [for int]) 5325 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5326 5327 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5328 5329 if self._match(TokenType.FROM): 5330 args.append(self._parse_bitwise()) 5331 if self._match(TokenType.FOR): 5332 if len(args) == 1: 5333 args.append(exp.Literal.number(1)) 5334 args.append(self._parse_bitwise()) 5335 5336 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5337 5338 def _parse_trim(self) -> exp.Trim: 5339 # https://www.w3resource.com/sql/character-functions/trim.php 5340 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5341 5342 position = None 5343 collation = None 5344 expression = None 5345 5346 if self._match_texts(self.TRIM_TYPES): 5347 position = self._prev.text.upper() 5348 5349 this = self._parse_bitwise() 5350 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5351 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5352 expression = self._parse_bitwise() 5353 5354 if invert_order: 5355 this, expression = expression, this 5356 5357 if self._match(TokenType.COLLATE): 5358 collation = self._parse_bitwise() 5359 5360 return self.expression( 5361 exp.Trim, this=this, position=position, expression=expression, collation=collation 5362 ) 5363 5364 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5365 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5366 5367 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5368 return self._parse_window(self._parse_id_var(), alias=True) 5369 5370 def _parse_respect_or_ignore_nulls( 5371 self, this: t.Optional[exp.Expression] 5372 ) -> t.Optional[exp.Expression]: 5373 if self._match_text_seq("IGNORE", "NULLS"): 5374 return self.expression(exp.IgnoreNulls, this=this) 5375 if self._match_text_seq("RESPECT", "NULLS"): 5376 return self.expression(exp.RespectNulls, this=this) 5377 return this 5378 5379 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5380 if self._match(TokenType.HAVING): 5381 self._match_texts(("MAX", "MIN")) 5382 max = self._prev.text.upper() != "MIN" 5383 return self.expression( 5384 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5385 ) 5386 5387 return this 5388 5389 def _parse_window( 5390 self, this: t.Optional[exp.Expression], alias: bool = False 5391 ) -> t.Optional[exp.Expression]: 5392 func = this 5393 comments = func.comments if isinstance(func, exp.Expression) else None 5394 5395 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5396 self._match(TokenType.WHERE) 5397 this = self.expression( 5398 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5399 ) 5400 self._match_r_paren() 5401 5402 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5403 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5404 if self._match_text_seq("WITHIN", "GROUP"): 5405 order = self._parse_wrapped(self._parse_order) 5406 this = self.expression(exp.WithinGroup, this=this, expression=order) 5407 5408 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5409 # Some dialects choose to implement and some do not. 5410 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5411 5412 # There is some code above in _parse_lambda that handles 5413 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5414 5415 # The below changes handle 5416 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5417 5418 # Oracle allows both formats 5419 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5420 # and Snowflake chose to do the same for familiarity 5421 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5422 if isinstance(this, exp.AggFunc): 5423 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5424 5425 if ignore_respect and ignore_respect is not this: 5426 ignore_respect.replace(ignore_respect.this) 5427 this = self.expression(ignore_respect.__class__, this=this) 5428 5429 this = self._parse_respect_or_ignore_nulls(this) 5430 5431 # bigquery select from window x AS (partition by ...) 5432 if alias: 5433 over = None 5434 self._match(TokenType.ALIAS) 5435 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5436 return this 5437 else: 5438 over = self._prev.text.upper() 5439 5440 if comments and isinstance(func, exp.Expression): 5441 func.pop_comments() 5442 5443 if not self._match(TokenType.L_PAREN): 5444 return self.expression( 5445 exp.Window, 5446 comments=comments, 5447 this=this, 5448 alias=self._parse_id_var(False), 5449 over=over, 5450 ) 5451 5452 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5453 5454 first = self._match(TokenType.FIRST) 5455 if self._match_text_seq("LAST"): 5456 first = False 5457 5458 partition, order = self._parse_partition_and_order() 5459 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5460 5461 if kind: 5462 self._match(TokenType.BETWEEN) 5463 start = self._parse_window_spec() 5464 self._match(TokenType.AND) 5465 end = self._parse_window_spec() 5466 5467 spec = self.expression( 5468 exp.WindowSpec, 5469 kind=kind, 5470 start=start["value"], 5471 start_side=start["side"], 5472 end=end["value"], 5473 end_side=end["side"], 5474 ) 5475 else: 5476 spec = None 5477 5478 self._match_r_paren() 5479 5480 window = self.expression( 5481 exp.Window, 5482 comments=comments, 5483 this=this, 5484 partition_by=partition, 5485 order=order, 5486 spec=spec, 5487 alias=window_alias, 5488 over=over, 5489 first=first, 5490 ) 5491 5492 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5493 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5494 return self._parse_window(window, alias=alias) 5495 5496 return window 5497 5498 def _parse_partition_and_order( 5499 self, 5500 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5501 return self._parse_partition_by(), self._parse_order() 5502 5503 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5504 self._match(TokenType.BETWEEN) 5505 5506 return { 5507 "value": ( 5508 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5509 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5510 or self._parse_bitwise() 5511 ), 5512 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5513 } 5514 5515 def _parse_alias( 5516 self, this: t.Optional[exp.Expression], explicit: bool = False 5517 ) -> t.Optional[exp.Expression]: 5518 any_token = self._match(TokenType.ALIAS) 5519 comments = self._prev_comments or [] 5520 5521 if explicit and not any_token: 5522 return this 5523 5524 if self._match(TokenType.L_PAREN): 5525 aliases = self.expression( 5526 exp.Aliases, 5527 comments=comments, 5528 this=this, 5529 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5530 ) 5531 self._match_r_paren(aliases) 5532 return aliases 5533 5534 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5535 self.STRING_ALIASES and self._parse_string_as_identifier() 5536 ) 5537 5538 if alias: 5539 comments.extend(alias.pop_comments()) 5540 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5541 column = this.this 5542 5543 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5544 if not this.comments and column and column.comments: 5545 this.comments = column.pop_comments() 5546 5547 return this 5548 5549 def _parse_id_var( 5550 self, 5551 any_token: bool = True, 5552 tokens: t.Optional[t.Collection[TokenType]] = None, 5553 ) -> t.Optional[exp.Expression]: 5554 expression = self._parse_identifier() 5555 if not expression and ( 5556 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5557 ): 5558 quoted = self._prev.token_type == TokenType.STRING 5559 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5560 5561 return expression 5562 5563 def _parse_string(self) -> t.Optional[exp.Expression]: 5564 if self._match_set(self.STRING_PARSERS): 5565 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5566 return self._parse_placeholder() 5567 5568 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5569 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5570 5571 def _parse_number(self) -> t.Optional[exp.Expression]: 5572 if self._match_set(self.NUMERIC_PARSERS): 5573 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5574 return self._parse_placeholder() 5575 5576 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5577 if self._match(TokenType.IDENTIFIER): 5578 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5579 return self._parse_placeholder() 5580 5581 def _parse_var( 5582 self, 5583 any_token: bool = False, 5584 tokens: t.Optional[t.Collection[TokenType]] = None, 5585 upper: bool = False, 5586 ) -> t.Optional[exp.Expression]: 5587 if ( 5588 (any_token and self._advance_any()) 5589 or self._match(TokenType.VAR) 5590 or (self._match_set(tokens) if tokens else False) 5591 ): 5592 return self.expression( 5593 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5594 ) 5595 return self._parse_placeholder() 5596 5597 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5598 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5599 self._advance() 5600 return self._prev 5601 return None 5602 5603 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5604 return self._parse_var() or self._parse_string() 5605 5606 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5607 return self._parse_primary() or self._parse_var(any_token=True) 5608 5609 def _parse_null(self) -> t.Optional[exp.Expression]: 5610 if self._match_set(self.NULL_TOKENS): 5611 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5612 return self._parse_placeholder() 5613 5614 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5615 if self._match(TokenType.TRUE): 5616 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5617 if self._match(TokenType.FALSE): 5618 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5619 return self._parse_placeholder() 5620 5621 def _parse_star(self) -> t.Optional[exp.Expression]: 5622 if self._match(TokenType.STAR): 5623 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5624 return self._parse_placeholder() 5625 5626 def _parse_parameter(self) -> exp.Parameter: 5627 self._match(TokenType.L_BRACE) 5628 this = self._parse_identifier() or self._parse_primary_or_var() 5629 expression = self._match(TokenType.COLON) and ( 5630 self._parse_identifier() or self._parse_primary_or_var() 5631 ) 5632 self._match(TokenType.R_BRACE) 5633 return self.expression(exp.Parameter, this=this, expression=expression) 5634 5635 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5636 if self._match_set(self.PLACEHOLDER_PARSERS): 5637 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5638 if placeholder: 5639 return placeholder 5640 self._advance(-1) 5641 return None 5642 5643 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5644 if not self._match(TokenType.EXCEPT): 5645 return None 5646 if self._match(TokenType.L_PAREN, advance=False): 5647 return self._parse_wrapped_csv(self._parse_column) 5648 5649 except_column = self._parse_column() 5650 return [except_column] if except_column else None 5651 5652 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5653 if not self._match(TokenType.REPLACE): 5654 return None 5655 if self._match(TokenType.L_PAREN, advance=False): 5656 return self._parse_wrapped_csv(self._parse_expression) 5657 5658 replace_expression = self._parse_expression() 5659 return [replace_expression] if replace_expression else None 5660 5661 def _parse_csv( 5662 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5663 ) -> t.List[exp.Expression]: 5664 parse_result = parse_method() 5665 items = [parse_result] if parse_result is not None else [] 5666 5667 while self._match(sep): 5668 self._add_comments(parse_result) 5669 parse_result = parse_method() 5670 if parse_result is not None: 5671 items.append(parse_result) 5672 5673 return items 5674 5675 def _parse_tokens( 5676 self, parse_method: t.Callable, expressions: t.Dict 5677 ) -> t.Optional[exp.Expression]: 5678 this = parse_method() 5679 5680 while self._match_set(expressions): 5681 this = self.expression( 5682 expressions[self._prev.token_type], 5683 this=this, 5684 comments=self._prev_comments, 5685 expression=parse_method(), 5686 ) 5687 5688 return this 5689 5690 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5691 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5692 5693 def _parse_wrapped_csv( 5694 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5695 ) -> t.List[exp.Expression]: 5696 return self._parse_wrapped( 5697 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5698 ) 5699 5700 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5701 wrapped = self._match(TokenType.L_PAREN) 5702 if not wrapped and not optional: 5703 self.raise_error("Expecting (") 5704 parse_result = parse_method() 5705 if wrapped: 5706 self._match_r_paren() 5707 return parse_result 5708 5709 def _parse_expressions(self) -> t.List[exp.Expression]: 5710 return self._parse_csv(self._parse_expression) 5711 5712 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5713 return self._parse_select() or self._parse_set_operations( 5714 self._parse_expression() if alias else self._parse_conjunction() 5715 ) 5716 5717 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5718 return self._parse_query_modifiers( 5719 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5720 ) 5721 5722 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5723 this = None 5724 if self._match_texts(self.TRANSACTION_KIND): 5725 this = self._prev.text 5726 5727 self._match_texts(("TRANSACTION", "WORK")) 5728 5729 modes = [] 5730 while True: 5731 mode = [] 5732 while self._match(TokenType.VAR): 5733 mode.append(self._prev.text) 5734 5735 if mode: 5736 modes.append(" ".join(mode)) 5737 if not self._match(TokenType.COMMA): 5738 break 5739 5740 return self.expression(exp.Transaction, this=this, modes=modes) 5741 5742 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5743 chain = None 5744 savepoint = None 5745 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5746 5747 self._match_texts(("TRANSACTION", "WORK")) 5748 5749 if self._match_text_seq("TO"): 5750 self._match_text_seq("SAVEPOINT") 5751 savepoint = self._parse_id_var() 5752 5753 if self._match(TokenType.AND): 5754 chain = not self._match_text_seq("NO") 5755 self._match_text_seq("CHAIN") 5756 5757 if is_rollback: 5758 return self.expression(exp.Rollback, savepoint=savepoint) 5759 5760 return self.expression(exp.Commit, chain=chain) 5761 5762 def _parse_refresh(self) -> exp.Refresh: 5763 self._match(TokenType.TABLE) 5764 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5765 5766 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5767 if not self._match_text_seq("ADD"): 5768 return None 5769 5770 self._match(TokenType.COLUMN) 5771 exists_column = self._parse_exists(not_=True) 5772 expression = self._parse_field_def() 5773 5774 if expression: 5775 expression.set("exists", exists_column) 5776 5777 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5778 if self._match_texts(("FIRST", "AFTER")): 5779 position = self._prev.text 5780 column_position = self.expression( 5781 exp.ColumnPosition, this=self._parse_column(), position=position 5782 ) 5783 expression.set("position", column_position) 5784 5785 return expression 5786 5787 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5788 drop = self._match(TokenType.DROP) and self._parse_drop() 5789 if drop and not isinstance(drop, exp.Command): 5790 drop.set("kind", drop.args.get("kind", "COLUMN")) 5791 return drop 5792 5793 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5794 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5795 return self.expression( 5796 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5797 ) 5798 5799 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5800 index = self._index - 1 5801 5802 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5803 return self._parse_csv( 5804 lambda: self.expression( 5805 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5806 ) 5807 ) 5808 5809 self._retreat(index) 5810 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5811 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5812 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5813 5814 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5815 self._match(TokenType.COLUMN) 5816 column = self._parse_field(any_token=True) 5817 5818 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5819 return self.expression(exp.AlterColumn, this=column, drop=True) 5820 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5821 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5822 if self._match(TokenType.COMMENT): 5823 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5824 5825 self._match_text_seq("SET", "DATA") 5826 self._match_text_seq("TYPE") 5827 return self.expression( 5828 exp.AlterColumn, 5829 this=column, 5830 dtype=self._parse_types(), 5831 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5832 using=self._match(TokenType.USING) and self._parse_conjunction(), 5833 ) 5834 5835 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5836 index = self._index - 1 5837 5838 partition_exists = self._parse_exists() 5839 if self._match(TokenType.PARTITION, advance=False): 5840 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5841 5842 self._retreat(index) 5843 return self._parse_csv(self._parse_drop_column) 5844 5845 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5846 if self._match(TokenType.COLUMN): 5847 exists = self._parse_exists() 5848 old_column = self._parse_column() 5849 to = self._match_text_seq("TO") 5850 new_column = self._parse_column() 5851 5852 if old_column is None or to is None or new_column is None: 5853 return None 5854 5855 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5856 5857 self._match_text_seq("TO") 5858 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5859 5860 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5861 start = self._prev 5862 5863 if not self._match(TokenType.TABLE): 5864 return self._parse_as_command(start) 5865 5866 exists = self._parse_exists() 5867 only = self._match_text_seq("ONLY") 5868 this = self._parse_table(schema=True) 5869 5870 if self._next: 5871 self._advance() 5872 5873 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5874 if parser: 5875 actions = ensure_list(parser(self)) 5876 options = self._parse_csv(self._parse_property) 5877 5878 if not self._curr and actions: 5879 return self.expression( 5880 exp.AlterTable, 5881 this=this, 5882 exists=exists, 5883 actions=actions, 5884 only=only, 5885 options=options, 5886 ) 5887 5888 return self._parse_as_command(start) 5889 5890 def _parse_merge(self) -> exp.Merge: 5891 self._match(TokenType.INTO) 5892 target = self._parse_table() 5893 5894 if target and self._match(TokenType.ALIAS, advance=False): 5895 target.set("alias", self._parse_table_alias()) 5896 5897 self._match(TokenType.USING) 5898 using = self._parse_table() 5899 5900 self._match(TokenType.ON) 5901 on = self._parse_conjunction() 5902 5903 return self.expression( 5904 exp.Merge, 5905 this=target, 5906 using=using, 5907 on=on, 5908 expressions=self._parse_when_matched(), 5909 ) 5910 5911 def _parse_when_matched(self) -> t.List[exp.When]: 5912 whens = [] 5913 5914 while self._match(TokenType.WHEN): 5915 matched = not self._match(TokenType.NOT) 5916 self._match_text_seq("MATCHED") 5917 source = ( 5918 False 5919 if self._match_text_seq("BY", "TARGET") 5920 else self._match_text_seq("BY", "SOURCE") 5921 ) 5922 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5923 5924 self._match(TokenType.THEN) 5925 5926 if self._match(TokenType.INSERT): 5927 _this = self._parse_star() 5928 if _this: 5929 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5930 else: 5931 then = self.expression( 5932 exp.Insert, 5933 this=self._parse_value(), 5934 expression=self._match_text_seq("VALUES") and self._parse_value(), 5935 ) 5936 elif self._match(TokenType.UPDATE): 5937 expressions = self._parse_star() 5938 if expressions: 5939 then = self.expression(exp.Update, expressions=expressions) 5940 else: 5941 then = self.expression( 5942 exp.Update, 5943 expressions=self._match(TokenType.SET) 5944 and self._parse_csv(self._parse_equality), 5945 ) 5946 elif self._match(TokenType.DELETE): 5947 then = self.expression(exp.Var, this=self._prev.text) 5948 else: 5949 then = None 5950 5951 whens.append( 5952 self.expression( 5953 exp.When, 5954 matched=matched, 5955 source=source, 5956 condition=condition, 5957 then=then, 5958 ) 5959 ) 5960 return whens 5961 5962 def _parse_show(self) -> t.Optional[exp.Expression]: 5963 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5964 if parser: 5965 return parser(self) 5966 return self._parse_as_command(self._prev) 5967 5968 def _parse_set_item_assignment( 5969 self, kind: t.Optional[str] = None 5970 ) -> t.Optional[exp.Expression]: 5971 index = self._index 5972 5973 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5974 return self._parse_set_transaction(global_=kind == "GLOBAL") 5975 5976 left = self._parse_primary() or self._parse_id_var() 5977 assignment_delimiter = self._match_texts(("=", "TO")) 5978 5979 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5980 self._retreat(index) 5981 return None 5982 5983 right = self._parse_statement() or self._parse_id_var() 5984 this = self.expression(exp.EQ, this=left, expression=right) 5985 5986 return self.expression(exp.SetItem, this=this, kind=kind) 5987 5988 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5989 self._match_text_seq("TRANSACTION") 5990 characteristics = self._parse_csv( 5991 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5992 ) 5993 return self.expression( 5994 exp.SetItem, 5995 expressions=characteristics, 5996 kind="TRANSACTION", 5997 **{"global": global_}, # type: ignore 5998 ) 5999 6000 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6001 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6002 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6003 6004 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6005 index = self._index 6006 set_ = self.expression( 6007 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6008 ) 6009 6010 if self._curr: 6011 self._retreat(index) 6012 return self._parse_as_command(self._prev) 6013 6014 return set_ 6015 6016 def _parse_var_from_options( 6017 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6018 ) -> t.Optional[exp.Var]: 6019 start = self._curr 6020 if not start: 6021 return None 6022 6023 option = start.text.upper() 6024 continuations = options.get(option) 6025 6026 index = self._index 6027 self._advance() 6028 for keywords in continuations or []: 6029 if isinstance(keywords, str): 6030 keywords = (keywords,) 6031 6032 if self._match_text_seq(*keywords): 6033 option = f"{option} {' '.join(keywords)}" 6034 break 6035 else: 6036 if continuations or continuations is None: 6037 if raise_unmatched: 6038 self.raise_error(f"Unknown option {option}") 6039 6040 self._retreat(index) 6041 return None 6042 6043 return exp.var(option) 6044 6045 def _parse_as_command(self, start: Token) -> exp.Command: 6046 while self._curr: 6047 self._advance() 6048 text = self._find_sql(start, self._prev) 6049 size = len(start.text) 6050 self._warn_unsupported() 6051 return exp.Command(this=text[:size], expression=text[size:]) 6052 6053 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6054 settings = [] 6055 6056 self._match_l_paren() 6057 kind = self._parse_id_var() 6058 6059 if self._match(TokenType.L_PAREN): 6060 while True: 6061 key = self._parse_id_var() 6062 value = self._parse_primary() 6063 6064 if not key and value is None: 6065 break 6066 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6067 self._match(TokenType.R_PAREN) 6068 6069 self._match_r_paren() 6070 6071 return self.expression( 6072 exp.DictProperty, 6073 this=this, 6074 kind=kind.this if kind else None, 6075 settings=settings, 6076 ) 6077 6078 def _parse_dict_range(self, this: str) -> exp.DictRange: 6079 self._match_l_paren() 6080 has_min = self._match_text_seq("MIN") 6081 if has_min: 6082 min = self._parse_var() or self._parse_primary() 6083 self._match_text_seq("MAX") 6084 max = self._parse_var() or self._parse_primary() 6085 else: 6086 max = self._parse_var() or self._parse_primary() 6087 min = exp.Literal.number(0) 6088 self._match_r_paren() 6089 return self.expression(exp.DictRange, this=this, min=min, max=max) 6090 6091 def _parse_comprehension( 6092 self, this: t.Optional[exp.Expression] 6093 ) -> t.Optional[exp.Comprehension]: 6094 index = self._index 6095 expression = self._parse_column() 6096 if not self._match(TokenType.IN): 6097 self._retreat(index - 1) 6098 return None 6099 iterator = self._parse_column() 6100 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6101 return self.expression( 6102 exp.Comprehension, 6103 this=this, 6104 expression=expression, 6105 iterator=iterator, 6106 condition=condition, 6107 ) 6108 6109 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6110 if self._match(TokenType.HEREDOC_STRING): 6111 return self.expression(exp.Heredoc, this=self._prev.text) 6112 6113 if not self._match_text_seq("$"): 6114 return None 6115 6116 tags = ["$"] 6117 tag_text = None 6118 6119 if self._is_connected(): 6120 self._advance() 6121 tags.append(self._prev.text.upper()) 6122 else: 6123 self.raise_error("No closing $ found") 6124 6125 if tags[-1] != "$": 6126 if self._is_connected() and self._match_text_seq("$"): 6127 tag_text = tags[-1] 6128 tags.append("$") 6129 else: 6130 self.raise_error("No closing $ found") 6131 6132 heredoc_start = self._curr 6133 6134 while self._curr: 6135 if self._match_text_seq(*tags, advance=False): 6136 this = self._find_sql(heredoc_start, self._prev) 6137 self._advance(len(tags)) 6138 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6139 6140 self._advance() 6141 6142 self.raise_error(f"No closing {''.join(tags)} found") 6143 return None 6144 6145 def _find_parser( 6146 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6147 ) -> t.Optional[t.Callable]: 6148 if not self._curr: 6149 return None 6150 6151 index = self._index 6152 this = [] 6153 while True: 6154 # The current token might be multiple words 6155 curr = self._curr.text.upper() 6156 key = curr.split(" ") 6157 this.append(curr) 6158 6159 self._advance() 6160 result, trie = in_trie(trie, key) 6161 if result == TrieResult.FAILED: 6162 break 6163 6164 if result == TrieResult.EXISTS: 6165 subparser = parsers[" ".join(this)] 6166 return subparser 6167 6168 self._retreat(index) 6169 return None 6170 6171 def _match(self, token_type, advance=True, expression=None): 6172 if not self._curr: 6173 return None 6174 6175 if self._curr.token_type == token_type: 6176 if advance: 6177 self._advance() 6178 self._add_comments(expression) 6179 return True 6180 6181 return None 6182 6183 def _match_set(self, types, advance=True): 6184 if not self._curr: 6185 return None 6186 6187 if self._curr.token_type in types: 6188 if advance: 6189 self._advance() 6190 return True 6191 6192 return None 6193 6194 def _match_pair(self, token_type_a, token_type_b, advance=True): 6195 if not self._curr or not self._next: 6196 return None 6197 6198 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6199 if advance: 6200 self._advance(2) 6201 return True 6202 6203 return None 6204 6205 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6206 if not self._match(TokenType.L_PAREN, expression=expression): 6207 self.raise_error("Expecting (") 6208 6209 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6210 if not self._match(TokenType.R_PAREN, expression=expression): 6211 self.raise_error("Expecting )") 6212 6213 def _match_texts(self, texts, advance=True): 6214 if self._curr and self._curr.text.upper() in texts: 6215 if advance: 6216 self._advance() 6217 return True 6218 return None 6219 6220 def _match_text_seq(self, *texts, advance=True): 6221 index = self._index 6222 for text in texts: 6223 if self._curr and self._curr.text.upper() == text: 6224 self._advance() 6225 else: 6226 self._retreat(index) 6227 return None 6228 6229 if not advance: 6230 self._retreat(index) 6231 6232 return True 6233 6234 def _replace_lambda( 6235 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6236 ) -> t.Optional[exp.Expression]: 6237 if not node: 6238 return node 6239 6240 for column in node.find_all(exp.Column): 6241 if column.parts[0].name in lambda_variables: 6242 dot_or_id = column.to_dot() if column.table else column.this 6243 parent = column.parent 6244 6245 while isinstance(parent, exp.Dot): 6246 if not isinstance(parent.parent, exp.Dot): 6247 parent.replace(dot_or_id) 6248 break 6249 parent = parent.parent 6250 else: 6251 if column is node: 6252 node = dot_or_id 6253 else: 6254 column.replace(dot_or_id) 6255 return node 6256 6257 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6258 start = self._prev 6259 6260 # Not to be confused with TRUNCATE(number, decimals) function call 6261 if self._match(TokenType.L_PAREN): 6262 self._retreat(self._index - 2) 6263 return self._parse_function() 6264 6265 # Clickhouse supports TRUNCATE DATABASE as well 6266 is_database = self._match(TokenType.DATABASE) 6267 6268 self._match(TokenType.TABLE) 6269 6270 exists = self._parse_exists(not_=False) 6271 6272 expressions = self._parse_csv( 6273 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6274 ) 6275 6276 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6277 6278 if self._match_text_seq("RESTART", "IDENTITY"): 6279 identity = "RESTART" 6280 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6281 identity = "CONTINUE" 6282 else: 6283 identity = None 6284 6285 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6286 option = self._prev.text 6287 else: 6288 option = None 6289 6290 partition = self._parse_partition() 6291 6292 # Fallback case 6293 if self._curr: 6294 return self._parse_as_command(start) 6295 6296 return self.expression( 6297 exp.TruncateTable, 6298 expressions=expressions, 6299 is_database=is_database, 6300 exists=exists, 6301 cluster=cluster, 6302 identity=identity, 6303 option=option, 6304 partition=partition, 6305 ) 6306 6307 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6308 this = self._parse_ordered(self._parse_opclass) 6309 6310 if not self._match(TokenType.WITH): 6311 return this 6312 6313 op = self._parse_var(any_token=True) 6314 6315 return self.expression(exp.WithOperator, this=this, op=op) 6316 6317 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6318 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6319 6320 options = [] 6321 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6322 option = self._parse_unquoted_field() 6323 value = None 6324 # Some options are defined as functions with the values as params 6325 if not isinstance(option, exp.Func): 6326 # Different dialects might separate options and values by white space, "=" and "AS" 6327 self._match(TokenType.EQ) 6328 self._match(TokenType.ALIAS) 6329 value = self._parse_unquoted_field() 6330 6331 param = self.expression(exp.CopyParameter, this=option, expression=value) 6332 options.append(param) 6333 6334 if sep: 6335 self._match(sep) 6336 6337 return options 6338 6339 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6340 def parse_options(): 6341 opts = [] 6342 self._match(TokenType.EQ) 6343 self._match(TokenType.L_PAREN) 6344 while self._curr and not self._match(TokenType.R_PAREN): 6345 opts.append(self._parse_conjunction()) 6346 return opts 6347 6348 expr = self.expression(exp.Credentials) 6349 6350 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6351 expr.set("storage", self._parse_conjunction()) 6352 if self._match_text_seq("CREDENTIALS"): 6353 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6354 creds = parse_options() if self._match(TokenType.EQ) else self._parse_field() 6355 expr.set("credentials", creds) 6356 if self._match_text_seq("ENCRYPTION"): 6357 expr.set("encryption", parse_options()) 6358 if self._match_text_seq("IAM_ROLE"): 6359 expr.set("iam_role", self._parse_field()) 6360 if self._match_text_seq("REGION"): 6361 expr.set("region", self._parse_field()) 6362 6363 return expr 6364 6365 def _parse_copy(self): 6366 start = self._prev 6367 6368 self._match(TokenType.INTO) 6369 6370 this = ( 6371 self._parse_conjunction() 6372 if self._match(TokenType.L_PAREN, advance=False) 6373 else self._parse_table(schema=True) 6374 ) 6375 6376 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6377 6378 files = self._parse_csv(self._parse_conjunction) 6379 credentials = self._parse_credentials() 6380 6381 self._match_text_seq("WITH") 6382 6383 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6384 6385 # Fallback case 6386 if self._curr: 6387 return self._parse_as_command(start) 6388 6389 return self.expression( 6390 exp.Copy, 6391 this=this, 6392 kind=kind, 6393 credentials=credentials, 6394 files=files, 6395 params=params, 6396 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1139 def __init__( 1140 self, 1141 error_level: t.Optional[ErrorLevel] = None, 1142 error_message_context: int = 100, 1143 max_errors: int = 3, 1144 dialect: DialectType = None, 1145 ): 1146 from sqlglot.dialects import Dialect 1147 1148 self.error_level = error_level or ErrorLevel.IMMEDIATE 1149 self.error_message_context = error_message_context 1150 self.max_errors = max_errors 1151 self.dialect = Dialect.get_or_raise(dialect) 1152 self.reset()
1164 def parse( 1165 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1166 ) -> t.List[t.Optional[exp.Expression]]: 1167 """ 1168 Parses a list of tokens and returns a list of syntax trees, one tree 1169 per parsed SQL statement. 1170 1171 Args: 1172 raw_tokens: The list of tokens. 1173 sql: The original SQL string, used to produce helpful debug messages. 1174 1175 Returns: 1176 The list of the produced syntax trees. 1177 """ 1178 return self._parse( 1179 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1180 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1182 def parse_into( 1183 self, 1184 expression_types: exp.IntoType, 1185 raw_tokens: t.List[Token], 1186 sql: t.Optional[str] = None, 1187 ) -> t.List[t.Optional[exp.Expression]]: 1188 """ 1189 Parses a list of tokens into a given Expression type. If a collection of Expression 1190 types is given instead, this method will try to parse the token list into each one 1191 of them, stopping at the first for which the parsing succeeds. 1192 1193 Args: 1194 expression_types: The expression type(s) to try and parse the token list into. 1195 raw_tokens: The list of tokens. 1196 sql: The original SQL string, used to produce helpful debug messages. 1197 1198 Returns: 1199 The target Expression. 1200 """ 1201 errors = [] 1202 for expression_type in ensure_list(expression_types): 1203 parser = self.EXPRESSION_PARSERS.get(expression_type) 1204 if not parser: 1205 raise TypeError(f"No parser registered for {expression_type}") 1206 1207 try: 1208 return self._parse(parser, raw_tokens, sql) 1209 except ParseError as e: 1210 e.errors[0]["into_expression"] = expression_type 1211 errors.append(e) 1212 1213 raise ParseError( 1214 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1215 errors=merge_errors(errors), 1216 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1253 def check_errors(self) -> None: 1254 """Logs or raises any found errors, depending on the chosen error level setting.""" 1255 if self.error_level == ErrorLevel.WARN: 1256 for error in self.errors: 1257 logger.error(str(error)) 1258 elif self.error_level == ErrorLevel.RAISE and self.errors: 1259 raise ParseError( 1260 concat_messages(self.errors, self.max_errors), 1261 errors=merge_errors(self.errors), 1262 )
Logs or raises any found errors, depending on the chosen error level setting.
1264 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1265 """ 1266 Appends an error in the list of recorded errors or raises it, depending on the chosen 1267 error level setting. 1268 """ 1269 token = token or self._curr or self._prev or Token.string("") 1270 start = token.start 1271 end = token.end + 1 1272 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1273 highlight = self.sql[start:end] 1274 end_context = self.sql[end : end + self.error_message_context] 1275 1276 error = ParseError.new( 1277 f"{message}. Line {token.line}, Col: {token.col}.\n" 1278 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1279 description=message, 1280 line=token.line, 1281 col=token.col, 1282 start_context=start_context, 1283 highlight=highlight, 1284 end_context=end_context, 1285 ) 1286 1287 if self.error_level == ErrorLevel.IMMEDIATE: 1288 raise error 1289 1290 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1292 def expression( 1293 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1294 ) -> E: 1295 """ 1296 Creates a new, validated Expression. 1297 1298 Args: 1299 exp_class: The expression class to instantiate. 1300 comments: An optional list of comments to attach to the expression. 1301 kwargs: The arguments to set for the expression along with their respective values. 1302 1303 Returns: 1304 The target expression. 1305 """ 1306 instance = exp_class(**kwargs) 1307 instance.add_comments(comments) if comments else self._add_comments(instance) 1308 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1315 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1316 """ 1317 Validates an Expression, making sure that all its mandatory arguments are set. 1318 1319 Args: 1320 expression: The expression to validate. 1321 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1322 1323 Returns: 1324 The validated expression. 1325 """ 1326 if self.error_level != ErrorLevel.IGNORE: 1327 for error_message in expression.error_messages(args): 1328 self.raise_error(error_message) 1329 1330 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.