sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.DATETIME, 221 TokenType.DATETIME64, 222 TokenType.DATE, 223 TokenType.DATE32, 224 TokenType.INT4RANGE, 225 TokenType.INT4MULTIRANGE, 226 TokenType.INT8RANGE, 227 TokenType.INT8MULTIRANGE, 228 TokenType.NUMRANGE, 229 TokenType.NUMMULTIRANGE, 230 TokenType.TSRANGE, 231 TokenType.TSMULTIRANGE, 232 TokenType.TSTZRANGE, 233 TokenType.TSTZMULTIRANGE, 234 TokenType.DATERANGE, 235 TokenType.DATEMULTIRANGE, 236 TokenType.DECIMAL, 237 TokenType.UDECIMAL, 238 TokenType.BIGDECIMAL, 239 TokenType.UUID, 240 TokenType.GEOGRAPHY, 241 TokenType.GEOMETRY, 242 TokenType.HLLSKETCH, 243 TokenType.HSTORE, 244 TokenType.PSEUDO_TYPE, 245 TokenType.SUPER, 246 TokenType.SERIAL, 247 TokenType.SMALLSERIAL, 248 TokenType.BIGSERIAL, 249 TokenType.XML, 250 TokenType.YEAR, 251 TokenType.UNIQUEIDENTIFIER, 252 TokenType.USERDEFINED, 253 TokenType.MONEY, 254 TokenType.SMALLMONEY, 255 TokenType.ROWVERSION, 256 TokenType.IMAGE, 257 TokenType.VARIANT, 258 TokenType.OBJECT, 259 TokenType.OBJECT_IDENTIFIER, 260 TokenType.INET, 261 TokenType.IPADDRESS, 262 TokenType.IPPREFIX, 263 TokenType.IPV4, 264 TokenType.IPV6, 265 TokenType.UNKNOWN, 266 TokenType.NULL, 267 TokenType.NAME, 268 *ENUM_TYPE_TOKENS, 269 *NESTED_TYPE_TOKENS, 270 *AGGREGATE_TYPE_TOKENS, 271 } 272 273 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 274 TokenType.BIGINT: TokenType.UBIGINT, 275 TokenType.INT: TokenType.UINT, 276 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 277 TokenType.SMALLINT: TokenType.USMALLINT, 278 TokenType.TINYINT: TokenType.UTINYINT, 279 TokenType.DECIMAL: TokenType.UDECIMAL, 280 } 281 282 SUBQUERY_PREDICATES = { 283 TokenType.ANY: exp.Any, 284 TokenType.ALL: exp.All, 285 TokenType.EXISTS: exp.Exists, 286 TokenType.SOME: exp.Any, 287 } 288 289 RESERVED_TOKENS = { 290 *Tokenizer.SINGLE_TOKENS.values(), 291 TokenType.SELECT, 292 } 293 294 DB_CREATABLES = { 295 TokenType.DATABASE, 296 TokenType.SCHEMA, 297 TokenType.TABLE, 298 TokenType.VIEW, 299 TokenType.MODEL, 300 TokenType.DICTIONARY, 301 TokenType.SEQUENCE, 302 TokenType.STORAGE_INTEGRATION, 303 } 304 305 CREATABLES = { 306 TokenType.COLUMN, 307 TokenType.CONSTRAINT, 308 TokenType.FUNCTION, 309 TokenType.INDEX, 310 TokenType.PROCEDURE, 311 TokenType.FOREIGN_KEY, 312 *DB_CREATABLES, 313 } 314 315 # Tokens that can represent identifiers 316 ID_VAR_TOKENS = { 317 TokenType.VAR, 318 TokenType.ANTI, 319 TokenType.APPLY, 320 TokenType.ASC, 321 TokenType.ASOF, 322 TokenType.AUTO_INCREMENT, 323 TokenType.BEGIN, 324 TokenType.BPCHAR, 325 TokenType.CACHE, 326 TokenType.CASE, 327 TokenType.COLLATE, 328 TokenType.COMMAND, 329 TokenType.COMMENT, 330 TokenType.COMMIT, 331 TokenType.CONSTRAINT, 332 TokenType.DEFAULT, 333 TokenType.DELETE, 334 TokenType.DESC, 335 TokenType.DESCRIBE, 336 TokenType.DICTIONARY, 337 TokenType.DIV, 338 TokenType.END, 339 TokenType.EXECUTE, 340 TokenType.ESCAPE, 341 TokenType.FALSE, 342 TokenType.FIRST, 343 TokenType.FILTER, 344 TokenType.FINAL, 345 TokenType.FORMAT, 346 TokenType.FULL, 347 TokenType.IDENTIFIER, 348 TokenType.IS, 349 TokenType.ISNULL, 350 TokenType.INTERVAL, 351 TokenType.KEEP, 352 TokenType.KILL, 353 TokenType.LEFT, 354 TokenType.LOAD, 355 TokenType.MERGE, 356 TokenType.NATURAL, 357 TokenType.NEXT, 358 TokenType.OFFSET, 359 TokenType.OPERATOR, 360 TokenType.ORDINALITY, 361 TokenType.OVERLAPS, 362 TokenType.OVERWRITE, 363 TokenType.PARTITION, 364 TokenType.PERCENT, 365 TokenType.PIVOT, 366 TokenType.PRAGMA, 367 TokenType.RANGE, 368 TokenType.RECURSIVE, 369 TokenType.REFERENCES, 370 TokenType.REFRESH, 371 TokenType.REPLACE, 372 TokenType.RIGHT, 373 TokenType.ROW, 374 TokenType.ROWS, 375 TokenType.SEMI, 376 TokenType.SET, 377 TokenType.SETTINGS, 378 TokenType.SHOW, 379 TokenType.TEMPORARY, 380 TokenType.TOP, 381 TokenType.TRUE, 382 TokenType.TRUNCATE, 383 TokenType.UNIQUE, 384 TokenType.UNPIVOT, 385 TokenType.UPDATE, 386 TokenType.USE, 387 TokenType.VOLATILE, 388 TokenType.WINDOW, 389 *CREATABLES, 390 *SUBQUERY_PREDICATES, 391 *TYPE_TOKENS, 392 *NO_PAREN_FUNCTIONS, 393 } 394 395 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 396 397 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 398 TokenType.ANTI, 399 TokenType.APPLY, 400 TokenType.ASOF, 401 TokenType.FULL, 402 TokenType.LEFT, 403 TokenType.LOCK, 404 TokenType.NATURAL, 405 TokenType.OFFSET, 406 TokenType.RIGHT, 407 TokenType.SEMI, 408 TokenType.WINDOW, 409 } 410 411 ALIAS_TOKENS = ID_VAR_TOKENS 412 413 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 414 415 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 416 417 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 418 419 FUNC_TOKENS = { 420 TokenType.COLLATE, 421 TokenType.COMMAND, 422 TokenType.CURRENT_DATE, 423 TokenType.CURRENT_DATETIME, 424 TokenType.CURRENT_TIMESTAMP, 425 TokenType.CURRENT_TIME, 426 TokenType.CURRENT_USER, 427 TokenType.FILTER, 428 TokenType.FIRST, 429 TokenType.FORMAT, 430 TokenType.GLOB, 431 TokenType.IDENTIFIER, 432 TokenType.INDEX, 433 TokenType.ISNULL, 434 TokenType.ILIKE, 435 TokenType.INSERT, 436 TokenType.LIKE, 437 TokenType.MERGE, 438 TokenType.OFFSET, 439 TokenType.PRIMARY_KEY, 440 TokenType.RANGE, 441 TokenType.REPLACE, 442 TokenType.RLIKE, 443 TokenType.ROW, 444 TokenType.UNNEST, 445 TokenType.VAR, 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.SEQUENCE, 449 TokenType.DATE, 450 TokenType.DATETIME, 451 TokenType.TABLE, 452 TokenType.TIMESTAMP, 453 TokenType.TIMESTAMPTZ, 454 TokenType.TRUNCATE, 455 TokenType.WINDOW, 456 TokenType.XOR, 457 *TYPE_TOKENS, 458 *SUBQUERY_PREDICATES, 459 } 460 461 CONJUNCTION = { 462 TokenType.AND: exp.And, 463 TokenType.OR: exp.Or, 464 } 465 466 EQUALITY = { 467 TokenType.COLON_EQ: exp.PropertyEQ, 468 TokenType.EQ: exp.EQ, 469 TokenType.NEQ: exp.NEQ, 470 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 471 } 472 473 COMPARISON = { 474 TokenType.GT: exp.GT, 475 TokenType.GTE: exp.GTE, 476 TokenType.LT: exp.LT, 477 TokenType.LTE: exp.LTE, 478 } 479 480 BITWISE = { 481 TokenType.AMP: exp.BitwiseAnd, 482 TokenType.CARET: exp.BitwiseXor, 483 TokenType.PIPE: exp.BitwiseOr, 484 } 485 486 TERM = { 487 TokenType.DASH: exp.Sub, 488 TokenType.PLUS: exp.Add, 489 TokenType.MOD: exp.Mod, 490 TokenType.COLLATE: exp.Collate, 491 } 492 493 FACTOR = { 494 TokenType.DIV: exp.IntDiv, 495 TokenType.LR_ARROW: exp.Distance, 496 TokenType.SLASH: exp.Div, 497 TokenType.STAR: exp.Mul, 498 } 499 500 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 501 502 TIMES = { 503 TokenType.TIME, 504 TokenType.TIMETZ, 505 } 506 507 TIMESTAMPS = { 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TIMESTAMPLTZ, 511 *TIMES, 512 } 513 514 SET_OPERATIONS = { 515 TokenType.UNION, 516 TokenType.INTERSECT, 517 TokenType.EXCEPT, 518 } 519 520 JOIN_METHODS = { 521 TokenType.ASOF, 522 TokenType.NATURAL, 523 TokenType.POSITIONAL, 524 } 525 526 JOIN_SIDES = { 527 TokenType.LEFT, 528 TokenType.RIGHT, 529 TokenType.FULL, 530 } 531 532 JOIN_KINDS = { 533 TokenType.INNER, 534 TokenType.OUTER, 535 TokenType.CROSS, 536 TokenType.SEMI, 537 TokenType.ANTI, 538 } 539 540 JOIN_HINTS: t.Set[str] = set() 541 542 LAMBDAS = { 543 TokenType.ARROW: lambda self, expressions: self.expression( 544 exp.Lambda, 545 this=self._replace_lambda( 546 self._parse_conjunction(), 547 {node.name for node in expressions}, 548 ), 549 expressions=expressions, 550 ), 551 TokenType.FARROW: lambda self, expressions: self.expression( 552 exp.Kwarg, 553 this=exp.var(expressions[0].name), 554 expression=self._parse_conjunction(), 555 ), 556 } 557 558 COLUMN_OPERATORS = { 559 TokenType.DOT: None, 560 TokenType.DCOLON: lambda self, this, to: self.expression( 561 exp.Cast if self.STRICT_CAST else exp.TryCast, 562 this=this, 563 to=to, 564 ), 565 TokenType.ARROW: lambda self, this, path: self.expression( 566 exp.JSONExtract, 567 this=this, 568 expression=self.dialect.to_json_path(path), 569 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 570 ), 571 TokenType.DARROW: lambda self, this, path: self.expression( 572 exp.JSONExtractScalar, 573 this=this, 574 expression=self.dialect.to_json_path(path), 575 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 576 ), 577 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtract, 579 this=this, 580 expression=path, 581 ), 582 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 583 exp.JSONBExtractScalar, 584 this=this, 585 expression=path, 586 ), 587 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 588 exp.JSONBContains, 589 this=this, 590 expression=key, 591 ), 592 } 593 594 EXPRESSION_PARSERS = { 595 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 596 exp.Column: lambda self: self._parse_column(), 597 exp.Condition: lambda self: self._parse_conjunction(), 598 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 599 exp.Expression: lambda self: self._parse_expression(), 600 exp.From: lambda self: self._parse_from(), 601 exp.Group: lambda self: self._parse_group(), 602 exp.Having: lambda self: self._parse_having(), 603 exp.Identifier: lambda self: self._parse_id_var(), 604 exp.Join: lambda self: self._parse_join(), 605 exp.Lambda: lambda self: self._parse_lambda(), 606 exp.Lateral: lambda self: self._parse_lateral(), 607 exp.Limit: lambda self: self._parse_limit(), 608 exp.Offset: lambda self: self._parse_offset(), 609 exp.Order: lambda self: self._parse_order(), 610 exp.Ordered: lambda self: self._parse_ordered(), 611 exp.Properties: lambda self: self._parse_properties(), 612 exp.Qualify: lambda self: self._parse_qualify(), 613 exp.Returning: lambda self: self._parse_returning(), 614 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 615 exp.Table: lambda self: self._parse_table_parts(), 616 exp.TableAlias: lambda self: self._parse_table_alias(), 617 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 618 exp.Where: lambda self: self._parse_where(), 619 exp.Window: lambda self: self._parse_named_window(), 620 exp.With: lambda self: self._parse_with(), 621 "JOIN_TYPE": lambda self: self._parse_join_parts(), 622 } 623 624 STATEMENT_PARSERS = { 625 TokenType.ALTER: lambda self: self._parse_alter(), 626 TokenType.BEGIN: lambda self: self._parse_transaction(), 627 TokenType.CACHE: lambda self: self._parse_cache(), 628 TokenType.COMMENT: lambda self: self._parse_comment(), 629 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 630 TokenType.CREATE: lambda self: self._parse_create(), 631 TokenType.DELETE: lambda self: self._parse_delete(), 632 TokenType.DESC: lambda self: self._parse_describe(), 633 TokenType.DESCRIBE: lambda self: self._parse_describe(), 634 TokenType.DROP: lambda self: self._parse_drop(), 635 TokenType.INSERT: lambda self: self._parse_insert(), 636 TokenType.KILL: lambda self: self._parse_kill(), 637 TokenType.LOAD: lambda self: self._parse_load(), 638 TokenType.MERGE: lambda self: self._parse_merge(), 639 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 640 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 641 TokenType.REFRESH: lambda self: self._parse_refresh(), 642 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 643 TokenType.SET: lambda self: self._parse_set(), 644 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 645 TokenType.UNCACHE: lambda self: self._parse_uncache(), 646 TokenType.UPDATE: lambda self: self._parse_update(), 647 TokenType.USE: lambda self: self.expression( 648 exp.Use, 649 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 650 this=self._parse_table(schema=False), 651 ), 652 } 653 654 UNARY_PARSERS = { 655 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 656 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 657 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 658 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 659 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 660 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 661 } 662 663 STRING_PARSERS = { 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 668 exp.National, this=token.text 669 ), 670 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 671 TokenType.STRING: lambda self, token: self.expression( 672 exp.Literal, this=token.text, is_string=True 673 ), 674 TokenType.UNICODE_STRING: lambda self, token: self.expression( 675 exp.UnicodeString, 676 this=token.text, 677 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 678 ), 679 } 680 681 NUMERIC_PARSERS = { 682 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 683 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 684 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 685 TokenType.NUMBER: lambda self, token: self.expression( 686 exp.Literal, this=token.text, is_string=False 687 ), 688 } 689 690 PRIMARY_PARSERS = { 691 **STRING_PARSERS, 692 **NUMERIC_PARSERS, 693 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 694 TokenType.NULL: lambda self, _: self.expression(exp.Null), 695 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 696 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 697 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 698 TokenType.STAR: lambda self, _: self.expression( 699 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 700 ), 701 } 702 703 PLACEHOLDER_PARSERS = { 704 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 705 TokenType.PARAMETER: lambda self: self._parse_parameter(), 706 TokenType.COLON: lambda self: ( 707 self.expression(exp.Placeholder, this=self._prev.text) 708 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 709 else None 710 ), 711 } 712 713 RANGE_PARSERS = { 714 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 715 TokenType.GLOB: binary_range_parser(exp.Glob), 716 TokenType.ILIKE: binary_range_parser(exp.ILike), 717 TokenType.IN: lambda self, this: self._parse_in(this), 718 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 719 TokenType.IS: lambda self, this: self._parse_is(this), 720 TokenType.LIKE: binary_range_parser(exp.Like), 721 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 722 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 723 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 724 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 725 } 726 727 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 728 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 729 "AUTO": lambda self: self._parse_auto_property(), 730 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 731 "BACKUP": lambda self: self.expression( 732 exp.BackupProperty, this=self._parse_var(any_token=True) 733 ), 734 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 735 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHECKSUM": lambda self: self._parse_checksum(), 738 "CLUSTER BY": lambda self: self._parse_cluster(), 739 "CLUSTERED": lambda self: self._parse_clustered_by(), 740 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 741 exp.CollateProperty, **kwargs 742 ), 743 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 744 "CONTAINS": lambda self: self._parse_contains_property(), 745 "COPY": lambda self: self._parse_copy_property(), 746 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 747 "DEFINER": lambda self: self._parse_definer(), 748 "DETERMINISTIC": lambda self: self.expression( 749 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 750 ), 751 "DISTKEY": lambda self: self._parse_distkey(), 752 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 753 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 754 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 755 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 756 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 757 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 758 "FREESPACE": lambda self: self._parse_freespace(), 759 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 760 "HEAP": lambda self: self.expression(exp.HeapProperty), 761 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 762 "IMMUTABLE": lambda self: self.expression( 763 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 764 ), 765 "INHERITS": lambda self: self.expression( 766 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 767 ), 768 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 769 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 770 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 771 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 772 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 773 "LIKE": lambda self: self._parse_create_like(), 774 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 775 "LOCK": lambda self: self._parse_locking(), 776 "LOCKING": lambda self: self._parse_locking(), 777 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 778 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 779 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 780 "MODIFIES": lambda self: self._parse_modifies_property(), 781 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 782 "NO": lambda self: self._parse_no_property(), 783 "ON": lambda self: self._parse_on_property(), 784 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 785 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 786 "PARTITION": lambda self: self._parse_partitioned_of(), 787 "PARTITION BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 790 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 791 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 792 "READS": lambda self: self._parse_reads_property(), 793 "REMOTE": lambda self: self._parse_remote_with_connection(), 794 "RETURNS": lambda self: self._parse_returns(), 795 "ROW": lambda self: self._parse_row(), 796 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 797 "SAMPLE": lambda self: self.expression( 798 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 799 ), 800 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 801 "SETTINGS": lambda self: self.expression( 802 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 803 ), 804 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 805 "SORTKEY": lambda self: self._parse_sortkey(), 806 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 807 "STABLE": lambda self: self.expression( 808 exp.StabilityProperty, this=exp.Literal.string("STABLE") 809 ), 810 "STORED": lambda self: self._parse_stored(), 811 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 812 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 813 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 814 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 815 "TO": lambda self: self._parse_to_table(), 816 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 817 "TRANSFORM": lambda self: self.expression( 818 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 819 ), 820 "TTL": lambda self: self._parse_ttl(), 821 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 823 "VOLATILE": lambda self: self._parse_volatile_property(), 824 "WITH": lambda self: self._parse_with_property(), 825 } 826 827 CONSTRAINT_PARSERS = { 828 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 829 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 830 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 831 "CHARACTER SET": lambda self: self.expression( 832 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 833 ), 834 "CHECK": lambda self: self.expression( 835 exp.CheckColumnConstraint, 836 this=self._parse_wrapped(self._parse_conjunction), 837 enforced=self._match_text_seq("ENFORCED"), 838 ), 839 "COLLATE": lambda self: self.expression( 840 exp.CollateColumnConstraint, this=self._parse_var() 841 ), 842 "COMMENT": lambda self: self.expression( 843 exp.CommentColumnConstraint, this=self._parse_string() 844 ), 845 "COMPRESS": lambda self: self._parse_compress(), 846 "CLUSTERED": lambda self: self.expression( 847 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 848 ), 849 "NONCLUSTERED": lambda self: self.expression( 850 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 851 ), 852 "DEFAULT": lambda self: self.expression( 853 exp.DefaultColumnConstraint, this=self._parse_bitwise() 854 ), 855 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 856 "EPHEMERAL": lambda self: self.expression( 857 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 858 ), 859 "EXCLUDE": lambda self: self.expression( 860 exp.ExcludeColumnConstraint, this=self._parse_index_params() 861 ), 862 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 863 "FORMAT": lambda self: self.expression( 864 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 865 ), 866 "GENERATED": lambda self: self._parse_generated_as_identity(), 867 "IDENTITY": lambda self: self._parse_auto_increment(), 868 "INLINE": lambda self: self._parse_inline(), 869 "LIKE": lambda self: self._parse_create_like(), 870 "NOT": lambda self: self._parse_not_constraint(), 871 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 872 "ON": lambda self: ( 873 self._match(TokenType.UPDATE) 874 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 875 ) 876 or self.expression(exp.OnProperty, this=self._parse_id_var()), 877 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 878 "PERIOD": lambda self: self._parse_period_for_system_time(), 879 "PRIMARY KEY": lambda self: self._parse_primary_key(), 880 "REFERENCES": lambda self: self._parse_references(match=False), 881 "TITLE": lambda self: self.expression( 882 exp.TitleColumnConstraint, this=self._parse_var_or_string() 883 ), 884 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 885 "UNIQUE": lambda self: self._parse_unique(), 886 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 887 "WITH": lambda self: self.expression( 888 exp.Properties, expressions=self._parse_wrapped_properties() 889 ), 890 } 891 892 ALTER_PARSERS = { 893 "ADD": lambda self: self._parse_alter_table_add(), 894 "ALTER": lambda self: self._parse_alter_table_alter(), 895 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 896 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 897 "DROP": lambda self: self._parse_alter_table_drop(), 898 "RENAME": lambda self: self._parse_alter_table_rename(), 899 } 900 901 SCHEMA_UNNAMED_CONSTRAINTS = { 902 "CHECK", 903 "EXCLUDE", 904 "FOREIGN KEY", 905 "LIKE", 906 "PERIOD", 907 "PRIMARY KEY", 908 "UNIQUE", 909 } 910 911 NO_PAREN_FUNCTION_PARSERS = { 912 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 913 "CASE": lambda self: self._parse_case(), 914 "IF": lambda self: self._parse_if(), 915 "NEXT": lambda self: self._parse_next_value_for(), 916 } 917 918 INVALID_FUNC_NAME_TOKENS = { 919 TokenType.IDENTIFIER, 920 TokenType.STRING, 921 } 922 923 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 924 925 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 926 927 FUNCTION_PARSERS = { 928 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 929 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 930 "DECODE": lambda self: self._parse_decode(), 931 "EXTRACT": lambda self: self._parse_extract(), 932 "JSON_OBJECT": lambda self: self._parse_json_object(), 933 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 934 "JSON_TABLE": lambda self: self._parse_json_table(), 935 "MATCH": lambda self: self._parse_match_against(), 936 "OPENJSON": lambda self: self._parse_open_json(), 937 "POSITION": lambda self: self._parse_position(), 938 "PREDICT": lambda self: self._parse_predict(), 939 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 940 "STRING_AGG": lambda self: self._parse_string_agg(), 941 "SUBSTRING": lambda self: self._parse_substring(), 942 "TRIM": lambda self: self._parse_trim(), 943 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 944 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 945 } 946 947 QUERY_MODIFIER_PARSERS = { 948 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 949 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 950 TokenType.WHERE: lambda self: ("where", self._parse_where()), 951 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 952 TokenType.HAVING: lambda self: ("having", self._parse_having()), 953 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 954 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 955 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 956 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 957 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 958 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 959 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 960 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 961 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 962 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 963 TokenType.CLUSTER_BY: lambda self: ( 964 "cluster", 965 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 966 ), 967 TokenType.DISTRIBUTE_BY: lambda self: ( 968 "distribute", 969 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 970 ), 971 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 972 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 973 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 974 } 975 976 SET_PARSERS = { 977 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 978 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 979 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 980 "TRANSACTION": lambda self: self._parse_set_transaction(), 981 } 982 983 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 984 985 TYPE_LITERAL_PARSERS = { 986 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 987 } 988 989 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 990 991 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 992 993 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 994 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 995 "ISOLATION": ( 996 ("LEVEL", "REPEATABLE", "READ"), 997 ("LEVEL", "READ", "COMMITTED"), 998 ("LEVEL", "READ", "UNCOMITTED"), 999 ("LEVEL", "SERIALIZABLE"), 1000 ), 1001 "READ": ("WRITE", "ONLY"), 1002 } 1003 1004 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1005 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1006 ) 1007 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1008 1009 CREATE_SEQUENCE: OPTIONS_TYPE = { 1010 "SCALE": ("EXTEND", "NOEXTEND"), 1011 "SHARD": ("EXTEND", "NOEXTEND"), 1012 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1013 **dict.fromkeys( 1014 ( 1015 "SESSION", 1016 "GLOBAL", 1017 "KEEP", 1018 "NOKEEP", 1019 "ORDER", 1020 "NOORDER", 1021 "NOCACHE", 1022 "CYCLE", 1023 "NOCYCLE", 1024 "NOMINVALUE", 1025 "NOMAXVALUE", 1026 "NOSCALE", 1027 "NOSHARD", 1028 ), 1029 tuple(), 1030 ), 1031 } 1032 1033 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1034 1035 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1036 1037 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1038 1039 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1040 1041 CLONE_KEYWORDS = {"CLONE", "COPY"} 1042 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1043 1044 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1045 1046 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1047 1048 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1049 1050 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1051 1052 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1053 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1054 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1055 1056 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1057 1058 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1059 1060 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1061 1062 DISTINCT_TOKENS = {TokenType.DISTINCT} 1063 1064 NULL_TOKENS = {TokenType.NULL} 1065 1066 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1067 1068 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1069 1070 STRICT_CAST = True 1071 1072 PREFIXED_PIVOT_COLUMNS = False 1073 IDENTIFY_PIVOT_STRINGS = False 1074 1075 LOG_DEFAULTS_TO_LN = False 1076 1077 # Whether ADD is present for each column added by ALTER TABLE 1078 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1079 1080 # Whether the table sample clause expects CSV syntax 1081 TABLESAMPLE_CSV = False 1082 1083 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1084 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1085 1086 # Whether the TRIM function expects the characters to trim as its first argument 1087 TRIM_PATTERN_FIRST = False 1088 1089 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1090 STRING_ALIASES = False 1091 1092 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1093 MODIFIERS_ATTACHED_TO_UNION = True 1094 UNION_MODIFIERS = {"order", "limit", "offset"} 1095 1096 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1097 NO_PAREN_IF_COMMANDS = True 1098 1099 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1100 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1101 1102 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1103 # If this is True and '(' is not found, the keyword will be treated as an identifier 1104 VALUES_FOLLOWED_BY_PAREN = True 1105 1106 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1107 SUPPORTS_IMPLICIT_UNNEST = False 1108 1109 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1110 INTERVAL_SPANS = True 1111 1112 __slots__ = ( 1113 "error_level", 1114 "error_message_context", 1115 "max_errors", 1116 "dialect", 1117 "sql", 1118 "errors", 1119 "_tokens", 1120 "_index", 1121 "_curr", 1122 "_next", 1123 "_prev", 1124 "_prev_comments", 1125 ) 1126 1127 # Autofilled 1128 SHOW_TRIE: t.Dict = {} 1129 SET_TRIE: t.Dict = {} 1130 1131 def __init__( 1132 self, 1133 error_level: t.Optional[ErrorLevel] = None, 1134 error_message_context: int = 100, 1135 max_errors: int = 3, 1136 dialect: DialectType = None, 1137 ): 1138 from sqlglot.dialects import Dialect 1139 1140 self.error_level = error_level or ErrorLevel.IMMEDIATE 1141 self.error_message_context = error_message_context 1142 self.max_errors = max_errors 1143 self.dialect = Dialect.get_or_raise(dialect) 1144 self.reset() 1145 1146 def reset(self): 1147 self.sql = "" 1148 self.errors = [] 1149 self._tokens = [] 1150 self._index = 0 1151 self._curr = None 1152 self._next = None 1153 self._prev = None 1154 self._prev_comments = None 1155 1156 def parse( 1157 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1158 ) -> t.List[t.Optional[exp.Expression]]: 1159 """ 1160 Parses a list of tokens and returns a list of syntax trees, one tree 1161 per parsed SQL statement. 1162 1163 Args: 1164 raw_tokens: The list of tokens. 1165 sql: The original SQL string, used to produce helpful debug messages. 1166 1167 Returns: 1168 The list of the produced syntax trees. 1169 """ 1170 return self._parse( 1171 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1172 ) 1173 1174 def parse_into( 1175 self, 1176 expression_types: exp.IntoType, 1177 raw_tokens: t.List[Token], 1178 sql: t.Optional[str] = None, 1179 ) -> t.List[t.Optional[exp.Expression]]: 1180 """ 1181 Parses a list of tokens into a given Expression type. If a collection of Expression 1182 types is given instead, this method will try to parse the token list into each one 1183 of them, stopping at the first for which the parsing succeeds. 1184 1185 Args: 1186 expression_types: The expression type(s) to try and parse the token list into. 1187 raw_tokens: The list of tokens. 1188 sql: The original SQL string, used to produce helpful debug messages. 1189 1190 Returns: 1191 The target Expression. 1192 """ 1193 errors = [] 1194 for expression_type in ensure_list(expression_types): 1195 parser = self.EXPRESSION_PARSERS.get(expression_type) 1196 if not parser: 1197 raise TypeError(f"No parser registered for {expression_type}") 1198 1199 try: 1200 return self._parse(parser, raw_tokens, sql) 1201 except ParseError as e: 1202 e.errors[0]["into_expression"] = expression_type 1203 errors.append(e) 1204 1205 raise ParseError( 1206 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1207 errors=merge_errors(errors), 1208 ) from errors[-1] 1209 1210 def _parse( 1211 self, 1212 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1213 raw_tokens: t.List[Token], 1214 sql: t.Optional[str] = None, 1215 ) -> t.List[t.Optional[exp.Expression]]: 1216 self.reset() 1217 self.sql = sql or "" 1218 1219 total = len(raw_tokens) 1220 chunks: t.List[t.List[Token]] = [[]] 1221 1222 for i, token in enumerate(raw_tokens): 1223 if token.token_type == TokenType.SEMICOLON: 1224 if i < total - 1: 1225 chunks.append([]) 1226 else: 1227 chunks[-1].append(token) 1228 1229 expressions = [] 1230 1231 for tokens in chunks: 1232 self._index = -1 1233 self._tokens = tokens 1234 self._advance() 1235 1236 expressions.append(parse_method(self)) 1237 1238 if self._index < len(self._tokens): 1239 self.raise_error("Invalid expression / Unexpected token") 1240 1241 self.check_errors() 1242 1243 return expressions 1244 1245 def check_errors(self) -> None: 1246 """Logs or raises any found errors, depending on the chosen error level setting.""" 1247 if self.error_level == ErrorLevel.WARN: 1248 for error in self.errors: 1249 logger.error(str(error)) 1250 elif self.error_level == ErrorLevel.RAISE and self.errors: 1251 raise ParseError( 1252 concat_messages(self.errors, self.max_errors), 1253 errors=merge_errors(self.errors), 1254 ) 1255 1256 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1257 """ 1258 Appends an error in the list of recorded errors or raises it, depending on the chosen 1259 error level setting. 1260 """ 1261 token = token or self._curr or self._prev or Token.string("") 1262 start = token.start 1263 end = token.end + 1 1264 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1265 highlight = self.sql[start:end] 1266 end_context = self.sql[end : end + self.error_message_context] 1267 1268 error = ParseError.new( 1269 f"{message}. Line {token.line}, Col: {token.col}.\n" 1270 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1271 description=message, 1272 line=token.line, 1273 col=token.col, 1274 start_context=start_context, 1275 highlight=highlight, 1276 end_context=end_context, 1277 ) 1278 1279 if self.error_level == ErrorLevel.IMMEDIATE: 1280 raise error 1281 1282 self.errors.append(error) 1283 1284 def expression( 1285 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1286 ) -> E: 1287 """ 1288 Creates a new, validated Expression. 1289 1290 Args: 1291 exp_class: The expression class to instantiate. 1292 comments: An optional list of comments to attach to the expression. 1293 kwargs: The arguments to set for the expression along with their respective values. 1294 1295 Returns: 1296 The target expression. 1297 """ 1298 instance = exp_class(**kwargs) 1299 instance.add_comments(comments) if comments else self._add_comments(instance) 1300 return self.validate_expression(instance) 1301 1302 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1303 if expression and self._prev_comments: 1304 expression.add_comments(self._prev_comments) 1305 self._prev_comments = None 1306 1307 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1308 """ 1309 Validates an Expression, making sure that all its mandatory arguments are set. 1310 1311 Args: 1312 expression: The expression to validate. 1313 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1314 1315 Returns: 1316 The validated expression. 1317 """ 1318 if self.error_level != ErrorLevel.IGNORE: 1319 for error_message in expression.error_messages(args): 1320 self.raise_error(error_message) 1321 1322 return expression 1323 1324 def _find_sql(self, start: Token, end: Token) -> str: 1325 return self.sql[start.start : end.end + 1] 1326 1327 def _is_connected(self) -> bool: 1328 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1329 1330 def _advance(self, times: int = 1) -> None: 1331 self._index += times 1332 self._curr = seq_get(self._tokens, self._index) 1333 self._next = seq_get(self._tokens, self._index + 1) 1334 1335 if self._index > 0: 1336 self._prev = self._tokens[self._index - 1] 1337 self._prev_comments = self._prev.comments 1338 else: 1339 self._prev = None 1340 self._prev_comments = None 1341 1342 def _retreat(self, index: int) -> None: 1343 if index != self._index: 1344 self._advance(index - self._index) 1345 1346 def _warn_unsupported(self) -> None: 1347 if len(self._tokens) <= 1: 1348 return 1349 1350 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1351 # interested in emitting a warning for the one being currently processed. 1352 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1353 1354 logger.warning( 1355 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1356 ) 1357 1358 def _parse_command(self) -> exp.Command: 1359 self._warn_unsupported() 1360 return self.expression( 1361 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1362 ) 1363 1364 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1365 """ 1366 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1367 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1368 the parser state accordingly 1369 """ 1370 index = self._index 1371 error_level = self.error_level 1372 1373 self.error_level = ErrorLevel.IMMEDIATE 1374 try: 1375 this = parse_method() 1376 except ParseError: 1377 this = None 1378 finally: 1379 if not this or retreat: 1380 self._retreat(index) 1381 self.error_level = error_level 1382 1383 return this 1384 1385 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1386 start = self._prev 1387 exists = self._parse_exists() if allow_exists else None 1388 1389 self._match(TokenType.ON) 1390 1391 materialized = self._match_text_seq("MATERIALIZED") 1392 kind = self._match_set(self.CREATABLES) and self._prev 1393 if not kind: 1394 return self._parse_as_command(start) 1395 1396 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1397 this = self._parse_user_defined_function(kind=kind.token_type) 1398 elif kind.token_type == TokenType.TABLE: 1399 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1400 elif kind.token_type == TokenType.COLUMN: 1401 this = self._parse_column() 1402 else: 1403 this = self._parse_id_var() 1404 1405 self._match(TokenType.IS) 1406 1407 return self.expression( 1408 exp.Comment, 1409 this=this, 1410 kind=kind.text, 1411 expression=self._parse_string(), 1412 exists=exists, 1413 materialized=materialized, 1414 ) 1415 1416 def _parse_to_table( 1417 self, 1418 ) -> exp.ToTableProperty: 1419 table = self._parse_table_parts(schema=True) 1420 return self.expression(exp.ToTableProperty, this=table) 1421 1422 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1423 def _parse_ttl(self) -> exp.Expression: 1424 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1425 this = self._parse_bitwise() 1426 1427 if self._match_text_seq("DELETE"): 1428 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1429 if self._match_text_seq("RECOMPRESS"): 1430 return self.expression( 1431 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1432 ) 1433 if self._match_text_seq("TO", "DISK"): 1434 return self.expression( 1435 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1436 ) 1437 if self._match_text_seq("TO", "VOLUME"): 1438 return self.expression( 1439 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1440 ) 1441 1442 return this 1443 1444 expressions = self._parse_csv(_parse_ttl_action) 1445 where = self._parse_where() 1446 group = self._parse_group() 1447 1448 aggregates = None 1449 if group and self._match(TokenType.SET): 1450 aggregates = self._parse_csv(self._parse_set_item) 1451 1452 return self.expression( 1453 exp.MergeTreeTTL, 1454 expressions=expressions, 1455 where=where, 1456 group=group, 1457 aggregates=aggregates, 1458 ) 1459 1460 def _parse_statement(self) -> t.Optional[exp.Expression]: 1461 if self._curr is None: 1462 return None 1463 1464 if self._match_set(self.STATEMENT_PARSERS): 1465 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1466 1467 if self._match_set(Tokenizer.COMMANDS): 1468 return self._parse_command() 1469 1470 expression = self._parse_expression() 1471 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1472 return self._parse_query_modifiers(expression) 1473 1474 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1475 start = self._prev 1476 temporary = self._match(TokenType.TEMPORARY) 1477 materialized = self._match_text_seq("MATERIALIZED") 1478 1479 kind = self._match_set(self.CREATABLES) and self._prev.text 1480 if not kind: 1481 return self._parse_as_command(start) 1482 1483 if_exists = exists or self._parse_exists() 1484 table = self._parse_table_parts( 1485 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1486 ) 1487 1488 if self._match(TokenType.L_PAREN, advance=False): 1489 expressions = self._parse_wrapped_csv(self._parse_types) 1490 else: 1491 expressions = None 1492 1493 return self.expression( 1494 exp.Drop, 1495 comments=start.comments, 1496 exists=if_exists, 1497 this=table, 1498 expressions=expressions, 1499 kind=kind, 1500 temporary=temporary, 1501 materialized=materialized, 1502 cascade=self._match_text_seq("CASCADE"), 1503 constraints=self._match_text_seq("CONSTRAINTS"), 1504 purge=self._match_text_seq("PURGE"), 1505 ) 1506 1507 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1508 return ( 1509 self._match_text_seq("IF") 1510 and (not not_ or self._match(TokenType.NOT)) 1511 and self._match(TokenType.EXISTS) 1512 ) 1513 1514 def _parse_create(self) -> exp.Create | exp.Command: 1515 # Note: this can't be None because we've matched a statement parser 1516 start = self._prev 1517 comments = self._prev_comments 1518 1519 replace = ( 1520 start.token_type == TokenType.REPLACE 1521 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1522 or self._match_pair(TokenType.OR, TokenType.ALTER) 1523 ) 1524 1525 unique = self._match(TokenType.UNIQUE) 1526 1527 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1528 self._advance() 1529 1530 properties = None 1531 create_token = self._match_set(self.CREATABLES) and self._prev 1532 1533 if not create_token: 1534 # exp.Properties.Location.POST_CREATE 1535 properties = self._parse_properties() 1536 create_token = self._match_set(self.CREATABLES) and self._prev 1537 1538 if not properties or not create_token: 1539 return self._parse_as_command(start) 1540 1541 exists = self._parse_exists(not_=True) 1542 this = None 1543 expression: t.Optional[exp.Expression] = None 1544 indexes = None 1545 no_schema_binding = None 1546 begin = None 1547 end = None 1548 clone = None 1549 1550 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1551 nonlocal properties 1552 if properties and temp_props: 1553 properties.expressions.extend(temp_props.expressions) 1554 elif temp_props: 1555 properties = temp_props 1556 1557 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1558 this = self._parse_user_defined_function(kind=create_token.token_type) 1559 1560 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1561 extend_props(self._parse_properties()) 1562 1563 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1564 1565 if not expression: 1566 if self._match(TokenType.COMMAND): 1567 expression = self._parse_as_command(self._prev) 1568 else: 1569 begin = self._match(TokenType.BEGIN) 1570 return_ = self._match_text_seq("RETURN") 1571 1572 if self._match(TokenType.STRING, advance=False): 1573 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1574 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1575 expression = self._parse_string() 1576 extend_props(self._parse_properties()) 1577 else: 1578 expression = self._parse_statement() 1579 1580 end = self._match_text_seq("END") 1581 1582 if return_: 1583 expression = self.expression(exp.Return, this=expression) 1584 elif create_token.token_type == TokenType.INDEX: 1585 this = self._parse_index(index=self._parse_id_var()) 1586 elif create_token.token_type in self.DB_CREATABLES: 1587 table_parts = self._parse_table_parts( 1588 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1589 ) 1590 1591 # exp.Properties.Location.POST_NAME 1592 self._match(TokenType.COMMA) 1593 extend_props(self._parse_properties(before=True)) 1594 1595 this = self._parse_schema(this=table_parts) 1596 1597 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1598 extend_props(self._parse_properties()) 1599 1600 self._match(TokenType.ALIAS) 1601 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1602 # exp.Properties.Location.POST_ALIAS 1603 extend_props(self._parse_properties()) 1604 1605 if create_token.token_type == TokenType.SEQUENCE: 1606 expression = self._parse_types() 1607 extend_props(self._parse_properties()) 1608 else: 1609 expression = self._parse_ddl_select() 1610 1611 if create_token.token_type == TokenType.TABLE: 1612 # exp.Properties.Location.POST_EXPRESSION 1613 extend_props(self._parse_properties()) 1614 1615 indexes = [] 1616 while True: 1617 index = self._parse_index() 1618 1619 # exp.Properties.Location.POST_INDEX 1620 extend_props(self._parse_properties()) 1621 1622 if not index: 1623 break 1624 else: 1625 self._match(TokenType.COMMA) 1626 indexes.append(index) 1627 elif create_token.token_type == TokenType.VIEW: 1628 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1629 no_schema_binding = True 1630 1631 shallow = self._match_text_seq("SHALLOW") 1632 1633 if self._match_texts(self.CLONE_KEYWORDS): 1634 copy = self._prev.text.lower() == "copy" 1635 clone = self.expression( 1636 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1637 ) 1638 1639 if self._curr: 1640 return self._parse_as_command(start) 1641 1642 return self.expression( 1643 exp.Create, 1644 comments=comments, 1645 this=this, 1646 kind=create_token.text.upper(), 1647 replace=replace, 1648 unique=unique, 1649 expression=expression, 1650 exists=exists, 1651 properties=properties, 1652 indexes=indexes, 1653 no_schema_binding=no_schema_binding, 1654 begin=begin, 1655 end=end, 1656 clone=clone, 1657 ) 1658 1659 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1660 seq = exp.SequenceProperties() 1661 1662 options = [] 1663 index = self._index 1664 1665 while self._curr: 1666 if self._match_text_seq("INCREMENT"): 1667 self._match_text_seq("BY") 1668 self._match_text_seq("=") 1669 seq.set("increment", self._parse_term()) 1670 elif self._match_text_seq("MINVALUE"): 1671 seq.set("minvalue", self._parse_term()) 1672 elif self._match_text_seq("MAXVALUE"): 1673 seq.set("maxvalue", self._parse_term()) 1674 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1675 self._match_text_seq("=") 1676 seq.set("start", self._parse_term()) 1677 elif self._match_text_seq("CACHE"): 1678 # T-SQL allows empty CACHE which is initialized dynamically 1679 seq.set("cache", self._parse_number() or True) 1680 elif self._match_text_seq("OWNED", "BY"): 1681 # "OWNED BY NONE" is the default 1682 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1683 else: 1684 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1685 if opt: 1686 options.append(opt) 1687 else: 1688 break 1689 1690 seq.set("options", options if options else None) 1691 return None if self._index == index else seq 1692 1693 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1694 # only used for teradata currently 1695 self._match(TokenType.COMMA) 1696 1697 kwargs = { 1698 "no": self._match_text_seq("NO"), 1699 "dual": self._match_text_seq("DUAL"), 1700 "before": self._match_text_seq("BEFORE"), 1701 "default": self._match_text_seq("DEFAULT"), 1702 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1703 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1704 "after": self._match_text_seq("AFTER"), 1705 "minimum": self._match_texts(("MIN", "MINIMUM")), 1706 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1707 } 1708 1709 if self._match_texts(self.PROPERTY_PARSERS): 1710 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1711 try: 1712 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1713 except TypeError: 1714 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1715 1716 return None 1717 1718 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1719 return self._parse_wrapped_csv(self._parse_property) 1720 1721 def _parse_property(self) -> t.Optional[exp.Expression]: 1722 if self._match_texts(self.PROPERTY_PARSERS): 1723 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1724 1725 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1726 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1727 1728 if self._match_text_seq("COMPOUND", "SORTKEY"): 1729 return self._parse_sortkey(compound=True) 1730 1731 if self._match_text_seq("SQL", "SECURITY"): 1732 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1733 1734 index = self._index 1735 key = self._parse_column() 1736 1737 if not self._match(TokenType.EQ): 1738 self._retreat(index) 1739 return self._parse_sequence_properties() 1740 1741 return self.expression( 1742 exp.Property, 1743 this=key.to_dot() if isinstance(key, exp.Column) else key, 1744 value=self._parse_bitwise() or self._parse_var(any_token=True), 1745 ) 1746 1747 def _parse_stored(self) -> exp.FileFormatProperty: 1748 self._match(TokenType.ALIAS) 1749 1750 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1751 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1752 1753 return self.expression( 1754 exp.FileFormatProperty, 1755 this=( 1756 self.expression( 1757 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1758 ) 1759 if input_format or output_format 1760 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1761 ), 1762 ) 1763 1764 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1765 self._match(TokenType.EQ) 1766 self._match(TokenType.ALIAS) 1767 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1768 1769 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1770 properties = [] 1771 while True: 1772 if before: 1773 prop = self._parse_property_before() 1774 else: 1775 prop = self._parse_property() 1776 if not prop: 1777 break 1778 for p in ensure_list(prop): 1779 properties.append(p) 1780 1781 if properties: 1782 return self.expression(exp.Properties, expressions=properties) 1783 1784 return None 1785 1786 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1787 return self.expression( 1788 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1789 ) 1790 1791 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1792 if self._index >= 2: 1793 pre_volatile_token = self._tokens[self._index - 2] 1794 else: 1795 pre_volatile_token = None 1796 1797 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1798 return exp.VolatileProperty() 1799 1800 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1801 1802 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1803 self._match_pair(TokenType.EQ, TokenType.ON) 1804 1805 prop = self.expression(exp.WithSystemVersioningProperty) 1806 if self._match(TokenType.L_PAREN): 1807 self._match_text_seq("HISTORY_TABLE", "=") 1808 prop.set("this", self._parse_table_parts()) 1809 1810 if self._match(TokenType.COMMA): 1811 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1812 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1813 1814 self._match_r_paren() 1815 1816 return prop 1817 1818 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1819 if self._match(TokenType.L_PAREN, advance=False): 1820 return self._parse_wrapped_properties() 1821 1822 if self._match_text_seq("JOURNAL"): 1823 return self._parse_withjournaltable() 1824 1825 if self._match_texts(self.VIEW_ATTRIBUTES): 1826 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1827 1828 if self._match_text_seq("DATA"): 1829 return self._parse_withdata(no=False) 1830 elif self._match_text_seq("NO", "DATA"): 1831 return self._parse_withdata(no=True) 1832 1833 if not self._next: 1834 return None 1835 1836 return self._parse_withisolatedloading() 1837 1838 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1839 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1840 self._match(TokenType.EQ) 1841 1842 user = self._parse_id_var() 1843 self._match(TokenType.PARAMETER) 1844 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1845 1846 if not user or not host: 1847 return None 1848 1849 return exp.DefinerProperty(this=f"{user}@{host}") 1850 1851 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1852 self._match(TokenType.TABLE) 1853 self._match(TokenType.EQ) 1854 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1855 1856 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1857 return self.expression(exp.LogProperty, no=no) 1858 1859 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1860 return self.expression(exp.JournalProperty, **kwargs) 1861 1862 def _parse_checksum(self) -> exp.ChecksumProperty: 1863 self._match(TokenType.EQ) 1864 1865 on = None 1866 if self._match(TokenType.ON): 1867 on = True 1868 elif self._match_text_seq("OFF"): 1869 on = False 1870 1871 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1872 1873 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1874 return self.expression( 1875 exp.Cluster, 1876 expressions=( 1877 self._parse_wrapped_csv(self._parse_ordered) 1878 if wrapped 1879 else self._parse_csv(self._parse_ordered) 1880 ), 1881 ) 1882 1883 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1884 self._match_text_seq("BY") 1885 1886 self._match_l_paren() 1887 expressions = self._parse_csv(self._parse_column) 1888 self._match_r_paren() 1889 1890 if self._match_text_seq("SORTED", "BY"): 1891 self._match_l_paren() 1892 sorted_by = self._parse_csv(self._parse_ordered) 1893 self._match_r_paren() 1894 else: 1895 sorted_by = None 1896 1897 self._match(TokenType.INTO) 1898 buckets = self._parse_number() 1899 self._match_text_seq("BUCKETS") 1900 1901 return self.expression( 1902 exp.ClusteredByProperty, 1903 expressions=expressions, 1904 sorted_by=sorted_by, 1905 buckets=buckets, 1906 ) 1907 1908 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1909 if not self._match_text_seq("GRANTS"): 1910 self._retreat(self._index - 1) 1911 return None 1912 1913 return self.expression(exp.CopyGrantsProperty) 1914 1915 def _parse_freespace(self) -> exp.FreespaceProperty: 1916 self._match(TokenType.EQ) 1917 return self.expression( 1918 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1919 ) 1920 1921 def _parse_mergeblockratio( 1922 self, no: bool = False, default: bool = False 1923 ) -> exp.MergeBlockRatioProperty: 1924 if self._match(TokenType.EQ): 1925 return self.expression( 1926 exp.MergeBlockRatioProperty, 1927 this=self._parse_number(), 1928 percent=self._match(TokenType.PERCENT), 1929 ) 1930 1931 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1932 1933 def _parse_datablocksize( 1934 self, 1935 default: t.Optional[bool] = None, 1936 minimum: t.Optional[bool] = None, 1937 maximum: t.Optional[bool] = None, 1938 ) -> exp.DataBlocksizeProperty: 1939 self._match(TokenType.EQ) 1940 size = self._parse_number() 1941 1942 units = None 1943 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1944 units = self._prev.text 1945 1946 return self.expression( 1947 exp.DataBlocksizeProperty, 1948 size=size, 1949 units=units, 1950 default=default, 1951 minimum=minimum, 1952 maximum=maximum, 1953 ) 1954 1955 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1956 self._match(TokenType.EQ) 1957 always = self._match_text_seq("ALWAYS") 1958 manual = self._match_text_seq("MANUAL") 1959 never = self._match_text_seq("NEVER") 1960 default = self._match_text_seq("DEFAULT") 1961 1962 autotemp = None 1963 if self._match_text_seq("AUTOTEMP"): 1964 autotemp = self._parse_schema() 1965 1966 return self.expression( 1967 exp.BlockCompressionProperty, 1968 always=always, 1969 manual=manual, 1970 never=never, 1971 default=default, 1972 autotemp=autotemp, 1973 ) 1974 1975 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1976 index = self._index 1977 no = self._match_text_seq("NO") 1978 concurrent = self._match_text_seq("CONCURRENT") 1979 1980 if not self._match_text_seq("ISOLATED", "LOADING"): 1981 self._retreat(index) 1982 return None 1983 1984 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1985 return self.expression( 1986 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1987 ) 1988 1989 def _parse_locking(self) -> exp.LockingProperty: 1990 if self._match(TokenType.TABLE): 1991 kind = "TABLE" 1992 elif self._match(TokenType.VIEW): 1993 kind = "VIEW" 1994 elif self._match(TokenType.ROW): 1995 kind = "ROW" 1996 elif self._match_text_seq("DATABASE"): 1997 kind = "DATABASE" 1998 else: 1999 kind = None 2000 2001 if kind in ("DATABASE", "TABLE", "VIEW"): 2002 this = self._parse_table_parts() 2003 else: 2004 this = None 2005 2006 if self._match(TokenType.FOR): 2007 for_or_in = "FOR" 2008 elif self._match(TokenType.IN): 2009 for_or_in = "IN" 2010 else: 2011 for_or_in = None 2012 2013 if self._match_text_seq("ACCESS"): 2014 lock_type = "ACCESS" 2015 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2016 lock_type = "EXCLUSIVE" 2017 elif self._match_text_seq("SHARE"): 2018 lock_type = "SHARE" 2019 elif self._match_text_seq("READ"): 2020 lock_type = "READ" 2021 elif self._match_text_seq("WRITE"): 2022 lock_type = "WRITE" 2023 elif self._match_text_seq("CHECKSUM"): 2024 lock_type = "CHECKSUM" 2025 else: 2026 lock_type = None 2027 2028 override = self._match_text_seq("OVERRIDE") 2029 2030 return self.expression( 2031 exp.LockingProperty, 2032 this=this, 2033 kind=kind, 2034 for_or_in=for_or_in, 2035 lock_type=lock_type, 2036 override=override, 2037 ) 2038 2039 def _parse_partition_by(self) -> t.List[exp.Expression]: 2040 if self._match(TokenType.PARTITION_BY): 2041 return self._parse_csv(self._parse_conjunction) 2042 return [] 2043 2044 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2045 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2046 if self._match_text_seq("MINVALUE"): 2047 return exp.var("MINVALUE") 2048 if self._match_text_seq("MAXVALUE"): 2049 return exp.var("MAXVALUE") 2050 return self._parse_bitwise() 2051 2052 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2053 expression = None 2054 from_expressions = None 2055 to_expressions = None 2056 2057 if self._match(TokenType.IN): 2058 this = self._parse_wrapped_csv(self._parse_bitwise) 2059 elif self._match(TokenType.FROM): 2060 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2061 self._match_text_seq("TO") 2062 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2063 elif self._match_text_seq("WITH", "(", "MODULUS"): 2064 this = self._parse_number() 2065 self._match_text_seq(",", "REMAINDER") 2066 expression = self._parse_number() 2067 self._match_r_paren() 2068 else: 2069 self.raise_error("Failed to parse partition bound spec.") 2070 2071 return self.expression( 2072 exp.PartitionBoundSpec, 2073 this=this, 2074 expression=expression, 2075 from_expressions=from_expressions, 2076 to_expressions=to_expressions, 2077 ) 2078 2079 # https://www.postgresql.org/docs/current/sql-createtable.html 2080 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2081 if not self._match_text_seq("OF"): 2082 self._retreat(self._index - 1) 2083 return None 2084 2085 this = self._parse_table(schema=True) 2086 2087 if self._match(TokenType.DEFAULT): 2088 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2089 elif self._match_text_seq("FOR", "VALUES"): 2090 expression = self._parse_partition_bound_spec() 2091 else: 2092 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2093 2094 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2095 2096 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2097 self._match(TokenType.EQ) 2098 return self.expression( 2099 exp.PartitionedByProperty, 2100 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2101 ) 2102 2103 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2104 if self._match_text_seq("AND", "STATISTICS"): 2105 statistics = True 2106 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2107 statistics = False 2108 else: 2109 statistics = None 2110 2111 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2112 2113 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2114 if self._match_text_seq("SQL"): 2115 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2116 return None 2117 2118 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2119 if self._match_text_seq("SQL", "DATA"): 2120 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2121 return None 2122 2123 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2124 if self._match_text_seq("PRIMARY", "INDEX"): 2125 return exp.NoPrimaryIndexProperty() 2126 if self._match_text_seq("SQL"): 2127 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2128 return None 2129 2130 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2131 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2132 return exp.OnCommitProperty() 2133 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2134 return exp.OnCommitProperty(delete=True) 2135 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2136 2137 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2138 if self._match_text_seq("SQL", "DATA"): 2139 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2140 return None 2141 2142 def _parse_distkey(self) -> exp.DistKeyProperty: 2143 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2144 2145 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2146 table = self._parse_table(schema=True) 2147 2148 options = [] 2149 while self._match_texts(("INCLUDING", "EXCLUDING")): 2150 this = self._prev.text.upper() 2151 2152 id_var = self._parse_id_var() 2153 if not id_var: 2154 return None 2155 2156 options.append( 2157 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2158 ) 2159 2160 return self.expression(exp.LikeProperty, this=table, expressions=options) 2161 2162 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2163 return self.expression( 2164 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2165 ) 2166 2167 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2168 self._match(TokenType.EQ) 2169 return self.expression( 2170 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2171 ) 2172 2173 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2174 self._match_text_seq("WITH", "CONNECTION") 2175 return self.expression( 2176 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2177 ) 2178 2179 def _parse_returns(self) -> exp.ReturnsProperty: 2180 value: t.Optional[exp.Expression] 2181 is_table = self._match(TokenType.TABLE) 2182 2183 if is_table: 2184 if self._match(TokenType.LT): 2185 value = self.expression( 2186 exp.Schema, 2187 this="TABLE", 2188 expressions=self._parse_csv(self._parse_struct_types), 2189 ) 2190 if not self._match(TokenType.GT): 2191 self.raise_error("Expecting >") 2192 else: 2193 value = self._parse_schema(exp.var("TABLE")) 2194 else: 2195 value = self._parse_types() 2196 2197 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2198 2199 def _parse_describe(self) -> exp.Describe: 2200 kind = self._match_set(self.CREATABLES) and self._prev.text 2201 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2202 if not self._match_set(self.ID_VAR_TOKENS, advance=False): 2203 style = None 2204 self._retreat(self._index - 1) 2205 this = self._parse_table(schema=True) 2206 properties = self._parse_properties() 2207 expressions = properties.expressions if properties else None 2208 return self.expression( 2209 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2210 ) 2211 2212 def _parse_insert(self) -> exp.Insert: 2213 comments = ensure_list(self._prev_comments) 2214 hint = self._parse_hint() 2215 overwrite = self._match(TokenType.OVERWRITE) 2216 ignore = self._match(TokenType.IGNORE) 2217 local = self._match_text_seq("LOCAL") 2218 alternative = None 2219 is_function = None 2220 2221 if self._match_text_seq("DIRECTORY"): 2222 this: t.Optional[exp.Expression] = self.expression( 2223 exp.Directory, 2224 this=self._parse_var_or_string(), 2225 local=local, 2226 row_format=self._parse_row_format(match_row=True), 2227 ) 2228 else: 2229 if self._match(TokenType.OR): 2230 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2231 2232 self._match(TokenType.INTO) 2233 comments += ensure_list(self._prev_comments) 2234 self._match(TokenType.TABLE) 2235 is_function = self._match(TokenType.FUNCTION) 2236 2237 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2238 2239 returning = self._parse_returning() 2240 2241 return self.expression( 2242 exp.Insert, 2243 comments=comments, 2244 hint=hint, 2245 is_function=is_function, 2246 this=this, 2247 by_name=self._match_text_seq("BY", "NAME"), 2248 exists=self._parse_exists(), 2249 partition=self._parse_partition(), 2250 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2251 and self._parse_conjunction(), 2252 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2253 conflict=self._parse_on_conflict(), 2254 returning=returning or self._parse_returning(), 2255 overwrite=overwrite, 2256 alternative=alternative, 2257 ignore=ignore, 2258 ) 2259 2260 def _parse_kill(self) -> exp.Kill: 2261 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2262 2263 return self.expression( 2264 exp.Kill, 2265 this=self._parse_primary(), 2266 kind=kind, 2267 ) 2268 2269 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2270 conflict = self._match_text_seq("ON", "CONFLICT") 2271 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2272 2273 if not conflict and not duplicate: 2274 return None 2275 2276 conflict_keys = None 2277 constraint = None 2278 2279 if conflict: 2280 if self._match_text_seq("ON", "CONSTRAINT"): 2281 constraint = self._parse_id_var() 2282 elif self._match(TokenType.L_PAREN): 2283 conflict_keys = self._parse_csv(self._parse_id_var) 2284 self._match_r_paren() 2285 2286 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2287 if self._prev.token_type == TokenType.UPDATE: 2288 self._match(TokenType.SET) 2289 expressions = self._parse_csv(self._parse_equality) 2290 else: 2291 expressions = None 2292 2293 return self.expression( 2294 exp.OnConflict, 2295 duplicate=duplicate, 2296 expressions=expressions, 2297 action=action, 2298 conflict_keys=conflict_keys, 2299 constraint=constraint, 2300 ) 2301 2302 def _parse_returning(self) -> t.Optional[exp.Returning]: 2303 if not self._match(TokenType.RETURNING): 2304 return None 2305 return self.expression( 2306 exp.Returning, 2307 expressions=self._parse_csv(self._parse_expression), 2308 into=self._match(TokenType.INTO) and self._parse_table_part(), 2309 ) 2310 2311 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2312 if not self._match(TokenType.FORMAT): 2313 return None 2314 return self._parse_row_format() 2315 2316 def _parse_row_format( 2317 self, match_row: bool = False 2318 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2319 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2320 return None 2321 2322 if self._match_text_seq("SERDE"): 2323 this = self._parse_string() 2324 2325 serde_properties = None 2326 if self._match(TokenType.SERDE_PROPERTIES): 2327 serde_properties = self.expression( 2328 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2329 ) 2330 2331 return self.expression( 2332 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2333 ) 2334 2335 self._match_text_seq("DELIMITED") 2336 2337 kwargs = {} 2338 2339 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2340 kwargs["fields"] = self._parse_string() 2341 if self._match_text_seq("ESCAPED", "BY"): 2342 kwargs["escaped"] = self._parse_string() 2343 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2344 kwargs["collection_items"] = self._parse_string() 2345 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2346 kwargs["map_keys"] = self._parse_string() 2347 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2348 kwargs["lines"] = self._parse_string() 2349 if self._match_text_seq("NULL", "DEFINED", "AS"): 2350 kwargs["null"] = self._parse_string() 2351 2352 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2353 2354 def _parse_load(self) -> exp.LoadData | exp.Command: 2355 if self._match_text_seq("DATA"): 2356 local = self._match_text_seq("LOCAL") 2357 self._match_text_seq("INPATH") 2358 inpath = self._parse_string() 2359 overwrite = self._match(TokenType.OVERWRITE) 2360 self._match_pair(TokenType.INTO, TokenType.TABLE) 2361 2362 return self.expression( 2363 exp.LoadData, 2364 this=self._parse_table(schema=True), 2365 local=local, 2366 overwrite=overwrite, 2367 inpath=inpath, 2368 partition=self._parse_partition(), 2369 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2370 serde=self._match_text_seq("SERDE") and self._parse_string(), 2371 ) 2372 return self._parse_as_command(self._prev) 2373 2374 def _parse_delete(self) -> exp.Delete: 2375 # This handles MySQL's "Multiple-Table Syntax" 2376 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2377 tables = None 2378 comments = self._prev_comments 2379 if not self._match(TokenType.FROM, advance=False): 2380 tables = self._parse_csv(self._parse_table) or None 2381 2382 returning = self._parse_returning() 2383 2384 return self.expression( 2385 exp.Delete, 2386 comments=comments, 2387 tables=tables, 2388 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2389 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2390 where=self._parse_where(), 2391 returning=returning or self._parse_returning(), 2392 limit=self._parse_limit(), 2393 ) 2394 2395 def _parse_update(self) -> exp.Update: 2396 comments = self._prev_comments 2397 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2398 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2399 returning = self._parse_returning() 2400 return self.expression( 2401 exp.Update, 2402 comments=comments, 2403 **{ # type: ignore 2404 "this": this, 2405 "expressions": expressions, 2406 "from": self._parse_from(joins=True), 2407 "where": self._parse_where(), 2408 "returning": returning or self._parse_returning(), 2409 "order": self._parse_order(), 2410 "limit": self._parse_limit(), 2411 }, 2412 ) 2413 2414 def _parse_uncache(self) -> exp.Uncache: 2415 if not self._match(TokenType.TABLE): 2416 self.raise_error("Expecting TABLE after UNCACHE") 2417 2418 return self.expression( 2419 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2420 ) 2421 2422 def _parse_cache(self) -> exp.Cache: 2423 lazy = self._match_text_seq("LAZY") 2424 self._match(TokenType.TABLE) 2425 table = self._parse_table(schema=True) 2426 2427 options = [] 2428 if self._match_text_seq("OPTIONS"): 2429 self._match_l_paren() 2430 k = self._parse_string() 2431 self._match(TokenType.EQ) 2432 v = self._parse_string() 2433 options = [k, v] 2434 self._match_r_paren() 2435 2436 self._match(TokenType.ALIAS) 2437 return self.expression( 2438 exp.Cache, 2439 this=table, 2440 lazy=lazy, 2441 options=options, 2442 expression=self._parse_select(nested=True), 2443 ) 2444 2445 def _parse_partition(self) -> t.Optional[exp.Partition]: 2446 if not self._match(TokenType.PARTITION): 2447 return None 2448 2449 return self.expression( 2450 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2451 ) 2452 2453 def _parse_value(self) -> exp.Tuple: 2454 if self._match(TokenType.L_PAREN): 2455 expressions = self._parse_csv(self._parse_expression) 2456 self._match_r_paren() 2457 return self.expression(exp.Tuple, expressions=expressions) 2458 2459 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2460 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2461 2462 def _parse_projections(self) -> t.List[exp.Expression]: 2463 return self._parse_expressions() 2464 2465 def _parse_select( 2466 self, 2467 nested: bool = False, 2468 table: bool = False, 2469 parse_subquery_alias: bool = True, 2470 parse_set_operation: bool = True, 2471 ) -> t.Optional[exp.Expression]: 2472 cte = self._parse_with() 2473 2474 if cte: 2475 this = self._parse_statement() 2476 2477 if not this: 2478 self.raise_error("Failed to parse any statement following CTE") 2479 return cte 2480 2481 if "with" in this.arg_types: 2482 this.set("with", cte) 2483 else: 2484 self.raise_error(f"{this.key} does not support CTE") 2485 this = cte 2486 2487 return this 2488 2489 # duckdb supports leading with FROM x 2490 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2491 2492 if self._match(TokenType.SELECT): 2493 comments = self._prev_comments 2494 2495 hint = self._parse_hint() 2496 all_ = self._match(TokenType.ALL) 2497 distinct = self._match_set(self.DISTINCT_TOKENS) 2498 2499 kind = ( 2500 self._match(TokenType.ALIAS) 2501 and self._match_texts(("STRUCT", "VALUE")) 2502 and self._prev.text.upper() 2503 ) 2504 2505 if distinct: 2506 distinct = self.expression( 2507 exp.Distinct, 2508 on=self._parse_value() if self._match(TokenType.ON) else None, 2509 ) 2510 2511 if all_ and distinct: 2512 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2513 2514 limit = self._parse_limit(top=True) 2515 projections = self._parse_projections() 2516 2517 this = self.expression( 2518 exp.Select, 2519 kind=kind, 2520 hint=hint, 2521 distinct=distinct, 2522 expressions=projections, 2523 limit=limit, 2524 ) 2525 this.comments = comments 2526 2527 into = self._parse_into() 2528 if into: 2529 this.set("into", into) 2530 2531 if not from_: 2532 from_ = self._parse_from() 2533 2534 if from_: 2535 this.set("from", from_) 2536 2537 this = self._parse_query_modifiers(this) 2538 elif (table or nested) and self._match(TokenType.L_PAREN): 2539 if self._match(TokenType.PIVOT): 2540 this = self._parse_simplified_pivot() 2541 elif self._match(TokenType.FROM): 2542 this = exp.select("*").from_( 2543 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2544 ) 2545 else: 2546 this = ( 2547 self._parse_table() 2548 if table 2549 else self._parse_select(nested=True, parse_set_operation=False) 2550 ) 2551 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2552 2553 self._match_r_paren() 2554 2555 # We return early here so that the UNION isn't attached to the subquery by the 2556 # following call to _parse_set_operations, but instead becomes the parent node 2557 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2558 elif self._match(TokenType.VALUES, advance=False): 2559 this = self._parse_derived_table_values() 2560 elif from_: 2561 this = exp.select("*").from_(from_.this, copy=False) 2562 else: 2563 this = None 2564 2565 if parse_set_operation: 2566 return self._parse_set_operations(this) 2567 return this 2568 2569 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2570 if not skip_with_token and not self._match(TokenType.WITH): 2571 return None 2572 2573 comments = self._prev_comments 2574 recursive = self._match(TokenType.RECURSIVE) 2575 2576 expressions = [] 2577 while True: 2578 expressions.append(self._parse_cte()) 2579 2580 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2581 break 2582 else: 2583 self._match(TokenType.WITH) 2584 2585 return self.expression( 2586 exp.With, comments=comments, expressions=expressions, recursive=recursive 2587 ) 2588 2589 def _parse_cte(self) -> exp.CTE: 2590 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2591 if not alias or not alias.this: 2592 self.raise_error("Expected CTE to have alias") 2593 2594 self._match(TokenType.ALIAS) 2595 2596 if self._match_text_seq("NOT", "MATERIALIZED"): 2597 materialized = False 2598 elif self._match_text_seq("MATERIALIZED"): 2599 materialized = True 2600 else: 2601 materialized = None 2602 2603 return self.expression( 2604 exp.CTE, 2605 this=self._parse_wrapped(self._parse_statement), 2606 alias=alias, 2607 materialized=materialized, 2608 ) 2609 2610 def _parse_table_alias( 2611 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2612 ) -> t.Optional[exp.TableAlias]: 2613 any_token = self._match(TokenType.ALIAS) 2614 alias = ( 2615 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2616 or self._parse_string_as_identifier() 2617 ) 2618 2619 index = self._index 2620 if self._match(TokenType.L_PAREN): 2621 columns = self._parse_csv(self._parse_function_parameter) 2622 self._match_r_paren() if columns else self._retreat(index) 2623 else: 2624 columns = None 2625 2626 if not alias and not columns: 2627 return None 2628 2629 return self.expression(exp.TableAlias, this=alias, columns=columns) 2630 2631 def _parse_subquery( 2632 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2633 ) -> t.Optional[exp.Subquery]: 2634 if not this: 2635 return None 2636 2637 return self.expression( 2638 exp.Subquery, 2639 this=this, 2640 pivots=self._parse_pivots(), 2641 alias=self._parse_table_alias() if parse_alias else None, 2642 ) 2643 2644 def _implicit_unnests_to_explicit(self, this: E) -> E: 2645 from sqlglot.optimizer.normalize_identifiers import ( 2646 normalize_identifiers as _norm, 2647 ) 2648 2649 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2650 for i, join in enumerate(this.args.get("joins") or []): 2651 table = join.this 2652 normalized_table = table.copy() 2653 normalized_table.meta["maybe_column"] = True 2654 normalized_table = _norm(normalized_table, dialect=self.dialect) 2655 2656 if isinstance(table, exp.Table) and not join.args.get("on"): 2657 if normalized_table.parts[0].name in refs: 2658 table_as_column = table.to_column() 2659 unnest = exp.Unnest(expressions=[table_as_column]) 2660 2661 # Table.to_column creates a parent Alias node that we want to convert to 2662 # a TableAlias and attach to the Unnest, so it matches the parser's output 2663 if isinstance(table.args.get("alias"), exp.TableAlias): 2664 table_as_column.replace(table_as_column.this) 2665 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2666 2667 table.replace(unnest) 2668 2669 refs.add(normalized_table.alias_or_name) 2670 2671 return this 2672 2673 def _parse_query_modifiers( 2674 self, this: t.Optional[exp.Expression] 2675 ) -> t.Optional[exp.Expression]: 2676 if isinstance(this, (exp.Query, exp.Table)): 2677 for join in self._parse_joins(): 2678 this.append("joins", join) 2679 for lateral in iter(self._parse_lateral, None): 2680 this.append("laterals", lateral) 2681 2682 while True: 2683 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2684 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2685 key, expression = parser(self) 2686 2687 if expression: 2688 this.set(key, expression) 2689 if key == "limit": 2690 offset = expression.args.pop("offset", None) 2691 2692 if offset: 2693 offset = exp.Offset(expression=offset) 2694 this.set("offset", offset) 2695 2696 limit_by_expressions = expression.expressions 2697 expression.set("expressions", None) 2698 offset.set("expressions", limit_by_expressions) 2699 continue 2700 break 2701 2702 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2703 this = self._implicit_unnests_to_explicit(this) 2704 2705 return this 2706 2707 def _parse_hint(self) -> t.Optional[exp.Hint]: 2708 if self._match(TokenType.HINT): 2709 hints = [] 2710 for hint in iter( 2711 lambda: self._parse_csv( 2712 lambda: self._parse_function() or self._parse_var(upper=True) 2713 ), 2714 [], 2715 ): 2716 hints.extend(hint) 2717 2718 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2719 self.raise_error("Expected */ after HINT") 2720 2721 return self.expression(exp.Hint, expressions=hints) 2722 2723 return None 2724 2725 def _parse_into(self) -> t.Optional[exp.Into]: 2726 if not self._match(TokenType.INTO): 2727 return None 2728 2729 temp = self._match(TokenType.TEMPORARY) 2730 unlogged = self._match_text_seq("UNLOGGED") 2731 self._match(TokenType.TABLE) 2732 2733 return self.expression( 2734 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2735 ) 2736 2737 def _parse_from( 2738 self, joins: bool = False, skip_from_token: bool = False 2739 ) -> t.Optional[exp.From]: 2740 if not skip_from_token and not self._match(TokenType.FROM): 2741 return None 2742 2743 return self.expression( 2744 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2745 ) 2746 2747 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2748 return self.expression( 2749 exp.MatchRecognizeMeasure, 2750 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2751 this=self._parse_expression(), 2752 ) 2753 2754 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2755 if not self._match(TokenType.MATCH_RECOGNIZE): 2756 return None 2757 2758 self._match_l_paren() 2759 2760 partition = self._parse_partition_by() 2761 order = self._parse_order() 2762 2763 measures = ( 2764 self._parse_csv(self._parse_match_recognize_measure) 2765 if self._match_text_seq("MEASURES") 2766 else None 2767 ) 2768 2769 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2770 rows = exp.var("ONE ROW PER MATCH") 2771 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2772 text = "ALL ROWS PER MATCH" 2773 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2774 text += " SHOW EMPTY MATCHES" 2775 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2776 text += " OMIT EMPTY MATCHES" 2777 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2778 text += " WITH UNMATCHED ROWS" 2779 rows = exp.var(text) 2780 else: 2781 rows = None 2782 2783 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2784 text = "AFTER MATCH SKIP" 2785 if self._match_text_seq("PAST", "LAST", "ROW"): 2786 text += " PAST LAST ROW" 2787 elif self._match_text_seq("TO", "NEXT", "ROW"): 2788 text += " TO NEXT ROW" 2789 elif self._match_text_seq("TO", "FIRST"): 2790 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2791 elif self._match_text_seq("TO", "LAST"): 2792 text += f" TO LAST {self._advance_any().text}" # type: ignore 2793 after = exp.var(text) 2794 else: 2795 after = None 2796 2797 if self._match_text_seq("PATTERN"): 2798 self._match_l_paren() 2799 2800 if not self._curr: 2801 self.raise_error("Expecting )", self._curr) 2802 2803 paren = 1 2804 start = self._curr 2805 2806 while self._curr and paren > 0: 2807 if self._curr.token_type == TokenType.L_PAREN: 2808 paren += 1 2809 if self._curr.token_type == TokenType.R_PAREN: 2810 paren -= 1 2811 2812 end = self._prev 2813 self._advance() 2814 2815 if paren > 0: 2816 self.raise_error("Expecting )", self._curr) 2817 2818 pattern = exp.var(self._find_sql(start, end)) 2819 else: 2820 pattern = None 2821 2822 define = ( 2823 self._parse_csv(self._parse_name_as_expression) 2824 if self._match_text_seq("DEFINE") 2825 else None 2826 ) 2827 2828 self._match_r_paren() 2829 2830 return self.expression( 2831 exp.MatchRecognize, 2832 partition_by=partition, 2833 order=order, 2834 measures=measures, 2835 rows=rows, 2836 after=after, 2837 pattern=pattern, 2838 define=define, 2839 alias=self._parse_table_alias(), 2840 ) 2841 2842 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2843 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2844 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2845 cross_apply = False 2846 2847 if cross_apply is not None: 2848 this = self._parse_select(table=True) 2849 view = None 2850 outer = None 2851 elif self._match(TokenType.LATERAL): 2852 this = self._parse_select(table=True) 2853 view = self._match(TokenType.VIEW) 2854 outer = self._match(TokenType.OUTER) 2855 else: 2856 return None 2857 2858 if not this: 2859 this = ( 2860 self._parse_unnest() 2861 or self._parse_function() 2862 or self._parse_id_var(any_token=False) 2863 ) 2864 2865 while self._match(TokenType.DOT): 2866 this = exp.Dot( 2867 this=this, 2868 expression=self._parse_function() or self._parse_id_var(any_token=False), 2869 ) 2870 2871 if view: 2872 table = self._parse_id_var(any_token=False) 2873 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2874 table_alias: t.Optional[exp.TableAlias] = self.expression( 2875 exp.TableAlias, this=table, columns=columns 2876 ) 2877 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2878 # We move the alias from the lateral's child node to the lateral itself 2879 table_alias = this.args["alias"].pop() 2880 else: 2881 table_alias = self._parse_table_alias() 2882 2883 return self.expression( 2884 exp.Lateral, 2885 this=this, 2886 view=view, 2887 outer=outer, 2888 alias=table_alias, 2889 cross_apply=cross_apply, 2890 ) 2891 2892 def _parse_join_parts( 2893 self, 2894 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2895 return ( 2896 self._match_set(self.JOIN_METHODS) and self._prev, 2897 self._match_set(self.JOIN_SIDES) and self._prev, 2898 self._match_set(self.JOIN_KINDS) and self._prev, 2899 ) 2900 2901 def _parse_join( 2902 self, skip_join_token: bool = False, parse_bracket: bool = False 2903 ) -> t.Optional[exp.Join]: 2904 if self._match(TokenType.COMMA): 2905 return self.expression(exp.Join, this=self._parse_table()) 2906 2907 index = self._index 2908 method, side, kind = self._parse_join_parts() 2909 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2910 join = self._match(TokenType.JOIN) 2911 2912 if not skip_join_token and not join: 2913 self._retreat(index) 2914 kind = None 2915 method = None 2916 side = None 2917 2918 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2919 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2920 2921 if not skip_join_token and not join and not outer_apply and not cross_apply: 2922 return None 2923 2924 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2925 2926 if method: 2927 kwargs["method"] = method.text 2928 if side: 2929 kwargs["side"] = side.text 2930 if kind: 2931 kwargs["kind"] = kind.text 2932 if hint: 2933 kwargs["hint"] = hint 2934 2935 if self._match(TokenType.MATCH_CONDITION): 2936 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2937 2938 if self._match(TokenType.ON): 2939 kwargs["on"] = self._parse_conjunction() 2940 elif self._match(TokenType.USING): 2941 kwargs["using"] = self._parse_wrapped_id_vars() 2942 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2943 kind and kind.token_type == TokenType.CROSS 2944 ): 2945 index = self._index 2946 joins: t.Optional[list] = list(self._parse_joins()) 2947 2948 if joins and self._match(TokenType.ON): 2949 kwargs["on"] = self._parse_conjunction() 2950 elif joins and self._match(TokenType.USING): 2951 kwargs["using"] = self._parse_wrapped_id_vars() 2952 else: 2953 joins = None 2954 self._retreat(index) 2955 2956 kwargs["this"].set("joins", joins if joins else None) 2957 2958 comments = [c for token in (method, side, kind) if token for c in token.comments] 2959 return self.expression(exp.Join, comments=comments, **kwargs) 2960 2961 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2962 this = self._parse_conjunction() 2963 2964 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2965 return this 2966 2967 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2968 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2969 2970 return this 2971 2972 def _parse_index_params(self) -> exp.IndexParameters: 2973 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2974 2975 if self._match(TokenType.L_PAREN, advance=False): 2976 columns = self._parse_wrapped_csv(self._parse_with_operator) 2977 else: 2978 columns = None 2979 2980 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2981 partition_by = self._parse_partition_by() 2982 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2983 tablespace = ( 2984 self._parse_var(any_token=True) 2985 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2986 else None 2987 ) 2988 where = self._parse_where() 2989 2990 return self.expression( 2991 exp.IndexParameters, 2992 using=using, 2993 columns=columns, 2994 include=include, 2995 partition_by=partition_by, 2996 where=where, 2997 with_storage=with_storage, 2998 tablespace=tablespace, 2999 ) 3000 3001 def _parse_index( 3002 self, 3003 index: t.Optional[exp.Expression] = None, 3004 ) -> t.Optional[exp.Index]: 3005 if index: 3006 unique = None 3007 primary = None 3008 amp = None 3009 3010 self._match(TokenType.ON) 3011 self._match(TokenType.TABLE) # hive 3012 table = self._parse_table_parts(schema=True) 3013 else: 3014 unique = self._match(TokenType.UNIQUE) 3015 primary = self._match_text_seq("PRIMARY") 3016 amp = self._match_text_seq("AMP") 3017 3018 if not self._match(TokenType.INDEX): 3019 return None 3020 3021 index = self._parse_id_var() 3022 table = None 3023 3024 params = self._parse_index_params() 3025 3026 return self.expression( 3027 exp.Index, 3028 this=index, 3029 table=table, 3030 unique=unique, 3031 primary=primary, 3032 amp=amp, 3033 params=params, 3034 ) 3035 3036 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3037 hints: t.List[exp.Expression] = [] 3038 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3039 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3040 hints.append( 3041 self.expression( 3042 exp.WithTableHint, 3043 expressions=self._parse_csv( 3044 lambda: self._parse_function() or self._parse_var(any_token=True) 3045 ), 3046 ) 3047 ) 3048 self._match_r_paren() 3049 else: 3050 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3051 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3052 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3053 3054 self._match_texts(("INDEX", "KEY")) 3055 if self._match(TokenType.FOR): 3056 hint.set("target", self._advance_any() and self._prev.text.upper()) 3057 3058 hint.set("expressions", self._parse_wrapped_id_vars()) 3059 hints.append(hint) 3060 3061 return hints or None 3062 3063 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3064 return ( 3065 (not schema and self._parse_function(optional_parens=False)) 3066 or self._parse_id_var(any_token=False) 3067 or self._parse_string_as_identifier() 3068 or self._parse_placeholder() 3069 ) 3070 3071 def _parse_table_parts( 3072 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3073 ) -> exp.Table: 3074 catalog = None 3075 db = None 3076 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3077 3078 while self._match(TokenType.DOT): 3079 if catalog: 3080 # This allows nesting the table in arbitrarily many dot expressions if needed 3081 table = self.expression( 3082 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3083 ) 3084 else: 3085 catalog = db 3086 db = table 3087 # "" used for tsql FROM a..b case 3088 table = self._parse_table_part(schema=schema) or "" 3089 3090 if ( 3091 wildcard 3092 and self._is_connected() 3093 and (isinstance(table, exp.Identifier) or not table) 3094 and self._match(TokenType.STAR) 3095 ): 3096 if isinstance(table, exp.Identifier): 3097 table.args["this"] += "*" 3098 else: 3099 table = exp.Identifier(this="*") 3100 3101 if is_db_reference: 3102 catalog = db 3103 db = table 3104 table = None 3105 3106 if not table and not is_db_reference: 3107 self.raise_error(f"Expected table name but got {self._curr}") 3108 if not db and is_db_reference: 3109 self.raise_error(f"Expected database name but got {self._curr}") 3110 3111 return self.expression( 3112 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3113 ) 3114 3115 def _parse_table( 3116 self, 3117 schema: bool = False, 3118 joins: bool = False, 3119 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3120 parse_bracket: bool = False, 3121 is_db_reference: bool = False, 3122 ) -> t.Optional[exp.Expression]: 3123 lateral = self._parse_lateral() 3124 if lateral: 3125 return lateral 3126 3127 unnest = self._parse_unnest() 3128 if unnest: 3129 return unnest 3130 3131 values = self._parse_derived_table_values() 3132 if values: 3133 return values 3134 3135 subquery = self._parse_select(table=True) 3136 if subquery: 3137 if not subquery.args.get("pivots"): 3138 subquery.set("pivots", self._parse_pivots()) 3139 return subquery 3140 3141 bracket = parse_bracket and self._parse_bracket(None) 3142 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3143 3144 only = self._match(TokenType.ONLY) 3145 3146 this = t.cast( 3147 exp.Expression, 3148 bracket 3149 or self._parse_bracket( 3150 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3151 ), 3152 ) 3153 3154 if only: 3155 this.set("only", only) 3156 3157 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3158 self._match_text_seq("*") 3159 3160 if schema: 3161 return self._parse_schema(this=this) 3162 3163 version = self._parse_version() 3164 3165 if version: 3166 this.set("version", version) 3167 3168 if self.dialect.ALIAS_POST_TABLESAMPLE: 3169 table_sample = self._parse_table_sample() 3170 3171 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3172 if alias: 3173 this.set("alias", alias) 3174 3175 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3176 return self.expression( 3177 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3178 ) 3179 3180 this.set("hints", self._parse_table_hints()) 3181 3182 if not this.args.get("pivots"): 3183 this.set("pivots", self._parse_pivots()) 3184 3185 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3186 table_sample = self._parse_table_sample() 3187 3188 if table_sample: 3189 table_sample.set("this", this) 3190 this = table_sample 3191 3192 if joins: 3193 for join in self._parse_joins(): 3194 this.append("joins", join) 3195 3196 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3197 this.set("ordinality", True) 3198 this.set("alias", self._parse_table_alias()) 3199 3200 return this 3201 3202 def _parse_version(self) -> t.Optional[exp.Version]: 3203 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3204 this = "TIMESTAMP" 3205 elif self._match(TokenType.VERSION_SNAPSHOT): 3206 this = "VERSION" 3207 else: 3208 return None 3209 3210 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3211 kind = self._prev.text.upper() 3212 start = self._parse_bitwise() 3213 self._match_texts(("TO", "AND")) 3214 end = self._parse_bitwise() 3215 expression: t.Optional[exp.Expression] = self.expression( 3216 exp.Tuple, expressions=[start, end] 3217 ) 3218 elif self._match_text_seq("CONTAINED", "IN"): 3219 kind = "CONTAINED IN" 3220 expression = self.expression( 3221 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3222 ) 3223 elif self._match(TokenType.ALL): 3224 kind = "ALL" 3225 expression = None 3226 else: 3227 self._match_text_seq("AS", "OF") 3228 kind = "AS OF" 3229 expression = self._parse_type() 3230 3231 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3232 3233 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3234 if not self._match(TokenType.UNNEST): 3235 return None 3236 3237 expressions = self._parse_wrapped_csv(self._parse_equality) 3238 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3239 3240 alias = self._parse_table_alias() if with_alias else None 3241 3242 if alias: 3243 if self.dialect.UNNEST_COLUMN_ONLY: 3244 if alias.args.get("columns"): 3245 self.raise_error("Unexpected extra column alias in unnest.") 3246 3247 alias.set("columns", [alias.this]) 3248 alias.set("this", None) 3249 3250 columns = alias.args.get("columns") or [] 3251 if offset and len(expressions) < len(columns): 3252 offset = columns.pop() 3253 3254 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3255 self._match(TokenType.ALIAS) 3256 offset = self._parse_id_var( 3257 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3258 ) or exp.to_identifier("offset") 3259 3260 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3261 3262 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3263 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3264 if not is_derived and not self._match_text_seq("VALUES"): 3265 return None 3266 3267 expressions = self._parse_csv(self._parse_value) 3268 alias = self._parse_table_alias() 3269 3270 if is_derived: 3271 self._match_r_paren() 3272 3273 return self.expression( 3274 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3275 ) 3276 3277 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3278 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3279 as_modifier and self._match_text_seq("USING", "SAMPLE") 3280 ): 3281 return None 3282 3283 bucket_numerator = None 3284 bucket_denominator = None 3285 bucket_field = None 3286 percent = None 3287 size = None 3288 seed = None 3289 3290 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3291 matched_l_paren = self._match(TokenType.L_PAREN) 3292 3293 if self.TABLESAMPLE_CSV: 3294 num = None 3295 expressions = self._parse_csv(self._parse_primary) 3296 else: 3297 expressions = None 3298 num = ( 3299 self._parse_factor() 3300 if self._match(TokenType.NUMBER, advance=False) 3301 else self._parse_primary() or self._parse_placeholder() 3302 ) 3303 3304 if self._match_text_seq("BUCKET"): 3305 bucket_numerator = self._parse_number() 3306 self._match_text_seq("OUT", "OF") 3307 bucket_denominator = bucket_denominator = self._parse_number() 3308 self._match(TokenType.ON) 3309 bucket_field = self._parse_field() 3310 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3311 percent = num 3312 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3313 size = num 3314 else: 3315 percent = num 3316 3317 if matched_l_paren: 3318 self._match_r_paren() 3319 3320 if self._match(TokenType.L_PAREN): 3321 method = self._parse_var(upper=True) 3322 seed = self._match(TokenType.COMMA) and self._parse_number() 3323 self._match_r_paren() 3324 elif self._match_texts(("SEED", "REPEATABLE")): 3325 seed = self._parse_wrapped(self._parse_number) 3326 3327 return self.expression( 3328 exp.TableSample, 3329 expressions=expressions, 3330 method=method, 3331 bucket_numerator=bucket_numerator, 3332 bucket_denominator=bucket_denominator, 3333 bucket_field=bucket_field, 3334 percent=percent, 3335 size=size, 3336 seed=seed, 3337 ) 3338 3339 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3340 return list(iter(self._parse_pivot, None)) or None 3341 3342 def _parse_joins(self) -> t.Iterator[exp.Join]: 3343 return iter(self._parse_join, None) 3344 3345 # https://duckdb.org/docs/sql/statements/pivot 3346 def _parse_simplified_pivot(self) -> exp.Pivot: 3347 def _parse_on() -> t.Optional[exp.Expression]: 3348 this = self._parse_bitwise() 3349 return self._parse_in(this) if self._match(TokenType.IN) else this 3350 3351 this = self._parse_table() 3352 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3353 using = self._match(TokenType.USING) and self._parse_csv( 3354 lambda: self._parse_alias(self._parse_function()) 3355 ) 3356 group = self._parse_group() 3357 return self.expression( 3358 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3359 ) 3360 3361 def _parse_pivot_in(self) -> exp.In: 3362 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3363 this = self._parse_conjunction() 3364 3365 self._match(TokenType.ALIAS) 3366 alias = self._parse_field() 3367 if alias: 3368 return self.expression(exp.PivotAlias, this=this, alias=alias) 3369 3370 return this 3371 3372 value = self._parse_column() 3373 3374 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3375 self.raise_error("Expecting IN (") 3376 3377 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3378 3379 self._match_r_paren() 3380 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3381 3382 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3383 index = self._index 3384 include_nulls = None 3385 3386 if self._match(TokenType.PIVOT): 3387 unpivot = False 3388 elif self._match(TokenType.UNPIVOT): 3389 unpivot = True 3390 3391 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3392 if self._match_text_seq("INCLUDE", "NULLS"): 3393 include_nulls = True 3394 elif self._match_text_seq("EXCLUDE", "NULLS"): 3395 include_nulls = False 3396 else: 3397 return None 3398 3399 expressions = [] 3400 3401 if not self._match(TokenType.L_PAREN): 3402 self._retreat(index) 3403 return None 3404 3405 if unpivot: 3406 expressions = self._parse_csv(self._parse_column) 3407 else: 3408 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3409 3410 if not expressions: 3411 self.raise_error("Failed to parse PIVOT's aggregation list") 3412 3413 if not self._match(TokenType.FOR): 3414 self.raise_error("Expecting FOR") 3415 3416 field = self._parse_pivot_in() 3417 3418 self._match_r_paren() 3419 3420 pivot = self.expression( 3421 exp.Pivot, 3422 expressions=expressions, 3423 field=field, 3424 unpivot=unpivot, 3425 include_nulls=include_nulls, 3426 ) 3427 3428 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3429 pivot.set("alias", self._parse_table_alias()) 3430 3431 if not unpivot: 3432 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3433 3434 columns: t.List[exp.Expression] = [] 3435 for fld in pivot.args["field"].expressions: 3436 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3437 for name in names: 3438 if self.PREFIXED_PIVOT_COLUMNS: 3439 name = f"{name}_{field_name}" if name else field_name 3440 else: 3441 name = f"{field_name}_{name}" if name else field_name 3442 3443 columns.append(exp.to_identifier(name)) 3444 3445 pivot.set("columns", columns) 3446 3447 return pivot 3448 3449 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3450 return [agg.alias for agg in aggregations] 3451 3452 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3453 if not skip_where_token and not self._match(TokenType.PREWHERE): 3454 return None 3455 3456 return self.expression( 3457 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3458 ) 3459 3460 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3461 if not skip_where_token and not self._match(TokenType.WHERE): 3462 return None 3463 3464 return self.expression( 3465 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3466 ) 3467 3468 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3469 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3470 return None 3471 3472 elements: t.Dict[str, t.Any] = defaultdict(list) 3473 3474 if self._match(TokenType.ALL): 3475 elements["all"] = True 3476 elif self._match(TokenType.DISTINCT): 3477 elements["all"] = False 3478 3479 while True: 3480 expressions = self._parse_csv(self._parse_conjunction) 3481 if expressions: 3482 elements["expressions"].extend(expressions) 3483 3484 grouping_sets = self._parse_grouping_sets() 3485 if grouping_sets: 3486 elements["grouping_sets"].extend(grouping_sets) 3487 3488 rollup = None 3489 cube = None 3490 totals = None 3491 3492 index = self._index 3493 with_ = self._match(TokenType.WITH) 3494 if self._match(TokenType.ROLLUP): 3495 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3496 elements["rollup"].extend(ensure_list(rollup)) 3497 3498 if self._match(TokenType.CUBE): 3499 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3500 elements["cube"].extend(ensure_list(cube)) 3501 3502 if self._match_text_seq("TOTALS"): 3503 totals = True 3504 elements["totals"] = True # type: ignore 3505 3506 if not (grouping_sets or rollup or cube or totals): 3507 if with_: 3508 self._retreat(index) 3509 break 3510 3511 return self.expression(exp.Group, **elements) # type: ignore 3512 3513 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3514 if not self._match(TokenType.GROUPING_SETS): 3515 return None 3516 3517 return self._parse_wrapped_csv(self._parse_grouping_set) 3518 3519 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3520 if self._match(TokenType.L_PAREN): 3521 grouping_set = self._parse_csv(self._parse_column) 3522 self._match_r_paren() 3523 return self.expression(exp.Tuple, expressions=grouping_set) 3524 3525 return self._parse_column() 3526 3527 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3528 if not skip_having_token and not self._match(TokenType.HAVING): 3529 return None 3530 return self.expression(exp.Having, this=self._parse_conjunction()) 3531 3532 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3533 if not self._match(TokenType.QUALIFY): 3534 return None 3535 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3536 3537 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3538 if skip_start_token: 3539 start = None 3540 elif self._match(TokenType.START_WITH): 3541 start = self._parse_conjunction() 3542 else: 3543 return None 3544 3545 self._match(TokenType.CONNECT_BY) 3546 nocycle = self._match_text_seq("NOCYCLE") 3547 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3548 exp.Prior, this=self._parse_bitwise() 3549 ) 3550 connect = self._parse_conjunction() 3551 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3552 3553 if not start and self._match(TokenType.START_WITH): 3554 start = self._parse_conjunction() 3555 3556 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3557 3558 def _parse_name_as_expression(self) -> exp.Alias: 3559 return self.expression( 3560 exp.Alias, 3561 alias=self._parse_id_var(any_token=True), 3562 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3563 ) 3564 3565 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3566 if self._match_text_seq("INTERPOLATE"): 3567 return self._parse_wrapped_csv(self._parse_name_as_expression) 3568 return None 3569 3570 def _parse_order( 3571 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3572 ) -> t.Optional[exp.Expression]: 3573 siblings = None 3574 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3575 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3576 return this 3577 3578 siblings = True 3579 3580 return self.expression( 3581 exp.Order, 3582 this=this, 3583 expressions=self._parse_csv(self._parse_ordered), 3584 interpolate=self._parse_interpolate(), 3585 siblings=siblings, 3586 ) 3587 3588 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3589 if not self._match(token): 3590 return None 3591 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3592 3593 def _parse_ordered( 3594 self, parse_method: t.Optional[t.Callable] = None 3595 ) -> t.Optional[exp.Ordered]: 3596 this = parse_method() if parse_method else self._parse_conjunction() 3597 if not this: 3598 return None 3599 3600 asc = self._match(TokenType.ASC) 3601 desc = self._match(TokenType.DESC) or (asc and False) 3602 3603 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3604 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3605 3606 nulls_first = is_nulls_first or False 3607 explicitly_null_ordered = is_nulls_first or is_nulls_last 3608 3609 if ( 3610 not explicitly_null_ordered 3611 and ( 3612 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3613 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3614 ) 3615 and self.dialect.NULL_ORDERING != "nulls_are_last" 3616 ): 3617 nulls_first = True 3618 3619 if self._match_text_seq("WITH", "FILL"): 3620 with_fill = self.expression( 3621 exp.WithFill, 3622 **{ # type: ignore 3623 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3624 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3625 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3626 }, 3627 ) 3628 else: 3629 with_fill = None 3630 3631 return self.expression( 3632 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3633 ) 3634 3635 def _parse_limit( 3636 self, 3637 this: t.Optional[exp.Expression] = None, 3638 top: bool = False, 3639 skip_limit_token: bool = False, 3640 ) -> t.Optional[exp.Expression]: 3641 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3642 comments = self._prev_comments 3643 if top: 3644 limit_paren = self._match(TokenType.L_PAREN) 3645 expression = self._parse_term() if limit_paren else self._parse_number() 3646 3647 if limit_paren: 3648 self._match_r_paren() 3649 else: 3650 expression = self._parse_term() 3651 3652 if self._match(TokenType.COMMA): 3653 offset = expression 3654 expression = self._parse_term() 3655 else: 3656 offset = None 3657 3658 limit_exp = self.expression( 3659 exp.Limit, 3660 this=this, 3661 expression=expression, 3662 offset=offset, 3663 comments=comments, 3664 expressions=self._parse_limit_by(), 3665 ) 3666 3667 return limit_exp 3668 3669 if self._match(TokenType.FETCH): 3670 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3671 direction = self._prev.text.upper() if direction else "FIRST" 3672 3673 count = self._parse_field(tokens=self.FETCH_TOKENS) 3674 percent = self._match(TokenType.PERCENT) 3675 3676 self._match_set((TokenType.ROW, TokenType.ROWS)) 3677 3678 only = self._match_text_seq("ONLY") 3679 with_ties = self._match_text_seq("WITH", "TIES") 3680 3681 if only and with_ties: 3682 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3683 3684 return self.expression( 3685 exp.Fetch, 3686 direction=direction, 3687 count=count, 3688 percent=percent, 3689 with_ties=with_ties, 3690 ) 3691 3692 return this 3693 3694 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3695 if not self._match(TokenType.OFFSET): 3696 return this 3697 3698 count = self._parse_term() 3699 self._match_set((TokenType.ROW, TokenType.ROWS)) 3700 3701 return self.expression( 3702 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3703 ) 3704 3705 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3706 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3707 3708 def _parse_locks(self) -> t.List[exp.Lock]: 3709 locks = [] 3710 while True: 3711 if self._match_text_seq("FOR", "UPDATE"): 3712 update = True 3713 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3714 "LOCK", "IN", "SHARE", "MODE" 3715 ): 3716 update = False 3717 else: 3718 break 3719 3720 expressions = None 3721 if self._match_text_seq("OF"): 3722 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3723 3724 wait: t.Optional[bool | exp.Expression] = None 3725 if self._match_text_seq("NOWAIT"): 3726 wait = True 3727 elif self._match_text_seq("WAIT"): 3728 wait = self._parse_primary() 3729 elif self._match_text_seq("SKIP", "LOCKED"): 3730 wait = False 3731 3732 locks.append( 3733 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3734 ) 3735 3736 return locks 3737 3738 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3739 while this and self._match_set(self.SET_OPERATIONS): 3740 token_type = self._prev.token_type 3741 3742 if token_type == TokenType.UNION: 3743 operation = exp.Union 3744 elif token_type == TokenType.EXCEPT: 3745 operation = exp.Except 3746 else: 3747 operation = exp.Intersect 3748 3749 comments = self._prev.comments 3750 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3751 by_name = self._match_text_seq("BY", "NAME") 3752 expression = self._parse_select(nested=True, parse_set_operation=False) 3753 3754 this = self.expression( 3755 operation, 3756 comments=comments, 3757 this=this, 3758 distinct=distinct, 3759 by_name=by_name, 3760 expression=expression, 3761 ) 3762 3763 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3764 expression = this.expression 3765 3766 if expression: 3767 for arg in self.UNION_MODIFIERS: 3768 expr = expression.args.get(arg) 3769 if expr: 3770 this.set(arg, expr.pop()) 3771 3772 return this 3773 3774 def _parse_expression(self) -> t.Optional[exp.Expression]: 3775 return self._parse_alias(self._parse_conjunction()) 3776 3777 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3778 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3779 3780 def _parse_equality(self) -> t.Optional[exp.Expression]: 3781 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3782 3783 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3784 return self._parse_tokens(self._parse_range, self.COMPARISON) 3785 3786 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3787 this = this or self._parse_bitwise() 3788 negate = self._match(TokenType.NOT) 3789 3790 if self._match_set(self.RANGE_PARSERS): 3791 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3792 if not expression: 3793 return this 3794 3795 this = expression 3796 elif self._match(TokenType.ISNULL): 3797 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3798 3799 # Postgres supports ISNULL and NOTNULL for conditions. 3800 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3801 if self._match(TokenType.NOTNULL): 3802 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3803 this = self.expression(exp.Not, this=this) 3804 3805 if negate: 3806 this = self.expression(exp.Not, this=this) 3807 3808 if self._match(TokenType.IS): 3809 this = self._parse_is(this) 3810 3811 return this 3812 3813 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3814 index = self._index - 1 3815 negate = self._match(TokenType.NOT) 3816 3817 if self._match_text_seq("DISTINCT", "FROM"): 3818 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3819 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3820 3821 expression = self._parse_null() or self._parse_boolean() 3822 if not expression: 3823 self._retreat(index) 3824 return None 3825 3826 this = self.expression(exp.Is, this=this, expression=expression) 3827 return self.expression(exp.Not, this=this) if negate else this 3828 3829 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3830 unnest = self._parse_unnest(with_alias=False) 3831 if unnest: 3832 this = self.expression(exp.In, this=this, unnest=unnest) 3833 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3834 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3835 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3836 3837 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3838 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3839 else: 3840 this = self.expression(exp.In, this=this, expressions=expressions) 3841 3842 if matched_l_paren: 3843 self._match_r_paren(this) 3844 elif not self._match(TokenType.R_BRACKET, expression=this): 3845 self.raise_error("Expecting ]") 3846 else: 3847 this = self.expression(exp.In, this=this, field=self._parse_field()) 3848 3849 return this 3850 3851 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3852 low = self._parse_bitwise() 3853 self._match(TokenType.AND) 3854 high = self._parse_bitwise() 3855 return self.expression(exp.Between, this=this, low=low, high=high) 3856 3857 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3858 if not self._match(TokenType.ESCAPE): 3859 return this 3860 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3861 3862 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3863 index = self._index 3864 3865 if not self._match(TokenType.INTERVAL) and match_interval: 3866 return None 3867 3868 if self._match(TokenType.STRING, advance=False): 3869 this = self._parse_primary() 3870 else: 3871 this = self._parse_term() 3872 3873 if not this or ( 3874 isinstance(this, exp.Column) 3875 and not this.table 3876 and not this.this.quoted 3877 and this.name.upper() == "IS" 3878 ): 3879 self._retreat(index) 3880 return None 3881 3882 unit = self._parse_function() or ( 3883 not self._match(TokenType.ALIAS, advance=False) 3884 and self._parse_var(any_token=True, upper=True) 3885 ) 3886 3887 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3888 # each INTERVAL expression into this canonical form so it's easy to transpile 3889 if this and this.is_number: 3890 this = exp.Literal.string(this.name) 3891 elif this and this.is_string: 3892 parts = this.name.split() 3893 3894 if len(parts) == 2: 3895 if unit: 3896 # This is not actually a unit, it's something else (e.g. a "window side") 3897 unit = None 3898 self._retreat(self._index - 1) 3899 3900 this = exp.Literal.string(parts[0]) 3901 unit = self.expression(exp.Var, this=parts[1].upper()) 3902 3903 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3904 unit = self.expression( 3905 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3906 ) 3907 3908 return self.expression(exp.Interval, this=this, unit=unit) 3909 3910 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3911 this = self._parse_term() 3912 3913 while True: 3914 if self._match_set(self.BITWISE): 3915 this = self.expression( 3916 self.BITWISE[self._prev.token_type], 3917 this=this, 3918 expression=self._parse_term(), 3919 ) 3920 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3921 this = self.expression( 3922 exp.DPipe, 3923 this=this, 3924 expression=self._parse_term(), 3925 safe=not self.dialect.STRICT_STRING_CONCAT, 3926 ) 3927 elif self._match(TokenType.DQMARK): 3928 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3929 elif self._match_pair(TokenType.LT, TokenType.LT): 3930 this = self.expression( 3931 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3932 ) 3933 elif self._match_pair(TokenType.GT, TokenType.GT): 3934 this = self.expression( 3935 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3936 ) 3937 else: 3938 break 3939 3940 return this 3941 3942 def _parse_term(self) -> t.Optional[exp.Expression]: 3943 return self._parse_tokens(self._parse_factor, self.TERM) 3944 3945 def _parse_factor(self) -> t.Optional[exp.Expression]: 3946 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3947 this = parse_method() 3948 3949 while self._match_set(self.FACTOR): 3950 this = self.expression( 3951 self.FACTOR[self._prev.token_type], 3952 this=this, 3953 comments=self._prev_comments, 3954 expression=parse_method(), 3955 ) 3956 if isinstance(this, exp.Div): 3957 this.args["typed"] = self.dialect.TYPED_DIVISION 3958 this.args["safe"] = self.dialect.SAFE_DIVISION 3959 3960 return this 3961 3962 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3963 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3964 3965 def _parse_unary(self) -> t.Optional[exp.Expression]: 3966 if self._match_set(self.UNARY_PARSERS): 3967 return self.UNARY_PARSERS[self._prev.token_type](self) 3968 return self._parse_at_time_zone(self._parse_type()) 3969 3970 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3971 interval = parse_interval and self._parse_interval() 3972 if interval: 3973 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3974 while True: 3975 index = self._index 3976 self._match(TokenType.PLUS) 3977 3978 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3979 self._retreat(index) 3980 break 3981 3982 interval = self.expression( # type: ignore 3983 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3984 ) 3985 3986 return interval 3987 3988 index = self._index 3989 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3990 this = self._parse_column() 3991 3992 if data_type: 3993 if isinstance(this, exp.Literal): 3994 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3995 if parser: 3996 return parser(self, this, data_type) 3997 return self.expression(exp.Cast, this=this, to=data_type) 3998 if not data_type.expressions: 3999 self._retreat(index) 4000 return self._parse_column() 4001 return self._parse_column_ops(data_type) 4002 4003 return this and self._parse_column_ops(this) 4004 4005 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4006 this = self._parse_type() 4007 if not this: 4008 return None 4009 4010 if isinstance(this, exp.Column) and not this.table: 4011 this = exp.var(this.name.upper()) 4012 4013 return self.expression( 4014 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4015 ) 4016 4017 def _parse_types( 4018 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4019 ) -> t.Optional[exp.Expression]: 4020 index = self._index 4021 4022 prefix = self._match_text_seq("SYSUDTLIB", ".") 4023 4024 if not self._match_set(self.TYPE_TOKENS): 4025 identifier = allow_identifiers and self._parse_id_var( 4026 any_token=False, tokens=(TokenType.VAR,) 4027 ) 4028 if identifier: 4029 tokens = self.dialect.tokenize(identifier.name) 4030 4031 if len(tokens) != 1: 4032 self.raise_error("Unexpected identifier", self._prev) 4033 4034 if tokens[0].token_type in self.TYPE_TOKENS: 4035 self._prev = tokens[0] 4036 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4037 type_name = identifier.name 4038 4039 while self._match(TokenType.DOT): 4040 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4041 4042 return exp.DataType.build(type_name, udt=True) 4043 else: 4044 self._retreat(self._index - 1) 4045 return None 4046 else: 4047 return None 4048 4049 type_token = self._prev.token_type 4050 4051 if type_token == TokenType.PSEUDO_TYPE: 4052 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4053 4054 if type_token == TokenType.OBJECT_IDENTIFIER: 4055 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4056 4057 nested = type_token in self.NESTED_TYPE_TOKENS 4058 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4059 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4060 expressions = None 4061 maybe_func = False 4062 4063 if self._match(TokenType.L_PAREN): 4064 if is_struct: 4065 expressions = self._parse_csv(self._parse_struct_types) 4066 elif nested: 4067 expressions = self._parse_csv( 4068 lambda: self._parse_types( 4069 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4070 ) 4071 ) 4072 elif type_token in self.ENUM_TYPE_TOKENS: 4073 expressions = self._parse_csv(self._parse_equality) 4074 elif is_aggregate: 4075 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4076 any_token=False, tokens=(TokenType.VAR,) 4077 ) 4078 if not func_or_ident or not self._match(TokenType.COMMA): 4079 return None 4080 expressions = self._parse_csv( 4081 lambda: self._parse_types( 4082 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4083 ) 4084 ) 4085 expressions.insert(0, func_or_ident) 4086 else: 4087 expressions = self._parse_csv(self._parse_type_size) 4088 4089 if not expressions or not self._match(TokenType.R_PAREN): 4090 self._retreat(index) 4091 return None 4092 4093 maybe_func = True 4094 4095 this: t.Optional[exp.Expression] = None 4096 values: t.Optional[t.List[exp.Expression]] = None 4097 4098 if nested and self._match(TokenType.LT): 4099 if is_struct: 4100 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4101 else: 4102 expressions = self._parse_csv( 4103 lambda: self._parse_types( 4104 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4105 ) 4106 ) 4107 4108 if not self._match(TokenType.GT): 4109 self.raise_error("Expecting >") 4110 4111 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4112 values = self._parse_csv(self._parse_conjunction) 4113 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4114 4115 if type_token in self.TIMESTAMPS: 4116 if self._match_text_seq("WITH", "TIME", "ZONE"): 4117 maybe_func = False 4118 tz_type = ( 4119 exp.DataType.Type.TIMETZ 4120 if type_token in self.TIMES 4121 else exp.DataType.Type.TIMESTAMPTZ 4122 ) 4123 this = exp.DataType(this=tz_type, expressions=expressions) 4124 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4125 maybe_func = False 4126 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4127 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4128 maybe_func = False 4129 elif type_token == TokenType.INTERVAL: 4130 unit = self._parse_var(upper=True) 4131 if unit: 4132 if self._match_text_seq("TO"): 4133 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4134 4135 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4136 else: 4137 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4138 4139 if maybe_func and check_func: 4140 index2 = self._index 4141 peek = self._parse_string() 4142 4143 if not peek: 4144 self._retreat(index) 4145 return None 4146 4147 self._retreat(index2) 4148 4149 if not this: 4150 if self._match_text_seq("UNSIGNED"): 4151 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4152 if not unsigned_type_token: 4153 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4154 4155 type_token = unsigned_type_token or type_token 4156 4157 this = exp.DataType( 4158 this=exp.DataType.Type[type_token.value], 4159 expressions=expressions, 4160 nested=nested, 4161 values=values, 4162 prefix=prefix, 4163 ) 4164 4165 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4166 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4167 4168 return this 4169 4170 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4171 index = self._index 4172 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4173 self._match(TokenType.COLON) 4174 column_def = self._parse_column_def(this) 4175 4176 if type_required and ( 4177 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4178 ): 4179 self._retreat(index) 4180 return self._parse_types() 4181 4182 return column_def 4183 4184 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4185 if not self._match_text_seq("AT", "TIME", "ZONE"): 4186 return this 4187 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4188 4189 def _parse_column(self) -> t.Optional[exp.Expression]: 4190 this = self._parse_column_reference() 4191 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4192 4193 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4194 this = self._parse_field() 4195 if ( 4196 not this 4197 and self._match(TokenType.VALUES, advance=False) 4198 and self.VALUES_FOLLOWED_BY_PAREN 4199 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4200 ): 4201 this = self._parse_id_var() 4202 4203 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4204 4205 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4206 this = self._parse_bracket(this) 4207 4208 while self._match_set(self.COLUMN_OPERATORS): 4209 op_token = self._prev.token_type 4210 op = self.COLUMN_OPERATORS.get(op_token) 4211 4212 if op_token == TokenType.DCOLON: 4213 field = self._parse_types() 4214 if not field: 4215 self.raise_error("Expected type") 4216 elif op and self._curr: 4217 field = self._parse_column_reference() 4218 else: 4219 field = self._parse_field(anonymous_func=True, any_token=True) 4220 4221 if isinstance(field, exp.Func) and this: 4222 # bigquery allows function calls like x.y.count(...) 4223 # SAFE.SUBSTR(...) 4224 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4225 this = exp.replace_tree( 4226 this, 4227 lambda n: ( 4228 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4229 if n.table 4230 else n.this 4231 ) 4232 if isinstance(n, exp.Column) 4233 else n, 4234 ) 4235 4236 if op: 4237 this = op(self, this, field) 4238 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4239 this = self.expression( 4240 exp.Column, 4241 this=field, 4242 table=this.this, 4243 db=this.args.get("table"), 4244 catalog=this.args.get("db"), 4245 ) 4246 else: 4247 this = self.expression(exp.Dot, this=this, expression=field) 4248 this = self._parse_bracket(this) 4249 return this 4250 4251 def _parse_primary(self) -> t.Optional[exp.Expression]: 4252 if self._match_set(self.PRIMARY_PARSERS): 4253 token_type = self._prev.token_type 4254 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4255 4256 if token_type == TokenType.STRING: 4257 expressions = [primary] 4258 while self._match(TokenType.STRING): 4259 expressions.append(exp.Literal.string(self._prev.text)) 4260 4261 if len(expressions) > 1: 4262 return self.expression(exp.Concat, expressions=expressions) 4263 4264 return primary 4265 4266 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4267 return exp.Literal.number(f"0.{self._prev.text}") 4268 4269 if self._match(TokenType.L_PAREN): 4270 comments = self._prev_comments 4271 query = self._parse_select() 4272 4273 if query: 4274 expressions = [query] 4275 else: 4276 expressions = self._parse_expressions() 4277 4278 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4279 4280 if isinstance(this, exp.UNWRAPPED_QUERIES): 4281 this = self._parse_set_operations( 4282 self._parse_subquery(this=this, parse_alias=False) 4283 ) 4284 elif isinstance(this, exp.Subquery): 4285 this = self._parse_subquery( 4286 this=self._parse_set_operations(this), parse_alias=False 4287 ) 4288 elif len(expressions) > 1: 4289 this = self.expression(exp.Tuple, expressions=expressions) 4290 else: 4291 this = self.expression(exp.Paren, this=this) 4292 4293 if this: 4294 this.add_comments(comments) 4295 4296 self._match_r_paren(expression=this) 4297 return this 4298 4299 return None 4300 4301 def _parse_field( 4302 self, 4303 any_token: bool = False, 4304 tokens: t.Optional[t.Collection[TokenType]] = None, 4305 anonymous_func: bool = False, 4306 ) -> t.Optional[exp.Expression]: 4307 return ( 4308 self._parse_primary() 4309 or self._parse_function(anonymous=anonymous_func) 4310 or self._parse_id_var(any_token=any_token, tokens=tokens) 4311 ) 4312 4313 def _parse_function( 4314 self, 4315 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4316 anonymous: bool = False, 4317 optional_parens: bool = True, 4318 ) -> t.Optional[exp.Expression]: 4319 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4320 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4321 fn_syntax = False 4322 if ( 4323 self._match(TokenType.L_BRACE, advance=False) 4324 and self._next 4325 and self._next.text.upper() == "FN" 4326 ): 4327 self._advance(2) 4328 fn_syntax = True 4329 4330 func = self._parse_function_call( 4331 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4332 ) 4333 4334 if fn_syntax: 4335 self._match(TokenType.R_BRACE) 4336 4337 return func 4338 4339 def _parse_function_call( 4340 self, 4341 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4342 anonymous: bool = False, 4343 optional_parens: bool = True, 4344 ) -> t.Optional[exp.Expression]: 4345 if not self._curr: 4346 return None 4347 4348 comments = self._curr.comments 4349 token_type = self._curr.token_type 4350 this = self._curr.text 4351 upper = this.upper() 4352 4353 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4354 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4355 self._advance() 4356 return self._parse_window(parser(self)) 4357 4358 if not self._next or self._next.token_type != TokenType.L_PAREN: 4359 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4360 self._advance() 4361 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4362 4363 return None 4364 4365 if token_type not in self.FUNC_TOKENS: 4366 return None 4367 4368 self._advance(2) 4369 4370 parser = self.FUNCTION_PARSERS.get(upper) 4371 if parser and not anonymous: 4372 this = parser(self) 4373 else: 4374 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4375 4376 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4377 this = self.expression(subquery_predicate, this=self._parse_select()) 4378 self._match_r_paren() 4379 return this 4380 4381 if functions is None: 4382 functions = self.FUNCTIONS 4383 4384 function = functions.get(upper) 4385 4386 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4387 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4388 4389 if alias: 4390 args = self._kv_to_prop_eq(args) 4391 4392 if function and not anonymous: 4393 if "dialect" in function.__code__.co_varnames: 4394 func = function(args, dialect=self.dialect) 4395 else: 4396 func = function(args) 4397 4398 func = self.validate_expression(func, args) 4399 if not self.dialect.NORMALIZE_FUNCTIONS: 4400 func.meta["name"] = this 4401 4402 this = func 4403 else: 4404 if token_type == TokenType.IDENTIFIER: 4405 this = exp.Identifier(this=this, quoted=True) 4406 this = self.expression(exp.Anonymous, this=this, expressions=args) 4407 4408 if isinstance(this, exp.Expression): 4409 this.add_comments(comments) 4410 4411 self._match_r_paren(this) 4412 return self._parse_window(this) 4413 4414 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4415 transformed = [] 4416 4417 for e in expressions: 4418 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4419 if isinstance(e, exp.Alias): 4420 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4421 4422 if not isinstance(e, exp.PropertyEQ): 4423 e = self.expression( 4424 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4425 ) 4426 4427 if isinstance(e.this, exp.Column): 4428 e.this.replace(e.this.this) 4429 4430 transformed.append(e) 4431 4432 return transformed 4433 4434 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4435 return self._parse_column_def(self._parse_id_var()) 4436 4437 def _parse_user_defined_function( 4438 self, kind: t.Optional[TokenType] = None 4439 ) -> t.Optional[exp.Expression]: 4440 this = self._parse_id_var() 4441 4442 while self._match(TokenType.DOT): 4443 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4444 4445 if not self._match(TokenType.L_PAREN): 4446 return this 4447 4448 expressions = self._parse_csv(self._parse_function_parameter) 4449 self._match_r_paren() 4450 return self.expression( 4451 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4452 ) 4453 4454 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4455 literal = self._parse_primary() 4456 if literal: 4457 return self.expression(exp.Introducer, this=token.text, expression=literal) 4458 4459 return self.expression(exp.Identifier, this=token.text) 4460 4461 def _parse_session_parameter(self) -> exp.SessionParameter: 4462 kind = None 4463 this = self._parse_id_var() or self._parse_primary() 4464 4465 if this and self._match(TokenType.DOT): 4466 kind = this.name 4467 this = self._parse_var() or self._parse_primary() 4468 4469 return self.expression(exp.SessionParameter, this=this, kind=kind) 4470 4471 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4472 index = self._index 4473 4474 if self._match(TokenType.L_PAREN): 4475 expressions = t.cast( 4476 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4477 ) 4478 4479 if not self._match(TokenType.R_PAREN): 4480 self._retreat(index) 4481 else: 4482 expressions = [self._parse_id_var()] 4483 4484 if self._match_set(self.LAMBDAS): 4485 return self.LAMBDAS[self._prev.token_type](self, expressions) 4486 4487 self._retreat(index) 4488 4489 this: t.Optional[exp.Expression] 4490 4491 if self._match(TokenType.DISTINCT): 4492 this = self.expression( 4493 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4494 ) 4495 else: 4496 this = self._parse_select_or_expression(alias=alias) 4497 4498 return self._parse_limit( 4499 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4500 ) 4501 4502 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4503 index = self._index 4504 4505 if not self._match(TokenType.L_PAREN): 4506 return this 4507 4508 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4509 # expr can be of both types 4510 if self._match_set(self.SELECT_START_TOKENS): 4511 self._retreat(index) 4512 return this 4513 4514 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4515 4516 self._match_r_paren() 4517 return self.expression(exp.Schema, this=this, expressions=args) 4518 4519 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4520 return self._parse_column_def(self._parse_field(any_token=True)) 4521 4522 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4523 # column defs are not really columns, they're identifiers 4524 if isinstance(this, exp.Column): 4525 this = this.this 4526 4527 kind = self._parse_types(schema=True) 4528 4529 if self._match_text_seq("FOR", "ORDINALITY"): 4530 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4531 4532 constraints: t.List[exp.Expression] = [] 4533 4534 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4535 ("ALIAS", "MATERIALIZED") 4536 ): 4537 persisted = self._prev.text.upper() == "MATERIALIZED" 4538 constraints.append( 4539 self.expression( 4540 exp.ComputedColumnConstraint, 4541 this=self._parse_conjunction(), 4542 persisted=persisted or self._match_text_seq("PERSISTED"), 4543 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4544 ) 4545 ) 4546 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4547 self._match(TokenType.ALIAS) 4548 constraints.append( 4549 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4550 ) 4551 4552 while True: 4553 constraint = self._parse_column_constraint() 4554 if not constraint: 4555 break 4556 constraints.append(constraint) 4557 4558 if not kind and not constraints: 4559 return this 4560 4561 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4562 4563 def _parse_auto_increment( 4564 self, 4565 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4566 start = None 4567 increment = None 4568 4569 if self._match(TokenType.L_PAREN, advance=False): 4570 args = self._parse_wrapped_csv(self._parse_bitwise) 4571 start = seq_get(args, 0) 4572 increment = seq_get(args, 1) 4573 elif self._match_text_seq("START"): 4574 start = self._parse_bitwise() 4575 self._match_text_seq("INCREMENT") 4576 increment = self._parse_bitwise() 4577 4578 if start and increment: 4579 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4580 4581 return exp.AutoIncrementColumnConstraint() 4582 4583 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4584 if not self._match_text_seq("REFRESH"): 4585 self._retreat(self._index - 1) 4586 return None 4587 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4588 4589 def _parse_compress(self) -> exp.CompressColumnConstraint: 4590 if self._match(TokenType.L_PAREN, advance=False): 4591 return self.expression( 4592 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4593 ) 4594 4595 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4596 4597 def _parse_generated_as_identity( 4598 self, 4599 ) -> ( 4600 exp.GeneratedAsIdentityColumnConstraint 4601 | exp.ComputedColumnConstraint 4602 | exp.GeneratedAsRowColumnConstraint 4603 ): 4604 if self._match_text_seq("BY", "DEFAULT"): 4605 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4606 this = self.expression( 4607 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4608 ) 4609 else: 4610 self._match_text_seq("ALWAYS") 4611 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4612 4613 self._match(TokenType.ALIAS) 4614 4615 if self._match_text_seq("ROW"): 4616 start = self._match_text_seq("START") 4617 if not start: 4618 self._match(TokenType.END) 4619 hidden = self._match_text_seq("HIDDEN") 4620 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4621 4622 identity = self._match_text_seq("IDENTITY") 4623 4624 if self._match(TokenType.L_PAREN): 4625 if self._match(TokenType.START_WITH): 4626 this.set("start", self._parse_bitwise()) 4627 if self._match_text_seq("INCREMENT", "BY"): 4628 this.set("increment", self._parse_bitwise()) 4629 if self._match_text_seq("MINVALUE"): 4630 this.set("minvalue", self._parse_bitwise()) 4631 if self._match_text_seq("MAXVALUE"): 4632 this.set("maxvalue", self._parse_bitwise()) 4633 4634 if self._match_text_seq("CYCLE"): 4635 this.set("cycle", True) 4636 elif self._match_text_seq("NO", "CYCLE"): 4637 this.set("cycle", False) 4638 4639 if not identity: 4640 this.set("expression", self._parse_bitwise()) 4641 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4642 args = self._parse_csv(self._parse_bitwise) 4643 this.set("start", seq_get(args, 0)) 4644 this.set("increment", seq_get(args, 1)) 4645 4646 self._match_r_paren() 4647 4648 return this 4649 4650 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4651 self._match_text_seq("LENGTH") 4652 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4653 4654 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4655 if self._match_text_seq("NULL"): 4656 return self.expression(exp.NotNullColumnConstraint) 4657 if self._match_text_seq("CASESPECIFIC"): 4658 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4659 if self._match_text_seq("FOR", "REPLICATION"): 4660 return self.expression(exp.NotForReplicationColumnConstraint) 4661 return None 4662 4663 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4664 if self._match(TokenType.CONSTRAINT): 4665 this = self._parse_id_var() 4666 else: 4667 this = None 4668 4669 if self._match_texts(self.CONSTRAINT_PARSERS): 4670 return self.expression( 4671 exp.ColumnConstraint, 4672 this=this, 4673 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4674 ) 4675 4676 return this 4677 4678 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4679 if not self._match(TokenType.CONSTRAINT): 4680 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4681 4682 return self.expression( 4683 exp.Constraint, 4684 this=self._parse_id_var(), 4685 expressions=self._parse_unnamed_constraints(), 4686 ) 4687 4688 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4689 constraints = [] 4690 while True: 4691 constraint = self._parse_unnamed_constraint() or self._parse_function() 4692 if not constraint: 4693 break 4694 constraints.append(constraint) 4695 4696 return constraints 4697 4698 def _parse_unnamed_constraint( 4699 self, constraints: t.Optional[t.Collection[str]] = None 4700 ) -> t.Optional[exp.Expression]: 4701 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4702 constraints or self.CONSTRAINT_PARSERS 4703 ): 4704 return None 4705 4706 constraint = self._prev.text.upper() 4707 if constraint not in self.CONSTRAINT_PARSERS: 4708 self.raise_error(f"No parser found for schema constraint {constraint}.") 4709 4710 return self.CONSTRAINT_PARSERS[constraint](self) 4711 4712 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4713 self._match_text_seq("KEY") 4714 return self.expression( 4715 exp.UniqueColumnConstraint, 4716 this=self._parse_schema(self._parse_id_var(any_token=False)), 4717 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4718 on_conflict=self._parse_on_conflict(), 4719 ) 4720 4721 def _parse_key_constraint_options(self) -> t.List[str]: 4722 options = [] 4723 while True: 4724 if not self._curr: 4725 break 4726 4727 if self._match(TokenType.ON): 4728 action = None 4729 on = self._advance_any() and self._prev.text 4730 4731 if self._match_text_seq("NO", "ACTION"): 4732 action = "NO ACTION" 4733 elif self._match_text_seq("CASCADE"): 4734 action = "CASCADE" 4735 elif self._match_text_seq("RESTRICT"): 4736 action = "RESTRICT" 4737 elif self._match_pair(TokenType.SET, TokenType.NULL): 4738 action = "SET NULL" 4739 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4740 action = "SET DEFAULT" 4741 else: 4742 self.raise_error("Invalid key constraint") 4743 4744 options.append(f"ON {on} {action}") 4745 elif self._match_text_seq("NOT", "ENFORCED"): 4746 options.append("NOT ENFORCED") 4747 elif self._match_text_seq("DEFERRABLE"): 4748 options.append("DEFERRABLE") 4749 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4750 options.append("INITIALLY DEFERRED") 4751 elif self._match_text_seq("NORELY"): 4752 options.append("NORELY") 4753 elif self._match_text_seq("MATCH", "FULL"): 4754 options.append("MATCH FULL") 4755 else: 4756 break 4757 4758 return options 4759 4760 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4761 if match and not self._match(TokenType.REFERENCES): 4762 return None 4763 4764 expressions = None 4765 this = self._parse_table(schema=True) 4766 options = self._parse_key_constraint_options() 4767 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4768 4769 def _parse_foreign_key(self) -> exp.ForeignKey: 4770 expressions = self._parse_wrapped_id_vars() 4771 reference = self._parse_references() 4772 options = {} 4773 4774 while self._match(TokenType.ON): 4775 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4776 self.raise_error("Expected DELETE or UPDATE") 4777 4778 kind = self._prev.text.lower() 4779 4780 if self._match_text_seq("NO", "ACTION"): 4781 action = "NO ACTION" 4782 elif self._match(TokenType.SET): 4783 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4784 action = "SET " + self._prev.text.upper() 4785 else: 4786 self._advance() 4787 action = self._prev.text.upper() 4788 4789 options[kind] = action 4790 4791 return self.expression( 4792 exp.ForeignKey, 4793 expressions=expressions, 4794 reference=reference, 4795 **options, # type: ignore 4796 ) 4797 4798 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4799 return self._parse_field() 4800 4801 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4802 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4803 self._retreat(self._index - 1) 4804 return None 4805 4806 id_vars = self._parse_wrapped_id_vars() 4807 return self.expression( 4808 exp.PeriodForSystemTimeConstraint, 4809 this=seq_get(id_vars, 0), 4810 expression=seq_get(id_vars, 1), 4811 ) 4812 4813 def _parse_primary_key( 4814 self, wrapped_optional: bool = False, in_props: bool = False 4815 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4816 desc = ( 4817 self._match_set((TokenType.ASC, TokenType.DESC)) 4818 and self._prev.token_type == TokenType.DESC 4819 ) 4820 4821 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4822 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4823 4824 expressions = self._parse_wrapped_csv( 4825 self._parse_primary_key_part, optional=wrapped_optional 4826 ) 4827 options = self._parse_key_constraint_options() 4828 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4829 4830 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4831 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4832 4833 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4834 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4835 return this 4836 4837 bracket_kind = self._prev.token_type 4838 expressions = self._parse_csv( 4839 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4840 ) 4841 4842 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4843 self.raise_error("Expected ]") 4844 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4845 self.raise_error("Expected }") 4846 4847 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4848 if bracket_kind == TokenType.L_BRACE: 4849 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4850 elif not this or this.name.upper() == "ARRAY": 4851 this = self.expression(exp.Array, expressions=expressions) 4852 else: 4853 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4854 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4855 4856 self._add_comments(this) 4857 return self._parse_bracket(this) 4858 4859 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4860 if self._match(TokenType.COLON): 4861 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4862 return this 4863 4864 def _parse_case(self) -> t.Optional[exp.Expression]: 4865 ifs = [] 4866 default = None 4867 4868 comments = self._prev_comments 4869 expression = self._parse_conjunction() 4870 4871 while self._match(TokenType.WHEN): 4872 this = self._parse_conjunction() 4873 self._match(TokenType.THEN) 4874 then = self._parse_conjunction() 4875 ifs.append(self.expression(exp.If, this=this, true=then)) 4876 4877 if self._match(TokenType.ELSE): 4878 default = self._parse_conjunction() 4879 4880 if not self._match(TokenType.END): 4881 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4882 default = exp.column("interval") 4883 else: 4884 self.raise_error("Expected END after CASE", self._prev) 4885 4886 return self.expression( 4887 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4888 ) 4889 4890 def _parse_if(self) -> t.Optional[exp.Expression]: 4891 if self._match(TokenType.L_PAREN): 4892 args = self._parse_csv(self._parse_conjunction) 4893 this = self.validate_expression(exp.If.from_arg_list(args), args) 4894 self._match_r_paren() 4895 else: 4896 index = self._index - 1 4897 4898 if self.NO_PAREN_IF_COMMANDS and index == 0: 4899 return self._parse_as_command(self._prev) 4900 4901 condition = self._parse_conjunction() 4902 4903 if not condition: 4904 self._retreat(index) 4905 return None 4906 4907 self._match(TokenType.THEN) 4908 true = self._parse_conjunction() 4909 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4910 self._match(TokenType.END) 4911 this = self.expression(exp.If, this=condition, true=true, false=false) 4912 4913 return this 4914 4915 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4916 if not self._match_text_seq("VALUE", "FOR"): 4917 self._retreat(self._index - 1) 4918 return None 4919 4920 return self.expression( 4921 exp.NextValueFor, 4922 this=self._parse_column(), 4923 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4924 ) 4925 4926 def _parse_extract(self) -> exp.Extract: 4927 this = self._parse_function() or self._parse_var() or self._parse_type() 4928 4929 if self._match(TokenType.FROM): 4930 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4931 4932 if not self._match(TokenType.COMMA): 4933 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4934 4935 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4936 4937 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4938 this = self._parse_conjunction() 4939 4940 if not self._match(TokenType.ALIAS): 4941 if self._match(TokenType.COMMA): 4942 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4943 4944 self.raise_error("Expected AS after CAST") 4945 4946 fmt = None 4947 to = self._parse_types() 4948 4949 if self._match(TokenType.FORMAT): 4950 fmt_string = self._parse_string() 4951 fmt = self._parse_at_time_zone(fmt_string) 4952 4953 if not to: 4954 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4955 if to.this in exp.DataType.TEMPORAL_TYPES: 4956 this = self.expression( 4957 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4958 this=this, 4959 format=exp.Literal.string( 4960 format_time( 4961 fmt_string.this if fmt_string else "", 4962 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4963 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4964 ) 4965 ), 4966 ) 4967 4968 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4969 this.set("zone", fmt.args["zone"]) 4970 return this 4971 elif not to: 4972 self.raise_error("Expected TYPE after CAST") 4973 elif isinstance(to, exp.Identifier): 4974 to = exp.DataType.build(to.name, udt=True) 4975 elif to.this == exp.DataType.Type.CHAR: 4976 if self._match(TokenType.CHARACTER_SET): 4977 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4978 4979 return self.expression( 4980 exp.Cast if strict else exp.TryCast, 4981 this=this, 4982 to=to, 4983 format=fmt, 4984 safe=safe, 4985 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4986 ) 4987 4988 def _parse_string_agg(self) -> exp.Expression: 4989 if self._match(TokenType.DISTINCT): 4990 args: t.List[t.Optional[exp.Expression]] = [ 4991 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4992 ] 4993 if self._match(TokenType.COMMA): 4994 args.extend(self._parse_csv(self._parse_conjunction)) 4995 else: 4996 args = self._parse_csv(self._parse_conjunction) # type: ignore 4997 4998 index = self._index 4999 if not self._match(TokenType.R_PAREN) and args: 5000 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5001 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5002 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5003 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5004 5005 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5006 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5007 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5008 if not self._match_text_seq("WITHIN", "GROUP"): 5009 self._retreat(index) 5010 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5011 5012 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5013 order = self._parse_order(this=seq_get(args, 0)) 5014 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5015 5016 def _parse_convert( 5017 self, strict: bool, safe: t.Optional[bool] = None 5018 ) -> t.Optional[exp.Expression]: 5019 this = self._parse_bitwise() 5020 5021 if self._match(TokenType.USING): 5022 to: t.Optional[exp.Expression] = self.expression( 5023 exp.CharacterSet, this=self._parse_var() 5024 ) 5025 elif self._match(TokenType.COMMA): 5026 to = self._parse_types() 5027 else: 5028 to = None 5029 5030 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5031 5032 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5033 """ 5034 There are generally two variants of the DECODE function: 5035 5036 - DECODE(bin, charset) 5037 - DECODE(expression, search, result [, search, result] ... [, default]) 5038 5039 The second variant will always be parsed into a CASE expression. Note that NULL 5040 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5041 instead of relying on pattern matching. 5042 """ 5043 args = self._parse_csv(self._parse_conjunction) 5044 5045 if len(args) < 3: 5046 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5047 5048 expression, *expressions = args 5049 if not expression: 5050 return None 5051 5052 ifs = [] 5053 for search, result in zip(expressions[::2], expressions[1::2]): 5054 if not search or not result: 5055 return None 5056 5057 if isinstance(search, exp.Literal): 5058 ifs.append( 5059 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5060 ) 5061 elif isinstance(search, exp.Null): 5062 ifs.append( 5063 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5064 ) 5065 else: 5066 cond = exp.or_( 5067 exp.EQ(this=expression.copy(), expression=search), 5068 exp.and_( 5069 exp.Is(this=expression.copy(), expression=exp.Null()), 5070 exp.Is(this=search.copy(), expression=exp.Null()), 5071 copy=False, 5072 ), 5073 copy=False, 5074 ) 5075 ifs.append(exp.If(this=cond, true=result)) 5076 5077 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5078 5079 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5080 self._match_text_seq("KEY") 5081 key = self._parse_column() 5082 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5083 self._match_text_seq("VALUE") 5084 value = self._parse_bitwise() 5085 5086 if not key and not value: 5087 return None 5088 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5089 5090 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5091 if not this or not self._match_text_seq("FORMAT", "JSON"): 5092 return this 5093 5094 return self.expression(exp.FormatJson, this=this) 5095 5096 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5097 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5098 for value in values: 5099 if self._match_text_seq(value, "ON", on): 5100 return f"{value} ON {on}" 5101 5102 return None 5103 5104 @t.overload 5105 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5106 5107 @t.overload 5108 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5109 5110 def _parse_json_object(self, agg=False): 5111 star = self._parse_star() 5112 expressions = ( 5113 [star] 5114 if star 5115 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5116 ) 5117 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5118 5119 unique_keys = None 5120 if self._match_text_seq("WITH", "UNIQUE"): 5121 unique_keys = True 5122 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5123 unique_keys = False 5124 5125 self._match_text_seq("KEYS") 5126 5127 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5128 self._parse_type() 5129 ) 5130 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5131 5132 return self.expression( 5133 exp.JSONObjectAgg if agg else exp.JSONObject, 5134 expressions=expressions, 5135 null_handling=null_handling, 5136 unique_keys=unique_keys, 5137 return_type=return_type, 5138 encoding=encoding, 5139 ) 5140 5141 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5142 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5143 if not self._match_text_seq("NESTED"): 5144 this = self._parse_id_var() 5145 kind = self._parse_types(allow_identifiers=False) 5146 nested = None 5147 else: 5148 this = None 5149 kind = None 5150 nested = True 5151 5152 path = self._match_text_seq("PATH") and self._parse_string() 5153 nested_schema = nested and self._parse_json_schema() 5154 5155 return self.expression( 5156 exp.JSONColumnDef, 5157 this=this, 5158 kind=kind, 5159 path=path, 5160 nested_schema=nested_schema, 5161 ) 5162 5163 def _parse_json_schema(self) -> exp.JSONSchema: 5164 self._match_text_seq("COLUMNS") 5165 return self.expression( 5166 exp.JSONSchema, 5167 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5168 ) 5169 5170 def _parse_json_table(self) -> exp.JSONTable: 5171 this = self._parse_format_json(self._parse_bitwise()) 5172 path = self._match(TokenType.COMMA) and self._parse_string() 5173 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5174 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5175 schema = self._parse_json_schema() 5176 5177 return exp.JSONTable( 5178 this=this, 5179 schema=schema, 5180 path=path, 5181 error_handling=error_handling, 5182 empty_handling=empty_handling, 5183 ) 5184 5185 def _parse_match_against(self) -> exp.MatchAgainst: 5186 expressions = self._parse_csv(self._parse_column) 5187 5188 self._match_text_seq(")", "AGAINST", "(") 5189 5190 this = self._parse_string() 5191 5192 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5193 modifier = "IN NATURAL LANGUAGE MODE" 5194 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5195 modifier = f"{modifier} WITH QUERY EXPANSION" 5196 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5197 modifier = "IN BOOLEAN MODE" 5198 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5199 modifier = "WITH QUERY EXPANSION" 5200 else: 5201 modifier = None 5202 5203 return self.expression( 5204 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5205 ) 5206 5207 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5208 def _parse_open_json(self) -> exp.OpenJSON: 5209 this = self._parse_bitwise() 5210 path = self._match(TokenType.COMMA) and self._parse_string() 5211 5212 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5213 this = self._parse_field(any_token=True) 5214 kind = self._parse_types() 5215 path = self._parse_string() 5216 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5217 5218 return self.expression( 5219 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5220 ) 5221 5222 expressions = None 5223 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5224 self._match_l_paren() 5225 expressions = self._parse_csv(_parse_open_json_column_def) 5226 5227 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5228 5229 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5230 args = self._parse_csv(self._parse_bitwise) 5231 5232 if self._match(TokenType.IN): 5233 return self.expression( 5234 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5235 ) 5236 5237 if haystack_first: 5238 haystack = seq_get(args, 0) 5239 needle = seq_get(args, 1) 5240 else: 5241 needle = seq_get(args, 0) 5242 haystack = seq_get(args, 1) 5243 5244 return self.expression( 5245 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5246 ) 5247 5248 def _parse_predict(self) -> exp.Predict: 5249 self._match_text_seq("MODEL") 5250 this = self._parse_table() 5251 5252 self._match(TokenType.COMMA) 5253 self._match_text_seq("TABLE") 5254 5255 return self.expression( 5256 exp.Predict, 5257 this=this, 5258 expression=self._parse_table(), 5259 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5260 ) 5261 5262 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5263 args = self._parse_csv(self._parse_table) 5264 return exp.JoinHint(this=func_name.upper(), expressions=args) 5265 5266 def _parse_substring(self) -> exp.Substring: 5267 # Postgres supports the form: substring(string [from int] [for int]) 5268 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5269 5270 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5271 5272 if self._match(TokenType.FROM): 5273 args.append(self._parse_bitwise()) 5274 if self._match(TokenType.FOR): 5275 args.append(self._parse_bitwise()) 5276 5277 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5278 5279 def _parse_trim(self) -> exp.Trim: 5280 # https://www.w3resource.com/sql/character-functions/trim.php 5281 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5282 5283 position = None 5284 collation = None 5285 expression = None 5286 5287 if self._match_texts(self.TRIM_TYPES): 5288 position = self._prev.text.upper() 5289 5290 this = self._parse_bitwise() 5291 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5292 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5293 expression = self._parse_bitwise() 5294 5295 if invert_order: 5296 this, expression = expression, this 5297 5298 if self._match(TokenType.COLLATE): 5299 collation = self._parse_bitwise() 5300 5301 return self.expression( 5302 exp.Trim, this=this, position=position, expression=expression, collation=collation 5303 ) 5304 5305 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5306 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5307 5308 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5309 return self._parse_window(self._parse_id_var(), alias=True) 5310 5311 def _parse_respect_or_ignore_nulls( 5312 self, this: t.Optional[exp.Expression] 5313 ) -> t.Optional[exp.Expression]: 5314 if self._match_text_seq("IGNORE", "NULLS"): 5315 return self.expression(exp.IgnoreNulls, this=this) 5316 if self._match_text_seq("RESPECT", "NULLS"): 5317 return self.expression(exp.RespectNulls, this=this) 5318 return this 5319 5320 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5321 if self._match(TokenType.HAVING): 5322 self._match_texts(("MAX", "MIN")) 5323 max = self._prev.text.upper() != "MIN" 5324 return self.expression( 5325 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5326 ) 5327 5328 return this 5329 5330 def _parse_window( 5331 self, this: t.Optional[exp.Expression], alias: bool = False 5332 ) -> t.Optional[exp.Expression]: 5333 func = this 5334 comments = func.comments if isinstance(func, exp.Expression) else None 5335 5336 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5337 self._match(TokenType.WHERE) 5338 this = self.expression( 5339 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5340 ) 5341 self._match_r_paren() 5342 5343 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5344 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5345 if self._match_text_seq("WITHIN", "GROUP"): 5346 order = self._parse_wrapped(self._parse_order) 5347 this = self.expression(exp.WithinGroup, this=this, expression=order) 5348 5349 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5350 # Some dialects choose to implement and some do not. 5351 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5352 5353 # There is some code above in _parse_lambda that handles 5354 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5355 5356 # The below changes handle 5357 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5358 5359 # Oracle allows both formats 5360 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5361 # and Snowflake chose to do the same for familiarity 5362 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5363 if isinstance(this, exp.AggFunc): 5364 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5365 5366 if ignore_respect and ignore_respect is not this: 5367 ignore_respect.replace(ignore_respect.this) 5368 this = self.expression(ignore_respect.__class__, this=this) 5369 5370 this = self._parse_respect_or_ignore_nulls(this) 5371 5372 # bigquery select from window x AS (partition by ...) 5373 if alias: 5374 over = None 5375 self._match(TokenType.ALIAS) 5376 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5377 return this 5378 else: 5379 over = self._prev.text.upper() 5380 5381 if comments: 5382 func.comments = None # type: ignore 5383 5384 if not self._match(TokenType.L_PAREN): 5385 return self.expression( 5386 exp.Window, 5387 comments=comments, 5388 this=this, 5389 alias=self._parse_id_var(False), 5390 over=over, 5391 ) 5392 5393 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5394 5395 first = self._match(TokenType.FIRST) 5396 if self._match_text_seq("LAST"): 5397 first = False 5398 5399 partition, order = self._parse_partition_and_order() 5400 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5401 5402 if kind: 5403 self._match(TokenType.BETWEEN) 5404 start = self._parse_window_spec() 5405 self._match(TokenType.AND) 5406 end = self._parse_window_spec() 5407 5408 spec = self.expression( 5409 exp.WindowSpec, 5410 kind=kind, 5411 start=start["value"], 5412 start_side=start["side"], 5413 end=end["value"], 5414 end_side=end["side"], 5415 ) 5416 else: 5417 spec = None 5418 5419 self._match_r_paren() 5420 5421 window = self.expression( 5422 exp.Window, 5423 comments=comments, 5424 this=this, 5425 partition_by=partition, 5426 order=order, 5427 spec=spec, 5428 alias=window_alias, 5429 over=over, 5430 first=first, 5431 ) 5432 5433 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5434 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5435 return self._parse_window(window, alias=alias) 5436 5437 return window 5438 5439 def _parse_partition_and_order( 5440 self, 5441 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5442 return self._parse_partition_by(), self._parse_order() 5443 5444 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5445 self._match(TokenType.BETWEEN) 5446 5447 return { 5448 "value": ( 5449 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5450 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5451 or self._parse_bitwise() 5452 ), 5453 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5454 } 5455 5456 def _parse_alias( 5457 self, this: t.Optional[exp.Expression], explicit: bool = False 5458 ) -> t.Optional[exp.Expression]: 5459 any_token = self._match(TokenType.ALIAS) 5460 comments = self._prev_comments 5461 5462 if explicit and not any_token: 5463 return this 5464 5465 if self._match(TokenType.L_PAREN): 5466 aliases = self.expression( 5467 exp.Aliases, 5468 comments=comments, 5469 this=this, 5470 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5471 ) 5472 self._match_r_paren(aliases) 5473 return aliases 5474 5475 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5476 self.STRING_ALIASES and self._parse_string_as_identifier() 5477 ) 5478 5479 if alias: 5480 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5481 column = this.this 5482 5483 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5484 if not this.comments and column and column.comments: 5485 this.comments = column.comments 5486 column.comments = None 5487 5488 return this 5489 5490 def _parse_id_var( 5491 self, 5492 any_token: bool = True, 5493 tokens: t.Optional[t.Collection[TokenType]] = None, 5494 ) -> t.Optional[exp.Expression]: 5495 identifier = self._parse_identifier() 5496 5497 if identifier: 5498 return identifier 5499 5500 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5501 quoted = self._prev.token_type == TokenType.STRING 5502 return exp.Identifier(this=self._prev.text, quoted=quoted) 5503 5504 return None 5505 5506 def _parse_string(self) -> t.Optional[exp.Expression]: 5507 if self._match_set(self.STRING_PARSERS): 5508 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5509 return self._parse_placeholder() 5510 5511 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5512 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5513 5514 def _parse_number(self) -> t.Optional[exp.Expression]: 5515 if self._match_set(self.NUMERIC_PARSERS): 5516 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5517 return self._parse_placeholder() 5518 5519 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5520 if self._match(TokenType.IDENTIFIER): 5521 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5522 return self._parse_placeholder() 5523 5524 def _parse_var( 5525 self, 5526 any_token: bool = False, 5527 tokens: t.Optional[t.Collection[TokenType]] = None, 5528 upper: bool = False, 5529 ) -> t.Optional[exp.Expression]: 5530 if ( 5531 (any_token and self._advance_any()) 5532 or self._match(TokenType.VAR) 5533 or (self._match_set(tokens) if tokens else False) 5534 ): 5535 return self.expression( 5536 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5537 ) 5538 return self._parse_placeholder() 5539 5540 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5541 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5542 self._advance() 5543 return self._prev 5544 return None 5545 5546 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5547 return self._parse_var() or self._parse_string() 5548 5549 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5550 return self._parse_primary() or self._parse_var(any_token=True) 5551 5552 def _parse_null(self) -> t.Optional[exp.Expression]: 5553 if self._match_set(self.NULL_TOKENS): 5554 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5555 return self._parse_placeholder() 5556 5557 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5558 if self._match(TokenType.TRUE): 5559 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5560 if self._match(TokenType.FALSE): 5561 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5562 return self._parse_placeholder() 5563 5564 def _parse_star(self) -> t.Optional[exp.Expression]: 5565 if self._match(TokenType.STAR): 5566 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5567 return self._parse_placeholder() 5568 5569 def _parse_parameter(self) -> exp.Parameter: 5570 self._match(TokenType.L_BRACE) 5571 this = self._parse_identifier() or self._parse_primary_or_var() 5572 expression = self._match(TokenType.COLON) and ( 5573 self._parse_identifier() or self._parse_primary_or_var() 5574 ) 5575 self._match(TokenType.R_BRACE) 5576 return self.expression(exp.Parameter, this=this, expression=expression) 5577 5578 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5579 if self._match_set(self.PLACEHOLDER_PARSERS): 5580 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5581 if placeholder: 5582 return placeholder 5583 self._advance(-1) 5584 return None 5585 5586 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5587 if not self._match(TokenType.EXCEPT): 5588 return None 5589 if self._match(TokenType.L_PAREN, advance=False): 5590 return self._parse_wrapped_csv(self._parse_column) 5591 5592 except_column = self._parse_column() 5593 return [except_column] if except_column else None 5594 5595 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5596 if not self._match(TokenType.REPLACE): 5597 return None 5598 if self._match(TokenType.L_PAREN, advance=False): 5599 return self._parse_wrapped_csv(self._parse_expression) 5600 5601 replace_expression = self._parse_expression() 5602 return [replace_expression] if replace_expression else None 5603 5604 def _parse_csv( 5605 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5606 ) -> t.List[exp.Expression]: 5607 parse_result = parse_method() 5608 items = [parse_result] if parse_result is not None else [] 5609 5610 while self._match(sep): 5611 self._add_comments(parse_result) 5612 parse_result = parse_method() 5613 if parse_result is not None: 5614 items.append(parse_result) 5615 5616 return items 5617 5618 def _parse_tokens( 5619 self, parse_method: t.Callable, expressions: t.Dict 5620 ) -> t.Optional[exp.Expression]: 5621 this = parse_method() 5622 5623 while self._match_set(expressions): 5624 this = self.expression( 5625 expressions[self._prev.token_type], 5626 this=this, 5627 comments=self._prev_comments, 5628 expression=parse_method(), 5629 ) 5630 5631 return this 5632 5633 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5634 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5635 5636 def _parse_wrapped_csv( 5637 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5638 ) -> t.List[exp.Expression]: 5639 return self._parse_wrapped( 5640 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5641 ) 5642 5643 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5644 wrapped = self._match(TokenType.L_PAREN) 5645 if not wrapped and not optional: 5646 self.raise_error("Expecting (") 5647 parse_result = parse_method() 5648 if wrapped: 5649 self._match_r_paren() 5650 return parse_result 5651 5652 def _parse_expressions(self) -> t.List[exp.Expression]: 5653 return self._parse_csv(self._parse_expression) 5654 5655 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5656 return self._parse_select() or self._parse_set_operations( 5657 self._parse_expression() if alias else self._parse_conjunction() 5658 ) 5659 5660 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5661 return self._parse_query_modifiers( 5662 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5663 ) 5664 5665 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5666 this = None 5667 if self._match_texts(self.TRANSACTION_KIND): 5668 this = self._prev.text 5669 5670 self._match_texts(("TRANSACTION", "WORK")) 5671 5672 modes = [] 5673 while True: 5674 mode = [] 5675 while self._match(TokenType.VAR): 5676 mode.append(self._prev.text) 5677 5678 if mode: 5679 modes.append(" ".join(mode)) 5680 if not self._match(TokenType.COMMA): 5681 break 5682 5683 return self.expression(exp.Transaction, this=this, modes=modes) 5684 5685 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5686 chain = None 5687 savepoint = None 5688 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5689 5690 self._match_texts(("TRANSACTION", "WORK")) 5691 5692 if self._match_text_seq("TO"): 5693 self._match_text_seq("SAVEPOINT") 5694 savepoint = self._parse_id_var() 5695 5696 if self._match(TokenType.AND): 5697 chain = not self._match_text_seq("NO") 5698 self._match_text_seq("CHAIN") 5699 5700 if is_rollback: 5701 return self.expression(exp.Rollback, savepoint=savepoint) 5702 5703 return self.expression(exp.Commit, chain=chain) 5704 5705 def _parse_refresh(self) -> exp.Refresh: 5706 self._match(TokenType.TABLE) 5707 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5708 5709 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5710 if not self._match_text_seq("ADD"): 5711 return None 5712 5713 self._match(TokenType.COLUMN) 5714 exists_column = self._parse_exists(not_=True) 5715 expression = self._parse_field_def() 5716 5717 if expression: 5718 expression.set("exists", exists_column) 5719 5720 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5721 if self._match_texts(("FIRST", "AFTER")): 5722 position = self._prev.text 5723 column_position = self.expression( 5724 exp.ColumnPosition, this=self._parse_column(), position=position 5725 ) 5726 expression.set("position", column_position) 5727 5728 return expression 5729 5730 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5731 drop = self._match(TokenType.DROP) and self._parse_drop() 5732 if drop and not isinstance(drop, exp.Command): 5733 drop.set("kind", drop.args.get("kind", "COLUMN")) 5734 return drop 5735 5736 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5737 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5738 return self.expression( 5739 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5740 ) 5741 5742 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5743 index = self._index - 1 5744 5745 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5746 return self._parse_csv( 5747 lambda: self.expression( 5748 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5749 ) 5750 ) 5751 5752 self._retreat(index) 5753 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5754 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5755 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5756 5757 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5758 self._match(TokenType.COLUMN) 5759 column = self._parse_field(any_token=True) 5760 5761 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5762 return self.expression(exp.AlterColumn, this=column, drop=True) 5763 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5764 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5765 if self._match(TokenType.COMMENT): 5766 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5767 5768 self._match_text_seq("SET", "DATA") 5769 self._match_text_seq("TYPE") 5770 return self.expression( 5771 exp.AlterColumn, 5772 this=column, 5773 dtype=self._parse_types(), 5774 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5775 using=self._match(TokenType.USING) and self._parse_conjunction(), 5776 ) 5777 5778 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5779 index = self._index - 1 5780 5781 partition_exists = self._parse_exists() 5782 if self._match(TokenType.PARTITION, advance=False): 5783 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5784 5785 self._retreat(index) 5786 return self._parse_csv(self._parse_drop_column) 5787 5788 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5789 if self._match(TokenType.COLUMN): 5790 exists = self._parse_exists() 5791 old_column = self._parse_column() 5792 to = self._match_text_seq("TO") 5793 new_column = self._parse_column() 5794 5795 if old_column is None or to is None or new_column is None: 5796 return None 5797 5798 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5799 5800 self._match_text_seq("TO") 5801 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5802 5803 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5804 start = self._prev 5805 5806 if not self._match(TokenType.TABLE): 5807 return self._parse_as_command(start) 5808 5809 exists = self._parse_exists() 5810 only = self._match_text_seq("ONLY") 5811 this = self._parse_table(schema=True) 5812 5813 if self._next: 5814 self._advance() 5815 5816 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5817 if parser: 5818 actions = ensure_list(parser(self)) 5819 options = self._parse_csv(self._parse_property) 5820 5821 if not self._curr and actions: 5822 return self.expression( 5823 exp.AlterTable, 5824 this=this, 5825 exists=exists, 5826 actions=actions, 5827 only=only, 5828 options=options, 5829 ) 5830 5831 return self._parse_as_command(start) 5832 5833 def _parse_merge(self) -> exp.Merge: 5834 self._match(TokenType.INTO) 5835 target = self._parse_table() 5836 5837 if target and self._match(TokenType.ALIAS, advance=False): 5838 target.set("alias", self._parse_table_alias()) 5839 5840 self._match(TokenType.USING) 5841 using = self._parse_table() 5842 5843 self._match(TokenType.ON) 5844 on = self._parse_conjunction() 5845 5846 return self.expression( 5847 exp.Merge, 5848 this=target, 5849 using=using, 5850 on=on, 5851 expressions=self._parse_when_matched(), 5852 ) 5853 5854 def _parse_when_matched(self) -> t.List[exp.When]: 5855 whens = [] 5856 5857 while self._match(TokenType.WHEN): 5858 matched = not self._match(TokenType.NOT) 5859 self._match_text_seq("MATCHED") 5860 source = ( 5861 False 5862 if self._match_text_seq("BY", "TARGET") 5863 else self._match_text_seq("BY", "SOURCE") 5864 ) 5865 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5866 5867 self._match(TokenType.THEN) 5868 5869 if self._match(TokenType.INSERT): 5870 _this = self._parse_star() 5871 if _this: 5872 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5873 else: 5874 then = self.expression( 5875 exp.Insert, 5876 this=self._parse_value(), 5877 expression=self._match_text_seq("VALUES") and self._parse_value(), 5878 ) 5879 elif self._match(TokenType.UPDATE): 5880 expressions = self._parse_star() 5881 if expressions: 5882 then = self.expression(exp.Update, expressions=expressions) 5883 else: 5884 then = self.expression( 5885 exp.Update, 5886 expressions=self._match(TokenType.SET) 5887 and self._parse_csv(self._parse_equality), 5888 ) 5889 elif self._match(TokenType.DELETE): 5890 then = self.expression(exp.Var, this=self._prev.text) 5891 else: 5892 then = None 5893 5894 whens.append( 5895 self.expression( 5896 exp.When, 5897 matched=matched, 5898 source=source, 5899 condition=condition, 5900 then=then, 5901 ) 5902 ) 5903 return whens 5904 5905 def _parse_show(self) -> t.Optional[exp.Expression]: 5906 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5907 if parser: 5908 return parser(self) 5909 return self._parse_as_command(self._prev) 5910 5911 def _parse_set_item_assignment( 5912 self, kind: t.Optional[str] = None 5913 ) -> t.Optional[exp.Expression]: 5914 index = self._index 5915 5916 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5917 return self._parse_set_transaction(global_=kind == "GLOBAL") 5918 5919 left = self._parse_primary() or self._parse_id_var() 5920 assignment_delimiter = self._match_texts(("=", "TO")) 5921 5922 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5923 self._retreat(index) 5924 return None 5925 5926 right = self._parse_statement() or self._parse_id_var() 5927 this = self.expression(exp.EQ, this=left, expression=right) 5928 5929 return self.expression(exp.SetItem, this=this, kind=kind) 5930 5931 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5932 self._match_text_seq("TRANSACTION") 5933 characteristics = self._parse_csv( 5934 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5935 ) 5936 return self.expression( 5937 exp.SetItem, 5938 expressions=characteristics, 5939 kind="TRANSACTION", 5940 **{"global": global_}, # type: ignore 5941 ) 5942 5943 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5944 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5945 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5946 5947 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5948 index = self._index 5949 set_ = self.expression( 5950 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5951 ) 5952 5953 if self._curr: 5954 self._retreat(index) 5955 return self._parse_as_command(self._prev) 5956 5957 return set_ 5958 5959 def _parse_var_from_options( 5960 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5961 ) -> t.Optional[exp.Var]: 5962 start = self._curr 5963 if not start: 5964 return None 5965 5966 option = start.text.upper() 5967 continuations = options.get(option) 5968 5969 index = self._index 5970 self._advance() 5971 for keywords in continuations or []: 5972 if isinstance(keywords, str): 5973 keywords = (keywords,) 5974 5975 if self._match_text_seq(*keywords): 5976 option = f"{option} {' '.join(keywords)}" 5977 break 5978 else: 5979 if continuations or continuations is None: 5980 if raise_unmatched: 5981 self.raise_error(f"Unknown option {option}") 5982 5983 self._retreat(index) 5984 return None 5985 5986 return exp.var(option) 5987 5988 def _parse_as_command(self, start: Token) -> exp.Command: 5989 while self._curr: 5990 self._advance() 5991 text = self._find_sql(start, self._prev) 5992 size = len(start.text) 5993 self._warn_unsupported() 5994 return exp.Command(this=text[:size], expression=text[size:]) 5995 5996 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5997 settings = [] 5998 5999 self._match_l_paren() 6000 kind = self._parse_id_var() 6001 6002 if self._match(TokenType.L_PAREN): 6003 while True: 6004 key = self._parse_id_var() 6005 value = self._parse_primary() 6006 6007 if not key and value is None: 6008 break 6009 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6010 self._match(TokenType.R_PAREN) 6011 6012 self._match_r_paren() 6013 6014 return self.expression( 6015 exp.DictProperty, 6016 this=this, 6017 kind=kind.this if kind else None, 6018 settings=settings, 6019 ) 6020 6021 def _parse_dict_range(self, this: str) -> exp.DictRange: 6022 self._match_l_paren() 6023 has_min = self._match_text_seq("MIN") 6024 if has_min: 6025 min = self._parse_var() or self._parse_primary() 6026 self._match_text_seq("MAX") 6027 max = self._parse_var() or self._parse_primary() 6028 else: 6029 max = self._parse_var() or self._parse_primary() 6030 min = exp.Literal.number(0) 6031 self._match_r_paren() 6032 return self.expression(exp.DictRange, this=this, min=min, max=max) 6033 6034 def _parse_comprehension( 6035 self, this: t.Optional[exp.Expression] 6036 ) -> t.Optional[exp.Comprehension]: 6037 index = self._index 6038 expression = self._parse_column() 6039 if not self._match(TokenType.IN): 6040 self._retreat(index - 1) 6041 return None 6042 iterator = self._parse_column() 6043 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6044 return self.expression( 6045 exp.Comprehension, 6046 this=this, 6047 expression=expression, 6048 iterator=iterator, 6049 condition=condition, 6050 ) 6051 6052 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6053 if self._match(TokenType.HEREDOC_STRING): 6054 return self.expression(exp.Heredoc, this=self._prev.text) 6055 6056 if not self._match_text_seq("$"): 6057 return None 6058 6059 tags = ["$"] 6060 tag_text = None 6061 6062 if self._is_connected(): 6063 self._advance() 6064 tags.append(self._prev.text.upper()) 6065 else: 6066 self.raise_error("No closing $ found") 6067 6068 if tags[-1] != "$": 6069 if self._is_connected() and self._match_text_seq("$"): 6070 tag_text = tags[-1] 6071 tags.append("$") 6072 else: 6073 self.raise_error("No closing $ found") 6074 6075 heredoc_start = self._curr 6076 6077 while self._curr: 6078 if self._match_text_seq(*tags, advance=False): 6079 this = self._find_sql(heredoc_start, self._prev) 6080 self._advance(len(tags)) 6081 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6082 6083 self._advance() 6084 6085 self.raise_error(f"No closing {''.join(tags)} found") 6086 return None 6087 6088 def _find_parser( 6089 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6090 ) -> t.Optional[t.Callable]: 6091 if not self._curr: 6092 return None 6093 6094 index = self._index 6095 this = [] 6096 while True: 6097 # The current token might be multiple words 6098 curr = self._curr.text.upper() 6099 key = curr.split(" ") 6100 this.append(curr) 6101 6102 self._advance() 6103 result, trie = in_trie(trie, key) 6104 if result == TrieResult.FAILED: 6105 break 6106 6107 if result == TrieResult.EXISTS: 6108 subparser = parsers[" ".join(this)] 6109 return subparser 6110 6111 self._retreat(index) 6112 return None 6113 6114 def _match(self, token_type, advance=True, expression=None): 6115 if not self._curr: 6116 return None 6117 6118 if self._curr.token_type == token_type: 6119 if advance: 6120 self._advance() 6121 self._add_comments(expression) 6122 return True 6123 6124 return None 6125 6126 def _match_set(self, types, advance=True): 6127 if not self._curr: 6128 return None 6129 6130 if self._curr.token_type in types: 6131 if advance: 6132 self._advance() 6133 return True 6134 6135 return None 6136 6137 def _match_pair(self, token_type_a, token_type_b, advance=True): 6138 if not self._curr or not self._next: 6139 return None 6140 6141 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6142 if advance: 6143 self._advance(2) 6144 return True 6145 6146 return None 6147 6148 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6149 if not self._match(TokenType.L_PAREN, expression=expression): 6150 self.raise_error("Expecting (") 6151 6152 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6153 if not self._match(TokenType.R_PAREN, expression=expression): 6154 self.raise_error("Expecting )") 6155 6156 def _match_texts(self, texts, advance=True): 6157 if self._curr and self._curr.text.upper() in texts: 6158 if advance: 6159 self._advance() 6160 return True 6161 return None 6162 6163 def _match_text_seq(self, *texts, advance=True): 6164 index = self._index 6165 for text in texts: 6166 if self._curr and self._curr.text.upper() == text: 6167 self._advance() 6168 else: 6169 self._retreat(index) 6170 return None 6171 6172 if not advance: 6173 self._retreat(index) 6174 6175 return True 6176 6177 def _replace_lambda( 6178 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6179 ) -> t.Optional[exp.Expression]: 6180 if not node: 6181 return node 6182 6183 for column in node.find_all(exp.Column): 6184 if column.parts[0].name in lambda_variables: 6185 dot_or_id = column.to_dot() if column.table else column.this 6186 parent = column.parent 6187 6188 while isinstance(parent, exp.Dot): 6189 if not isinstance(parent.parent, exp.Dot): 6190 parent.replace(dot_or_id) 6191 break 6192 parent = parent.parent 6193 else: 6194 if column is node: 6195 node = dot_or_id 6196 else: 6197 column.replace(dot_or_id) 6198 return node 6199 6200 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6201 start = self._prev 6202 6203 # Not to be confused with TRUNCATE(number, decimals) function call 6204 if self._match(TokenType.L_PAREN): 6205 self._retreat(self._index - 2) 6206 return self._parse_function() 6207 6208 # Clickhouse supports TRUNCATE DATABASE as well 6209 is_database = self._match(TokenType.DATABASE) 6210 6211 self._match(TokenType.TABLE) 6212 6213 exists = self._parse_exists(not_=False) 6214 6215 expressions = self._parse_csv( 6216 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6217 ) 6218 6219 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6220 6221 if self._match_text_seq("RESTART", "IDENTITY"): 6222 identity = "RESTART" 6223 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6224 identity = "CONTINUE" 6225 else: 6226 identity = None 6227 6228 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6229 option = self._prev.text 6230 else: 6231 option = None 6232 6233 partition = self._parse_partition() 6234 6235 # Fallback case 6236 if self._curr: 6237 return self._parse_as_command(start) 6238 6239 return self.expression( 6240 exp.TruncateTable, 6241 expressions=expressions, 6242 is_database=is_database, 6243 exists=exists, 6244 cluster=cluster, 6245 identity=identity, 6246 option=option, 6247 partition=partition, 6248 ) 6249 6250 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6251 this = self._parse_ordered(self._parse_opclass) 6252 6253 if not self._match(TokenType.WITH): 6254 return this 6255 6256 op = self._parse_var(any_token=True) 6257 6258 return self.expression(exp.WithOperator, this=this, op=op)
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 *ENUM_TYPE_TOKENS, 270 *NESTED_TYPE_TOKENS, 271 *AGGREGATE_TYPE_TOKENS, 272 } 273 274 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 275 TokenType.BIGINT: TokenType.UBIGINT, 276 TokenType.INT: TokenType.UINT, 277 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 278 TokenType.SMALLINT: TokenType.USMALLINT, 279 TokenType.TINYINT: TokenType.UTINYINT, 280 TokenType.DECIMAL: TokenType.UDECIMAL, 281 } 282 283 SUBQUERY_PREDICATES = { 284 TokenType.ANY: exp.Any, 285 TokenType.ALL: exp.All, 286 TokenType.EXISTS: exp.Exists, 287 TokenType.SOME: exp.Any, 288 } 289 290 RESERVED_TOKENS = { 291 *Tokenizer.SINGLE_TOKENS.values(), 292 TokenType.SELECT, 293 } 294 295 DB_CREATABLES = { 296 TokenType.DATABASE, 297 TokenType.SCHEMA, 298 TokenType.TABLE, 299 TokenType.VIEW, 300 TokenType.MODEL, 301 TokenType.DICTIONARY, 302 TokenType.SEQUENCE, 303 TokenType.STORAGE_INTEGRATION, 304 } 305 306 CREATABLES = { 307 TokenType.COLUMN, 308 TokenType.CONSTRAINT, 309 TokenType.FUNCTION, 310 TokenType.INDEX, 311 TokenType.PROCEDURE, 312 TokenType.FOREIGN_KEY, 313 *DB_CREATABLES, 314 } 315 316 # Tokens that can represent identifiers 317 ID_VAR_TOKENS = { 318 TokenType.VAR, 319 TokenType.ANTI, 320 TokenType.APPLY, 321 TokenType.ASC, 322 TokenType.ASOF, 323 TokenType.AUTO_INCREMENT, 324 TokenType.BEGIN, 325 TokenType.BPCHAR, 326 TokenType.CACHE, 327 TokenType.CASE, 328 TokenType.COLLATE, 329 TokenType.COMMAND, 330 TokenType.COMMENT, 331 TokenType.COMMIT, 332 TokenType.CONSTRAINT, 333 TokenType.DEFAULT, 334 TokenType.DELETE, 335 TokenType.DESC, 336 TokenType.DESCRIBE, 337 TokenType.DICTIONARY, 338 TokenType.DIV, 339 TokenType.END, 340 TokenType.EXECUTE, 341 TokenType.ESCAPE, 342 TokenType.FALSE, 343 TokenType.FIRST, 344 TokenType.FILTER, 345 TokenType.FINAL, 346 TokenType.FORMAT, 347 TokenType.FULL, 348 TokenType.IDENTIFIER, 349 TokenType.IS, 350 TokenType.ISNULL, 351 TokenType.INTERVAL, 352 TokenType.KEEP, 353 TokenType.KILL, 354 TokenType.LEFT, 355 TokenType.LOAD, 356 TokenType.MERGE, 357 TokenType.NATURAL, 358 TokenType.NEXT, 359 TokenType.OFFSET, 360 TokenType.OPERATOR, 361 TokenType.ORDINALITY, 362 TokenType.OVERLAPS, 363 TokenType.OVERWRITE, 364 TokenType.PARTITION, 365 TokenType.PERCENT, 366 TokenType.PIVOT, 367 TokenType.PRAGMA, 368 TokenType.RANGE, 369 TokenType.RECURSIVE, 370 TokenType.REFERENCES, 371 TokenType.REFRESH, 372 TokenType.REPLACE, 373 TokenType.RIGHT, 374 TokenType.ROW, 375 TokenType.ROWS, 376 TokenType.SEMI, 377 TokenType.SET, 378 TokenType.SETTINGS, 379 TokenType.SHOW, 380 TokenType.TEMPORARY, 381 TokenType.TOP, 382 TokenType.TRUE, 383 TokenType.TRUNCATE, 384 TokenType.UNIQUE, 385 TokenType.UNPIVOT, 386 TokenType.UPDATE, 387 TokenType.USE, 388 TokenType.VOLATILE, 389 TokenType.WINDOW, 390 *CREATABLES, 391 *SUBQUERY_PREDICATES, 392 *TYPE_TOKENS, 393 *NO_PAREN_FUNCTIONS, 394 } 395 396 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 397 398 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 399 TokenType.ANTI, 400 TokenType.APPLY, 401 TokenType.ASOF, 402 TokenType.FULL, 403 TokenType.LEFT, 404 TokenType.LOCK, 405 TokenType.NATURAL, 406 TokenType.OFFSET, 407 TokenType.RIGHT, 408 TokenType.SEMI, 409 TokenType.WINDOW, 410 } 411 412 ALIAS_TOKENS = ID_VAR_TOKENS 413 414 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 415 416 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 417 418 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 419 420 FUNC_TOKENS = { 421 TokenType.COLLATE, 422 TokenType.COMMAND, 423 TokenType.CURRENT_DATE, 424 TokenType.CURRENT_DATETIME, 425 TokenType.CURRENT_TIMESTAMP, 426 TokenType.CURRENT_TIME, 427 TokenType.CURRENT_USER, 428 TokenType.FILTER, 429 TokenType.FIRST, 430 TokenType.FORMAT, 431 TokenType.GLOB, 432 TokenType.IDENTIFIER, 433 TokenType.INDEX, 434 TokenType.ISNULL, 435 TokenType.ILIKE, 436 TokenType.INSERT, 437 TokenType.LIKE, 438 TokenType.MERGE, 439 TokenType.OFFSET, 440 TokenType.PRIMARY_KEY, 441 TokenType.RANGE, 442 TokenType.REPLACE, 443 TokenType.RLIKE, 444 TokenType.ROW, 445 TokenType.UNNEST, 446 TokenType.VAR, 447 TokenType.LEFT, 448 TokenType.RIGHT, 449 TokenType.SEQUENCE, 450 TokenType.DATE, 451 TokenType.DATETIME, 452 TokenType.TABLE, 453 TokenType.TIMESTAMP, 454 TokenType.TIMESTAMPTZ, 455 TokenType.TRUNCATE, 456 TokenType.WINDOW, 457 TokenType.XOR, 458 *TYPE_TOKENS, 459 *SUBQUERY_PREDICATES, 460 } 461 462 CONJUNCTION = { 463 TokenType.AND: exp.And, 464 TokenType.OR: exp.Or, 465 } 466 467 EQUALITY = { 468 TokenType.COLON_EQ: exp.PropertyEQ, 469 TokenType.EQ: exp.EQ, 470 TokenType.NEQ: exp.NEQ, 471 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 472 } 473 474 COMPARISON = { 475 TokenType.GT: exp.GT, 476 TokenType.GTE: exp.GTE, 477 TokenType.LT: exp.LT, 478 TokenType.LTE: exp.LTE, 479 } 480 481 BITWISE = { 482 TokenType.AMP: exp.BitwiseAnd, 483 TokenType.CARET: exp.BitwiseXor, 484 TokenType.PIPE: exp.BitwiseOr, 485 } 486 487 TERM = { 488 TokenType.DASH: exp.Sub, 489 TokenType.PLUS: exp.Add, 490 TokenType.MOD: exp.Mod, 491 TokenType.COLLATE: exp.Collate, 492 } 493 494 FACTOR = { 495 TokenType.DIV: exp.IntDiv, 496 TokenType.LR_ARROW: exp.Distance, 497 TokenType.SLASH: exp.Div, 498 TokenType.STAR: exp.Mul, 499 } 500 501 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 502 503 TIMES = { 504 TokenType.TIME, 505 TokenType.TIMETZ, 506 } 507 508 TIMESTAMPS = { 509 TokenType.TIMESTAMP, 510 TokenType.TIMESTAMPTZ, 511 TokenType.TIMESTAMPLTZ, 512 *TIMES, 513 } 514 515 SET_OPERATIONS = { 516 TokenType.UNION, 517 TokenType.INTERSECT, 518 TokenType.EXCEPT, 519 } 520 521 JOIN_METHODS = { 522 TokenType.ASOF, 523 TokenType.NATURAL, 524 TokenType.POSITIONAL, 525 } 526 527 JOIN_SIDES = { 528 TokenType.LEFT, 529 TokenType.RIGHT, 530 TokenType.FULL, 531 } 532 533 JOIN_KINDS = { 534 TokenType.INNER, 535 TokenType.OUTER, 536 TokenType.CROSS, 537 TokenType.SEMI, 538 TokenType.ANTI, 539 } 540 541 JOIN_HINTS: t.Set[str] = set() 542 543 LAMBDAS = { 544 TokenType.ARROW: lambda self, expressions: self.expression( 545 exp.Lambda, 546 this=self._replace_lambda( 547 self._parse_conjunction(), 548 {node.name for node in expressions}, 549 ), 550 expressions=expressions, 551 ), 552 TokenType.FARROW: lambda self, expressions: self.expression( 553 exp.Kwarg, 554 this=exp.var(expressions[0].name), 555 expression=self._parse_conjunction(), 556 ), 557 } 558 559 COLUMN_OPERATORS = { 560 TokenType.DOT: None, 561 TokenType.DCOLON: lambda self, this, to: self.expression( 562 exp.Cast if self.STRICT_CAST else exp.TryCast, 563 this=this, 564 to=to, 565 ), 566 TokenType.ARROW: lambda self, this, path: self.expression( 567 exp.JSONExtract, 568 this=this, 569 expression=self.dialect.to_json_path(path), 570 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 571 ), 572 TokenType.DARROW: lambda self, this, path: self.expression( 573 exp.JSONExtractScalar, 574 this=this, 575 expression=self.dialect.to_json_path(path), 576 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 577 ), 578 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 579 exp.JSONBExtract, 580 this=this, 581 expression=path, 582 ), 583 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 584 exp.JSONBExtractScalar, 585 this=this, 586 expression=path, 587 ), 588 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 589 exp.JSONBContains, 590 this=this, 591 expression=key, 592 ), 593 } 594 595 EXPRESSION_PARSERS = { 596 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 597 exp.Column: lambda self: self._parse_column(), 598 exp.Condition: lambda self: self._parse_conjunction(), 599 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 600 exp.Expression: lambda self: self._parse_expression(), 601 exp.From: lambda self: self._parse_from(), 602 exp.Group: lambda self: self._parse_group(), 603 exp.Having: lambda self: self._parse_having(), 604 exp.Identifier: lambda self: self._parse_id_var(), 605 exp.Join: lambda self: self._parse_join(), 606 exp.Lambda: lambda self: self._parse_lambda(), 607 exp.Lateral: lambda self: self._parse_lateral(), 608 exp.Limit: lambda self: self._parse_limit(), 609 exp.Offset: lambda self: self._parse_offset(), 610 exp.Order: lambda self: self._parse_order(), 611 exp.Ordered: lambda self: self._parse_ordered(), 612 exp.Properties: lambda self: self._parse_properties(), 613 exp.Qualify: lambda self: self._parse_qualify(), 614 exp.Returning: lambda self: self._parse_returning(), 615 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 616 exp.Table: lambda self: self._parse_table_parts(), 617 exp.TableAlias: lambda self: self._parse_table_alias(), 618 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 619 exp.Where: lambda self: self._parse_where(), 620 exp.Window: lambda self: self._parse_named_window(), 621 exp.With: lambda self: self._parse_with(), 622 "JOIN_TYPE": lambda self: self._parse_join_parts(), 623 } 624 625 STATEMENT_PARSERS = { 626 TokenType.ALTER: lambda self: self._parse_alter(), 627 TokenType.BEGIN: lambda self: self._parse_transaction(), 628 TokenType.CACHE: lambda self: self._parse_cache(), 629 TokenType.COMMENT: lambda self: self._parse_comment(), 630 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 631 TokenType.CREATE: lambda self: self._parse_create(), 632 TokenType.DELETE: lambda self: self._parse_delete(), 633 TokenType.DESC: lambda self: self._parse_describe(), 634 TokenType.DESCRIBE: lambda self: self._parse_describe(), 635 TokenType.DROP: lambda self: self._parse_drop(), 636 TokenType.INSERT: lambda self: self._parse_insert(), 637 TokenType.KILL: lambda self: self._parse_kill(), 638 TokenType.LOAD: lambda self: self._parse_load(), 639 TokenType.MERGE: lambda self: self._parse_merge(), 640 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 641 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 642 TokenType.REFRESH: lambda self: self._parse_refresh(), 643 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 644 TokenType.SET: lambda self: self._parse_set(), 645 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 646 TokenType.UNCACHE: lambda self: self._parse_uncache(), 647 TokenType.UPDATE: lambda self: self._parse_update(), 648 TokenType.USE: lambda self: self.expression( 649 exp.Use, 650 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 651 this=self._parse_table(schema=False), 652 ), 653 } 654 655 UNARY_PARSERS = { 656 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 657 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 658 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 659 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 660 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 661 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 662 } 663 664 STRING_PARSERS = { 665 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 666 exp.RawString, this=token.text 667 ), 668 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 669 exp.National, this=token.text 670 ), 671 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 672 TokenType.STRING: lambda self, token: self.expression( 673 exp.Literal, this=token.text, is_string=True 674 ), 675 TokenType.UNICODE_STRING: lambda self, token: self.expression( 676 exp.UnicodeString, 677 this=token.text, 678 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 679 ), 680 } 681 682 NUMERIC_PARSERS = { 683 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 684 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 685 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 686 TokenType.NUMBER: lambda self, token: self.expression( 687 exp.Literal, this=token.text, is_string=False 688 ), 689 } 690 691 PRIMARY_PARSERS = { 692 **STRING_PARSERS, 693 **NUMERIC_PARSERS, 694 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 695 TokenType.NULL: lambda self, _: self.expression(exp.Null), 696 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 697 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 698 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 699 TokenType.STAR: lambda self, _: self.expression( 700 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 701 ), 702 } 703 704 PLACEHOLDER_PARSERS = { 705 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 706 TokenType.PARAMETER: lambda self: self._parse_parameter(), 707 TokenType.COLON: lambda self: ( 708 self.expression(exp.Placeholder, this=self._prev.text) 709 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 710 else None 711 ), 712 } 713 714 RANGE_PARSERS = { 715 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 716 TokenType.GLOB: binary_range_parser(exp.Glob), 717 TokenType.ILIKE: binary_range_parser(exp.ILike), 718 TokenType.IN: lambda self, this: self._parse_in(this), 719 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 720 TokenType.IS: lambda self, this: self._parse_is(this), 721 TokenType.LIKE: binary_range_parser(exp.Like), 722 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 723 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 724 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 725 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 726 } 727 728 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 729 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 730 "AUTO": lambda self: self._parse_auto_property(), 731 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 732 "BACKUP": lambda self: self.expression( 733 exp.BackupProperty, this=self._parse_var(any_token=True) 734 ), 735 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 736 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 738 "CHECKSUM": lambda self: self._parse_checksum(), 739 "CLUSTER BY": lambda self: self._parse_cluster(), 740 "CLUSTERED": lambda self: self._parse_clustered_by(), 741 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 742 exp.CollateProperty, **kwargs 743 ), 744 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 745 "CONTAINS": lambda self: self._parse_contains_property(), 746 "COPY": lambda self: self._parse_copy_property(), 747 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 748 "DEFINER": lambda self: self._parse_definer(), 749 "DETERMINISTIC": lambda self: self.expression( 750 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 751 ), 752 "DISTKEY": lambda self: self._parse_distkey(), 753 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 754 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 755 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 756 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 757 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 758 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 759 "FREESPACE": lambda self: self._parse_freespace(), 760 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 761 "HEAP": lambda self: self.expression(exp.HeapProperty), 762 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 763 "IMMUTABLE": lambda self: self.expression( 764 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 765 ), 766 "INHERITS": lambda self: self.expression( 767 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 768 ), 769 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 770 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 771 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 772 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 773 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 774 "LIKE": lambda self: self._parse_create_like(), 775 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 776 "LOCK": lambda self: self._parse_locking(), 777 "LOCKING": lambda self: self._parse_locking(), 778 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 779 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 780 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 781 "MODIFIES": lambda self: self._parse_modifies_property(), 782 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 783 "NO": lambda self: self._parse_no_property(), 784 "ON": lambda self: self._parse_on_property(), 785 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 786 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 787 "PARTITION": lambda self: self._parse_partitioned_of(), 788 "PARTITION BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 790 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 791 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 792 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 793 "READS": lambda self: self._parse_reads_property(), 794 "REMOTE": lambda self: self._parse_remote_with_connection(), 795 "RETURNS": lambda self: self._parse_returns(), 796 "ROW": lambda self: self._parse_row(), 797 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 798 "SAMPLE": lambda self: self.expression( 799 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 800 ), 801 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 802 "SETTINGS": lambda self: self.expression( 803 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 804 ), 805 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 806 "SORTKEY": lambda self: self._parse_sortkey(), 807 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 808 "STABLE": lambda self: self.expression( 809 exp.StabilityProperty, this=exp.Literal.string("STABLE") 810 ), 811 "STORED": lambda self: self._parse_stored(), 812 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 813 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 814 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 815 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 816 "TO": lambda self: self._parse_to_table(), 817 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 818 "TRANSFORM": lambda self: self.expression( 819 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 820 ), 821 "TTL": lambda self: self._parse_ttl(), 822 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 823 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 824 "VOLATILE": lambda self: self._parse_volatile_property(), 825 "WITH": lambda self: self._parse_with_property(), 826 } 827 828 CONSTRAINT_PARSERS = { 829 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 830 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 831 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 832 "CHARACTER SET": lambda self: self.expression( 833 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 834 ), 835 "CHECK": lambda self: self.expression( 836 exp.CheckColumnConstraint, 837 this=self._parse_wrapped(self._parse_conjunction), 838 enforced=self._match_text_seq("ENFORCED"), 839 ), 840 "COLLATE": lambda self: self.expression( 841 exp.CollateColumnConstraint, this=self._parse_var() 842 ), 843 "COMMENT": lambda self: self.expression( 844 exp.CommentColumnConstraint, this=self._parse_string() 845 ), 846 "COMPRESS": lambda self: self._parse_compress(), 847 "CLUSTERED": lambda self: self.expression( 848 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 849 ), 850 "NONCLUSTERED": lambda self: self.expression( 851 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 852 ), 853 "DEFAULT": lambda self: self.expression( 854 exp.DefaultColumnConstraint, this=self._parse_bitwise() 855 ), 856 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 857 "EPHEMERAL": lambda self: self.expression( 858 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 859 ), 860 "EXCLUDE": lambda self: self.expression( 861 exp.ExcludeColumnConstraint, this=self._parse_index_params() 862 ), 863 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 864 "FORMAT": lambda self: self.expression( 865 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 866 ), 867 "GENERATED": lambda self: self._parse_generated_as_identity(), 868 "IDENTITY": lambda self: self._parse_auto_increment(), 869 "INLINE": lambda self: self._parse_inline(), 870 "LIKE": lambda self: self._parse_create_like(), 871 "NOT": lambda self: self._parse_not_constraint(), 872 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 873 "ON": lambda self: ( 874 self._match(TokenType.UPDATE) 875 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 876 ) 877 or self.expression(exp.OnProperty, this=self._parse_id_var()), 878 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 879 "PERIOD": lambda self: self._parse_period_for_system_time(), 880 "PRIMARY KEY": lambda self: self._parse_primary_key(), 881 "REFERENCES": lambda self: self._parse_references(match=False), 882 "TITLE": lambda self: self.expression( 883 exp.TitleColumnConstraint, this=self._parse_var_or_string() 884 ), 885 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 886 "UNIQUE": lambda self: self._parse_unique(), 887 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 888 "WITH": lambda self: self.expression( 889 exp.Properties, expressions=self._parse_wrapped_properties() 890 ), 891 } 892 893 ALTER_PARSERS = { 894 "ADD": lambda self: self._parse_alter_table_add(), 895 "ALTER": lambda self: self._parse_alter_table_alter(), 896 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 897 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 898 "DROP": lambda self: self._parse_alter_table_drop(), 899 "RENAME": lambda self: self._parse_alter_table_rename(), 900 } 901 902 SCHEMA_UNNAMED_CONSTRAINTS = { 903 "CHECK", 904 "EXCLUDE", 905 "FOREIGN KEY", 906 "LIKE", 907 "PERIOD", 908 "PRIMARY KEY", 909 "UNIQUE", 910 } 911 912 NO_PAREN_FUNCTION_PARSERS = { 913 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 914 "CASE": lambda self: self._parse_case(), 915 "IF": lambda self: self._parse_if(), 916 "NEXT": lambda self: self._parse_next_value_for(), 917 } 918 919 INVALID_FUNC_NAME_TOKENS = { 920 TokenType.IDENTIFIER, 921 TokenType.STRING, 922 } 923 924 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 925 926 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 927 928 FUNCTION_PARSERS = { 929 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 930 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 931 "DECODE": lambda self: self._parse_decode(), 932 "EXTRACT": lambda self: self._parse_extract(), 933 "JSON_OBJECT": lambda self: self._parse_json_object(), 934 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 935 "JSON_TABLE": lambda self: self._parse_json_table(), 936 "MATCH": lambda self: self._parse_match_against(), 937 "OPENJSON": lambda self: self._parse_open_json(), 938 "POSITION": lambda self: self._parse_position(), 939 "PREDICT": lambda self: self._parse_predict(), 940 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 941 "STRING_AGG": lambda self: self._parse_string_agg(), 942 "SUBSTRING": lambda self: self._parse_substring(), 943 "TRIM": lambda self: self._parse_trim(), 944 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 945 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 946 } 947 948 QUERY_MODIFIER_PARSERS = { 949 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 950 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 951 TokenType.WHERE: lambda self: ("where", self._parse_where()), 952 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 953 TokenType.HAVING: lambda self: ("having", self._parse_having()), 954 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 955 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 956 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 957 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 958 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 959 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 960 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 961 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 962 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 963 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 964 TokenType.CLUSTER_BY: lambda self: ( 965 "cluster", 966 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 967 ), 968 TokenType.DISTRIBUTE_BY: lambda self: ( 969 "distribute", 970 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 971 ), 972 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 973 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 974 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 975 } 976 977 SET_PARSERS = { 978 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 979 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 980 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 981 "TRANSACTION": lambda self: self._parse_set_transaction(), 982 } 983 984 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 985 986 TYPE_LITERAL_PARSERS = { 987 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 988 } 989 990 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 991 992 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 993 994 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 995 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 996 "ISOLATION": ( 997 ("LEVEL", "REPEATABLE", "READ"), 998 ("LEVEL", "READ", "COMMITTED"), 999 ("LEVEL", "READ", "UNCOMITTED"), 1000 ("LEVEL", "SERIALIZABLE"), 1001 ), 1002 "READ": ("WRITE", "ONLY"), 1003 } 1004 1005 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1006 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1007 ) 1008 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1009 1010 CREATE_SEQUENCE: OPTIONS_TYPE = { 1011 "SCALE": ("EXTEND", "NOEXTEND"), 1012 "SHARD": ("EXTEND", "NOEXTEND"), 1013 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1014 **dict.fromkeys( 1015 ( 1016 "SESSION", 1017 "GLOBAL", 1018 "KEEP", 1019 "NOKEEP", 1020 "ORDER", 1021 "NOORDER", 1022 "NOCACHE", 1023 "CYCLE", 1024 "NOCYCLE", 1025 "NOMINVALUE", 1026 "NOMAXVALUE", 1027 "NOSCALE", 1028 "NOSHARD", 1029 ), 1030 tuple(), 1031 ), 1032 } 1033 1034 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1035 1036 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1037 1038 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1039 1040 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1041 1042 CLONE_KEYWORDS = {"CLONE", "COPY"} 1043 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1044 1045 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1046 1047 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1048 1049 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1050 1051 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1052 1053 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1054 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1055 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1056 1057 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1058 1059 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1060 1061 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1062 1063 DISTINCT_TOKENS = {TokenType.DISTINCT} 1064 1065 NULL_TOKENS = {TokenType.NULL} 1066 1067 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1068 1069 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1070 1071 STRICT_CAST = True 1072 1073 PREFIXED_PIVOT_COLUMNS = False 1074 IDENTIFY_PIVOT_STRINGS = False 1075 1076 LOG_DEFAULTS_TO_LN = False 1077 1078 # Whether ADD is present for each column added by ALTER TABLE 1079 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1080 1081 # Whether the table sample clause expects CSV syntax 1082 TABLESAMPLE_CSV = False 1083 1084 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1085 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1086 1087 # Whether the TRIM function expects the characters to trim as its first argument 1088 TRIM_PATTERN_FIRST = False 1089 1090 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1091 STRING_ALIASES = False 1092 1093 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1094 MODIFIERS_ATTACHED_TO_UNION = True 1095 UNION_MODIFIERS = {"order", "limit", "offset"} 1096 1097 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1098 NO_PAREN_IF_COMMANDS = True 1099 1100 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1101 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1102 1103 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1104 # If this is True and '(' is not found, the keyword will be treated as an identifier 1105 VALUES_FOLLOWED_BY_PAREN = True 1106 1107 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1108 SUPPORTS_IMPLICIT_UNNEST = False 1109 1110 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1111 INTERVAL_SPANS = True 1112 1113 __slots__ = ( 1114 "error_level", 1115 "error_message_context", 1116 "max_errors", 1117 "dialect", 1118 "sql", 1119 "errors", 1120 "_tokens", 1121 "_index", 1122 "_curr", 1123 "_next", 1124 "_prev", 1125 "_prev_comments", 1126 ) 1127 1128 # Autofilled 1129 SHOW_TRIE: t.Dict = {} 1130 SET_TRIE: t.Dict = {} 1131 1132 def __init__( 1133 self, 1134 error_level: t.Optional[ErrorLevel] = None, 1135 error_message_context: int = 100, 1136 max_errors: int = 3, 1137 dialect: DialectType = None, 1138 ): 1139 from sqlglot.dialects import Dialect 1140 1141 self.error_level = error_level or ErrorLevel.IMMEDIATE 1142 self.error_message_context = error_message_context 1143 self.max_errors = max_errors 1144 self.dialect = Dialect.get_or_raise(dialect) 1145 self.reset() 1146 1147 def reset(self): 1148 self.sql = "" 1149 self.errors = [] 1150 self._tokens = [] 1151 self._index = 0 1152 self._curr = None 1153 self._next = None 1154 self._prev = None 1155 self._prev_comments = None 1156 1157 def parse( 1158 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1159 ) -> t.List[t.Optional[exp.Expression]]: 1160 """ 1161 Parses a list of tokens and returns a list of syntax trees, one tree 1162 per parsed SQL statement. 1163 1164 Args: 1165 raw_tokens: The list of tokens. 1166 sql: The original SQL string, used to produce helpful debug messages. 1167 1168 Returns: 1169 The list of the produced syntax trees. 1170 """ 1171 return self._parse( 1172 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1173 ) 1174 1175 def parse_into( 1176 self, 1177 expression_types: exp.IntoType, 1178 raw_tokens: t.List[Token], 1179 sql: t.Optional[str] = None, 1180 ) -> t.List[t.Optional[exp.Expression]]: 1181 """ 1182 Parses a list of tokens into a given Expression type. If a collection of Expression 1183 types is given instead, this method will try to parse the token list into each one 1184 of them, stopping at the first for which the parsing succeeds. 1185 1186 Args: 1187 expression_types: The expression type(s) to try and parse the token list into. 1188 raw_tokens: The list of tokens. 1189 sql: The original SQL string, used to produce helpful debug messages. 1190 1191 Returns: 1192 The target Expression. 1193 """ 1194 errors = [] 1195 for expression_type in ensure_list(expression_types): 1196 parser = self.EXPRESSION_PARSERS.get(expression_type) 1197 if not parser: 1198 raise TypeError(f"No parser registered for {expression_type}") 1199 1200 try: 1201 return self._parse(parser, raw_tokens, sql) 1202 except ParseError as e: 1203 e.errors[0]["into_expression"] = expression_type 1204 errors.append(e) 1205 1206 raise ParseError( 1207 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1208 errors=merge_errors(errors), 1209 ) from errors[-1] 1210 1211 def _parse( 1212 self, 1213 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1214 raw_tokens: t.List[Token], 1215 sql: t.Optional[str] = None, 1216 ) -> t.List[t.Optional[exp.Expression]]: 1217 self.reset() 1218 self.sql = sql or "" 1219 1220 total = len(raw_tokens) 1221 chunks: t.List[t.List[Token]] = [[]] 1222 1223 for i, token in enumerate(raw_tokens): 1224 if token.token_type == TokenType.SEMICOLON: 1225 if i < total - 1: 1226 chunks.append([]) 1227 else: 1228 chunks[-1].append(token) 1229 1230 expressions = [] 1231 1232 for tokens in chunks: 1233 self._index = -1 1234 self._tokens = tokens 1235 self._advance() 1236 1237 expressions.append(parse_method(self)) 1238 1239 if self._index < len(self._tokens): 1240 self.raise_error("Invalid expression / Unexpected token") 1241 1242 self.check_errors() 1243 1244 return expressions 1245 1246 def check_errors(self) -> None: 1247 """Logs or raises any found errors, depending on the chosen error level setting.""" 1248 if self.error_level == ErrorLevel.WARN: 1249 for error in self.errors: 1250 logger.error(str(error)) 1251 elif self.error_level == ErrorLevel.RAISE and self.errors: 1252 raise ParseError( 1253 concat_messages(self.errors, self.max_errors), 1254 errors=merge_errors(self.errors), 1255 ) 1256 1257 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1258 """ 1259 Appends an error in the list of recorded errors or raises it, depending on the chosen 1260 error level setting. 1261 """ 1262 token = token or self._curr or self._prev or Token.string("") 1263 start = token.start 1264 end = token.end + 1 1265 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1266 highlight = self.sql[start:end] 1267 end_context = self.sql[end : end + self.error_message_context] 1268 1269 error = ParseError.new( 1270 f"{message}. Line {token.line}, Col: {token.col}.\n" 1271 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1272 description=message, 1273 line=token.line, 1274 col=token.col, 1275 start_context=start_context, 1276 highlight=highlight, 1277 end_context=end_context, 1278 ) 1279 1280 if self.error_level == ErrorLevel.IMMEDIATE: 1281 raise error 1282 1283 self.errors.append(error) 1284 1285 def expression( 1286 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1287 ) -> E: 1288 """ 1289 Creates a new, validated Expression. 1290 1291 Args: 1292 exp_class: The expression class to instantiate. 1293 comments: An optional list of comments to attach to the expression. 1294 kwargs: The arguments to set for the expression along with their respective values. 1295 1296 Returns: 1297 The target expression. 1298 """ 1299 instance = exp_class(**kwargs) 1300 instance.add_comments(comments) if comments else self._add_comments(instance) 1301 return self.validate_expression(instance) 1302 1303 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1304 if expression and self._prev_comments: 1305 expression.add_comments(self._prev_comments) 1306 self._prev_comments = None 1307 1308 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1309 """ 1310 Validates an Expression, making sure that all its mandatory arguments are set. 1311 1312 Args: 1313 expression: The expression to validate. 1314 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1315 1316 Returns: 1317 The validated expression. 1318 """ 1319 if self.error_level != ErrorLevel.IGNORE: 1320 for error_message in expression.error_messages(args): 1321 self.raise_error(error_message) 1322 1323 return expression 1324 1325 def _find_sql(self, start: Token, end: Token) -> str: 1326 return self.sql[start.start : end.end + 1] 1327 1328 def _is_connected(self) -> bool: 1329 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1330 1331 def _advance(self, times: int = 1) -> None: 1332 self._index += times 1333 self._curr = seq_get(self._tokens, self._index) 1334 self._next = seq_get(self._tokens, self._index + 1) 1335 1336 if self._index > 0: 1337 self._prev = self._tokens[self._index - 1] 1338 self._prev_comments = self._prev.comments 1339 else: 1340 self._prev = None 1341 self._prev_comments = None 1342 1343 def _retreat(self, index: int) -> None: 1344 if index != self._index: 1345 self._advance(index - self._index) 1346 1347 def _warn_unsupported(self) -> None: 1348 if len(self._tokens) <= 1: 1349 return 1350 1351 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1352 # interested in emitting a warning for the one being currently processed. 1353 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1354 1355 logger.warning( 1356 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1357 ) 1358 1359 def _parse_command(self) -> exp.Command: 1360 self._warn_unsupported() 1361 return self.expression( 1362 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1363 ) 1364 1365 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1366 """ 1367 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1368 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1369 the parser state accordingly 1370 """ 1371 index = self._index 1372 error_level = self.error_level 1373 1374 self.error_level = ErrorLevel.IMMEDIATE 1375 try: 1376 this = parse_method() 1377 except ParseError: 1378 this = None 1379 finally: 1380 if not this or retreat: 1381 self._retreat(index) 1382 self.error_level = error_level 1383 1384 return this 1385 1386 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1387 start = self._prev 1388 exists = self._parse_exists() if allow_exists else None 1389 1390 self._match(TokenType.ON) 1391 1392 materialized = self._match_text_seq("MATERIALIZED") 1393 kind = self._match_set(self.CREATABLES) and self._prev 1394 if not kind: 1395 return self._parse_as_command(start) 1396 1397 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1398 this = self._parse_user_defined_function(kind=kind.token_type) 1399 elif kind.token_type == TokenType.TABLE: 1400 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1401 elif kind.token_type == TokenType.COLUMN: 1402 this = self._parse_column() 1403 else: 1404 this = self._parse_id_var() 1405 1406 self._match(TokenType.IS) 1407 1408 return self.expression( 1409 exp.Comment, 1410 this=this, 1411 kind=kind.text, 1412 expression=self._parse_string(), 1413 exists=exists, 1414 materialized=materialized, 1415 ) 1416 1417 def _parse_to_table( 1418 self, 1419 ) -> exp.ToTableProperty: 1420 table = self._parse_table_parts(schema=True) 1421 return self.expression(exp.ToTableProperty, this=table) 1422 1423 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1424 def _parse_ttl(self) -> exp.Expression: 1425 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1426 this = self._parse_bitwise() 1427 1428 if self._match_text_seq("DELETE"): 1429 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1430 if self._match_text_seq("RECOMPRESS"): 1431 return self.expression( 1432 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1433 ) 1434 if self._match_text_seq("TO", "DISK"): 1435 return self.expression( 1436 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1437 ) 1438 if self._match_text_seq("TO", "VOLUME"): 1439 return self.expression( 1440 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1441 ) 1442 1443 return this 1444 1445 expressions = self._parse_csv(_parse_ttl_action) 1446 where = self._parse_where() 1447 group = self._parse_group() 1448 1449 aggregates = None 1450 if group and self._match(TokenType.SET): 1451 aggregates = self._parse_csv(self._parse_set_item) 1452 1453 return self.expression( 1454 exp.MergeTreeTTL, 1455 expressions=expressions, 1456 where=where, 1457 group=group, 1458 aggregates=aggregates, 1459 ) 1460 1461 def _parse_statement(self) -> t.Optional[exp.Expression]: 1462 if self._curr is None: 1463 return None 1464 1465 if self._match_set(self.STATEMENT_PARSERS): 1466 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1467 1468 if self._match_set(Tokenizer.COMMANDS): 1469 return self._parse_command() 1470 1471 expression = self._parse_expression() 1472 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1473 return self._parse_query_modifiers(expression) 1474 1475 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1476 start = self._prev 1477 temporary = self._match(TokenType.TEMPORARY) 1478 materialized = self._match_text_seq("MATERIALIZED") 1479 1480 kind = self._match_set(self.CREATABLES) and self._prev.text 1481 if not kind: 1482 return self._parse_as_command(start) 1483 1484 if_exists = exists or self._parse_exists() 1485 table = self._parse_table_parts( 1486 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1487 ) 1488 1489 if self._match(TokenType.L_PAREN, advance=False): 1490 expressions = self._parse_wrapped_csv(self._parse_types) 1491 else: 1492 expressions = None 1493 1494 return self.expression( 1495 exp.Drop, 1496 comments=start.comments, 1497 exists=if_exists, 1498 this=table, 1499 expressions=expressions, 1500 kind=kind, 1501 temporary=temporary, 1502 materialized=materialized, 1503 cascade=self._match_text_seq("CASCADE"), 1504 constraints=self._match_text_seq("CONSTRAINTS"), 1505 purge=self._match_text_seq("PURGE"), 1506 ) 1507 1508 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1509 return ( 1510 self._match_text_seq("IF") 1511 and (not not_ or self._match(TokenType.NOT)) 1512 and self._match(TokenType.EXISTS) 1513 ) 1514 1515 def _parse_create(self) -> exp.Create | exp.Command: 1516 # Note: this can't be None because we've matched a statement parser 1517 start = self._prev 1518 comments = self._prev_comments 1519 1520 replace = ( 1521 start.token_type == TokenType.REPLACE 1522 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1523 or self._match_pair(TokenType.OR, TokenType.ALTER) 1524 ) 1525 1526 unique = self._match(TokenType.UNIQUE) 1527 1528 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1529 self._advance() 1530 1531 properties = None 1532 create_token = self._match_set(self.CREATABLES) and self._prev 1533 1534 if not create_token: 1535 # exp.Properties.Location.POST_CREATE 1536 properties = self._parse_properties() 1537 create_token = self._match_set(self.CREATABLES) and self._prev 1538 1539 if not properties or not create_token: 1540 return self._parse_as_command(start) 1541 1542 exists = self._parse_exists(not_=True) 1543 this = None 1544 expression: t.Optional[exp.Expression] = None 1545 indexes = None 1546 no_schema_binding = None 1547 begin = None 1548 end = None 1549 clone = None 1550 1551 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1552 nonlocal properties 1553 if properties and temp_props: 1554 properties.expressions.extend(temp_props.expressions) 1555 elif temp_props: 1556 properties = temp_props 1557 1558 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1559 this = self._parse_user_defined_function(kind=create_token.token_type) 1560 1561 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1562 extend_props(self._parse_properties()) 1563 1564 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1565 1566 if not expression: 1567 if self._match(TokenType.COMMAND): 1568 expression = self._parse_as_command(self._prev) 1569 else: 1570 begin = self._match(TokenType.BEGIN) 1571 return_ = self._match_text_seq("RETURN") 1572 1573 if self._match(TokenType.STRING, advance=False): 1574 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1575 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1576 expression = self._parse_string() 1577 extend_props(self._parse_properties()) 1578 else: 1579 expression = self._parse_statement() 1580 1581 end = self._match_text_seq("END") 1582 1583 if return_: 1584 expression = self.expression(exp.Return, this=expression) 1585 elif create_token.token_type == TokenType.INDEX: 1586 this = self._parse_index(index=self._parse_id_var()) 1587 elif create_token.token_type in self.DB_CREATABLES: 1588 table_parts = self._parse_table_parts( 1589 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1590 ) 1591 1592 # exp.Properties.Location.POST_NAME 1593 self._match(TokenType.COMMA) 1594 extend_props(self._parse_properties(before=True)) 1595 1596 this = self._parse_schema(this=table_parts) 1597 1598 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1599 extend_props(self._parse_properties()) 1600 1601 self._match(TokenType.ALIAS) 1602 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1603 # exp.Properties.Location.POST_ALIAS 1604 extend_props(self._parse_properties()) 1605 1606 if create_token.token_type == TokenType.SEQUENCE: 1607 expression = self._parse_types() 1608 extend_props(self._parse_properties()) 1609 else: 1610 expression = self._parse_ddl_select() 1611 1612 if create_token.token_type == TokenType.TABLE: 1613 # exp.Properties.Location.POST_EXPRESSION 1614 extend_props(self._parse_properties()) 1615 1616 indexes = [] 1617 while True: 1618 index = self._parse_index() 1619 1620 # exp.Properties.Location.POST_INDEX 1621 extend_props(self._parse_properties()) 1622 1623 if not index: 1624 break 1625 else: 1626 self._match(TokenType.COMMA) 1627 indexes.append(index) 1628 elif create_token.token_type == TokenType.VIEW: 1629 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1630 no_schema_binding = True 1631 1632 shallow = self._match_text_seq("SHALLOW") 1633 1634 if self._match_texts(self.CLONE_KEYWORDS): 1635 copy = self._prev.text.lower() == "copy" 1636 clone = self.expression( 1637 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1638 ) 1639 1640 if self._curr: 1641 return self._parse_as_command(start) 1642 1643 return self.expression( 1644 exp.Create, 1645 comments=comments, 1646 this=this, 1647 kind=create_token.text.upper(), 1648 replace=replace, 1649 unique=unique, 1650 expression=expression, 1651 exists=exists, 1652 properties=properties, 1653 indexes=indexes, 1654 no_schema_binding=no_schema_binding, 1655 begin=begin, 1656 end=end, 1657 clone=clone, 1658 ) 1659 1660 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1661 seq = exp.SequenceProperties() 1662 1663 options = [] 1664 index = self._index 1665 1666 while self._curr: 1667 if self._match_text_seq("INCREMENT"): 1668 self._match_text_seq("BY") 1669 self._match_text_seq("=") 1670 seq.set("increment", self._parse_term()) 1671 elif self._match_text_seq("MINVALUE"): 1672 seq.set("minvalue", self._parse_term()) 1673 elif self._match_text_seq("MAXVALUE"): 1674 seq.set("maxvalue", self._parse_term()) 1675 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1676 self._match_text_seq("=") 1677 seq.set("start", self._parse_term()) 1678 elif self._match_text_seq("CACHE"): 1679 # T-SQL allows empty CACHE which is initialized dynamically 1680 seq.set("cache", self._parse_number() or True) 1681 elif self._match_text_seq("OWNED", "BY"): 1682 # "OWNED BY NONE" is the default 1683 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1684 else: 1685 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1686 if opt: 1687 options.append(opt) 1688 else: 1689 break 1690 1691 seq.set("options", options if options else None) 1692 return None if self._index == index else seq 1693 1694 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1695 # only used for teradata currently 1696 self._match(TokenType.COMMA) 1697 1698 kwargs = { 1699 "no": self._match_text_seq("NO"), 1700 "dual": self._match_text_seq("DUAL"), 1701 "before": self._match_text_seq("BEFORE"), 1702 "default": self._match_text_seq("DEFAULT"), 1703 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1704 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1705 "after": self._match_text_seq("AFTER"), 1706 "minimum": self._match_texts(("MIN", "MINIMUM")), 1707 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1708 } 1709 1710 if self._match_texts(self.PROPERTY_PARSERS): 1711 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1712 try: 1713 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1714 except TypeError: 1715 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1716 1717 return None 1718 1719 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1720 return self._parse_wrapped_csv(self._parse_property) 1721 1722 def _parse_property(self) -> t.Optional[exp.Expression]: 1723 if self._match_texts(self.PROPERTY_PARSERS): 1724 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1725 1726 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1727 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1728 1729 if self._match_text_seq("COMPOUND", "SORTKEY"): 1730 return self._parse_sortkey(compound=True) 1731 1732 if self._match_text_seq("SQL", "SECURITY"): 1733 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1734 1735 index = self._index 1736 key = self._parse_column() 1737 1738 if not self._match(TokenType.EQ): 1739 self._retreat(index) 1740 return self._parse_sequence_properties() 1741 1742 return self.expression( 1743 exp.Property, 1744 this=key.to_dot() if isinstance(key, exp.Column) else key, 1745 value=self._parse_bitwise() or self._parse_var(any_token=True), 1746 ) 1747 1748 def _parse_stored(self) -> exp.FileFormatProperty: 1749 self._match(TokenType.ALIAS) 1750 1751 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1752 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1753 1754 return self.expression( 1755 exp.FileFormatProperty, 1756 this=( 1757 self.expression( 1758 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1759 ) 1760 if input_format or output_format 1761 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1762 ), 1763 ) 1764 1765 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1766 self._match(TokenType.EQ) 1767 self._match(TokenType.ALIAS) 1768 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1769 1770 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1771 properties = [] 1772 while True: 1773 if before: 1774 prop = self._parse_property_before() 1775 else: 1776 prop = self._parse_property() 1777 if not prop: 1778 break 1779 for p in ensure_list(prop): 1780 properties.append(p) 1781 1782 if properties: 1783 return self.expression(exp.Properties, expressions=properties) 1784 1785 return None 1786 1787 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1788 return self.expression( 1789 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1790 ) 1791 1792 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1793 if self._index >= 2: 1794 pre_volatile_token = self._tokens[self._index - 2] 1795 else: 1796 pre_volatile_token = None 1797 1798 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1799 return exp.VolatileProperty() 1800 1801 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1802 1803 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1804 self._match_pair(TokenType.EQ, TokenType.ON) 1805 1806 prop = self.expression(exp.WithSystemVersioningProperty) 1807 if self._match(TokenType.L_PAREN): 1808 self._match_text_seq("HISTORY_TABLE", "=") 1809 prop.set("this", self._parse_table_parts()) 1810 1811 if self._match(TokenType.COMMA): 1812 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1813 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1814 1815 self._match_r_paren() 1816 1817 return prop 1818 1819 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1820 if self._match(TokenType.L_PAREN, advance=False): 1821 return self._parse_wrapped_properties() 1822 1823 if self._match_text_seq("JOURNAL"): 1824 return self._parse_withjournaltable() 1825 1826 if self._match_texts(self.VIEW_ATTRIBUTES): 1827 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1828 1829 if self._match_text_seq("DATA"): 1830 return self._parse_withdata(no=False) 1831 elif self._match_text_seq("NO", "DATA"): 1832 return self._parse_withdata(no=True) 1833 1834 if not self._next: 1835 return None 1836 1837 return self._parse_withisolatedloading() 1838 1839 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1840 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1841 self._match(TokenType.EQ) 1842 1843 user = self._parse_id_var() 1844 self._match(TokenType.PARAMETER) 1845 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1846 1847 if not user or not host: 1848 return None 1849 1850 return exp.DefinerProperty(this=f"{user}@{host}") 1851 1852 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1853 self._match(TokenType.TABLE) 1854 self._match(TokenType.EQ) 1855 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1856 1857 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1858 return self.expression(exp.LogProperty, no=no) 1859 1860 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1861 return self.expression(exp.JournalProperty, **kwargs) 1862 1863 def _parse_checksum(self) -> exp.ChecksumProperty: 1864 self._match(TokenType.EQ) 1865 1866 on = None 1867 if self._match(TokenType.ON): 1868 on = True 1869 elif self._match_text_seq("OFF"): 1870 on = False 1871 1872 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1873 1874 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1875 return self.expression( 1876 exp.Cluster, 1877 expressions=( 1878 self._parse_wrapped_csv(self._parse_ordered) 1879 if wrapped 1880 else self._parse_csv(self._parse_ordered) 1881 ), 1882 ) 1883 1884 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1885 self._match_text_seq("BY") 1886 1887 self._match_l_paren() 1888 expressions = self._parse_csv(self._parse_column) 1889 self._match_r_paren() 1890 1891 if self._match_text_seq("SORTED", "BY"): 1892 self._match_l_paren() 1893 sorted_by = self._parse_csv(self._parse_ordered) 1894 self._match_r_paren() 1895 else: 1896 sorted_by = None 1897 1898 self._match(TokenType.INTO) 1899 buckets = self._parse_number() 1900 self._match_text_seq("BUCKETS") 1901 1902 return self.expression( 1903 exp.ClusteredByProperty, 1904 expressions=expressions, 1905 sorted_by=sorted_by, 1906 buckets=buckets, 1907 ) 1908 1909 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1910 if not self._match_text_seq("GRANTS"): 1911 self._retreat(self._index - 1) 1912 return None 1913 1914 return self.expression(exp.CopyGrantsProperty) 1915 1916 def _parse_freespace(self) -> exp.FreespaceProperty: 1917 self._match(TokenType.EQ) 1918 return self.expression( 1919 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1920 ) 1921 1922 def _parse_mergeblockratio( 1923 self, no: bool = False, default: bool = False 1924 ) -> exp.MergeBlockRatioProperty: 1925 if self._match(TokenType.EQ): 1926 return self.expression( 1927 exp.MergeBlockRatioProperty, 1928 this=self._parse_number(), 1929 percent=self._match(TokenType.PERCENT), 1930 ) 1931 1932 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1933 1934 def _parse_datablocksize( 1935 self, 1936 default: t.Optional[bool] = None, 1937 minimum: t.Optional[bool] = None, 1938 maximum: t.Optional[bool] = None, 1939 ) -> exp.DataBlocksizeProperty: 1940 self._match(TokenType.EQ) 1941 size = self._parse_number() 1942 1943 units = None 1944 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1945 units = self._prev.text 1946 1947 return self.expression( 1948 exp.DataBlocksizeProperty, 1949 size=size, 1950 units=units, 1951 default=default, 1952 minimum=minimum, 1953 maximum=maximum, 1954 ) 1955 1956 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1957 self._match(TokenType.EQ) 1958 always = self._match_text_seq("ALWAYS") 1959 manual = self._match_text_seq("MANUAL") 1960 never = self._match_text_seq("NEVER") 1961 default = self._match_text_seq("DEFAULT") 1962 1963 autotemp = None 1964 if self._match_text_seq("AUTOTEMP"): 1965 autotemp = self._parse_schema() 1966 1967 return self.expression( 1968 exp.BlockCompressionProperty, 1969 always=always, 1970 manual=manual, 1971 never=never, 1972 default=default, 1973 autotemp=autotemp, 1974 ) 1975 1976 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1977 index = self._index 1978 no = self._match_text_seq("NO") 1979 concurrent = self._match_text_seq("CONCURRENT") 1980 1981 if not self._match_text_seq("ISOLATED", "LOADING"): 1982 self._retreat(index) 1983 return None 1984 1985 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1986 return self.expression( 1987 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1988 ) 1989 1990 def _parse_locking(self) -> exp.LockingProperty: 1991 if self._match(TokenType.TABLE): 1992 kind = "TABLE" 1993 elif self._match(TokenType.VIEW): 1994 kind = "VIEW" 1995 elif self._match(TokenType.ROW): 1996 kind = "ROW" 1997 elif self._match_text_seq("DATABASE"): 1998 kind = "DATABASE" 1999 else: 2000 kind = None 2001 2002 if kind in ("DATABASE", "TABLE", "VIEW"): 2003 this = self._parse_table_parts() 2004 else: 2005 this = None 2006 2007 if self._match(TokenType.FOR): 2008 for_or_in = "FOR" 2009 elif self._match(TokenType.IN): 2010 for_or_in = "IN" 2011 else: 2012 for_or_in = None 2013 2014 if self._match_text_seq("ACCESS"): 2015 lock_type = "ACCESS" 2016 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2017 lock_type = "EXCLUSIVE" 2018 elif self._match_text_seq("SHARE"): 2019 lock_type = "SHARE" 2020 elif self._match_text_seq("READ"): 2021 lock_type = "READ" 2022 elif self._match_text_seq("WRITE"): 2023 lock_type = "WRITE" 2024 elif self._match_text_seq("CHECKSUM"): 2025 lock_type = "CHECKSUM" 2026 else: 2027 lock_type = None 2028 2029 override = self._match_text_seq("OVERRIDE") 2030 2031 return self.expression( 2032 exp.LockingProperty, 2033 this=this, 2034 kind=kind, 2035 for_or_in=for_or_in, 2036 lock_type=lock_type, 2037 override=override, 2038 ) 2039 2040 def _parse_partition_by(self) -> t.List[exp.Expression]: 2041 if self._match(TokenType.PARTITION_BY): 2042 return self._parse_csv(self._parse_conjunction) 2043 return [] 2044 2045 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2046 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2047 if self._match_text_seq("MINVALUE"): 2048 return exp.var("MINVALUE") 2049 if self._match_text_seq("MAXVALUE"): 2050 return exp.var("MAXVALUE") 2051 return self._parse_bitwise() 2052 2053 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2054 expression = None 2055 from_expressions = None 2056 to_expressions = None 2057 2058 if self._match(TokenType.IN): 2059 this = self._parse_wrapped_csv(self._parse_bitwise) 2060 elif self._match(TokenType.FROM): 2061 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2062 self._match_text_seq("TO") 2063 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2064 elif self._match_text_seq("WITH", "(", "MODULUS"): 2065 this = self._parse_number() 2066 self._match_text_seq(",", "REMAINDER") 2067 expression = self._parse_number() 2068 self._match_r_paren() 2069 else: 2070 self.raise_error("Failed to parse partition bound spec.") 2071 2072 return self.expression( 2073 exp.PartitionBoundSpec, 2074 this=this, 2075 expression=expression, 2076 from_expressions=from_expressions, 2077 to_expressions=to_expressions, 2078 ) 2079 2080 # https://www.postgresql.org/docs/current/sql-createtable.html 2081 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2082 if not self._match_text_seq("OF"): 2083 self._retreat(self._index - 1) 2084 return None 2085 2086 this = self._parse_table(schema=True) 2087 2088 if self._match(TokenType.DEFAULT): 2089 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2090 elif self._match_text_seq("FOR", "VALUES"): 2091 expression = self._parse_partition_bound_spec() 2092 else: 2093 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2094 2095 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2096 2097 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2098 self._match(TokenType.EQ) 2099 return self.expression( 2100 exp.PartitionedByProperty, 2101 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2102 ) 2103 2104 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2105 if self._match_text_seq("AND", "STATISTICS"): 2106 statistics = True 2107 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2108 statistics = False 2109 else: 2110 statistics = None 2111 2112 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2113 2114 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2115 if self._match_text_seq("SQL"): 2116 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2117 return None 2118 2119 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2120 if self._match_text_seq("SQL", "DATA"): 2121 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2122 return None 2123 2124 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2125 if self._match_text_seq("PRIMARY", "INDEX"): 2126 return exp.NoPrimaryIndexProperty() 2127 if self._match_text_seq("SQL"): 2128 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2129 return None 2130 2131 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2132 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2133 return exp.OnCommitProperty() 2134 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2135 return exp.OnCommitProperty(delete=True) 2136 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2137 2138 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2139 if self._match_text_seq("SQL", "DATA"): 2140 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2141 return None 2142 2143 def _parse_distkey(self) -> exp.DistKeyProperty: 2144 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2145 2146 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2147 table = self._parse_table(schema=True) 2148 2149 options = [] 2150 while self._match_texts(("INCLUDING", "EXCLUDING")): 2151 this = self._prev.text.upper() 2152 2153 id_var = self._parse_id_var() 2154 if not id_var: 2155 return None 2156 2157 options.append( 2158 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2159 ) 2160 2161 return self.expression(exp.LikeProperty, this=table, expressions=options) 2162 2163 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2164 return self.expression( 2165 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2166 ) 2167 2168 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2169 self._match(TokenType.EQ) 2170 return self.expression( 2171 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2172 ) 2173 2174 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2175 self._match_text_seq("WITH", "CONNECTION") 2176 return self.expression( 2177 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2178 ) 2179 2180 def _parse_returns(self) -> exp.ReturnsProperty: 2181 value: t.Optional[exp.Expression] 2182 is_table = self._match(TokenType.TABLE) 2183 2184 if is_table: 2185 if self._match(TokenType.LT): 2186 value = self.expression( 2187 exp.Schema, 2188 this="TABLE", 2189 expressions=self._parse_csv(self._parse_struct_types), 2190 ) 2191 if not self._match(TokenType.GT): 2192 self.raise_error("Expecting >") 2193 else: 2194 value = self._parse_schema(exp.var("TABLE")) 2195 else: 2196 value = self._parse_types() 2197 2198 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2199 2200 def _parse_describe(self) -> exp.Describe: 2201 kind = self._match_set(self.CREATABLES) and self._prev.text 2202 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2203 if not self._match_set(self.ID_VAR_TOKENS, advance=False): 2204 style = None 2205 self._retreat(self._index - 1) 2206 this = self._parse_table(schema=True) 2207 properties = self._parse_properties() 2208 expressions = properties.expressions if properties else None 2209 return self.expression( 2210 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2211 ) 2212 2213 def _parse_insert(self) -> exp.Insert: 2214 comments = ensure_list(self._prev_comments) 2215 hint = self._parse_hint() 2216 overwrite = self._match(TokenType.OVERWRITE) 2217 ignore = self._match(TokenType.IGNORE) 2218 local = self._match_text_seq("LOCAL") 2219 alternative = None 2220 is_function = None 2221 2222 if self._match_text_seq("DIRECTORY"): 2223 this: t.Optional[exp.Expression] = self.expression( 2224 exp.Directory, 2225 this=self._parse_var_or_string(), 2226 local=local, 2227 row_format=self._parse_row_format(match_row=True), 2228 ) 2229 else: 2230 if self._match(TokenType.OR): 2231 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2232 2233 self._match(TokenType.INTO) 2234 comments += ensure_list(self._prev_comments) 2235 self._match(TokenType.TABLE) 2236 is_function = self._match(TokenType.FUNCTION) 2237 2238 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2239 2240 returning = self._parse_returning() 2241 2242 return self.expression( 2243 exp.Insert, 2244 comments=comments, 2245 hint=hint, 2246 is_function=is_function, 2247 this=this, 2248 by_name=self._match_text_seq("BY", "NAME"), 2249 exists=self._parse_exists(), 2250 partition=self._parse_partition(), 2251 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2252 and self._parse_conjunction(), 2253 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2254 conflict=self._parse_on_conflict(), 2255 returning=returning or self._parse_returning(), 2256 overwrite=overwrite, 2257 alternative=alternative, 2258 ignore=ignore, 2259 ) 2260 2261 def _parse_kill(self) -> exp.Kill: 2262 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2263 2264 return self.expression( 2265 exp.Kill, 2266 this=self._parse_primary(), 2267 kind=kind, 2268 ) 2269 2270 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2271 conflict = self._match_text_seq("ON", "CONFLICT") 2272 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2273 2274 if not conflict and not duplicate: 2275 return None 2276 2277 conflict_keys = None 2278 constraint = None 2279 2280 if conflict: 2281 if self._match_text_seq("ON", "CONSTRAINT"): 2282 constraint = self._parse_id_var() 2283 elif self._match(TokenType.L_PAREN): 2284 conflict_keys = self._parse_csv(self._parse_id_var) 2285 self._match_r_paren() 2286 2287 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2288 if self._prev.token_type == TokenType.UPDATE: 2289 self._match(TokenType.SET) 2290 expressions = self._parse_csv(self._parse_equality) 2291 else: 2292 expressions = None 2293 2294 return self.expression( 2295 exp.OnConflict, 2296 duplicate=duplicate, 2297 expressions=expressions, 2298 action=action, 2299 conflict_keys=conflict_keys, 2300 constraint=constraint, 2301 ) 2302 2303 def _parse_returning(self) -> t.Optional[exp.Returning]: 2304 if not self._match(TokenType.RETURNING): 2305 return None 2306 return self.expression( 2307 exp.Returning, 2308 expressions=self._parse_csv(self._parse_expression), 2309 into=self._match(TokenType.INTO) and self._parse_table_part(), 2310 ) 2311 2312 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2313 if not self._match(TokenType.FORMAT): 2314 return None 2315 return self._parse_row_format() 2316 2317 def _parse_row_format( 2318 self, match_row: bool = False 2319 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2320 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2321 return None 2322 2323 if self._match_text_seq("SERDE"): 2324 this = self._parse_string() 2325 2326 serde_properties = None 2327 if self._match(TokenType.SERDE_PROPERTIES): 2328 serde_properties = self.expression( 2329 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2330 ) 2331 2332 return self.expression( 2333 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2334 ) 2335 2336 self._match_text_seq("DELIMITED") 2337 2338 kwargs = {} 2339 2340 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2341 kwargs["fields"] = self._parse_string() 2342 if self._match_text_seq("ESCAPED", "BY"): 2343 kwargs["escaped"] = self._parse_string() 2344 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2345 kwargs["collection_items"] = self._parse_string() 2346 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2347 kwargs["map_keys"] = self._parse_string() 2348 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2349 kwargs["lines"] = self._parse_string() 2350 if self._match_text_seq("NULL", "DEFINED", "AS"): 2351 kwargs["null"] = self._parse_string() 2352 2353 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2354 2355 def _parse_load(self) -> exp.LoadData | exp.Command: 2356 if self._match_text_seq("DATA"): 2357 local = self._match_text_seq("LOCAL") 2358 self._match_text_seq("INPATH") 2359 inpath = self._parse_string() 2360 overwrite = self._match(TokenType.OVERWRITE) 2361 self._match_pair(TokenType.INTO, TokenType.TABLE) 2362 2363 return self.expression( 2364 exp.LoadData, 2365 this=self._parse_table(schema=True), 2366 local=local, 2367 overwrite=overwrite, 2368 inpath=inpath, 2369 partition=self._parse_partition(), 2370 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2371 serde=self._match_text_seq("SERDE") and self._parse_string(), 2372 ) 2373 return self._parse_as_command(self._prev) 2374 2375 def _parse_delete(self) -> exp.Delete: 2376 # This handles MySQL's "Multiple-Table Syntax" 2377 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2378 tables = None 2379 comments = self._prev_comments 2380 if not self._match(TokenType.FROM, advance=False): 2381 tables = self._parse_csv(self._parse_table) or None 2382 2383 returning = self._parse_returning() 2384 2385 return self.expression( 2386 exp.Delete, 2387 comments=comments, 2388 tables=tables, 2389 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2390 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2391 where=self._parse_where(), 2392 returning=returning or self._parse_returning(), 2393 limit=self._parse_limit(), 2394 ) 2395 2396 def _parse_update(self) -> exp.Update: 2397 comments = self._prev_comments 2398 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2399 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2400 returning = self._parse_returning() 2401 return self.expression( 2402 exp.Update, 2403 comments=comments, 2404 **{ # type: ignore 2405 "this": this, 2406 "expressions": expressions, 2407 "from": self._parse_from(joins=True), 2408 "where": self._parse_where(), 2409 "returning": returning or self._parse_returning(), 2410 "order": self._parse_order(), 2411 "limit": self._parse_limit(), 2412 }, 2413 ) 2414 2415 def _parse_uncache(self) -> exp.Uncache: 2416 if not self._match(TokenType.TABLE): 2417 self.raise_error("Expecting TABLE after UNCACHE") 2418 2419 return self.expression( 2420 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2421 ) 2422 2423 def _parse_cache(self) -> exp.Cache: 2424 lazy = self._match_text_seq("LAZY") 2425 self._match(TokenType.TABLE) 2426 table = self._parse_table(schema=True) 2427 2428 options = [] 2429 if self._match_text_seq("OPTIONS"): 2430 self._match_l_paren() 2431 k = self._parse_string() 2432 self._match(TokenType.EQ) 2433 v = self._parse_string() 2434 options = [k, v] 2435 self._match_r_paren() 2436 2437 self._match(TokenType.ALIAS) 2438 return self.expression( 2439 exp.Cache, 2440 this=table, 2441 lazy=lazy, 2442 options=options, 2443 expression=self._parse_select(nested=True), 2444 ) 2445 2446 def _parse_partition(self) -> t.Optional[exp.Partition]: 2447 if not self._match(TokenType.PARTITION): 2448 return None 2449 2450 return self.expression( 2451 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2452 ) 2453 2454 def _parse_value(self) -> exp.Tuple: 2455 if self._match(TokenType.L_PAREN): 2456 expressions = self._parse_csv(self._parse_expression) 2457 self._match_r_paren() 2458 return self.expression(exp.Tuple, expressions=expressions) 2459 2460 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2461 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2462 2463 def _parse_projections(self) -> t.List[exp.Expression]: 2464 return self._parse_expressions() 2465 2466 def _parse_select( 2467 self, 2468 nested: bool = False, 2469 table: bool = False, 2470 parse_subquery_alias: bool = True, 2471 parse_set_operation: bool = True, 2472 ) -> t.Optional[exp.Expression]: 2473 cte = self._parse_with() 2474 2475 if cte: 2476 this = self._parse_statement() 2477 2478 if not this: 2479 self.raise_error("Failed to parse any statement following CTE") 2480 return cte 2481 2482 if "with" in this.arg_types: 2483 this.set("with", cte) 2484 else: 2485 self.raise_error(f"{this.key} does not support CTE") 2486 this = cte 2487 2488 return this 2489 2490 # duckdb supports leading with FROM x 2491 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2492 2493 if self._match(TokenType.SELECT): 2494 comments = self._prev_comments 2495 2496 hint = self._parse_hint() 2497 all_ = self._match(TokenType.ALL) 2498 distinct = self._match_set(self.DISTINCT_TOKENS) 2499 2500 kind = ( 2501 self._match(TokenType.ALIAS) 2502 and self._match_texts(("STRUCT", "VALUE")) 2503 and self._prev.text.upper() 2504 ) 2505 2506 if distinct: 2507 distinct = self.expression( 2508 exp.Distinct, 2509 on=self._parse_value() if self._match(TokenType.ON) else None, 2510 ) 2511 2512 if all_ and distinct: 2513 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2514 2515 limit = self._parse_limit(top=True) 2516 projections = self._parse_projections() 2517 2518 this = self.expression( 2519 exp.Select, 2520 kind=kind, 2521 hint=hint, 2522 distinct=distinct, 2523 expressions=projections, 2524 limit=limit, 2525 ) 2526 this.comments = comments 2527 2528 into = self._parse_into() 2529 if into: 2530 this.set("into", into) 2531 2532 if not from_: 2533 from_ = self._parse_from() 2534 2535 if from_: 2536 this.set("from", from_) 2537 2538 this = self._parse_query_modifiers(this) 2539 elif (table or nested) and self._match(TokenType.L_PAREN): 2540 if self._match(TokenType.PIVOT): 2541 this = self._parse_simplified_pivot() 2542 elif self._match(TokenType.FROM): 2543 this = exp.select("*").from_( 2544 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2545 ) 2546 else: 2547 this = ( 2548 self._parse_table() 2549 if table 2550 else self._parse_select(nested=True, parse_set_operation=False) 2551 ) 2552 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2553 2554 self._match_r_paren() 2555 2556 # We return early here so that the UNION isn't attached to the subquery by the 2557 # following call to _parse_set_operations, but instead becomes the parent node 2558 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2559 elif self._match(TokenType.VALUES, advance=False): 2560 this = self._parse_derived_table_values() 2561 elif from_: 2562 this = exp.select("*").from_(from_.this, copy=False) 2563 else: 2564 this = None 2565 2566 if parse_set_operation: 2567 return self._parse_set_operations(this) 2568 return this 2569 2570 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2571 if not skip_with_token and not self._match(TokenType.WITH): 2572 return None 2573 2574 comments = self._prev_comments 2575 recursive = self._match(TokenType.RECURSIVE) 2576 2577 expressions = [] 2578 while True: 2579 expressions.append(self._parse_cte()) 2580 2581 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2582 break 2583 else: 2584 self._match(TokenType.WITH) 2585 2586 return self.expression( 2587 exp.With, comments=comments, expressions=expressions, recursive=recursive 2588 ) 2589 2590 def _parse_cte(self) -> exp.CTE: 2591 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2592 if not alias or not alias.this: 2593 self.raise_error("Expected CTE to have alias") 2594 2595 self._match(TokenType.ALIAS) 2596 2597 if self._match_text_seq("NOT", "MATERIALIZED"): 2598 materialized = False 2599 elif self._match_text_seq("MATERIALIZED"): 2600 materialized = True 2601 else: 2602 materialized = None 2603 2604 return self.expression( 2605 exp.CTE, 2606 this=self._parse_wrapped(self._parse_statement), 2607 alias=alias, 2608 materialized=materialized, 2609 ) 2610 2611 def _parse_table_alias( 2612 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2613 ) -> t.Optional[exp.TableAlias]: 2614 any_token = self._match(TokenType.ALIAS) 2615 alias = ( 2616 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2617 or self._parse_string_as_identifier() 2618 ) 2619 2620 index = self._index 2621 if self._match(TokenType.L_PAREN): 2622 columns = self._parse_csv(self._parse_function_parameter) 2623 self._match_r_paren() if columns else self._retreat(index) 2624 else: 2625 columns = None 2626 2627 if not alias and not columns: 2628 return None 2629 2630 return self.expression(exp.TableAlias, this=alias, columns=columns) 2631 2632 def _parse_subquery( 2633 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2634 ) -> t.Optional[exp.Subquery]: 2635 if not this: 2636 return None 2637 2638 return self.expression( 2639 exp.Subquery, 2640 this=this, 2641 pivots=self._parse_pivots(), 2642 alias=self._parse_table_alias() if parse_alias else None, 2643 ) 2644 2645 def _implicit_unnests_to_explicit(self, this: E) -> E: 2646 from sqlglot.optimizer.normalize_identifiers import ( 2647 normalize_identifiers as _norm, 2648 ) 2649 2650 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2651 for i, join in enumerate(this.args.get("joins") or []): 2652 table = join.this 2653 normalized_table = table.copy() 2654 normalized_table.meta["maybe_column"] = True 2655 normalized_table = _norm(normalized_table, dialect=self.dialect) 2656 2657 if isinstance(table, exp.Table) and not join.args.get("on"): 2658 if normalized_table.parts[0].name in refs: 2659 table_as_column = table.to_column() 2660 unnest = exp.Unnest(expressions=[table_as_column]) 2661 2662 # Table.to_column creates a parent Alias node that we want to convert to 2663 # a TableAlias and attach to the Unnest, so it matches the parser's output 2664 if isinstance(table.args.get("alias"), exp.TableAlias): 2665 table_as_column.replace(table_as_column.this) 2666 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2667 2668 table.replace(unnest) 2669 2670 refs.add(normalized_table.alias_or_name) 2671 2672 return this 2673 2674 def _parse_query_modifiers( 2675 self, this: t.Optional[exp.Expression] 2676 ) -> t.Optional[exp.Expression]: 2677 if isinstance(this, (exp.Query, exp.Table)): 2678 for join in self._parse_joins(): 2679 this.append("joins", join) 2680 for lateral in iter(self._parse_lateral, None): 2681 this.append("laterals", lateral) 2682 2683 while True: 2684 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2685 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2686 key, expression = parser(self) 2687 2688 if expression: 2689 this.set(key, expression) 2690 if key == "limit": 2691 offset = expression.args.pop("offset", None) 2692 2693 if offset: 2694 offset = exp.Offset(expression=offset) 2695 this.set("offset", offset) 2696 2697 limit_by_expressions = expression.expressions 2698 expression.set("expressions", None) 2699 offset.set("expressions", limit_by_expressions) 2700 continue 2701 break 2702 2703 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2704 this = self._implicit_unnests_to_explicit(this) 2705 2706 return this 2707 2708 def _parse_hint(self) -> t.Optional[exp.Hint]: 2709 if self._match(TokenType.HINT): 2710 hints = [] 2711 for hint in iter( 2712 lambda: self._parse_csv( 2713 lambda: self._parse_function() or self._parse_var(upper=True) 2714 ), 2715 [], 2716 ): 2717 hints.extend(hint) 2718 2719 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2720 self.raise_error("Expected */ after HINT") 2721 2722 return self.expression(exp.Hint, expressions=hints) 2723 2724 return None 2725 2726 def _parse_into(self) -> t.Optional[exp.Into]: 2727 if not self._match(TokenType.INTO): 2728 return None 2729 2730 temp = self._match(TokenType.TEMPORARY) 2731 unlogged = self._match_text_seq("UNLOGGED") 2732 self._match(TokenType.TABLE) 2733 2734 return self.expression( 2735 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2736 ) 2737 2738 def _parse_from( 2739 self, joins: bool = False, skip_from_token: bool = False 2740 ) -> t.Optional[exp.From]: 2741 if not skip_from_token and not self._match(TokenType.FROM): 2742 return None 2743 2744 return self.expression( 2745 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2746 ) 2747 2748 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2749 return self.expression( 2750 exp.MatchRecognizeMeasure, 2751 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2752 this=self._parse_expression(), 2753 ) 2754 2755 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2756 if not self._match(TokenType.MATCH_RECOGNIZE): 2757 return None 2758 2759 self._match_l_paren() 2760 2761 partition = self._parse_partition_by() 2762 order = self._parse_order() 2763 2764 measures = ( 2765 self._parse_csv(self._parse_match_recognize_measure) 2766 if self._match_text_seq("MEASURES") 2767 else None 2768 ) 2769 2770 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2771 rows = exp.var("ONE ROW PER MATCH") 2772 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2773 text = "ALL ROWS PER MATCH" 2774 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2775 text += " SHOW EMPTY MATCHES" 2776 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2777 text += " OMIT EMPTY MATCHES" 2778 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2779 text += " WITH UNMATCHED ROWS" 2780 rows = exp.var(text) 2781 else: 2782 rows = None 2783 2784 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2785 text = "AFTER MATCH SKIP" 2786 if self._match_text_seq("PAST", "LAST", "ROW"): 2787 text += " PAST LAST ROW" 2788 elif self._match_text_seq("TO", "NEXT", "ROW"): 2789 text += " TO NEXT ROW" 2790 elif self._match_text_seq("TO", "FIRST"): 2791 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2792 elif self._match_text_seq("TO", "LAST"): 2793 text += f" TO LAST {self._advance_any().text}" # type: ignore 2794 after = exp.var(text) 2795 else: 2796 after = None 2797 2798 if self._match_text_seq("PATTERN"): 2799 self._match_l_paren() 2800 2801 if not self._curr: 2802 self.raise_error("Expecting )", self._curr) 2803 2804 paren = 1 2805 start = self._curr 2806 2807 while self._curr and paren > 0: 2808 if self._curr.token_type == TokenType.L_PAREN: 2809 paren += 1 2810 if self._curr.token_type == TokenType.R_PAREN: 2811 paren -= 1 2812 2813 end = self._prev 2814 self._advance() 2815 2816 if paren > 0: 2817 self.raise_error("Expecting )", self._curr) 2818 2819 pattern = exp.var(self._find_sql(start, end)) 2820 else: 2821 pattern = None 2822 2823 define = ( 2824 self._parse_csv(self._parse_name_as_expression) 2825 if self._match_text_seq("DEFINE") 2826 else None 2827 ) 2828 2829 self._match_r_paren() 2830 2831 return self.expression( 2832 exp.MatchRecognize, 2833 partition_by=partition, 2834 order=order, 2835 measures=measures, 2836 rows=rows, 2837 after=after, 2838 pattern=pattern, 2839 define=define, 2840 alias=self._parse_table_alias(), 2841 ) 2842 2843 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2844 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2845 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2846 cross_apply = False 2847 2848 if cross_apply is not None: 2849 this = self._parse_select(table=True) 2850 view = None 2851 outer = None 2852 elif self._match(TokenType.LATERAL): 2853 this = self._parse_select(table=True) 2854 view = self._match(TokenType.VIEW) 2855 outer = self._match(TokenType.OUTER) 2856 else: 2857 return None 2858 2859 if not this: 2860 this = ( 2861 self._parse_unnest() 2862 or self._parse_function() 2863 or self._parse_id_var(any_token=False) 2864 ) 2865 2866 while self._match(TokenType.DOT): 2867 this = exp.Dot( 2868 this=this, 2869 expression=self._parse_function() or self._parse_id_var(any_token=False), 2870 ) 2871 2872 if view: 2873 table = self._parse_id_var(any_token=False) 2874 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2875 table_alias: t.Optional[exp.TableAlias] = self.expression( 2876 exp.TableAlias, this=table, columns=columns 2877 ) 2878 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2879 # We move the alias from the lateral's child node to the lateral itself 2880 table_alias = this.args["alias"].pop() 2881 else: 2882 table_alias = self._parse_table_alias() 2883 2884 return self.expression( 2885 exp.Lateral, 2886 this=this, 2887 view=view, 2888 outer=outer, 2889 alias=table_alias, 2890 cross_apply=cross_apply, 2891 ) 2892 2893 def _parse_join_parts( 2894 self, 2895 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2896 return ( 2897 self._match_set(self.JOIN_METHODS) and self._prev, 2898 self._match_set(self.JOIN_SIDES) and self._prev, 2899 self._match_set(self.JOIN_KINDS) and self._prev, 2900 ) 2901 2902 def _parse_join( 2903 self, skip_join_token: bool = False, parse_bracket: bool = False 2904 ) -> t.Optional[exp.Join]: 2905 if self._match(TokenType.COMMA): 2906 return self.expression(exp.Join, this=self._parse_table()) 2907 2908 index = self._index 2909 method, side, kind = self._parse_join_parts() 2910 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2911 join = self._match(TokenType.JOIN) 2912 2913 if not skip_join_token and not join: 2914 self._retreat(index) 2915 kind = None 2916 method = None 2917 side = None 2918 2919 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2920 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2921 2922 if not skip_join_token and not join and not outer_apply and not cross_apply: 2923 return None 2924 2925 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2926 2927 if method: 2928 kwargs["method"] = method.text 2929 if side: 2930 kwargs["side"] = side.text 2931 if kind: 2932 kwargs["kind"] = kind.text 2933 if hint: 2934 kwargs["hint"] = hint 2935 2936 if self._match(TokenType.MATCH_CONDITION): 2937 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2938 2939 if self._match(TokenType.ON): 2940 kwargs["on"] = self._parse_conjunction() 2941 elif self._match(TokenType.USING): 2942 kwargs["using"] = self._parse_wrapped_id_vars() 2943 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2944 kind and kind.token_type == TokenType.CROSS 2945 ): 2946 index = self._index 2947 joins: t.Optional[list] = list(self._parse_joins()) 2948 2949 if joins and self._match(TokenType.ON): 2950 kwargs["on"] = self._parse_conjunction() 2951 elif joins and self._match(TokenType.USING): 2952 kwargs["using"] = self._parse_wrapped_id_vars() 2953 else: 2954 joins = None 2955 self._retreat(index) 2956 2957 kwargs["this"].set("joins", joins if joins else None) 2958 2959 comments = [c for token in (method, side, kind) if token for c in token.comments] 2960 return self.expression(exp.Join, comments=comments, **kwargs) 2961 2962 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2963 this = self._parse_conjunction() 2964 2965 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2966 return this 2967 2968 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2969 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2970 2971 return this 2972 2973 def _parse_index_params(self) -> exp.IndexParameters: 2974 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2975 2976 if self._match(TokenType.L_PAREN, advance=False): 2977 columns = self._parse_wrapped_csv(self._parse_with_operator) 2978 else: 2979 columns = None 2980 2981 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2982 partition_by = self._parse_partition_by() 2983 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2984 tablespace = ( 2985 self._parse_var(any_token=True) 2986 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2987 else None 2988 ) 2989 where = self._parse_where() 2990 2991 return self.expression( 2992 exp.IndexParameters, 2993 using=using, 2994 columns=columns, 2995 include=include, 2996 partition_by=partition_by, 2997 where=where, 2998 with_storage=with_storage, 2999 tablespace=tablespace, 3000 ) 3001 3002 def _parse_index( 3003 self, 3004 index: t.Optional[exp.Expression] = None, 3005 ) -> t.Optional[exp.Index]: 3006 if index: 3007 unique = None 3008 primary = None 3009 amp = None 3010 3011 self._match(TokenType.ON) 3012 self._match(TokenType.TABLE) # hive 3013 table = self._parse_table_parts(schema=True) 3014 else: 3015 unique = self._match(TokenType.UNIQUE) 3016 primary = self._match_text_seq("PRIMARY") 3017 amp = self._match_text_seq("AMP") 3018 3019 if not self._match(TokenType.INDEX): 3020 return None 3021 3022 index = self._parse_id_var() 3023 table = None 3024 3025 params = self._parse_index_params() 3026 3027 return self.expression( 3028 exp.Index, 3029 this=index, 3030 table=table, 3031 unique=unique, 3032 primary=primary, 3033 amp=amp, 3034 params=params, 3035 ) 3036 3037 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3038 hints: t.List[exp.Expression] = [] 3039 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3040 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3041 hints.append( 3042 self.expression( 3043 exp.WithTableHint, 3044 expressions=self._parse_csv( 3045 lambda: self._parse_function() or self._parse_var(any_token=True) 3046 ), 3047 ) 3048 ) 3049 self._match_r_paren() 3050 else: 3051 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3052 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3053 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3054 3055 self._match_texts(("INDEX", "KEY")) 3056 if self._match(TokenType.FOR): 3057 hint.set("target", self._advance_any() and self._prev.text.upper()) 3058 3059 hint.set("expressions", self._parse_wrapped_id_vars()) 3060 hints.append(hint) 3061 3062 return hints or None 3063 3064 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3065 return ( 3066 (not schema and self._parse_function(optional_parens=False)) 3067 or self._parse_id_var(any_token=False) 3068 or self._parse_string_as_identifier() 3069 or self._parse_placeholder() 3070 ) 3071 3072 def _parse_table_parts( 3073 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3074 ) -> exp.Table: 3075 catalog = None 3076 db = None 3077 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3078 3079 while self._match(TokenType.DOT): 3080 if catalog: 3081 # This allows nesting the table in arbitrarily many dot expressions if needed 3082 table = self.expression( 3083 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3084 ) 3085 else: 3086 catalog = db 3087 db = table 3088 # "" used for tsql FROM a..b case 3089 table = self._parse_table_part(schema=schema) or "" 3090 3091 if ( 3092 wildcard 3093 and self._is_connected() 3094 and (isinstance(table, exp.Identifier) or not table) 3095 and self._match(TokenType.STAR) 3096 ): 3097 if isinstance(table, exp.Identifier): 3098 table.args["this"] += "*" 3099 else: 3100 table = exp.Identifier(this="*") 3101 3102 if is_db_reference: 3103 catalog = db 3104 db = table 3105 table = None 3106 3107 if not table and not is_db_reference: 3108 self.raise_error(f"Expected table name but got {self._curr}") 3109 if not db and is_db_reference: 3110 self.raise_error(f"Expected database name but got {self._curr}") 3111 3112 return self.expression( 3113 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3114 ) 3115 3116 def _parse_table( 3117 self, 3118 schema: bool = False, 3119 joins: bool = False, 3120 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3121 parse_bracket: bool = False, 3122 is_db_reference: bool = False, 3123 ) -> t.Optional[exp.Expression]: 3124 lateral = self._parse_lateral() 3125 if lateral: 3126 return lateral 3127 3128 unnest = self._parse_unnest() 3129 if unnest: 3130 return unnest 3131 3132 values = self._parse_derived_table_values() 3133 if values: 3134 return values 3135 3136 subquery = self._parse_select(table=True) 3137 if subquery: 3138 if not subquery.args.get("pivots"): 3139 subquery.set("pivots", self._parse_pivots()) 3140 return subquery 3141 3142 bracket = parse_bracket and self._parse_bracket(None) 3143 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3144 3145 only = self._match(TokenType.ONLY) 3146 3147 this = t.cast( 3148 exp.Expression, 3149 bracket 3150 or self._parse_bracket( 3151 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3152 ), 3153 ) 3154 3155 if only: 3156 this.set("only", only) 3157 3158 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3159 self._match_text_seq("*") 3160 3161 if schema: 3162 return self._parse_schema(this=this) 3163 3164 version = self._parse_version() 3165 3166 if version: 3167 this.set("version", version) 3168 3169 if self.dialect.ALIAS_POST_TABLESAMPLE: 3170 table_sample = self._parse_table_sample() 3171 3172 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3173 if alias: 3174 this.set("alias", alias) 3175 3176 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3177 return self.expression( 3178 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3179 ) 3180 3181 this.set("hints", self._parse_table_hints()) 3182 3183 if not this.args.get("pivots"): 3184 this.set("pivots", self._parse_pivots()) 3185 3186 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3187 table_sample = self._parse_table_sample() 3188 3189 if table_sample: 3190 table_sample.set("this", this) 3191 this = table_sample 3192 3193 if joins: 3194 for join in self._parse_joins(): 3195 this.append("joins", join) 3196 3197 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3198 this.set("ordinality", True) 3199 this.set("alias", self._parse_table_alias()) 3200 3201 return this 3202 3203 def _parse_version(self) -> t.Optional[exp.Version]: 3204 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3205 this = "TIMESTAMP" 3206 elif self._match(TokenType.VERSION_SNAPSHOT): 3207 this = "VERSION" 3208 else: 3209 return None 3210 3211 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3212 kind = self._prev.text.upper() 3213 start = self._parse_bitwise() 3214 self._match_texts(("TO", "AND")) 3215 end = self._parse_bitwise() 3216 expression: t.Optional[exp.Expression] = self.expression( 3217 exp.Tuple, expressions=[start, end] 3218 ) 3219 elif self._match_text_seq("CONTAINED", "IN"): 3220 kind = "CONTAINED IN" 3221 expression = self.expression( 3222 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3223 ) 3224 elif self._match(TokenType.ALL): 3225 kind = "ALL" 3226 expression = None 3227 else: 3228 self._match_text_seq("AS", "OF") 3229 kind = "AS OF" 3230 expression = self._parse_type() 3231 3232 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3233 3234 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3235 if not self._match(TokenType.UNNEST): 3236 return None 3237 3238 expressions = self._parse_wrapped_csv(self._parse_equality) 3239 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3240 3241 alias = self._parse_table_alias() if with_alias else None 3242 3243 if alias: 3244 if self.dialect.UNNEST_COLUMN_ONLY: 3245 if alias.args.get("columns"): 3246 self.raise_error("Unexpected extra column alias in unnest.") 3247 3248 alias.set("columns", [alias.this]) 3249 alias.set("this", None) 3250 3251 columns = alias.args.get("columns") or [] 3252 if offset and len(expressions) < len(columns): 3253 offset = columns.pop() 3254 3255 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3256 self._match(TokenType.ALIAS) 3257 offset = self._parse_id_var( 3258 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3259 ) or exp.to_identifier("offset") 3260 3261 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3262 3263 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3264 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3265 if not is_derived and not self._match_text_seq("VALUES"): 3266 return None 3267 3268 expressions = self._parse_csv(self._parse_value) 3269 alias = self._parse_table_alias() 3270 3271 if is_derived: 3272 self._match_r_paren() 3273 3274 return self.expression( 3275 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3276 ) 3277 3278 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3279 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3280 as_modifier and self._match_text_seq("USING", "SAMPLE") 3281 ): 3282 return None 3283 3284 bucket_numerator = None 3285 bucket_denominator = None 3286 bucket_field = None 3287 percent = None 3288 size = None 3289 seed = None 3290 3291 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3292 matched_l_paren = self._match(TokenType.L_PAREN) 3293 3294 if self.TABLESAMPLE_CSV: 3295 num = None 3296 expressions = self._parse_csv(self._parse_primary) 3297 else: 3298 expressions = None 3299 num = ( 3300 self._parse_factor() 3301 if self._match(TokenType.NUMBER, advance=False) 3302 else self._parse_primary() or self._parse_placeholder() 3303 ) 3304 3305 if self._match_text_seq("BUCKET"): 3306 bucket_numerator = self._parse_number() 3307 self._match_text_seq("OUT", "OF") 3308 bucket_denominator = bucket_denominator = self._parse_number() 3309 self._match(TokenType.ON) 3310 bucket_field = self._parse_field() 3311 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3312 percent = num 3313 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3314 size = num 3315 else: 3316 percent = num 3317 3318 if matched_l_paren: 3319 self._match_r_paren() 3320 3321 if self._match(TokenType.L_PAREN): 3322 method = self._parse_var(upper=True) 3323 seed = self._match(TokenType.COMMA) and self._parse_number() 3324 self._match_r_paren() 3325 elif self._match_texts(("SEED", "REPEATABLE")): 3326 seed = self._parse_wrapped(self._parse_number) 3327 3328 return self.expression( 3329 exp.TableSample, 3330 expressions=expressions, 3331 method=method, 3332 bucket_numerator=bucket_numerator, 3333 bucket_denominator=bucket_denominator, 3334 bucket_field=bucket_field, 3335 percent=percent, 3336 size=size, 3337 seed=seed, 3338 ) 3339 3340 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3341 return list(iter(self._parse_pivot, None)) or None 3342 3343 def _parse_joins(self) -> t.Iterator[exp.Join]: 3344 return iter(self._parse_join, None) 3345 3346 # https://duckdb.org/docs/sql/statements/pivot 3347 def _parse_simplified_pivot(self) -> exp.Pivot: 3348 def _parse_on() -> t.Optional[exp.Expression]: 3349 this = self._parse_bitwise() 3350 return self._parse_in(this) if self._match(TokenType.IN) else this 3351 3352 this = self._parse_table() 3353 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3354 using = self._match(TokenType.USING) and self._parse_csv( 3355 lambda: self._parse_alias(self._parse_function()) 3356 ) 3357 group = self._parse_group() 3358 return self.expression( 3359 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3360 ) 3361 3362 def _parse_pivot_in(self) -> exp.In: 3363 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3364 this = self._parse_conjunction() 3365 3366 self._match(TokenType.ALIAS) 3367 alias = self._parse_field() 3368 if alias: 3369 return self.expression(exp.PivotAlias, this=this, alias=alias) 3370 3371 return this 3372 3373 value = self._parse_column() 3374 3375 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3376 self.raise_error("Expecting IN (") 3377 3378 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3379 3380 self._match_r_paren() 3381 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3382 3383 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3384 index = self._index 3385 include_nulls = None 3386 3387 if self._match(TokenType.PIVOT): 3388 unpivot = False 3389 elif self._match(TokenType.UNPIVOT): 3390 unpivot = True 3391 3392 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3393 if self._match_text_seq("INCLUDE", "NULLS"): 3394 include_nulls = True 3395 elif self._match_text_seq("EXCLUDE", "NULLS"): 3396 include_nulls = False 3397 else: 3398 return None 3399 3400 expressions = [] 3401 3402 if not self._match(TokenType.L_PAREN): 3403 self._retreat(index) 3404 return None 3405 3406 if unpivot: 3407 expressions = self._parse_csv(self._parse_column) 3408 else: 3409 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3410 3411 if not expressions: 3412 self.raise_error("Failed to parse PIVOT's aggregation list") 3413 3414 if not self._match(TokenType.FOR): 3415 self.raise_error("Expecting FOR") 3416 3417 field = self._parse_pivot_in() 3418 3419 self._match_r_paren() 3420 3421 pivot = self.expression( 3422 exp.Pivot, 3423 expressions=expressions, 3424 field=field, 3425 unpivot=unpivot, 3426 include_nulls=include_nulls, 3427 ) 3428 3429 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3430 pivot.set("alias", self._parse_table_alias()) 3431 3432 if not unpivot: 3433 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3434 3435 columns: t.List[exp.Expression] = [] 3436 for fld in pivot.args["field"].expressions: 3437 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3438 for name in names: 3439 if self.PREFIXED_PIVOT_COLUMNS: 3440 name = f"{name}_{field_name}" if name else field_name 3441 else: 3442 name = f"{field_name}_{name}" if name else field_name 3443 3444 columns.append(exp.to_identifier(name)) 3445 3446 pivot.set("columns", columns) 3447 3448 return pivot 3449 3450 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3451 return [agg.alias for agg in aggregations] 3452 3453 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3454 if not skip_where_token and not self._match(TokenType.PREWHERE): 3455 return None 3456 3457 return self.expression( 3458 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3459 ) 3460 3461 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3462 if not skip_where_token and not self._match(TokenType.WHERE): 3463 return None 3464 3465 return self.expression( 3466 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3467 ) 3468 3469 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3470 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3471 return None 3472 3473 elements: t.Dict[str, t.Any] = defaultdict(list) 3474 3475 if self._match(TokenType.ALL): 3476 elements["all"] = True 3477 elif self._match(TokenType.DISTINCT): 3478 elements["all"] = False 3479 3480 while True: 3481 expressions = self._parse_csv(self._parse_conjunction) 3482 if expressions: 3483 elements["expressions"].extend(expressions) 3484 3485 grouping_sets = self._parse_grouping_sets() 3486 if grouping_sets: 3487 elements["grouping_sets"].extend(grouping_sets) 3488 3489 rollup = None 3490 cube = None 3491 totals = None 3492 3493 index = self._index 3494 with_ = self._match(TokenType.WITH) 3495 if self._match(TokenType.ROLLUP): 3496 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3497 elements["rollup"].extend(ensure_list(rollup)) 3498 3499 if self._match(TokenType.CUBE): 3500 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3501 elements["cube"].extend(ensure_list(cube)) 3502 3503 if self._match_text_seq("TOTALS"): 3504 totals = True 3505 elements["totals"] = True # type: ignore 3506 3507 if not (grouping_sets or rollup or cube or totals): 3508 if with_: 3509 self._retreat(index) 3510 break 3511 3512 return self.expression(exp.Group, **elements) # type: ignore 3513 3514 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3515 if not self._match(TokenType.GROUPING_SETS): 3516 return None 3517 3518 return self._parse_wrapped_csv(self._parse_grouping_set) 3519 3520 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3521 if self._match(TokenType.L_PAREN): 3522 grouping_set = self._parse_csv(self._parse_column) 3523 self._match_r_paren() 3524 return self.expression(exp.Tuple, expressions=grouping_set) 3525 3526 return self._parse_column() 3527 3528 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3529 if not skip_having_token and not self._match(TokenType.HAVING): 3530 return None 3531 return self.expression(exp.Having, this=self._parse_conjunction()) 3532 3533 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3534 if not self._match(TokenType.QUALIFY): 3535 return None 3536 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3537 3538 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3539 if skip_start_token: 3540 start = None 3541 elif self._match(TokenType.START_WITH): 3542 start = self._parse_conjunction() 3543 else: 3544 return None 3545 3546 self._match(TokenType.CONNECT_BY) 3547 nocycle = self._match_text_seq("NOCYCLE") 3548 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3549 exp.Prior, this=self._parse_bitwise() 3550 ) 3551 connect = self._parse_conjunction() 3552 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3553 3554 if not start and self._match(TokenType.START_WITH): 3555 start = self._parse_conjunction() 3556 3557 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3558 3559 def _parse_name_as_expression(self) -> exp.Alias: 3560 return self.expression( 3561 exp.Alias, 3562 alias=self._parse_id_var(any_token=True), 3563 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3564 ) 3565 3566 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3567 if self._match_text_seq("INTERPOLATE"): 3568 return self._parse_wrapped_csv(self._parse_name_as_expression) 3569 return None 3570 3571 def _parse_order( 3572 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3573 ) -> t.Optional[exp.Expression]: 3574 siblings = None 3575 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3576 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3577 return this 3578 3579 siblings = True 3580 3581 return self.expression( 3582 exp.Order, 3583 this=this, 3584 expressions=self._parse_csv(self._parse_ordered), 3585 interpolate=self._parse_interpolate(), 3586 siblings=siblings, 3587 ) 3588 3589 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3590 if not self._match(token): 3591 return None 3592 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3593 3594 def _parse_ordered( 3595 self, parse_method: t.Optional[t.Callable] = None 3596 ) -> t.Optional[exp.Ordered]: 3597 this = parse_method() if parse_method else self._parse_conjunction() 3598 if not this: 3599 return None 3600 3601 asc = self._match(TokenType.ASC) 3602 desc = self._match(TokenType.DESC) or (asc and False) 3603 3604 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3605 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3606 3607 nulls_first = is_nulls_first or False 3608 explicitly_null_ordered = is_nulls_first or is_nulls_last 3609 3610 if ( 3611 not explicitly_null_ordered 3612 and ( 3613 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3614 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3615 ) 3616 and self.dialect.NULL_ORDERING != "nulls_are_last" 3617 ): 3618 nulls_first = True 3619 3620 if self._match_text_seq("WITH", "FILL"): 3621 with_fill = self.expression( 3622 exp.WithFill, 3623 **{ # type: ignore 3624 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3625 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3626 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3627 }, 3628 ) 3629 else: 3630 with_fill = None 3631 3632 return self.expression( 3633 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3634 ) 3635 3636 def _parse_limit( 3637 self, 3638 this: t.Optional[exp.Expression] = None, 3639 top: bool = False, 3640 skip_limit_token: bool = False, 3641 ) -> t.Optional[exp.Expression]: 3642 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3643 comments = self._prev_comments 3644 if top: 3645 limit_paren = self._match(TokenType.L_PAREN) 3646 expression = self._parse_term() if limit_paren else self._parse_number() 3647 3648 if limit_paren: 3649 self._match_r_paren() 3650 else: 3651 expression = self._parse_term() 3652 3653 if self._match(TokenType.COMMA): 3654 offset = expression 3655 expression = self._parse_term() 3656 else: 3657 offset = None 3658 3659 limit_exp = self.expression( 3660 exp.Limit, 3661 this=this, 3662 expression=expression, 3663 offset=offset, 3664 comments=comments, 3665 expressions=self._parse_limit_by(), 3666 ) 3667 3668 return limit_exp 3669 3670 if self._match(TokenType.FETCH): 3671 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3672 direction = self._prev.text.upper() if direction else "FIRST" 3673 3674 count = self._parse_field(tokens=self.FETCH_TOKENS) 3675 percent = self._match(TokenType.PERCENT) 3676 3677 self._match_set((TokenType.ROW, TokenType.ROWS)) 3678 3679 only = self._match_text_seq("ONLY") 3680 with_ties = self._match_text_seq("WITH", "TIES") 3681 3682 if only and with_ties: 3683 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3684 3685 return self.expression( 3686 exp.Fetch, 3687 direction=direction, 3688 count=count, 3689 percent=percent, 3690 with_ties=with_ties, 3691 ) 3692 3693 return this 3694 3695 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3696 if not self._match(TokenType.OFFSET): 3697 return this 3698 3699 count = self._parse_term() 3700 self._match_set((TokenType.ROW, TokenType.ROWS)) 3701 3702 return self.expression( 3703 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3704 ) 3705 3706 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3707 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3708 3709 def _parse_locks(self) -> t.List[exp.Lock]: 3710 locks = [] 3711 while True: 3712 if self._match_text_seq("FOR", "UPDATE"): 3713 update = True 3714 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3715 "LOCK", "IN", "SHARE", "MODE" 3716 ): 3717 update = False 3718 else: 3719 break 3720 3721 expressions = None 3722 if self._match_text_seq("OF"): 3723 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3724 3725 wait: t.Optional[bool | exp.Expression] = None 3726 if self._match_text_seq("NOWAIT"): 3727 wait = True 3728 elif self._match_text_seq("WAIT"): 3729 wait = self._parse_primary() 3730 elif self._match_text_seq("SKIP", "LOCKED"): 3731 wait = False 3732 3733 locks.append( 3734 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3735 ) 3736 3737 return locks 3738 3739 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3740 while this and self._match_set(self.SET_OPERATIONS): 3741 token_type = self._prev.token_type 3742 3743 if token_type == TokenType.UNION: 3744 operation = exp.Union 3745 elif token_type == TokenType.EXCEPT: 3746 operation = exp.Except 3747 else: 3748 operation = exp.Intersect 3749 3750 comments = self._prev.comments 3751 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3752 by_name = self._match_text_seq("BY", "NAME") 3753 expression = self._parse_select(nested=True, parse_set_operation=False) 3754 3755 this = self.expression( 3756 operation, 3757 comments=comments, 3758 this=this, 3759 distinct=distinct, 3760 by_name=by_name, 3761 expression=expression, 3762 ) 3763 3764 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3765 expression = this.expression 3766 3767 if expression: 3768 for arg in self.UNION_MODIFIERS: 3769 expr = expression.args.get(arg) 3770 if expr: 3771 this.set(arg, expr.pop()) 3772 3773 return this 3774 3775 def _parse_expression(self) -> t.Optional[exp.Expression]: 3776 return self._parse_alias(self._parse_conjunction()) 3777 3778 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3779 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3780 3781 def _parse_equality(self) -> t.Optional[exp.Expression]: 3782 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3783 3784 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3785 return self._parse_tokens(self._parse_range, self.COMPARISON) 3786 3787 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3788 this = this or self._parse_bitwise() 3789 negate = self._match(TokenType.NOT) 3790 3791 if self._match_set(self.RANGE_PARSERS): 3792 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3793 if not expression: 3794 return this 3795 3796 this = expression 3797 elif self._match(TokenType.ISNULL): 3798 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3799 3800 # Postgres supports ISNULL and NOTNULL for conditions. 3801 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3802 if self._match(TokenType.NOTNULL): 3803 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3804 this = self.expression(exp.Not, this=this) 3805 3806 if negate: 3807 this = self.expression(exp.Not, this=this) 3808 3809 if self._match(TokenType.IS): 3810 this = self._parse_is(this) 3811 3812 return this 3813 3814 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3815 index = self._index - 1 3816 negate = self._match(TokenType.NOT) 3817 3818 if self._match_text_seq("DISTINCT", "FROM"): 3819 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3820 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3821 3822 expression = self._parse_null() or self._parse_boolean() 3823 if not expression: 3824 self._retreat(index) 3825 return None 3826 3827 this = self.expression(exp.Is, this=this, expression=expression) 3828 return self.expression(exp.Not, this=this) if negate else this 3829 3830 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3831 unnest = self._parse_unnest(with_alias=False) 3832 if unnest: 3833 this = self.expression(exp.In, this=this, unnest=unnest) 3834 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3835 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3836 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3837 3838 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3839 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3840 else: 3841 this = self.expression(exp.In, this=this, expressions=expressions) 3842 3843 if matched_l_paren: 3844 self._match_r_paren(this) 3845 elif not self._match(TokenType.R_BRACKET, expression=this): 3846 self.raise_error("Expecting ]") 3847 else: 3848 this = self.expression(exp.In, this=this, field=self._parse_field()) 3849 3850 return this 3851 3852 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3853 low = self._parse_bitwise() 3854 self._match(TokenType.AND) 3855 high = self._parse_bitwise() 3856 return self.expression(exp.Between, this=this, low=low, high=high) 3857 3858 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3859 if not self._match(TokenType.ESCAPE): 3860 return this 3861 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3862 3863 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3864 index = self._index 3865 3866 if not self._match(TokenType.INTERVAL) and match_interval: 3867 return None 3868 3869 if self._match(TokenType.STRING, advance=False): 3870 this = self._parse_primary() 3871 else: 3872 this = self._parse_term() 3873 3874 if not this or ( 3875 isinstance(this, exp.Column) 3876 and not this.table 3877 and not this.this.quoted 3878 and this.name.upper() == "IS" 3879 ): 3880 self._retreat(index) 3881 return None 3882 3883 unit = self._parse_function() or ( 3884 not self._match(TokenType.ALIAS, advance=False) 3885 and self._parse_var(any_token=True, upper=True) 3886 ) 3887 3888 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3889 # each INTERVAL expression into this canonical form so it's easy to transpile 3890 if this and this.is_number: 3891 this = exp.Literal.string(this.name) 3892 elif this and this.is_string: 3893 parts = this.name.split() 3894 3895 if len(parts) == 2: 3896 if unit: 3897 # This is not actually a unit, it's something else (e.g. a "window side") 3898 unit = None 3899 self._retreat(self._index - 1) 3900 3901 this = exp.Literal.string(parts[0]) 3902 unit = self.expression(exp.Var, this=parts[1].upper()) 3903 3904 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3905 unit = self.expression( 3906 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3907 ) 3908 3909 return self.expression(exp.Interval, this=this, unit=unit) 3910 3911 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3912 this = self._parse_term() 3913 3914 while True: 3915 if self._match_set(self.BITWISE): 3916 this = self.expression( 3917 self.BITWISE[self._prev.token_type], 3918 this=this, 3919 expression=self._parse_term(), 3920 ) 3921 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3922 this = self.expression( 3923 exp.DPipe, 3924 this=this, 3925 expression=self._parse_term(), 3926 safe=not self.dialect.STRICT_STRING_CONCAT, 3927 ) 3928 elif self._match(TokenType.DQMARK): 3929 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3930 elif self._match_pair(TokenType.LT, TokenType.LT): 3931 this = self.expression( 3932 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3933 ) 3934 elif self._match_pair(TokenType.GT, TokenType.GT): 3935 this = self.expression( 3936 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3937 ) 3938 else: 3939 break 3940 3941 return this 3942 3943 def _parse_term(self) -> t.Optional[exp.Expression]: 3944 return self._parse_tokens(self._parse_factor, self.TERM) 3945 3946 def _parse_factor(self) -> t.Optional[exp.Expression]: 3947 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3948 this = parse_method() 3949 3950 while self._match_set(self.FACTOR): 3951 this = self.expression( 3952 self.FACTOR[self._prev.token_type], 3953 this=this, 3954 comments=self._prev_comments, 3955 expression=parse_method(), 3956 ) 3957 if isinstance(this, exp.Div): 3958 this.args["typed"] = self.dialect.TYPED_DIVISION 3959 this.args["safe"] = self.dialect.SAFE_DIVISION 3960 3961 return this 3962 3963 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3964 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3965 3966 def _parse_unary(self) -> t.Optional[exp.Expression]: 3967 if self._match_set(self.UNARY_PARSERS): 3968 return self.UNARY_PARSERS[self._prev.token_type](self) 3969 return self._parse_at_time_zone(self._parse_type()) 3970 3971 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3972 interval = parse_interval and self._parse_interval() 3973 if interval: 3974 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3975 while True: 3976 index = self._index 3977 self._match(TokenType.PLUS) 3978 3979 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3980 self._retreat(index) 3981 break 3982 3983 interval = self.expression( # type: ignore 3984 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3985 ) 3986 3987 return interval 3988 3989 index = self._index 3990 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3991 this = self._parse_column() 3992 3993 if data_type: 3994 if isinstance(this, exp.Literal): 3995 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3996 if parser: 3997 return parser(self, this, data_type) 3998 return self.expression(exp.Cast, this=this, to=data_type) 3999 if not data_type.expressions: 4000 self._retreat(index) 4001 return self._parse_column() 4002 return self._parse_column_ops(data_type) 4003 4004 return this and self._parse_column_ops(this) 4005 4006 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4007 this = self._parse_type() 4008 if not this: 4009 return None 4010 4011 if isinstance(this, exp.Column) and not this.table: 4012 this = exp.var(this.name.upper()) 4013 4014 return self.expression( 4015 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4016 ) 4017 4018 def _parse_types( 4019 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4020 ) -> t.Optional[exp.Expression]: 4021 index = self._index 4022 4023 prefix = self._match_text_seq("SYSUDTLIB", ".") 4024 4025 if not self._match_set(self.TYPE_TOKENS): 4026 identifier = allow_identifiers and self._parse_id_var( 4027 any_token=False, tokens=(TokenType.VAR,) 4028 ) 4029 if identifier: 4030 tokens = self.dialect.tokenize(identifier.name) 4031 4032 if len(tokens) != 1: 4033 self.raise_error("Unexpected identifier", self._prev) 4034 4035 if tokens[0].token_type in self.TYPE_TOKENS: 4036 self._prev = tokens[0] 4037 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4038 type_name = identifier.name 4039 4040 while self._match(TokenType.DOT): 4041 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4042 4043 return exp.DataType.build(type_name, udt=True) 4044 else: 4045 self._retreat(self._index - 1) 4046 return None 4047 else: 4048 return None 4049 4050 type_token = self._prev.token_type 4051 4052 if type_token == TokenType.PSEUDO_TYPE: 4053 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4054 4055 if type_token == TokenType.OBJECT_IDENTIFIER: 4056 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4057 4058 nested = type_token in self.NESTED_TYPE_TOKENS 4059 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4060 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4061 expressions = None 4062 maybe_func = False 4063 4064 if self._match(TokenType.L_PAREN): 4065 if is_struct: 4066 expressions = self._parse_csv(self._parse_struct_types) 4067 elif nested: 4068 expressions = self._parse_csv( 4069 lambda: self._parse_types( 4070 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4071 ) 4072 ) 4073 elif type_token in self.ENUM_TYPE_TOKENS: 4074 expressions = self._parse_csv(self._parse_equality) 4075 elif is_aggregate: 4076 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4077 any_token=False, tokens=(TokenType.VAR,) 4078 ) 4079 if not func_or_ident or not self._match(TokenType.COMMA): 4080 return None 4081 expressions = self._parse_csv( 4082 lambda: self._parse_types( 4083 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4084 ) 4085 ) 4086 expressions.insert(0, func_or_ident) 4087 else: 4088 expressions = self._parse_csv(self._parse_type_size) 4089 4090 if not expressions or not self._match(TokenType.R_PAREN): 4091 self._retreat(index) 4092 return None 4093 4094 maybe_func = True 4095 4096 this: t.Optional[exp.Expression] = None 4097 values: t.Optional[t.List[exp.Expression]] = None 4098 4099 if nested and self._match(TokenType.LT): 4100 if is_struct: 4101 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4102 else: 4103 expressions = self._parse_csv( 4104 lambda: self._parse_types( 4105 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4106 ) 4107 ) 4108 4109 if not self._match(TokenType.GT): 4110 self.raise_error("Expecting >") 4111 4112 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4113 values = self._parse_csv(self._parse_conjunction) 4114 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4115 4116 if type_token in self.TIMESTAMPS: 4117 if self._match_text_seq("WITH", "TIME", "ZONE"): 4118 maybe_func = False 4119 tz_type = ( 4120 exp.DataType.Type.TIMETZ 4121 if type_token in self.TIMES 4122 else exp.DataType.Type.TIMESTAMPTZ 4123 ) 4124 this = exp.DataType(this=tz_type, expressions=expressions) 4125 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4126 maybe_func = False 4127 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4128 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4129 maybe_func = False 4130 elif type_token == TokenType.INTERVAL: 4131 unit = self._parse_var(upper=True) 4132 if unit: 4133 if self._match_text_seq("TO"): 4134 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4135 4136 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4137 else: 4138 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4139 4140 if maybe_func and check_func: 4141 index2 = self._index 4142 peek = self._parse_string() 4143 4144 if not peek: 4145 self._retreat(index) 4146 return None 4147 4148 self._retreat(index2) 4149 4150 if not this: 4151 if self._match_text_seq("UNSIGNED"): 4152 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4153 if not unsigned_type_token: 4154 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4155 4156 type_token = unsigned_type_token or type_token 4157 4158 this = exp.DataType( 4159 this=exp.DataType.Type[type_token.value], 4160 expressions=expressions, 4161 nested=nested, 4162 values=values, 4163 prefix=prefix, 4164 ) 4165 4166 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4167 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4168 4169 return this 4170 4171 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4172 index = self._index 4173 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4174 self._match(TokenType.COLON) 4175 column_def = self._parse_column_def(this) 4176 4177 if type_required and ( 4178 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4179 ): 4180 self._retreat(index) 4181 return self._parse_types() 4182 4183 return column_def 4184 4185 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4186 if not self._match_text_seq("AT", "TIME", "ZONE"): 4187 return this 4188 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4189 4190 def _parse_column(self) -> t.Optional[exp.Expression]: 4191 this = self._parse_column_reference() 4192 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4193 4194 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4195 this = self._parse_field() 4196 if ( 4197 not this 4198 and self._match(TokenType.VALUES, advance=False) 4199 and self.VALUES_FOLLOWED_BY_PAREN 4200 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4201 ): 4202 this = self._parse_id_var() 4203 4204 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4205 4206 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4207 this = self._parse_bracket(this) 4208 4209 while self._match_set(self.COLUMN_OPERATORS): 4210 op_token = self._prev.token_type 4211 op = self.COLUMN_OPERATORS.get(op_token) 4212 4213 if op_token == TokenType.DCOLON: 4214 field = self._parse_types() 4215 if not field: 4216 self.raise_error("Expected type") 4217 elif op and self._curr: 4218 field = self._parse_column_reference() 4219 else: 4220 field = self._parse_field(anonymous_func=True, any_token=True) 4221 4222 if isinstance(field, exp.Func) and this: 4223 # bigquery allows function calls like x.y.count(...) 4224 # SAFE.SUBSTR(...) 4225 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4226 this = exp.replace_tree( 4227 this, 4228 lambda n: ( 4229 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4230 if n.table 4231 else n.this 4232 ) 4233 if isinstance(n, exp.Column) 4234 else n, 4235 ) 4236 4237 if op: 4238 this = op(self, this, field) 4239 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4240 this = self.expression( 4241 exp.Column, 4242 this=field, 4243 table=this.this, 4244 db=this.args.get("table"), 4245 catalog=this.args.get("db"), 4246 ) 4247 else: 4248 this = self.expression(exp.Dot, this=this, expression=field) 4249 this = self._parse_bracket(this) 4250 return this 4251 4252 def _parse_primary(self) -> t.Optional[exp.Expression]: 4253 if self._match_set(self.PRIMARY_PARSERS): 4254 token_type = self._prev.token_type 4255 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4256 4257 if token_type == TokenType.STRING: 4258 expressions = [primary] 4259 while self._match(TokenType.STRING): 4260 expressions.append(exp.Literal.string(self._prev.text)) 4261 4262 if len(expressions) > 1: 4263 return self.expression(exp.Concat, expressions=expressions) 4264 4265 return primary 4266 4267 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4268 return exp.Literal.number(f"0.{self._prev.text}") 4269 4270 if self._match(TokenType.L_PAREN): 4271 comments = self._prev_comments 4272 query = self._parse_select() 4273 4274 if query: 4275 expressions = [query] 4276 else: 4277 expressions = self._parse_expressions() 4278 4279 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4280 4281 if isinstance(this, exp.UNWRAPPED_QUERIES): 4282 this = self._parse_set_operations( 4283 self._parse_subquery(this=this, parse_alias=False) 4284 ) 4285 elif isinstance(this, exp.Subquery): 4286 this = self._parse_subquery( 4287 this=self._parse_set_operations(this), parse_alias=False 4288 ) 4289 elif len(expressions) > 1: 4290 this = self.expression(exp.Tuple, expressions=expressions) 4291 else: 4292 this = self.expression(exp.Paren, this=this) 4293 4294 if this: 4295 this.add_comments(comments) 4296 4297 self._match_r_paren(expression=this) 4298 return this 4299 4300 return None 4301 4302 def _parse_field( 4303 self, 4304 any_token: bool = False, 4305 tokens: t.Optional[t.Collection[TokenType]] = None, 4306 anonymous_func: bool = False, 4307 ) -> t.Optional[exp.Expression]: 4308 return ( 4309 self._parse_primary() 4310 or self._parse_function(anonymous=anonymous_func) 4311 or self._parse_id_var(any_token=any_token, tokens=tokens) 4312 ) 4313 4314 def _parse_function( 4315 self, 4316 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4317 anonymous: bool = False, 4318 optional_parens: bool = True, 4319 ) -> t.Optional[exp.Expression]: 4320 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4321 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4322 fn_syntax = False 4323 if ( 4324 self._match(TokenType.L_BRACE, advance=False) 4325 and self._next 4326 and self._next.text.upper() == "FN" 4327 ): 4328 self._advance(2) 4329 fn_syntax = True 4330 4331 func = self._parse_function_call( 4332 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4333 ) 4334 4335 if fn_syntax: 4336 self._match(TokenType.R_BRACE) 4337 4338 return func 4339 4340 def _parse_function_call( 4341 self, 4342 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4343 anonymous: bool = False, 4344 optional_parens: bool = True, 4345 ) -> t.Optional[exp.Expression]: 4346 if not self._curr: 4347 return None 4348 4349 comments = self._curr.comments 4350 token_type = self._curr.token_type 4351 this = self._curr.text 4352 upper = this.upper() 4353 4354 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4355 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4356 self._advance() 4357 return self._parse_window(parser(self)) 4358 4359 if not self._next or self._next.token_type != TokenType.L_PAREN: 4360 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4361 self._advance() 4362 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4363 4364 return None 4365 4366 if token_type not in self.FUNC_TOKENS: 4367 return None 4368 4369 self._advance(2) 4370 4371 parser = self.FUNCTION_PARSERS.get(upper) 4372 if parser and not anonymous: 4373 this = parser(self) 4374 else: 4375 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4376 4377 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4378 this = self.expression(subquery_predicate, this=self._parse_select()) 4379 self._match_r_paren() 4380 return this 4381 4382 if functions is None: 4383 functions = self.FUNCTIONS 4384 4385 function = functions.get(upper) 4386 4387 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4388 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4389 4390 if alias: 4391 args = self._kv_to_prop_eq(args) 4392 4393 if function and not anonymous: 4394 if "dialect" in function.__code__.co_varnames: 4395 func = function(args, dialect=self.dialect) 4396 else: 4397 func = function(args) 4398 4399 func = self.validate_expression(func, args) 4400 if not self.dialect.NORMALIZE_FUNCTIONS: 4401 func.meta["name"] = this 4402 4403 this = func 4404 else: 4405 if token_type == TokenType.IDENTIFIER: 4406 this = exp.Identifier(this=this, quoted=True) 4407 this = self.expression(exp.Anonymous, this=this, expressions=args) 4408 4409 if isinstance(this, exp.Expression): 4410 this.add_comments(comments) 4411 4412 self._match_r_paren(this) 4413 return self._parse_window(this) 4414 4415 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4416 transformed = [] 4417 4418 for e in expressions: 4419 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4420 if isinstance(e, exp.Alias): 4421 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4422 4423 if not isinstance(e, exp.PropertyEQ): 4424 e = self.expression( 4425 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4426 ) 4427 4428 if isinstance(e.this, exp.Column): 4429 e.this.replace(e.this.this) 4430 4431 transformed.append(e) 4432 4433 return transformed 4434 4435 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4436 return self._parse_column_def(self._parse_id_var()) 4437 4438 def _parse_user_defined_function( 4439 self, kind: t.Optional[TokenType] = None 4440 ) -> t.Optional[exp.Expression]: 4441 this = self._parse_id_var() 4442 4443 while self._match(TokenType.DOT): 4444 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4445 4446 if not self._match(TokenType.L_PAREN): 4447 return this 4448 4449 expressions = self._parse_csv(self._parse_function_parameter) 4450 self._match_r_paren() 4451 return self.expression( 4452 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4453 ) 4454 4455 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4456 literal = self._parse_primary() 4457 if literal: 4458 return self.expression(exp.Introducer, this=token.text, expression=literal) 4459 4460 return self.expression(exp.Identifier, this=token.text) 4461 4462 def _parse_session_parameter(self) -> exp.SessionParameter: 4463 kind = None 4464 this = self._parse_id_var() or self._parse_primary() 4465 4466 if this and self._match(TokenType.DOT): 4467 kind = this.name 4468 this = self._parse_var() or self._parse_primary() 4469 4470 return self.expression(exp.SessionParameter, this=this, kind=kind) 4471 4472 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4473 index = self._index 4474 4475 if self._match(TokenType.L_PAREN): 4476 expressions = t.cast( 4477 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4478 ) 4479 4480 if not self._match(TokenType.R_PAREN): 4481 self._retreat(index) 4482 else: 4483 expressions = [self._parse_id_var()] 4484 4485 if self._match_set(self.LAMBDAS): 4486 return self.LAMBDAS[self._prev.token_type](self, expressions) 4487 4488 self._retreat(index) 4489 4490 this: t.Optional[exp.Expression] 4491 4492 if self._match(TokenType.DISTINCT): 4493 this = self.expression( 4494 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4495 ) 4496 else: 4497 this = self._parse_select_or_expression(alias=alias) 4498 4499 return self._parse_limit( 4500 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4501 ) 4502 4503 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4504 index = self._index 4505 4506 if not self._match(TokenType.L_PAREN): 4507 return this 4508 4509 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4510 # expr can be of both types 4511 if self._match_set(self.SELECT_START_TOKENS): 4512 self._retreat(index) 4513 return this 4514 4515 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4516 4517 self._match_r_paren() 4518 return self.expression(exp.Schema, this=this, expressions=args) 4519 4520 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4521 return self._parse_column_def(self._parse_field(any_token=True)) 4522 4523 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4524 # column defs are not really columns, they're identifiers 4525 if isinstance(this, exp.Column): 4526 this = this.this 4527 4528 kind = self._parse_types(schema=True) 4529 4530 if self._match_text_seq("FOR", "ORDINALITY"): 4531 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4532 4533 constraints: t.List[exp.Expression] = [] 4534 4535 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4536 ("ALIAS", "MATERIALIZED") 4537 ): 4538 persisted = self._prev.text.upper() == "MATERIALIZED" 4539 constraints.append( 4540 self.expression( 4541 exp.ComputedColumnConstraint, 4542 this=self._parse_conjunction(), 4543 persisted=persisted or self._match_text_seq("PERSISTED"), 4544 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4545 ) 4546 ) 4547 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4548 self._match(TokenType.ALIAS) 4549 constraints.append( 4550 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4551 ) 4552 4553 while True: 4554 constraint = self._parse_column_constraint() 4555 if not constraint: 4556 break 4557 constraints.append(constraint) 4558 4559 if not kind and not constraints: 4560 return this 4561 4562 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4563 4564 def _parse_auto_increment( 4565 self, 4566 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4567 start = None 4568 increment = None 4569 4570 if self._match(TokenType.L_PAREN, advance=False): 4571 args = self._parse_wrapped_csv(self._parse_bitwise) 4572 start = seq_get(args, 0) 4573 increment = seq_get(args, 1) 4574 elif self._match_text_seq("START"): 4575 start = self._parse_bitwise() 4576 self._match_text_seq("INCREMENT") 4577 increment = self._parse_bitwise() 4578 4579 if start and increment: 4580 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4581 4582 return exp.AutoIncrementColumnConstraint() 4583 4584 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4585 if not self._match_text_seq("REFRESH"): 4586 self._retreat(self._index - 1) 4587 return None 4588 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4589 4590 def _parse_compress(self) -> exp.CompressColumnConstraint: 4591 if self._match(TokenType.L_PAREN, advance=False): 4592 return self.expression( 4593 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4594 ) 4595 4596 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4597 4598 def _parse_generated_as_identity( 4599 self, 4600 ) -> ( 4601 exp.GeneratedAsIdentityColumnConstraint 4602 | exp.ComputedColumnConstraint 4603 | exp.GeneratedAsRowColumnConstraint 4604 ): 4605 if self._match_text_seq("BY", "DEFAULT"): 4606 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4607 this = self.expression( 4608 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4609 ) 4610 else: 4611 self._match_text_seq("ALWAYS") 4612 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4613 4614 self._match(TokenType.ALIAS) 4615 4616 if self._match_text_seq("ROW"): 4617 start = self._match_text_seq("START") 4618 if not start: 4619 self._match(TokenType.END) 4620 hidden = self._match_text_seq("HIDDEN") 4621 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4622 4623 identity = self._match_text_seq("IDENTITY") 4624 4625 if self._match(TokenType.L_PAREN): 4626 if self._match(TokenType.START_WITH): 4627 this.set("start", self._parse_bitwise()) 4628 if self._match_text_seq("INCREMENT", "BY"): 4629 this.set("increment", self._parse_bitwise()) 4630 if self._match_text_seq("MINVALUE"): 4631 this.set("minvalue", self._parse_bitwise()) 4632 if self._match_text_seq("MAXVALUE"): 4633 this.set("maxvalue", self._parse_bitwise()) 4634 4635 if self._match_text_seq("CYCLE"): 4636 this.set("cycle", True) 4637 elif self._match_text_seq("NO", "CYCLE"): 4638 this.set("cycle", False) 4639 4640 if not identity: 4641 this.set("expression", self._parse_bitwise()) 4642 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4643 args = self._parse_csv(self._parse_bitwise) 4644 this.set("start", seq_get(args, 0)) 4645 this.set("increment", seq_get(args, 1)) 4646 4647 self._match_r_paren() 4648 4649 return this 4650 4651 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4652 self._match_text_seq("LENGTH") 4653 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4654 4655 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4656 if self._match_text_seq("NULL"): 4657 return self.expression(exp.NotNullColumnConstraint) 4658 if self._match_text_seq("CASESPECIFIC"): 4659 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4660 if self._match_text_seq("FOR", "REPLICATION"): 4661 return self.expression(exp.NotForReplicationColumnConstraint) 4662 return None 4663 4664 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4665 if self._match(TokenType.CONSTRAINT): 4666 this = self._parse_id_var() 4667 else: 4668 this = None 4669 4670 if self._match_texts(self.CONSTRAINT_PARSERS): 4671 return self.expression( 4672 exp.ColumnConstraint, 4673 this=this, 4674 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4675 ) 4676 4677 return this 4678 4679 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4680 if not self._match(TokenType.CONSTRAINT): 4681 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4682 4683 return self.expression( 4684 exp.Constraint, 4685 this=self._parse_id_var(), 4686 expressions=self._parse_unnamed_constraints(), 4687 ) 4688 4689 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4690 constraints = [] 4691 while True: 4692 constraint = self._parse_unnamed_constraint() or self._parse_function() 4693 if not constraint: 4694 break 4695 constraints.append(constraint) 4696 4697 return constraints 4698 4699 def _parse_unnamed_constraint( 4700 self, constraints: t.Optional[t.Collection[str]] = None 4701 ) -> t.Optional[exp.Expression]: 4702 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4703 constraints or self.CONSTRAINT_PARSERS 4704 ): 4705 return None 4706 4707 constraint = self._prev.text.upper() 4708 if constraint not in self.CONSTRAINT_PARSERS: 4709 self.raise_error(f"No parser found for schema constraint {constraint}.") 4710 4711 return self.CONSTRAINT_PARSERS[constraint](self) 4712 4713 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4714 self._match_text_seq("KEY") 4715 return self.expression( 4716 exp.UniqueColumnConstraint, 4717 this=self._parse_schema(self._parse_id_var(any_token=False)), 4718 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4719 on_conflict=self._parse_on_conflict(), 4720 ) 4721 4722 def _parse_key_constraint_options(self) -> t.List[str]: 4723 options = [] 4724 while True: 4725 if not self._curr: 4726 break 4727 4728 if self._match(TokenType.ON): 4729 action = None 4730 on = self._advance_any() and self._prev.text 4731 4732 if self._match_text_seq("NO", "ACTION"): 4733 action = "NO ACTION" 4734 elif self._match_text_seq("CASCADE"): 4735 action = "CASCADE" 4736 elif self._match_text_seq("RESTRICT"): 4737 action = "RESTRICT" 4738 elif self._match_pair(TokenType.SET, TokenType.NULL): 4739 action = "SET NULL" 4740 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4741 action = "SET DEFAULT" 4742 else: 4743 self.raise_error("Invalid key constraint") 4744 4745 options.append(f"ON {on} {action}") 4746 elif self._match_text_seq("NOT", "ENFORCED"): 4747 options.append("NOT ENFORCED") 4748 elif self._match_text_seq("DEFERRABLE"): 4749 options.append("DEFERRABLE") 4750 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4751 options.append("INITIALLY DEFERRED") 4752 elif self._match_text_seq("NORELY"): 4753 options.append("NORELY") 4754 elif self._match_text_seq("MATCH", "FULL"): 4755 options.append("MATCH FULL") 4756 else: 4757 break 4758 4759 return options 4760 4761 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4762 if match and not self._match(TokenType.REFERENCES): 4763 return None 4764 4765 expressions = None 4766 this = self._parse_table(schema=True) 4767 options = self._parse_key_constraint_options() 4768 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4769 4770 def _parse_foreign_key(self) -> exp.ForeignKey: 4771 expressions = self._parse_wrapped_id_vars() 4772 reference = self._parse_references() 4773 options = {} 4774 4775 while self._match(TokenType.ON): 4776 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4777 self.raise_error("Expected DELETE or UPDATE") 4778 4779 kind = self._prev.text.lower() 4780 4781 if self._match_text_seq("NO", "ACTION"): 4782 action = "NO ACTION" 4783 elif self._match(TokenType.SET): 4784 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4785 action = "SET " + self._prev.text.upper() 4786 else: 4787 self._advance() 4788 action = self._prev.text.upper() 4789 4790 options[kind] = action 4791 4792 return self.expression( 4793 exp.ForeignKey, 4794 expressions=expressions, 4795 reference=reference, 4796 **options, # type: ignore 4797 ) 4798 4799 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4800 return self._parse_field() 4801 4802 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4803 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4804 self._retreat(self._index - 1) 4805 return None 4806 4807 id_vars = self._parse_wrapped_id_vars() 4808 return self.expression( 4809 exp.PeriodForSystemTimeConstraint, 4810 this=seq_get(id_vars, 0), 4811 expression=seq_get(id_vars, 1), 4812 ) 4813 4814 def _parse_primary_key( 4815 self, wrapped_optional: bool = False, in_props: bool = False 4816 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4817 desc = ( 4818 self._match_set((TokenType.ASC, TokenType.DESC)) 4819 and self._prev.token_type == TokenType.DESC 4820 ) 4821 4822 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4823 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4824 4825 expressions = self._parse_wrapped_csv( 4826 self._parse_primary_key_part, optional=wrapped_optional 4827 ) 4828 options = self._parse_key_constraint_options() 4829 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4830 4831 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4832 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4833 4834 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4835 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4836 return this 4837 4838 bracket_kind = self._prev.token_type 4839 expressions = self._parse_csv( 4840 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4841 ) 4842 4843 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4844 self.raise_error("Expected ]") 4845 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4846 self.raise_error("Expected }") 4847 4848 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4849 if bracket_kind == TokenType.L_BRACE: 4850 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4851 elif not this or this.name.upper() == "ARRAY": 4852 this = self.expression(exp.Array, expressions=expressions) 4853 else: 4854 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4855 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4856 4857 self._add_comments(this) 4858 return self._parse_bracket(this) 4859 4860 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4861 if self._match(TokenType.COLON): 4862 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4863 return this 4864 4865 def _parse_case(self) -> t.Optional[exp.Expression]: 4866 ifs = [] 4867 default = None 4868 4869 comments = self._prev_comments 4870 expression = self._parse_conjunction() 4871 4872 while self._match(TokenType.WHEN): 4873 this = self._parse_conjunction() 4874 self._match(TokenType.THEN) 4875 then = self._parse_conjunction() 4876 ifs.append(self.expression(exp.If, this=this, true=then)) 4877 4878 if self._match(TokenType.ELSE): 4879 default = self._parse_conjunction() 4880 4881 if not self._match(TokenType.END): 4882 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4883 default = exp.column("interval") 4884 else: 4885 self.raise_error("Expected END after CASE", self._prev) 4886 4887 return self.expression( 4888 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4889 ) 4890 4891 def _parse_if(self) -> t.Optional[exp.Expression]: 4892 if self._match(TokenType.L_PAREN): 4893 args = self._parse_csv(self._parse_conjunction) 4894 this = self.validate_expression(exp.If.from_arg_list(args), args) 4895 self._match_r_paren() 4896 else: 4897 index = self._index - 1 4898 4899 if self.NO_PAREN_IF_COMMANDS and index == 0: 4900 return self._parse_as_command(self._prev) 4901 4902 condition = self._parse_conjunction() 4903 4904 if not condition: 4905 self._retreat(index) 4906 return None 4907 4908 self._match(TokenType.THEN) 4909 true = self._parse_conjunction() 4910 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4911 self._match(TokenType.END) 4912 this = self.expression(exp.If, this=condition, true=true, false=false) 4913 4914 return this 4915 4916 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4917 if not self._match_text_seq("VALUE", "FOR"): 4918 self._retreat(self._index - 1) 4919 return None 4920 4921 return self.expression( 4922 exp.NextValueFor, 4923 this=self._parse_column(), 4924 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4925 ) 4926 4927 def _parse_extract(self) -> exp.Extract: 4928 this = self._parse_function() or self._parse_var() or self._parse_type() 4929 4930 if self._match(TokenType.FROM): 4931 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4932 4933 if not self._match(TokenType.COMMA): 4934 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4935 4936 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4937 4938 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4939 this = self._parse_conjunction() 4940 4941 if not self._match(TokenType.ALIAS): 4942 if self._match(TokenType.COMMA): 4943 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4944 4945 self.raise_error("Expected AS after CAST") 4946 4947 fmt = None 4948 to = self._parse_types() 4949 4950 if self._match(TokenType.FORMAT): 4951 fmt_string = self._parse_string() 4952 fmt = self._parse_at_time_zone(fmt_string) 4953 4954 if not to: 4955 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4956 if to.this in exp.DataType.TEMPORAL_TYPES: 4957 this = self.expression( 4958 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4959 this=this, 4960 format=exp.Literal.string( 4961 format_time( 4962 fmt_string.this if fmt_string else "", 4963 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4964 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4965 ) 4966 ), 4967 ) 4968 4969 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4970 this.set("zone", fmt.args["zone"]) 4971 return this 4972 elif not to: 4973 self.raise_error("Expected TYPE after CAST") 4974 elif isinstance(to, exp.Identifier): 4975 to = exp.DataType.build(to.name, udt=True) 4976 elif to.this == exp.DataType.Type.CHAR: 4977 if self._match(TokenType.CHARACTER_SET): 4978 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4979 4980 return self.expression( 4981 exp.Cast if strict else exp.TryCast, 4982 this=this, 4983 to=to, 4984 format=fmt, 4985 safe=safe, 4986 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4987 ) 4988 4989 def _parse_string_agg(self) -> exp.Expression: 4990 if self._match(TokenType.DISTINCT): 4991 args: t.List[t.Optional[exp.Expression]] = [ 4992 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4993 ] 4994 if self._match(TokenType.COMMA): 4995 args.extend(self._parse_csv(self._parse_conjunction)) 4996 else: 4997 args = self._parse_csv(self._parse_conjunction) # type: ignore 4998 4999 index = self._index 5000 if not self._match(TokenType.R_PAREN) and args: 5001 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5002 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5003 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5004 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5005 5006 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5007 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5008 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5009 if not self._match_text_seq("WITHIN", "GROUP"): 5010 self._retreat(index) 5011 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5012 5013 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5014 order = self._parse_order(this=seq_get(args, 0)) 5015 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5016 5017 def _parse_convert( 5018 self, strict: bool, safe: t.Optional[bool] = None 5019 ) -> t.Optional[exp.Expression]: 5020 this = self._parse_bitwise() 5021 5022 if self._match(TokenType.USING): 5023 to: t.Optional[exp.Expression] = self.expression( 5024 exp.CharacterSet, this=self._parse_var() 5025 ) 5026 elif self._match(TokenType.COMMA): 5027 to = self._parse_types() 5028 else: 5029 to = None 5030 5031 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5032 5033 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5034 """ 5035 There are generally two variants of the DECODE function: 5036 5037 - DECODE(bin, charset) 5038 - DECODE(expression, search, result [, search, result] ... [, default]) 5039 5040 The second variant will always be parsed into a CASE expression. Note that NULL 5041 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5042 instead of relying on pattern matching. 5043 """ 5044 args = self._parse_csv(self._parse_conjunction) 5045 5046 if len(args) < 3: 5047 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5048 5049 expression, *expressions = args 5050 if not expression: 5051 return None 5052 5053 ifs = [] 5054 for search, result in zip(expressions[::2], expressions[1::2]): 5055 if not search or not result: 5056 return None 5057 5058 if isinstance(search, exp.Literal): 5059 ifs.append( 5060 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5061 ) 5062 elif isinstance(search, exp.Null): 5063 ifs.append( 5064 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5065 ) 5066 else: 5067 cond = exp.or_( 5068 exp.EQ(this=expression.copy(), expression=search), 5069 exp.and_( 5070 exp.Is(this=expression.copy(), expression=exp.Null()), 5071 exp.Is(this=search.copy(), expression=exp.Null()), 5072 copy=False, 5073 ), 5074 copy=False, 5075 ) 5076 ifs.append(exp.If(this=cond, true=result)) 5077 5078 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5079 5080 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5081 self._match_text_seq("KEY") 5082 key = self._parse_column() 5083 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5084 self._match_text_seq("VALUE") 5085 value = self._parse_bitwise() 5086 5087 if not key and not value: 5088 return None 5089 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5090 5091 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5092 if not this or not self._match_text_seq("FORMAT", "JSON"): 5093 return this 5094 5095 return self.expression(exp.FormatJson, this=this) 5096 5097 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5098 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5099 for value in values: 5100 if self._match_text_seq(value, "ON", on): 5101 return f"{value} ON {on}" 5102 5103 return None 5104 5105 @t.overload 5106 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5107 5108 @t.overload 5109 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5110 5111 def _parse_json_object(self, agg=False): 5112 star = self._parse_star() 5113 expressions = ( 5114 [star] 5115 if star 5116 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5117 ) 5118 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5119 5120 unique_keys = None 5121 if self._match_text_seq("WITH", "UNIQUE"): 5122 unique_keys = True 5123 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5124 unique_keys = False 5125 5126 self._match_text_seq("KEYS") 5127 5128 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5129 self._parse_type() 5130 ) 5131 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5132 5133 return self.expression( 5134 exp.JSONObjectAgg if agg else exp.JSONObject, 5135 expressions=expressions, 5136 null_handling=null_handling, 5137 unique_keys=unique_keys, 5138 return_type=return_type, 5139 encoding=encoding, 5140 ) 5141 5142 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5143 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5144 if not self._match_text_seq("NESTED"): 5145 this = self._parse_id_var() 5146 kind = self._parse_types(allow_identifiers=False) 5147 nested = None 5148 else: 5149 this = None 5150 kind = None 5151 nested = True 5152 5153 path = self._match_text_seq("PATH") and self._parse_string() 5154 nested_schema = nested and self._parse_json_schema() 5155 5156 return self.expression( 5157 exp.JSONColumnDef, 5158 this=this, 5159 kind=kind, 5160 path=path, 5161 nested_schema=nested_schema, 5162 ) 5163 5164 def _parse_json_schema(self) -> exp.JSONSchema: 5165 self._match_text_seq("COLUMNS") 5166 return self.expression( 5167 exp.JSONSchema, 5168 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5169 ) 5170 5171 def _parse_json_table(self) -> exp.JSONTable: 5172 this = self._parse_format_json(self._parse_bitwise()) 5173 path = self._match(TokenType.COMMA) and self._parse_string() 5174 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5175 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5176 schema = self._parse_json_schema() 5177 5178 return exp.JSONTable( 5179 this=this, 5180 schema=schema, 5181 path=path, 5182 error_handling=error_handling, 5183 empty_handling=empty_handling, 5184 ) 5185 5186 def _parse_match_against(self) -> exp.MatchAgainst: 5187 expressions = self._parse_csv(self._parse_column) 5188 5189 self._match_text_seq(")", "AGAINST", "(") 5190 5191 this = self._parse_string() 5192 5193 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5194 modifier = "IN NATURAL LANGUAGE MODE" 5195 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5196 modifier = f"{modifier} WITH QUERY EXPANSION" 5197 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5198 modifier = "IN BOOLEAN MODE" 5199 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5200 modifier = "WITH QUERY EXPANSION" 5201 else: 5202 modifier = None 5203 5204 return self.expression( 5205 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5206 ) 5207 5208 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5209 def _parse_open_json(self) -> exp.OpenJSON: 5210 this = self._parse_bitwise() 5211 path = self._match(TokenType.COMMA) and self._parse_string() 5212 5213 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5214 this = self._parse_field(any_token=True) 5215 kind = self._parse_types() 5216 path = self._parse_string() 5217 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5218 5219 return self.expression( 5220 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5221 ) 5222 5223 expressions = None 5224 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5225 self._match_l_paren() 5226 expressions = self._parse_csv(_parse_open_json_column_def) 5227 5228 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5229 5230 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5231 args = self._parse_csv(self._parse_bitwise) 5232 5233 if self._match(TokenType.IN): 5234 return self.expression( 5235 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5236 ) 5237 5238 if haystack_first: 5239 haystack = seq_get(args, 0) 5240 needle = seq_get(args, 1) 5241 else: 5242 needle = seq_get(args, 0) 5243 haystack = seq_get(args, 1) 5244 5245 return self.expression( 5246 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5247 ) 5248 5249 def _parse_predict(self) -> exp.Predict: 5250 self._match_text_seq("MODEL") 5251 this = self._parse_table() 5252 5253 self._match(TokenType.COMMA) 5254 self._match_text_seq("TABLE") 5255 5256 return self.expression( 5257 exp.Predict, 5258 this=this, 5259 expression=self._parse_table(), 5260 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5261 ) 5262 5263 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5264 args = self._parse_csv(self._parse_table) 5265 return exp.JoinHint(this=func_name.upper(), expressions=args) 5266 5267 def _parse_substring(self) -> exp.Substring: 5268 # Postgres supports the form: substring(string [from int] [for int]) 5269 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5270 5271 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5272 5273 if self._match(TokenType.FROM): 5274 args.append(self._parse_bitwise()) 5275 if self._match(TokenType.FOR): 5276 args.append(self._parse_bitwise()) 5277 5278 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5279 5280 def _parse_trim(self) -> exp.Trim: 5281 # https://www.w3resource.com/sql/character-functions/trim.php 5282 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5283 5284 position = None 5285 collation = None 5286 expression = None 5287 5288 if self._match_texts(self.TRIM_TYPES): 5289 position = self._prev.text.upper() 5290 5291 this = self._parse_bitwise() 5292 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5293 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5294 expression = self._parse_bitwise() 5295 5296 if invert_order: 5297 this, expression = expression, this 5298 5299 if self._match(TokenType.COLLATE): 5300 collation = self._parse_bitwise() 5301 5302 return self.expression( 5303 exp.Trim, this=this, position=position, expression=expression, collation=collation 5304 ) 5305 5306 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5307 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5308 5309 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5310 return self._parse_window(self._parse_id_var(), alias=True) 5311 5312 def _parse_respect_or_ignore_nulls( 5313 self, this: t.Optional[exp.Expression] 5314 ) -> t.Optional[exp.Expression]: 5315 if self._match_text_seq("IGNORE", "NULLS"): 5316 return self.expression(exp.IgnoreNulls, this=this) 5317 if self._match_text_seq("RESPECT", "NULLS"): 5318 return self.expression(exp.RespectNulls, this=this) 5319 return this 5320 5321 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5322 if self._match(TokenType.HAVING): 5323 self._match_texts(("MAX", "MIN")) 5324 max = self._prev.text.upper() != "MIN" 5325 return self.expression( 5326 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5327 ) 5328 5329 return this 5330 5331 def _parse_window( 5332 self, this: t.Optional[exp.Expression], alias: bool = False 5333 ) -> t.Optional[exp.Expression]: 5334 func = this 5335 comments = func.comments if isinstance(func, exp.Expression) else None 5336 5337 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5338 self._match(TokenType.WHERE) 5339 this = self.expression( 5340 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5341 ) 5342 self._match_r_paren() 5343 5344 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5345 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5346 if self._match_text_seq("WITHIN", "GROUP"): 5347 order = self._parse_wrapped(self._parse_order) 5348 this = self.expression(exp.WithinGroup, this=this, expression=order) 5349 5350 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5351 # Some dialects choose to implement and some do not. 5352 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5353 5354 # There is some code above in _parse_lambda that handles 5355 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5356 5357 # The below changes handle 5358 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5359 5360 # Oracle allows both formats 5361 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5362 # and Snowflake chose to do the same for familiarity 5363 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5364 if isinstance(this, exp.AggFunc): 5365 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5366 5367 if ignore_respect and ignore_respect is not this: 5368 ignore_respect.replace(ignore_respect.this) 5369 this = self.expression(ignore_respect.__class__, this=this) 5370 5371 this = self._parse_respect_or_ignore_nulls(this) 5372 5373 # bigquery select from window x AS (partition by ...) 5374 if alias: 5375 over = None 5376 self._match(TokenType.ALIAS) 5377 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5378 return this 5379 else: 5380 over = self._prev.text.upper() 5381 5382 if comments: 5383 func.comments = None # type: ignore 5384 5385 if not self._match(TokenType.L_PAREN): 5386 return self.expression( 5387 exp.Window, 5388 comments=comments, 5389 this=this, 5390 alias=self._parse_id_var(False), 5391 over=over, 5392 ) 5393 5394 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5395 5396 first = self._match(TokenType.FIRST) 5397 if self._match_text_seq("LAST"): 5398 first = False 5399 5400 partition, order = self._parse_partition_and_order() 5401 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5402 5403 if kind: 5404 self._match(TokenType.BETWEEN) 5405 start = self._parse_window_spec() 5406 self._match(TokenType.AND) 5407 end = self._parse_window_spec() 5408 5409 spec = self.expression( 5410 exp.WindowSpec, 5411 kind=kind, 5412 start=start["value"], 5413 start_side=start["side"], 5414 end=end["value"], 5415 end_side=end["side"], 5416 ) 5417 else: 5418 spec = None 5419 5420 self._match_r_paren() 5421 5422 window = self.expression( 5423 exp.Window, 5424 comments=comments, 5425 this=this, 5426 partition_by=partition, 5427 order=order, 5428 spec=spec, 5429 alias=window_alias, 5430 over=over, 5431 first=first, 5432 ) 5433 5434 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5435 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5436 return self._parse_window(window, alias=alias) 5437 5438 return window 5439 5440 def _parse_partition_and_order( 5441 self, 5442 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5443 return self._parse_partition_by(), self._parse_order() 5444 5445 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5446 self._match(TokenType.BETWEEN) 5447 5448 return { 5449 "value": ( 5450 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5451 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5452 or self._parse_bitwise() 5453 ), 5454 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5455 } 5456 5457 def _parse_alias( 5458 self, this: t.Optional[exp.Expression], explicit: bool = False 5459 ) -> t.Optional[exp.Expression]: 5460 any_token = self._match(TokenType.ALIAS) 5461 comments = self._prev_comments 5462 5463 if explicit and not any_token: 5464 return this 5465 5466 if self._match(TokenType.L_PAREN): 5467 aliases = self.expression( 5468 exp.Aliases, 5469 comments=comments, 5470 this=this, 5471 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5472 ) 5473 self._match_r_paren(aliases) 5474 return aliases 5475 5476 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5477 self.STRING_ALIASES and self._parse_string_as_identifier() 5478 ) 5479 5480 if alias: 5481 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5482 column = this.this 5483 5484 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5485 if not this.comments and column and column.comments: 5486 this.comments = column.comments 5487 column.comments = None 5488 5489 return this 5490 5491 def _parse_id_var( 5492 self, 5493 any_token: bool = True, 5494 tokens: t.Optional[t.Collection[TokenType]] = None, 5495 ) -> t.Optional[exp.Expression]: 5496 identifier = self._parse_identifier() 5497 5498 if identifier: 5499 return identifier 5500 5501 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5502 quoted = self._prev.token_type == TokenType.STRING 5503 return exp.Identifier(this=self._prev.text, quoted=quoted) 5504 5505 return None 5506 5507 def _parse_string(self) -> t.Optional[exp.Expression]: 5508 if self._match_set(self.STRING_PARSERS): 5509 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5510 return self._parse_placeholder() 5511 5512 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5513 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5514 5515 def _parse_number(self) -> t.Optional[exp.Expression]: 5516 if self._match_set(self.NUMERIC_PARSERS): 5517 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5518 return self._parse_placeholder() 5519 5520 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5521 if self._match(TokenType.IDENTIFIER): 5522 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5523 return self._parse_placeholder() 5524 5525 def _parse_var( 5526 self, 5527 any_token: bool = False, 5528 tokens: t.Optional[t.Collection[TokenType]] = None, 5529 upper: bool = False, 5530 ) -> t.Optional[exp.Expression]: 5531 if ( 5532 (any_token and self._advance_any()) 5533 or self._match(TokenType.VAR) 5534 or (self._match_set(tokens) if tokens else False) 5535 ): 5536 return self.expression( 5537 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5538 ) 5539 return self._parse_placeholder() 5540 5541 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5542 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5543 self._advance() 5544 return self._prev 5545 return None 5546 5547 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5548 return self._parse_var() or self._parse_string() 5549 5550 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5551 return self._parse_primary() or self._parse_var(any_token=True) 5552 5553 def _parse_null(self) -> t.Optional[exp.Expression]: 5554 if self._match_set(self.NULL_TOKENS): 5555 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5556 return self._parse_placeholder() 5557 5558 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5559 if self._match(TokenType.TRUE): 5560 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5561 if self._match(TokenType.FALSE): 5562 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5563 return self._parse_placeholder() 5564 5565 def _parse_star(self) -> t.Optional[exp.Expression]: 5566 if self._match(TokenType.STAR): 5567 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5568 return self._parse_placeholder() 5569 5570 def _parse_parameter(self) -> exp.Parameter: 5571 self._match(TokenType.L_BRACE) 5572 this = self._parse_identifier() or self._parse_primary_or_var() 5573 expression = self._match(TokenType.COLON) and ( 5574 self._parse_identifier() or self._parse_primary_or_var() 5575 ) 5576 self._match(TokenType.R_BRACE) 5577 return self.expression(exp.Parameter, this=this, expression=expression) 5578 5579 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5580 if self._match_set(self.PLACEHOLDER_PARSERS): 5581 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5582 if placeholder: 5583 return placeholder 5584 self._advance(-1) 5585 return None 5586 5587 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5588 if not self._match(TokenType.EXCEPT): 5589 return None 5590 if self._match(TokenType.L_PAREN, advance=False): 5591 return self._parse_wrapped_csv(self._parse_column) 5592 5593 except_column = self._parse_column() 5594 return [except_column] if except_column else None 5595 5596 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5597 if not self._match(TokenType.REPLACE): 5598 return None 5599 if self._match(TokenType.L_PAREN, advance=False): 5600 return self._parse_wrapped_csv(self._parse_expression) 5601 5602 replace_expression = self._parse_expression() 5603 return [replace_expression] if replace_expression else None 5604 5605 def _parse_csv( 5606 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5607 ) -> t.List[exp.Expression]: 5608 parse_result = parse_method() 5609 items = [parse_result] if parse_result is not None else [] 5610 5611 while self._match(sep): 5612 self._add_comments(parse_result) 5613 parse_result = parse_method() 5614 if parse_result is not None: 5615 items.append(parse_result) 5616 5617 return items 5618 5619 def _parse_tokens( 5620 self, parse_method: t.Callable, expressions: t.Dict 5621 ) -> t.Optional[exp.Expression]: 5622 this = parse_method() 5623 5624 while self._match_set(expressions): 5625 this = self.expression( 5626 expressions[self._prev.token_type], 5627 this=this, 5628 comments=self._prev_comments, 5629 expression=parse_method(), 5630 ) 5631 5632 return this 5633 5634 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5635 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5636 5637 def _parse_wrapped_csv( 5638 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5639 ) -> t.List[exp.Expression]: 5640 return self._parse_wrapped( 5641 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5642 ) 5643 5644 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5645 wrapped = self._match(TokenType.L_PAREN) 5646 if not wrapped and not optional: 5647 self.raise_error("Expecting (") 5648 parse_result = parse_method() 5649 if wrapped: 5650 self._match_r_paren() 5651 return parse_result 5652 5653 def _parse_expressions(self) -> t.List[exp.Expression]: 5654 return self._parse_csv(self._parse_expression) 5655 5656 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5657 return self._parse_select() or self._parse_set_operations( 5658 self._parse_expression() if alias else self._parse_conjunction() 5659 ) 5660 5661 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5662 return self._parse_query_modifiers( 5663 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5664 ) 5665 5666 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5667 this = None 5668 if self._match_texts(self.TRANSACTION_KIND): 5669 this = self._prev.text 5670 5671 self._match_texts(("TRANSACTION", "WORK")) 5672 5673 modes = [] 5674 while True: 5675 mode = [] 5676 while self._match(TokenType.VAR): 5677 mode.append(self._prev.text) 5678 5679 if mode: 5680 modes.append(" ".join(mode)) 5681 if not self._match(TokenType.COMMA): 5682 break 5683 5684 return self.expression(exp.Transaction, this=this, modes=modes) 5685 5686 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5687 chain = None 5688 savepoint = None 5689 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5690 5691 self._match_texts(("TRANSACTION", "WORK")) 5692 5693 if self._match_text_seq("TO"): 5694 self._match_text_seq("SAVEPOINT") 5695 savepoint = self._parse_id_var() 5696 5697 if self._match(TokenType.AND): 5698 chain = not self._match_text_seq("NO") 5699 self._match_text_seq("CHAIN") 5700 5701 if is_rollback: 5702 return self.expression(exp.Rollback, savepoint=savepoint) 5703 5704 return self.expression(exp.Commit, chain=chain) 5705 5706 def _parse_refresh(self) -> exp.Refresh: 5707 self._match(TokenType.TABLE) 5708 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5709 5710 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5711 if not self._match_text_seq("ADD"): 5712 return None 5713 5714 self._match(TokenType.COLUMN) 5715 exists_column = self._parse_exists(not_=True) 5716 expression = self._parse_field_def() 5717 5718 if expression: 5719 expression.set("exists", exists_column) 5720 5721 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5722 if self._match_texts(("FIRST", "AFTER")): 5723 position = self._prev.text 5724 column_position = self.expression( 5725 exp.ColumnPosition, this=self._parse_column(), position=position 5726 ) 5727 expression.set("position", column_position) 5728 5729 return expression 5730 5731 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5732 drop = self._match(TokenType.DROP) and self._parse_drop() 5733 if drop and not isinstance(drop, exp.Command): 5734 drop.set("kind", drop.args.get("kind", "COLUMN")) 5735 return drop 5736 5737 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5738 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5739 return self.expression( 5740 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5741 ) 5742 5743 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5744 index = self._index - 1 5745 5746 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5747 return self._parse_csv( 5748 lambda: self.expression( 5749 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5750 ) 5751 ) 5752 5753 self._retreat(index) 5754 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5755 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5756 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5757 5758 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5759 self._match(TokenType.COLUMN) 5760 column = self._parse_field(any_token=True) 5761 5762 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5763 return self.expression(exp.AlterColumn, this=column, drop=True) 5764 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5765 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5766 if self._match(TokenType.COMMENT): 5767 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5768 5769 self._match_text_seq("SET", "DATA") 5770 self._match_text_seq("TYPE") 5771 return self.expression( 5772 exp.AlterColumn, 5773 this=column, 5774 dtype=self._parse_types(), 5775 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5776 using=self._match(TokenType.USING) and self._parse_conjunction(), 5777 ) 5778 5779 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5780 index = self._index - 1 5781 5782 partition_exists = self._parse_exists() 5783 if self._match(TokenType.PARTITION, advance=False): 5784 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5785 5786 self._retreat(index) 5787 return self._parse_csv(self._parse_drop_column) 5788 5789 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5790 if self._match(TokenType.COLUMN): 5791 exists = self._parse_exists() 5792 old_column = self._parse_column() 5793 to = self._match_text_seq("TO") 5794 new_column = self._parse_column() 5795 5796 if old_column is None or to is None or new_column is None: 5797 return None 5798 5799 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5800 5801 self._match_text_seq("TO") 5802 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5803 5804 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5805 start = self._prev 5806 5807 if not self._match(TokenType.TABLE): 5808 return self._parse_as_command(start) 5809 5810 exists = self._parse_exists() 5811 only = self._match_text_seq("ONLY") 5812 this = self._parse_table(schema=True) 5813 5814 if self._next: 5815 self._advance() 5816 5817 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5818 if parser: 5819 actions = ensure_list(parser(self)) 5820 options = self._parse_csv(self._parse_property) 5821 5822 if not self._curr and actions: 5823 return self.expression( 5824 exp.AlterTable, 5825 this=this, 5826 exists=exists, 5827 actions=actions, 5828 only=only, 5829 options=options, 5830 ) 5831 5832 return self._parse_as_command(start) 5833 5834 def _parse_merge(self) -> exp.Merge: 5835 self._match(TokenType.INTO) 5836 target = self._parse_table() 5837 5838 if target and self._match(TokenType.ALIAS, advance=False): 5839 target.set("alias", self._parse_table_alias()) 5840 5841 self._match(TokenType.USING) 5842 using = self._parse_table() 5843 5844 self._match(TokenType.ON) 5845 on = self._parse_conjunction() 5846 5847 return self.expression( 5848 exp.Merge, 5849 this=target, 5850 using=using, 5851 on=on, 5852 expressions=self._parse_when_matched(), 5853 ) 5854 5855 def _parse_when_matched(self) -> t.List[exp.When]: 5856 whens = [] 5857 5858 while self._match(TokenType.WHEN): 5859 matched = not self._match(TokenType.NOT) 5860 self._match_text_seq("MATCHED") 5861 source = ( 5862 False 5863 if self._match_text_seq("BY", "TARGET") 5864 else self._match_text_seq("BY", "SOURCE") 5865 ) 5866 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5867 5868 self._match(TokenType.THEN) 5869 5870 if self._match(TokenType.INSERT): 5871 _this = self._parse_star() 5872 if _this: 5873 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5874 else: 5875 then = self.expression( 5876 exp.Insert, 5877 this=self._parse_value(), 5878 expression=self._match_text_seq("VALUES") and self._parse_value(), 5879 ) 5880 elif self._match(TokenType.UPDATE): 5881 expressions = self._parse_star() 5882 if expressions: 5883 then = self.expression(exp.Update, expressions=expressions) 5884 else: 5885 then = self.expression( 5886 exp.Update, 5887 expressions=self._match(TokenType.SET) 5888 and self._parse_csv(self._parse_equality), 5889 ) 5890 elif self._match(TokenType.DELETE): 5891 then = self.expression(exp.Var, this=self._prev.text) 5892 else: 5893 then = None 5894 5895 whens.append( 5896 self.expression( 5897 exp.When, 5898 matched=matched, 5899 source=source, 5900 condition=condition, 5901 then=then, 5902 ) 5903 ) 5904 return whens 5905 5906 def _parse_show(self) -> t.Optional[exp.Expression]: 5907 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5908 if parser: 5909 return parser(self) 5910 return self._parse_as_command(self._prev) 5911 5912 def _parse_set_item_assignment( 5913 self, kind: t.Optional[str] = None 5914 ) -> t.Optional[exp.Expression]: 5915 index = self._index 5916 5917 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5918 return self._parse_set_transaction(global_=kind == "GLOBAL") 5919 5920 left = self._parse_primary() or self._parse_id_var() 5921 assignment_delimiter = self._match_texts(("=", "TO")) 5922 5923 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5924 self._retreat(index) 5925 return None 5926 5927 right = self._parse_statement() or self._parse_id_var() 5928 this = self.expression(exp.EQ, this=left, expression=right) 5929 5930 return self.expression(exp.SetItem, this=this, kind=kind) 5931 5932 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5933 self._match_text_seq("TRANSACTION") 5934 characteristics = self._parse_csv( 5935 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5936 ) 5937 return self.expression( 5938 exp.SetItem, 5939 expressions=characteristics, 5940 kind="TRANSACTION", 5941 **{"global": global_}, # type: ignore 5942 ) 5943 5944 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5945 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5946 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5947 5948 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5949 index = self._index 5950 set_ = self.expression( 5951 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5952 ) 5953 5954 if self._curr: 5955 self._retreat(index) 5956 return self._parse_as_command(self._prev) 5957 5958 return set_ 5959 5960 def _parse_var_from_options( 5961 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5962 ) -> t.Optional[exp.Var]: 5963 start = self._curr 5964 if not start: 5965 return None 5966 5967 option = start.text.upper() 5968 continuations = options.get(option) 5969 5970 index = self._index 5971 self._advance() 5972 for keywords in continuations or []: 5973 if isinstance(keywords, str): 5974 keywords = (keywords,) 5975 5976 if self._match_text_seq(*keywords): 5977 option = f"{option} {' '.join(keywords)}" 5978 break 5979 else: 5980 if continuations or continuations is None: 5981 if raise_unmatched: 5982 self.raise_error(f"Unknown option {option}") 5983 5984 self._retreat(index) 5985 return None 5986 5987 return exp.var(option) 5988 5989 def _parse_as_command(self, start: Token) -> exp.Command: 5990 while self._curr: 5991 self._advance() 5992 text = self._find_sql(start, self._prev) 5993 size = len(start.text) 5994 self._warn_unsupported() 5995 return exp.Command(this=text[:size], expression=text[size:]) 5996 5997 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5998 settings = [] 5999 6000 self._match_l_paren() 6001 kind = self._parse_id_var() 6002 6003 if self._match(TokenType.L_PAREN): 6004 while True: 6005 key = self._parse_id_var() 6006 value = self._parse_primary() 6007 6008 if not key and value is None: 6009 break 6010 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6011 self._match(TokenType.R_PAREN) 6012 6013 self._match_r_paren() 6014 6015 return self.expression( 6016 exp.DictProperty, 6017 this=this, 6018 kind=kind.this if kind else None, 6019 settings=settings, 6020 ) 6021 6022 def _parse_dict_range(self, this: str) -> exp.DictRange: 6023 self._match_l_paren() 6024 has_min = self._match_text_seq("MIN") 6025 if has_min: 6026 min = self._parse_var() or self._parse_primary() 6027 self._match_text_seq("MAX") 6028 max = self._parse_var() or self._parse_primary() 6029 else: 6030 max = self._parse_var() or self._parse_primary() 6031 min = exp.Literal.number(0) 6032 self._match_r_paren() 6033 return self.expression(exp.DictRange, this=this, min=min, max=max) 6034 6035 def _parse_comprehension( 6036 self, this: t.Optional[exp.Expression] 6037 ) -> t.Optional[exp.Comprehension]: 6038 index = self._index 6039 expression = self._parse_column() 6040 if not self._match(TokenType.IN): 6041 self._retreat(index - 1) 6042 return None 6043 iterator = self._parse_column() 6044 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6045 return self.expression( 6046 exp.Comprehension, 6047 this=this, 6048 expression=expression, 6049 iterator=iterator, 6050 condition=condition, 6051 ) 6052 6053 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6054 if self._match(TokenType.HEREDOC_STRING): 6055 return self.expression(exp.Heredoc, this=self._prev.text) 6056 6057 if not self._match_text_seq("$"): 6058 return None 6059 6060 tags = ["$"] 6061 tag_text = None 6062 6063 if self._is_connected(): 6064 self._advance() 6065 tags.append(self._prev.text.upper()) 6066 else: 6067 self.raise_error("No closing $ found") 6068 6069 if tags[-1] != "$": 6070 if self._is_connected() and self._match_text_seq("$"): 6071 tag_text = tags[-1] 6072 tags.append("$") 6073 else: 6074 self.raise_error("No closing $ found") 6075 6076 heredoc_start = self._curr 6077 6078 while self._curr: 6079 if self._match_text_seq(*tags, advance=False): 6080 this = self._find_sql(heredoc_start, self._prev) 6081 self._advance(len(tags)) 6082 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6083 6084 self._advance() 6085 6086 self.raise_error(f"No closing {''.join(tags)} found") 6087 return None 6088 6089 def _find_parser( 6090 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6091 ) -> t.Optional[t.Callable]: 6092 if not self._curr: 6093 return None 6094 6095 index = self._index 6096 this = [] 6097 while True: 6098 # The current token might be multiple words 6099 curr = self._curr.text.upper() 6100 key = curr.split(" ") 6101 this.append(curr) 6102 6103 self._advance() 6104 result, trie = in_trie(trie, key) 6105 if result == TrieResult.FAILED: 6106 break 6107 6108 if result == TrieResult.EXISTS: 6109 subparser = parsers[" ".join(this)] 6110 return subparser 6111 6112 self._retreat(index) 6113 return None 6114 6115 def _match(self, token_type, advance=True, expression=None): 6116 if not self._curr: 6117 return None 6118 6119 if self._curr.token_type == token_type: 6120 if advance: 6121 self._advance() 6122 self._add_comments(expression) 6123 return True 6124 6125 return None 6126 6127 def _match_set(self, types, advance=True): 6128 if not self._curr: 6129 return None 6130 6131 if self._curr.token_type in types: 6132 if advance: 6133 self._advance() 6134 return True 6135 6136 return None 6137 6138 def _match_pair(self, token_type_a, token_type_b, advance=True): 6139 if not self._curr or not self._next: 6140 return None 6141 6142 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6143 if advance: 6144 self._advance(2) 6145 return True 6146 6147 return None 6148 6149 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6150 if not self._match(TokenType.L_PAREN, expression=expression): 6151 self.raise_error("Expecting (") 6152 6153 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6154 if not self._match(TokenType.R_PAREN, expression=expression): 6155 self.raise_error("Expecting )") 6156 6157 def _match_texts(self, texts, advance=True): 6158 if self._curr and self._curr.text.upper() in texts: 6159 if advance: 6160 self._advance() 6161 return True 6162 return None 6163 6164 def _match_text_seq(self, *texts, advance=True): 6165 index = self._index 6166 for text in texts: 6167 if self._curr and self._curr.text.upper() == text: 6168 self._advance() 6169 else: 6170 self._retreat(index) 6171 return None 6172 6173 if not advance: 6174 self._retreat(index) 6175 6176 return True 6177 6178 def _replace_lambda( 6179 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6180 ) -> t.Optional[exp.Expression]: 6181 if not node: 6182 return node 6183 6184 for column in node.find_all(exp.Column): 6185 if column.parts[0].name in lambda_variables: 6186 dot_or_id = column.to_dot() if column.table else column.this 6187 parent = column.parent 6188 6189 while isinstance(parent, exp.Dot): 6190 if not isinstance(parent.parent, exp.Dot): 6191 parent.replace(dot_or_id) 6192 break 6193 parent = parent.parent 6194 else: 6195 if column is node: 6196 node = dot_or_id 6197 else: 6198 column.replace(dot_or_id) 6199 return node 6200 6201 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6202 start = self._prev 6203 6204 # Not to be confused with TRUNCATE(number, decimals) function call 6205 if self._match(TokenType.L_PAREN): 6206 self._retreat(self._index - 2) 6207 return self._parse_function() 6208 6209 # Clickhouse supports TRUNCATE DATABASE as well 6210 is_database = self._match(TokenType.DATABASE) 6211 6212 self._match(TokenType.TABLE) 6213 6214 exists = self._parse_exists(not_=False) 6215 6216 expressions = self._parse_csv( 6217 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6218 ) 6219 6220 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6221 6222 if self._match_text_seq("RESTART", "IDENTITY"): 6223 identity = "RESTART" 6224 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6225 identity = "CONTINUE" 6226 else: 6227 identity = None 6228 6229 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6230 option = self._prev.text 6231 else: 6232 option = None 6233 6234 partition = self._parse_partition() 6235 6236 # Fallback case 6237 if self._curr: 6238 return self._parse_as_command(start) 6239 6240 return self.expression( 6241 exp.TruncateTable, 6242 expressions=expressions, 6243 is_database=is_database, 6244 exists=exists, 6245 cluster=cluster, 6246 identity=identity, 6247 option=option, 6248 partition=partition, 6249 ) 6250 6251 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6252 this = self._parse_ordered(self._parse_opclass) 6253 6254 if not self._match(TokenType.WITH): 6255 return this 6256 6257 op = self._parse_var(any_token=True) 6258 6259 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1132 def __init__( 1133 self, 1134 error_level: t.Optional[ErrorLevel] = None, 1135 error_message_context: int = 100, 1136 max_errors: int = 3, 1137 dialect: DialectType = None, 1138 ): 1139 from sqlglot.dialects import Dialect 1140 1141 self.error_level = error_level or ErrorLevel.IMMEDIATE 1142 self.error_message_context = error_message_context 1143 self.max_errors = max_errors 1144 self.dialect = Dialect.get_or_raise(dialect) 1145 self.reset()
1157 def parse( 1158 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1159 ) -> t.List[t.Optional[exp.Expression]]: 1160 """ 1161 Parses a list of tokens and returns a list of syntax trees, one tree 1162 per parsed SQL statement. 1163 1164 Args: 1165 raw_tokens: The list of tokens. 1166 sql: The original SQL string, used to produce helpful debug messages. 1167 1168 Returns: 1169 The list of the produced syntax trees. 1170 """ 1171 return self._parse( 1172 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1173 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1175 def parse_into( 1176 self, 1177 expression_types: exp.IntoType, 1178 raw_tokens: t.List[Token], 1179 sql: t.Optional[str] = None, 1180 ) -> t.List[t.Optional[exp.Expression]]: 1181 """ 1182 Parses a list of tokens into a given Expression type. If a collection of Expression 1183 types is given instead, this method will try to parse the token list into each one 1184 of them, stopping at the first for which the parsing succeeds. 1185 1186 Args: 1187 expression_types: The expression type(s) to try and parse the token list into. 1188 raw_tokens: The list of tokens. 1189 sql: The original SQL string, used to produce helpful debug messages. 1190 1191 Returns: 1192 The target Expression. 1193 """ 1194 errors = [] 1195 for expression_type in ensure_list(expression_types): 1196 parser = self.EXPRESSION_PARSERS.get(expression_type) 1197 if not parser: 1198 raise TypeError(f"No parser registered for {expression_type}") 1199 1200 try: 1201 return self._parse(parser, raw_tokens, sql) 1202 except ParseError as e: 1203 e.errors[0]["into_expression"] = expression_type 1204 errors.append(e) 1205 1206 raise ParseError( 1207 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1208 errors=merge_errors(errors), 1209 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1246 def check_errors(self) -> None: 1247 """Logs or raises any found errors, depending on the chosen error level setting.""" 1248 if self.error_level == ErrorLevel.WARN: 1249 for error in self.errors: 1250 logger.error(str(error)) 1251 elif self.error_level == ErrorLevel.RAISE and self.errors: 1252 raise ParseError( 1253 concat_messages(self.errors, self.max_errors), 1254 errors=merge_errors(self.errors), 1255 )
Logs or raises any found errors, depending on the chosen error level setting.
1257 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1258 """ 1259 Appends an error in the list of recorded errors or raises it, depending on the chosen 1260 error level setting. 1261 """ 1262 token = token or self._curr or self._prev or Token.string("") 1263 start = token.start 1264 end = token.end + 1 1265 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1266 highlight = self.sql[start:end] 1267 end_context = self.sql[end : end + self.error_message_context] 1268 1269 error = ParseError.new( 1270 f"{message}. Line {token.line}, Col: {token.col}.\n" 1271 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1272 description=message, 1273 line=token.line, 1274 col=token.col, 1275 start_context=start_context, 1276 highlight=highlight, 1277 end_context=end_context, 1278 ) 1279 1280 if self.error_level == ErrorLevel.IMMEDIATE: 1281 raise error 1282 1283 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1285 def expression( 1286 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1287 ) -> E: 1288 """ 1289 Creates a new, validated Expression. 1290 1291 Args: 1292 exp_class: The expression class to instantiate. 1293 comments: An optional list of comments to attach to the expression. 1294 kwargs: The arguments to set for the expression along with their respective values. 1295 1296 Returns: 1297 The target expression. 1298 """ 1299 instance = exp_class(**kwargs) 1300 instance.add_comments(comments) if comments else self._add_comments(instance) 1301 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1308 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1309 """ 1310 Validates an Expression, making sure that all its mandatory arguments are set. 1311 1312 Args: 1313 expression: The expression to validate. 1314 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1315 1316 Returns: 1317 The validated expression. 1318 """ 1319 if self.error_level != ErrorLevel.IGNORE: 1320 for error_message in expression.error_messages(args): 1321 self.raise_error(error_message) 1322 1323 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.