sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.DATETIME, 221 TokenType.DATETIME64, 222 TokenType.DATE, 223 TokenType.DATE32, 224 TokenType.INT4RANGE, 225 TokenType.INT4MULTIRANGE, 226 TokenType.INT8RANGE, 227 TokenType.INT8MULTIRANGE, 228 TokenType.NUMRANGE, 229 TokenType.NUMMULTIRANGE, 230 TokenType.TSRANGE, 231 TokenType.TSMULTIRANGE, 232 TokenType.TSTZRANGE, 233 TokenType.TSTZMULTIRANGE, 234 TokenType.DATERANGE, 235 TokenType.DATEMULTIRANGE, 236 TokenType.DECIMAL, 237 TokenType.UDECIMAL, 238 TokenType.BIGDECIMAL, 239 TokenType.UUID, 240 TokenType.GEOGRAPHY, 241 TokenType.GEOMETRY, 242 TokenType.HLLSKETCH, 243 TokenType.HSTORE, 244 TokenType.PSEUDO_TYPE, 245 TokenType.SUPER, 246 TokenType.SERIAL, 247 TokenType.SMALLSERIAL, 248 TokenType.BIGSERIAL, 249 TokenType.XML, 250 TokenType.YEAR, 251 TokenType.UNIQUEIDENTIFIER, 252 TokenType.USERDEFINED, 253 TokenType.MONEY, 254 TokenType.SMALLMONEY, 255 TokenType.ROWVERSION, 256 TokenType.IMAGE, 257 TokenType.VARIANT, 258 TokenType.OBJECT, 259 TokenType.OBJECT_IDENTIFIER, 260 TokenType.INET, 261 TokenType.IPADDRESS, 262 TokenType.IPPREFIX, 263 TokenType.IPV4, 264 TokenType.IPV6, 265 TokenType.UNKNOWN, 266 TokenType.NULL, 267 TokenType.NAME, 268 *ENUM_TYPE_TOKENS, 269 *NESTED_TYPE_TOKENS, 270 *AGGREGATE_TYPE_TOKENS, 271 } 272 273 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 274 TokenType.BIGINT: TokenType.UBIGINT, 275 TokenType.INT: TokenType.UINT, 276 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 277 TokenType.SMALLINT: TokenType.USMALLINT, 278 TokenType.TINYINT: TokenType.UTINYINT, 279 TokenType.DECIMAL: TokenType.UDECIMAL, 280 } 281 282 SUBQUERY_PREDICATES = { 283 TokenType.ANY: exp.Any, 284 TokenType.ALL: exp.All, 285 TokenType.EXISTS: exp.Exists, 286 TokenType.SOME: exp.Any, 287 } 288 289 RESERVED_TOKENS = { 290 *Tokenizer.SINGLE_TOKENS.values(), 291 TokenType.SELECT, 292 } - {TokenType.IDENTIFIER} 293 294 DB_CREATABLES = { 295 TokenType.DATABASE, 296 TokenType.SCHEMA, 297 TokenType.TABLE, 298 TokenType.VIEW, 299 TokenType.MODEL, 300 TokenType.DICTIONARY, 301 TokenType.SEQUENCE, 302 TokenType.STORAGE_INTEGRATION, 303 } 304 305 CREATABLES = { 306 TokenType.COLUMN, 307 TokenType.CONSTRAINT, 308 TokenType.FUNCTION, 309 TokenType.INDEX, 310 TokenType.PROCEDURE, 311 TokenType.FOREIGN_KEY, 312 *DB_CREATABLES, 313 } 314 315 # Tokens that can represent identifiers 316 ID_VAR_TOKENS = { 317 TokenType.VAR, 318 TokenType.ANTI, 319 TokenType.APPLY, 320 TokenType.ASC, 321 TokenType.ASOF, 322 TokenType.AUTO_INCREMENT, 323 TokenType.BEGIN, 324 TokenType.BPCHAR, 325 TokenType.CACHE, 326 TokenType.CASE, 327 TokenType.COLLATE, 328 TokenType.COMMAND, 329 TokenType.COMMENT, 330 TokenType.COMMIT, 331 TokenType.CONSTRAINT, 332 TokenType.DEFAULT, 333 TokenType.DELETE, 334 TokenType.DESC, 335 TokenType.DESCRIBE, 336 TokenType.DICTIONARY, 337 TokenType.DIV, 338 TokenType.END, 339 TokenType.EXECUTE, 340 TokenType.ESCAPE, 341 TokenType.FALSE, 342 TokenType.FIRST, 343 TokenType.FILTER, 344 TokenType.FINAL, 345 TokenType.FORMAT, 346 TokenType.FULL, 347 TokenType.IDENTIFIER, 348 TokenType.IS, 349 TokenType.ISNULL, 350 TokenType.INTERVAL, 351 TokenType.KEEP, 352 TokenType.KILL, 353 TokenType.LEFT, 354 TokenType.LOAD, 355 TokenType.MERGE, 356 TokenType.NATURAL, 357 TokenType.NEXT, 358 TokenType.OFFSET, 359 TokenType.OPERATOR, 360 TokenType.ORDINALITY, 361 TokenType.OVERLAPS, 362 TokenType.OVERWRITE, 363 TokenType.PARTITION, 364 TokenType.PERCENT, 365 TokenType.PIVOT, 366 TokenType.PRAGMA, 367 TokenType.RANGE, 368 TokenType.RECURSIVE, 369 TokenType.REFERENCES, 370 TokenType.REFRESH, 371 TokenType.REPLACE, 372 TokenType.RIGHT, 373 TokenType.ROW, 374 TokenType.ROWS, 375 TokenType.SEMI, 376 TokenType.SET, 377 TokenType.SETTINGS, 378 TokenType.SHOW, 379 TokenType.TEMPORARY, 380 TokenType.TOP, 381 TokenType.TRUE, 382 TokenType.TRUNCATE, 383 TokenType.UNIQUE, 384 TokenType.UNPIVOT, 385 TokenType.UPDATE, 386 TokenType.USE, 387 TokenType.VOLATILE, 388 TokenType.WINDOW, 389 *CREATABLES, 390 *SUBQUERY_PREDICATES, 391 *TYPE_TOKENS, 392 *NO_PAREN_FUNCTIONS, 393 } 394 395 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 396 397 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 398 TokenType.ANTI, 399 TokenType.APPLY, 400 TokenType.ASOF, 401 TokenType.FULL, 402 TokenType.LEFT, 403 TokenType.LOCK, 404 TokenType.NATURAL, 405 TokenType.OFFSET, 406 TokenType.RIGHT, 407 TokenType.SEMI, 408 TokenType.WINDOW, 409 } 410 411 ALIAS_TOKENS = ID_VAR_TOKENS 412 413 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 414 415 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 416 417 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 418 419 FUNC_TOKENS = { 420 TokenType.COLLATE, 421 TokenType.COMMAND, 422 TokenType.CURRENT_DATE, 423 TokenType.CURRENT_DATETIME, 424 TokenType.CURRENT_TIMESTAMP, 425 TokenType.CURRENT_TIME, 426 TokenType.CURRENT_USER, 427 TokenType.FILTER, 428 TokenType.FIRST, 429 TokenType.FORMAT, 430 TokenType.GLOB, 431 TokenType.IDENTIFIER, 432 TokenType.INDEX, 433 TokenType.ISNULL, 434 TokenType.ILIKE, 435 TokenType.INSERT, 436 TokenType.LIKE, 437 TokenType.MERGE, 438 TokenType.OFFSET, 439 TokenType.PRIMARY_KEY, 440 TokenType.RANGE, 441 TokenType.REPLACE, 442 TokenType.RLIKE, 443 TokenType.ROW, 444 TokenType.UNNEST, 445 TokenType.VAR, 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.SEQUENCE, 449 TokenType.DATE, 450 TokenType.DATETIME, 451 TokenType.TABLE, 452 TokenType.TIMESTAMP, 453 TokenType.TIMESTAMPTZ, 454 TokenType.TRUNCATE, 455 TokenType.WINDOW, 456 TokenType.XOR, 457 *TYPE_TOKENS, 458 *SUBQUERY_PREDICATES, 459 } 460 461 CONJUNCTION = { 462 TokenType.AND: exp.And, 463 TokenType.OR: exp.Or, 464 } 465 466 EQUALITY = { 467 TokenType.COLON_EQ: exp.PropertyEQ, 468 TokenType.EQ: exp.EQ, 469 TokenType.NEQ: exp.NEQ, 470 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 471 } 472 473 COMPARISON = { 474 TokenType.GT: exp.GT, 475 TokenType.GTE: exp.GTE, 476 TokenType.LT: exp.LT, 477 TokenType.LTE: exp.LTE, 478 } 479 480 BITWISE = { 481 TokenType.AMP: exp.BitwiseAnd, 482 TokenType.CARET: exp.BitwiseXor, 483 TokenType.PIPE: exp.BitwiseOr, 484 } 485 486 TERM = { 487 TokenType.DASH: exp.Sub, 488 TokenType.PLUS: exp.Add, 489 TokenType.MOD: exp.Mod, 490 TokenType.COLLATE: exp.Collate, 491 } 492 493 FACTOR = { 494 TokenType.DIV: exp.IntDiv, 495 TokenType.LR_ARROW: exp.Distance, 496 TokenType.SLASH: exp.Div, 497 TokenType.STAR: exp.Mul, 498 } 499 500 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 501 502 TIMES = { 503 TokenType.TIME, 504 TokenType.TIMETZ, 505 } 506 507 TIMESTAMPS = { 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TIMESTAMPLTZ, 511 *TIMES, 512 } 513 514 SET_OPERATIONS = { 515 TokenType.UNION, 516 TokenType.INTERSECT, 517 TokenType.EXCEPT, 518 } 519 520 JOIN_METHODS = { 521 TokenType.ASOF, 522 TokenType.NATURAL, 523 TokenType.POSITIONAL, 524 } 525 526 JOIN_SIDES = { 527 TokenType.LEFT, 528 TokenType.RIGHT, 529 TokenType.FULL, 530 } 531 532 JOIN_KINDS = { 533 TokenType.INNER, 534 TokenType.OUTER, 535 TokenType.CROSS, 536 TokenType.SEMI, 537 TokenType.ANTI, 538 } 539 540 JOIN_HINTS: t.Set[str] = set() 541 542 LAMBDAS = { 543 TokenType.ARROW: lambda self, expressions: self.expression( 544 exp.Lambda, 545 this=self._replace_lambda( 546 self._parse_conjunction(), 547 {node.name for node in expressions}, 548 ), 549 expressions=expressions, 550 ), 551 TokenType.FARROW: lambda self, expressions: self.expression( 552 exp.Kwarg, 553 this=exp.var(expressions[0].name), 554 expression=self._parse_conjunction(), 555 ), 556 } 557 558 COLUMN_OPERATORS = { 559 TokenType.DOT: None, 560 TokenType.DCOLON: lambda self, this, to: self.expression( 561 exp.Cast if self.STRICT_CAST else exp.TryCast, 562 this=this, 563 to=to, 564 ), 565 TokenType.ARROW: lambda self, this, path: self.expression( 566 exp.JSONExtract, 567 this=this, 568 expression=self.dialect.to_json_path(path), 569 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 570 ), 571 TokenType.DARROW: lambda self, this, path: self.expression( 572 exp.JSONExtractScalar, 573 this=this, 574 expression=self.dialect.to_json_path(path), 575 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 576 ), 577 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtract, 579 this=this, 580 expression=path, 581 ), 582 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 583 exp.JSONBExtractScalar, 584 this=this, 585 expression=path, 586 ), 587 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 588 exp.JSONBContains, 589 this=this, 590 expression=key, 591 ), 592 } 593 594 EXPRESSION_PARSERS = { 595 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 596 exp.Column: lambda self: self._parse_column(), 597 exp.Condition: lambda self: self._parse_conjunction(), 598 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 599 exp.Expression: lambda self: self._parse_expression(), 600 exp.From: lambda self: self._parse_from(), 601 exp.Group: lambda self: self._parse_group(), 602 exp.Having: lambda self: self._parse_having(), 603 exp.Identifier: lambda self: self._parse_id_var(), 604 exp.Join: lambda self: self._parse_join(), 605 exp.Lambda: lambda self: self._parse_lambda(), 606 exp.Lateral: lambda self: self._parse_lateral(), 607 exp.Limit: lambda self: self._parse_limit(), 608 exp.Offset: lambda self: self._parse_offset(), 609 exp.Order: lambda self: self._parse_order(), 610 exp.Ordered: lambda self: self._parse_ordered(), 611 exp.Properties: lambda self: self._parse_properties(), 612 exp.Qualify: lambda self: self._parse_qualify(), 613 exp.Returning: lambda self: self._parse_returning(), 614 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 615 exp.Table: lambda self: self._parse_table_parts(), 616 exp.TableAlias: lambda self: self._parse_table_alias(), 617 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 618 exp.Where: lambda self: self._parse_where(), 619 exp.Window: lambda self: self._parse_named_window(), 620 exp.With: lambda self: self._parse_with(), 621 "JOIN_TYPE": lambda self: self._parse_join_parts(), 622 } 623 624 STATEMENT_PARSERS = { 625 TokenType.ALTER: lambda self: self._parse_alter(), 626 TokenType.BEGIN: lambda self: self._parse_transaction(), 627 TokenType.CACHE: lambda self: self._parse_cache(), 628 TokenType.COMMENT: lambda self: self._parse_comment(), 629 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 630 TokenType.CREATE: lambda self: self._parse_create(), 631 TokenType.DELETE: lambda self: self._parse_delete(), 632 TokenType.DESC: lambda self: self._parse_describe(), 633 TokenType.DESCRIBE: lambda self: self._parse_describe(), 634 TokenType.DROP: lambda self: self._parse_drop(), 635 TokenType.INSERT: lambda self: self._parse_insert(), 636 TokenType.KILL: lambda self: self._parse_kill(), 637 TokenType.LOAD: lambda self: self._parse_load(), 638 TokenType.MERGE: lambda self: self._parse_merge(), 639 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 640 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 641 TokenType.REFRESH: lambda self: self._parse_refresh(), 642 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 643 TokenType.SET: lambda self: self._parse_set(), 644 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 645 TokenType.UNCACHE: lambda self: self._parse_uncache(), 646 TokenType.UPDATE: lambda self: self._parse_update(), 647 TokenType.USE: lambda self: self.expression( 648 exp.Use, 649 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 650 this=self._parse_table(schema=False), 651 ), 652 } 653 654 UNARY_PARSERS = { 655 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 656 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 657 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 658 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 659 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 660 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 661 } 662 663 STRING_PARSERS = { 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 668 exp.National, this=token.text 669 ), 670 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 671 TokenType.STRING: lambda self, token: self.expression( 672 exp.Literal, this=token.text, is_string=True 673 ), 674 TokenType.UNICODE_STRING: lambda self, token: self.expression( 675 exp.UnicodeString, 676 this=token.text, 677 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 678 ), 679 } 680 681 NUMERIC_PARSERS = { 682 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 683 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 684 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 685 TokenType.NUMBER: lambda self, token: self.expression( 686 exp.Literal, this=token.text, is_string=False 687 ), 688 } 689 690 PRIMARY_PARSERS = { 691 **STRING_PARSERS, 692 **NUMERIC_PARSERS, 693 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 694 TokenType.NULL: lambda self, _: self.expression(exp.Null), 695 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 696 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 697 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 698 TokenType.STAR: lambda self, _: self.expression( 699 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 700 ), 701 } 702 703 PLACEHOLDER_PARSERS = { 704 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 705 TokenType.PARAMETER: lambda self: self._parse_parameter(), 706 TokenType.COLON: lambda self: ( 707 self.expression(exp.Placeholder, this=self._prev.text) 708 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 709 else None 710 ), 711 } 712 713 RANGE_PARSERS = { 714 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 715 TokenType.GLOB: binary_range_parser(exp.Glob), 716 TokenType.ILIKE: binary_range_parser(exp.ILike), 717 TokenType.IN: lambda self, this: self._parse_in(this), 718 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 719 TokenType.IS: lambda self, this: self._parse_is(this), 720 TokenType.LIKE: binary_range_parser(exp.Like), 721 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 722 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 723 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 724 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 725 } 726 727 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 728 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 729 "AUTO": lambda self: self._parse_auto_property(), 730 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 731 "BACKUP": lambda self: self.expression( 732 exp.BackupProperty, this=self._parse_var(any_token=True) 733 ), 734 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 735 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHECKSUM": lambda self: self._parse_checksum(), 738 "CLUSTER BY": lambda self: self._parse_cluster(), 739 "CLUSTERED": lambda self: self._parse_clustered_by(), 740 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 741 exp.CollateProperty, **kwargs 742 ), 743 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 744 "CONTAINS": lambda self: self._parse_contains_property(), 745 "COPY": lambda self: self._parse_copy_property(), 746 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 747 "DEFINER": lambda self: self._parse_definer(), 748 "DETERMINISTIC": lambda self: self.expression( 749 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 750 ), 751 "DISTKEY": lambda self: self._parse_distkey(), 752 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 753 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 754 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 755 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 756 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 757 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 758 "FREESPACE": lambda self: self._parse_freespace(), 759 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 760 "HEAP": lambda self: self.expression(exp.HeapProperty), 761 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 762 "IMMUTABLE": lambda self: self.expression( 763 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 764 ), 765 "INHERITS": lambda self: self.expression( 766 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 767 ), 768 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 769 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 770 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 771 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 772 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 773 "LIKE": lambda self: self._parse_create_like(), 774 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 775 "LOCK": lambda self: self._parse_locking(), 776 "LOCKING": lambda self: self._parse_locking(), 777 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 778 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 779 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 780 "MODIFIES": lambda self: self._parse_modifies_property(), 781 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 782 "NO": lambda self: self._parse_no_property(), 783 "ON": lambda self: self._parse_on_property(), 784 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 785 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 786 "PARTITION": lambda self: self._parse_partitioned_of(), 787 "PARTITION BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 790 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 791 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 792 "READS": lambda self: self._parse_reads_property(), 793 "REMOTE": lambda self: self._parse_remote_with_connection(), 794 "RETURNS": lambda self: self._parse_returns(), 795 "ROW": lambda self: self._parse_row(), 796 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 797 "SAMPLE": lambda self: self.expression( 798 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 799 ), 800 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 801 "SETTINGS": lambda self: self.expression( 802 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 803 ), 804 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 805 "SORTKEY": lambda self: self._parse_sortkey(), 806 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 807 "STABLE": lambda self: self.expression( 808 exp.StabilityProperty, this=exp.Literal.string("STABLE") 809 ), 810 "STORED": lambda self: self._parse_stored(), 811 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 812 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 813 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 814 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 815 "TO": lambda self: self._parse_to_table(), 816 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 817 "TRANSFORM": lambda self: self.expression( 818 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 819 ), 820 "TTL": lambda self: self._parse_ttl(), 821 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 823 "VOLATILE": lambda self: self._parse_volatile_property(), 824 "WITH": lambda self: self._parse_with_property(), 825 } 826 827 CONSTRAINT_PARSERS = { 828 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 829 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 830 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 831 "CHARACTER SET": lambda self: self.expression( 832 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 833 ), 834 "CHECK": lambda self: self.expression( 835 exp.CheckColumnConstraint, 836 this=self._parse_wrapped(self._parse_conjunction), 837 enforced=self._match_text_seq("ENFORCED"), 838 ), 839 "COLLATE": lambda self: self.expression( 840 exp.CollateColumnConstraint, this=self._parse_var() 841 ), 842 "COMMENT": lambda self: self.expression( 843 exp.CommentColumnConstraint, this=self._parse_string() 844 ), 845 "COMPRESS": lambda self: self._parse_compress(), 846 "CLUSTERED": lambda self: self.expression( 847 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 848 ), 849 "NONCLUSTERED": lambda self: self.expression( 850 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 851 ), 852 "DEFAULT": lambda self: self.expression( 853 exp.DefaultColumnConstraint, this=self._parse_bitwise() 854 ), 855 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 856 "EPHEMERAL": lambda self: self.expression( 857 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 858 ), 859 "EXCLUDE": lambda self: self.expression( 860 exp.ExcludeColumnConstraint, this=self._parse_index_params() 861 ), 862 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 863 "FORMAT": lambda self: self.expression( 864 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 865 ), 866 "GENERATED": lambda self: self._parse_generated_as_identity(), 867 "IDENTITY": lambda self: self._parse_auto_increment(), 868 "INLINE": lambda self: self._parse_inline(), 869 "LIKE": lambda self: self._parse_create_like(), 870 "NOT": lambda self: self._parse_not_constraint(), 871 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 872 "ON": lambda self: ( 873 self._match(TokenType.UPDATE) 874 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 875 ) 876 or self.expression(exp.OnProperty, this=self._parse_id_var()), 877 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 878 "PERIOD": lambda self: self._parse_period_for_system_time(), 879 "PRIMARY KEY": lambda self: self._parse_primary_key(), 880 "REFERENCES": lambda self: self._parse_references(match=False), 881 "TITLE": lambda self: self.expression( 882 exp.TitleColumnConstraint, this=self._parse_var_or_string() 883 ), 884 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 885 "UNIQUE": lambda self: self._parse_unique(), 886 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 887 "WITH": lambda self: self.expression( 888 exp.Properties, expressions=self._parse_wrapped_properties() 889 ), 890 } 891 892 ALTER_PARSERS = { 893 "ADD": lambda self: self._parse_alter_table_add(), 894 "ALTER": lambda self: self._parse_alter_table_alter(), 895 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 896 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 897 "DROP": lambda self: self._parse_alter_table_drop(), 898 "RENAME": lambda self: self._parse_alter_table_rename(), 899 } 900 901 SCHEMA_UNNAMED_CONSTRAINTS = { 902 "CHECK", 903 "EXCLUDE", 904 "FOREIGN KEY", 905 "LIKE", 906 "PERIOD", 907 "PRIMARY KEY", 908 "UNIQUE", 909 } 910 911 NO_PAREN_FUNCTION_PARSERS = { 912 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 913 "CASE": lambda self: self._parse_case(), 914 "IF": lambda self: self._parse_if(), 915 "NEXT": lambda self: self._parse_next_value_for(), 916 } 917 918 INVALID_FUNC_NAME_TOKENS = { 919 TokenType.IDENTIFIER, 920 TokenType.STRING, 921 } 922 923 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 924 925 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 926 927 FUNCTION_PARSERS = { 928 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 929 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 930 "DECODE": lambda self: self._parse_decode(), 931 "EXTRACT": lambda self: self._parse_extract(), 932 "JSON_OBJECT": lambda self: self._parse_json_object(), 933 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 934 "JSON_TABLE": lambda self: self._parse_json_table(), 935 "MATCH": lambda self: self._parse_match_against(), 936 "OPENJSON": lambda self: self._parse_open_json(), 937 "POSITION": lambda self: self._parse_position(), 938 "PREDICT": lambda self: self._parse_predict(), 939 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 940 "STRING_AGG": lambda self: self._parse_string_agg(), 941 "SUBSTRING": lambda self: self._parse_substring(), 942 "TRIM": lambda self: self._parse_trim(), 943 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 944 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 945 } 946 947 QUERY_MODIFIER_PARSERS = { 948 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 949 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 950 TokenType.WHERE: lambda self: ("where", self._parse_where()), 951 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 952 TokenType.HAVING: lambda self: ("having", self._parse_having()), 953 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 954 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 955 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 956 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 957 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 958 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 959 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 960 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 961 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 962 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 963 TokenType.CLUSTER_BY: lambda self: ( 964 "cluster", 965 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 966 ), 967 TokenType.DISTRIBUTE_BY: lambda self: ( 968 "distribute", 969 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 970 ), 971 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 972 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 973 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 974 } 975 976 SET_PARSERS = { 977 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 978 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 979 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 980 "TRANSACTION": lambda self: self._parse_set_transaction(), 981 } 982 983 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 984 985 TYPE_LITERAL_PARSERS = { 986 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 987 } 988 989 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 990 991 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 992 993 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 994 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 995 "ISOLATION": ( 996 ("LEVEL", "REPEATABLE", "READ"), 997 ("LEVEL", "READ", "COMMITTED"), 998 ("LEVEL", "READ", "UNCOMITTED"), 999 ("LEVEL", "SERIALIZABLE"), 1000 ), 1001 "READ": ("WRITE", "ONLY"), 1002 } 1003 1004 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1005 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1006 ) 1007 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1008 1009 CREATE_SEQUENCE: OPTIONS_TYPE = { 1010 "SCALE": ("EXTEND", "NOEXTEND"), 1011 "SHARD": ("EXTEND", "NOEXTEND"), 1012 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1013 **dict.fromkeys( 1014 ( 1015 "SESSION", 1016 "GLOBAL", 1017 "KEEP", 1018 "NOKEEP", 1019 "ORDER", 1020 "NOORDER", 1021 "NOCACHE", 1022 "CYCLE", 1023 "NOCYCLE", 1024 "NOMINVALUE", 1025 "NOMAXVALUE", 1026 "NOSCALE", 1027 "NOSHARD", 1028 ), 1029 tuple(), 1030 ), 1031 } 1032 1033 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1034 1035 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1036 1037 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1038 1039 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1040 1041 CLONE_KEYWORDS = {"CLONE", "COPY"} 1042 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1043 1044 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1045 1046 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1047 1048 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1049 1050 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1051 1052 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1053 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1054 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1055 1056 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1057 1058 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1059 1060 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1061 1062 DISTINCT_TOKENS = {TokenType.DISTINCT} 1063 1064 NULL_TOKENS = {TokenType.NULL} 1065 1066 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1067 1068 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1069 1070 STRICT_CAST = True 1071 1072 PREFIXED_PIVOT_COLUMNS = False 1073 IDENTIFY_PIVOT_STRINGS = False 1074 1075 LOG_DEFAULTS_TO_LN = False 1076 1077 # Whether ADD is present for each column added by ALTER TABLE 1078 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1079 1080 # Whether the table sample clause expects CSV syntax 1081 TABLESAMPLE_CSV = False 1082 1083 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1084 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1085 1086 # Whether the TRIM function expects the characters to trim as its first argument 1087 TRIM_PATTERN_FIRST = False 1088 1089 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1090 STRING_ALIASES = False 1091 1092 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1093 MODIFIERS_ATTACHED_TO_UNION = True 1094 UNION_MODIFIERS = {"order", "limit", "offset"} 1095 1096 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1097 NO_PAREN_IF_COMMANDS = True 1098 1099 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1100 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1101 1102 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1103 # If this is True and '(' is not found, the keyword will be treated as an identifier 1104 VALUES_FOLLOWED_BY_PAREN = True 1105 1106 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1107 SUPPORTS_IMPLICIT_UNNEST = False 1108 1109 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1110 INTERVAL_SPANS = True 1111 1112 # Whether a PARTITION clause can follow a table reference 1113 SUPPORTS_PARTITION_SELECTION = False 1114 1115 __slots__ = ( 1116 "error_level", 1117 "error_message_context", 1118 "max_errors", 1119 "dialect", 1120 "sql", 1121 "errors", 1122 "_tokens", 1123 "_index", 1124 "_curr", 1125 "_next", 1126 "_prev", 1127 "_prev_comments", 1128 ) 1129 1130 # Autofilled 1131 SHOW_TRIE: t.Dict = {} 1132 SET_TRIE: t.Dict = {} 1133 1134 def __init__( 1135 self, 1136 error_level: t.Optional[ErrorLevel] = None, 1137 error_message_context: int = 100, 1138 max_errors: int = 3, 1139 dialect: DialectType = None, 1140 ): 1141 from sqlglot.dialects import Dialect 1142 1143 self.error_level = error_level or ErrorLevel.IMMEDIATE 1144 self.error_message_context = error_message_context 1145 self.max_errors = max_errors 1146 self.dialect = Dialect.get_or_raise(dialect) 1147 self.reset() 1148 1149 def reset(self): 1150 self.sql = "" 1151 self.errors = [] 1152 self._tokens = [] 1153 self._index = 0 1154 self._curr = None 1155 self._next = None 1156 self._prev = None 1157 self._prev_comments = None 1158 1159 def parse( 1160 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1161 ) -> t.List[t.Optional[exp.Expression]]: 1162 """ 1163 Parses a list of tokens and returns a list of syntax trees, one tree 1164 per parsed SQL statement. 1165 1166 Args: 1167 raw_tokens: The list of tokens. 1168 sql: The original SQL string, used to produce helpful debug messages. 1169 1170 Returns: 1171 The list of the produced syntax trees. 1172 """ 1173 return self._parse( 1174 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1175 ) 1176 1177 def parse_into( 1178 self, 1179 expression_types: exp.IntoType, 1180 raw_tokens: t.List[Token], 1181 sql: t.Optional[str] = None, 1182 ) -> t.List[t.Optional[exp.Expression]]: 1183 """ 1184 Parses a list of tokens into a given Expression type. If a collection of Expression 1185 types is given instead, this method will try to parse the token list into each one 1186 of them, stopping at the first for which the parsing succeeds. 1187 1188 Args: 1189 expression_types: The expression type(s) to try and parse the token list into. 1190 raw_tokens: The list of tokens. 1191 sql: The original SQL string, used to produce helpful debug messages. 1192 1193 Returns: 1194 The target Expression. 1195 """ 1196 errors = [] 1197 for expression_type in ensure_list(expression_types): 1198 parser = self.EXPRESSION_PARSERS.get(expression_type) 1199 if not parser: 1200 raise TypeError(f"No parser registered for {expression_type}") 1201 1202 try: 1203 return self._parse(parser, raw_tokens, sql) 1204 except ParseError as e: 1205 e.errors[0]["into_expression"] = expression_type 1206 errors.append(e) 1207 1208 raise ParseError( 1209 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1210 errors=merge_errors(errors), 1211 ) from errors[-1] 1212 1213 def _parse( 1214 self, 1215 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1216 raw_tokens: t.List[Token], 1217 sql: t.Optional[str] = None, 1218 ) -> t.List[t.Optional[exp.Expression]]: 1219 self.reset() 1220 self.sql = sql or "" 1221 1222 total = len(raw_tokens) 1223 chunks: t.List[t.List[Token]] = [[]] 1224 1225 for i, token in enumerate(raw_tokens): 1226 if token.token_type == TokenType.SEMICOLON: 1227 if i < total - 1: 1228 chunks.append([]) 1229 else: 1230 chunks[-1].append(token) 1231 1232 expressions = [] 1233 1234 for tokens in chunks: 1235 self._index = -1 1236 self._tokens = tokens 1237 self._advance() 1238 1239 expressions.append(parse_method(self)) 1240 1241 if self._index < len(self._tokens): 1242 self.raise_error("Invalid expression / Unexpected token") 1243 1244 self.check_errors() 1245 1246 return expressions 1247 1248 def check_errors(self) -> None: 1249 """Logs or raises any found errors, depending on the chosen error level setting.""" 1250 if self.error_level == ErrorLevel.WARN: 1251 for error in self.errors: 1252 logger.error(str(error)) 1253 elif self.error_level == ErrorLevel.RAISE and self.errors: 1254 raise ParseError( 1255 concat_messages(self.errors, self.max_errors), 1256 errors=merge_errors(self.errors), 1257 ) 1258 1259 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1260 """ 1261 Appends an error in the list of recorded errors or raises it, depending on the chosen 1262 error level setting. 1263 """ 1264 token = token or self._curr or self._prev or Token.string("") 1265 start = token.start 1266 end = token.end + 1 1267 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1268 highlight = self.sql[start:end] 1269 end_context = self.sql[end : end + self.error_message_context] 1270 1271 error = ParseError.new( 1272 f"{message}. Line {token.line}, Col: {token.col}.\n" 1273 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1274 description=message, 1275 line=token.line, 1276 col=token.col, 1277 start_context=start_context, 1278 highlight=highlight, 1279 end_context=end_context, 1280 ) 1281 1282 if self.error_level == ErrorLevel.IMMEDIATE: 1283 raise error 1284 1285 self.errors.append(error) 1286 1287 def expression( 1288 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1289 ) -> E: 1290 """ 1291 Creates a new, validated Expression. 1292 1293 Args: 1294 exp_class: The expression class to instantiate. 1295 comments: An optional list of comments to attach to the expression. 1296 kwargs: The arguments to set for the expression along with their respective values. 1297 1298 Returns: 1299 The target expression. 1300 """ 1301 instance = exp_class(**kwargs) 1302 instance.add_comments(comments) if comments else self._add_comments(instance) 1303 return self.validate_expression(instance) 1304 1305 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1306 if expression and self._prev_comments: 1307 expression.add_comments(self._prev_comments) 1308 self._prev_comments = None 1309 1310 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1311 """ 1312 Validates an Expression, making sure that all its mandatory arguments are set. 1313 1314 Args: 1315 expression: The expression to validate. 1316 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1317 1318 Returns: 1319 The validated expression. 1320 """ 1321 if self.error_level != ErrorLevel.IGNORE: 1322 for error_message in expression.error_messages(args): 1323 self.raise_error(error_message) 1324 1325 return expression 1326 1327 def _find_sql(self, start: Token, end: Token) -> str: 1328 return self.sql[start.start : end.end + 1] 1329 1330 def _is_connected(self) -> bool: 1331 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1332 1333 def _advance(self, times: int = 1) -> None: 1334 self._index += times 1335 self._curr = seq_get(self._tokens, self._index) 1336 self._next = seq_get(self._tokens, self._index + 1) 1337 1338 if self._index > 0: 1339 self._prev = self._tokens[self._index - 1] 1340 self._prev_comments = self._prev.comments 1341 else: 1342 self._prev = None 1343 self._prev_comments = None 1344 1345 def _retreat(self, index: int) -> None: 1346 if index != self._index: 1347 self._advance(index - self._index) 1348 1349 def _warn_unsupported(self) -> None: 1350 if len(self._tokens) <= 1: 1351 return 1352 1353 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1354 # interested in emitting a warning for the one being currently processed. 1355 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1356 1357 logger.warning( 1358 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1359 ) 1360 1361 def _parse_command(self) -> exp.Command: 1362 self._warn_unsupported() 1363 return self.expression( 1364 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1365 ) 1366 1367 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1368 """ 1369 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1370 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1371 the parser state accordingly 1372 """ 1373 index = self._index 1374 error_level = self.error_level 1375 1376 self.error_level = ErrorLevel.IMMEDIATE 1377 try: 1378 this = parse_method() 1379 except ParseError: 1380 this = None 1381 finally: 1382 if not this or retreat: 1383 self._retreat(index) 1384 self.error_level = error_level 1385 1386 return this 1387 1388 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1389 start = self._prev 1390 exists = self._parse_exists() if allow_exists else None 1391 1392 self._match(TokenType.ON) 1393 1394 materialized = self._match_text_seq("MATERIALIZED") 1395 kind = self._match_set(self.CREATABLES) and self._prev 1396 if not kind: 1397 return self._parse_as_command(start) 1398 1399 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1400 this = self._parse_user_defined_function(kind=kind.token_type) 1401 elif kind.token_type == TokenType.TABLE: 1402 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1403 elif kind.token_type == TokenType.COLUMN: 1404 this = self._parse_column() 1405 else: 1406 this = self._parse_id_var() 1407 1408 self._match(TokenType.IS) 1409 1410 return self.expression( 1411 exp.Comment, 1412 this=this, 1413 kind=kind.text, 1414 expression=self._parse_string(), 1415 exists=exists, 1416 materialized=materialized, 1417 ) 1418 1419 def _parse_to_table( 1420 self, 1421 ) -> exp.ToTableProperty: 1422 table = self._parse_table_parts(schema=True) 1423 return self.expression(exp.ToTableProperty, this=table) 1424 1425 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1426 def _parse_ttl(self) -> exp.Expression: 1427 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1428 this = self._parse_bitwise() 1429 1430 if self._match_text_seq("DELETE"): 1431 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1432 if self._match_text_seq("RECOMPRESS"): 1433 return self.expression( 1434 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1435 ) 1436 if self._match_text_seq("TO", "DISK"): 1437 return self.expression( 1438 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1439 ) 1440 if self._match_text_seq("TO", "VOLUME"): 1441 return self.expression( 1442 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1443 ) 1444 1445 return this 1446 1447 expressions = self._parse_csv(_parse_ttl_action) 1448 where = self._parse_where() 1449 group = self._parse_group() 1450 1451 aggregates = None 1452 if group and self._match(TokenType.SET): 1453 aggregates = self._parse_csv(self._parse_set_item) 1454 1455 return self.expression( 1456 exp.MergeTreeTTL, 1457 expressions=expressions, 1458 where=where, 1459 group=group, 1460 aggregates=aggregates, 1461 ) 1462 1463 def _parse_statement(self) -> t.Optional[exp.Expression]: 1464 if self._curr is None: 1465 return None 1466 1467 if self._match_set(self.STATEMENT_PARSERS): 1468 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1469 1470 if self._match_set(Tokenizer.COMMANDS): 1471 return self._parse_command() 1472 1473 expression = self._parse_expression() 1474 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1475 return self._parse_query_modifiers(expression) 1476 1477 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1478 start = self._prev 1479 temporary = self._match(TokenType.TEMPORARY) 1480 materialized = self._match_text_seq("MATERIALIZED") 1481 1482 kind = self._match_set(self.CREATABLES) and self._prev.text 1483 if not kind: 1484 return self._parse_as_command(start) 1485 1486 if_exists = exists or self._parse_exists() 1487 table = self._parse_table_parts( 1488 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1489 ) 1490 1491 if self._match(TokenType.L_PAREN, advance=False): 1492 expressions = self._parse_wrapped_csv(self._parse_types) 1493 else: 1494 expressions = None 1495 1496 return self.expression( 1497 exp.Drop, 1498 comments=start.comments, 1499 exists=if_exists, 1500 this=table, 1501 expressions=expressions, 1502 kind=kind, 1503 temporary=temporary, 1504 materialized=materialized, 1505 cascade=self._match_text_seq("CASCADE"), 1506 constraints=self._match_text_seq("CONSTRAINTS"), 1507 purge=self._match_text_seq("PURGE"), 1508 ) 1509 1510 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1511 return ( 1512 self._match_text_seq("IF") 1513 and (not not_ or self._match(TokenType.NOT)) 1514 and self._match(TokenType.EXISTS) 1515 ) 1516 1517 def _parse_create(self) -> exp.Create | exp.Command: 1518 # Note: this can't be None because we've matched a statement parser 1519 start = self._prev 1520 comments = self._prev_comments 1521 1522 replace = ( 1523 start.token_type == TokenType.REPLACE 1524 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1525 or self._match_pair(TokenType.OR, TokenType.ALTER) 1526 ) 1527 1528 unique = self._match(TokenType.UNIQUE) 1529 1530 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1531 self._advance() 1532 1533 properties = None 1534 create_token = self._match_set(self.CREATABLES) and self._prev 1535 1536 if not create_token: 1537 # exp.Properties.Location.POST_CREATE 1538 properties = self._parse_properties() 1539 create_token = self._match_set(self.CREATABLES) and self._prev 1540 1541 if not properties or not create_token: 1542 return self._parse_as_command(start) 1543 1544 exists = self._parse_exists(not_=True) 1545 this = None 1546 expression: t.Optional[exp.Expression] = None 1547 indexes = None 1548 no_schema_binding = None 1549 begin = None 1550 end = None 1551 clone = None 1552 1553 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1554 nonlocal properties 1555 if properties and temp_props: 1556 properties.expressions.extend(temp_props.expressions) 1557 elif temp_props: 1558 properties = temp_props 1559 1560 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1561 this = self._parse_user_defined_function(kind=create_token.token_type) 1562 1563 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1564 extend_props(self._parse_properties()) 1565 1566 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1567 1568 if not expression: 1569 if self._match(TokenType.COMMAND): 1570 expression = self._parse_as_command(self._prev) 1571 else: 1572 begin = self._match(TokenType.BEGIN) 1573 return_ = self._match_text_seq("RETURN") 1574 1575 if self._match(TokenType.STRING, advance=False): 1576 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1577 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1578 expression = self._parse_string() 1579 extend_props(self._parse_properties()) 1580 else: 1581 expression = self._parse_statement() 1582 1583 end = self._match_text_seq("END") 1584 1585 if return_: 1586 expression = self.expression(exp.Return, this=expression) 1587 elif create_token.token_type == TokenType.INDEX: 1588 this = self._parse_index(index=self._parse_id_var()) 1589 elif create_token.token_type in self.DB_CREATABLES: 1590 table_parts = self._parse_table_parts( 1591 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1592 ) 1593 1594 # exp.Properties.Location.POST_NAME 1595 self._match(TokenType.COMMA) 1596 extend_props(self._parse_properties(before=True)) 1597 1598 this = self._parse_schema(this=table_parts) 1599 1600 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1601 extend_props(self._parse_properties()) 1602 1603 self._match(TokenType.ALIAS) 1604 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1605 # exp.Properties.Location.POST_ALIAS 1606 extend_props(self._parse_properties()) 1607 1608 if create_token.token_type == TokenType.SEQUENCE: 1609 expression = self._parse_types() 1610 extend_props(self._parse_properties()) 1611 else: 1612 expression = self._parse_ddl_select() 1613 1614 if create_token.token_type == TokenType.TABLE: 1615 # exp.Properties.Location.POST_EXPRESSION 1616 extend_props(self._parse_properties()) 1617 1618 indexes = [] 1619 while True: 1620 index = self._parse_index() 1621 1622 # exp.Properties.Location.POST_INDEX 1623 extend_props(self._parse_properties()) 1624 1625 if not index: 1626 break 1627 else: 1628 self._match(TokenType.COMMA) 1629 indexes.append(index) 1630 elif create_token.token_type == TokenType.VIEW: 1631 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1632 no_schema_binding = True 1633 1634 shallow = self._match_text_seq("SHALLOW") 1635 1636 if self._match_texts(self.CLONE_KEYWORDS): 1637 copy = self._prev.text.lower() == "copy" 1638 clone = self.expression( 1639 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1640 ) 1641 1642 if self._curr: 1643 return self._parse_as_command(start) 1644 1645 return self.expression( 1646 exp.Create, 1647 comments=comments, 1648 this=this, 1649 kind=create_token.text.upper(), 1650 replace=replace, 1651 unique=unique, 1652 expression=expression, 1653 exists=exists, 1654 properties=properties, 1655 indexes=indexes, 1656 no_schema_binding=no_schema_binding, 1657 begin=begin, 1658 end=end, 1659 clone=clone, 1660 ) 1661 1662 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1663 seq = exp.SequenceProperties() 1664 1665 options = [] 1666 index = self._index 1667 1668 while self._curr: 1669 if self._match_text_seq("INCREMENT"): 1670 self._match_text_seq("BY") 1671 self._match_text_seq("=") 1672 seq.set("increment", self._parse_term()) 1673 elif self._match_text_seq("MINVALUE"): 1674 seq.set("minvalue", self._parse_term()) 1675 elif self._match_text_seq("MAXVALUE"): 1676 seq.set("maxvalue", self._parse_term()) 1677 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1678 self._match_text_seq("=") 1679 seq.set("start", self._parse_term()) 1680 elif self._match_text_seq("CACHE"): 1681 # T-SQL allows empty CACHE which is initialized dynamically 1682 seq.set("cache", self._parse_number() or True) 1683 elif self._match_text_seq("OWNED", "BY"): 1684 # "OWNED BY NONE" is the default 1685 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1686 else: 1687 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1688 if opt: 1689 options.append(opt) 1690 else: 1691 break 1692 1693 seq.set("options", options if options else None) 1694 return None if self._index == index else seq 1695 1696 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1697 # only used for teradata currently 1698 self._match(TokenType.COMMA) 1699 1700 kwargs = { 1701 "no": self._match_text_seq("NO"), 1702 "dual": self._match_text_seq("DUAL"), 1703 "before": self._match_text_seq("BEFORE"), 1704 "default": self._match_text_seq("DEFAULT"), 1705 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1706 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1707 "after": self._match_text_seq("AFTER"), 1708 "minimum": self._match_texts(("MIN", "MINIMUM")), 1709 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1710 } 1711 1712 if self._match_texts(self.PROPERTY_PARSERS): 1713 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1714 try: 1715 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1716 except TypeError: 1717 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1718 1719 return None 1720 1721 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1722 return self._parse_wrapped_csv(self._parse_property) 1723 1724 def _parse_property(self) -> t.Optional[exp.Expression]: 1725 if self._match_texts(self.PROPERTY_PARSERS): 1726 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1727 1728 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1729 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1730 1731 if self._match_text_seq("COMPOUND", "SORTKEY"): 1732 return self._parse_sortkey(compound=True) 1733 1734 if self._match_text_seq("SQL", "SECURITY"): 1735 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1736 1737 index = self._index 1738 key = self._parse_column() 1739 1740 if not self._match(TokenType.EQ): 1741 self._retreat(index) 1742 return self._parse_sequence_properties() 1743 1744 return self.expression( 1745 exp.Property, 1746 this=key.to_dot() if isinstance(key, exp.Column) else key, 1747 value=self._parse_bitwise() or self._parse_var(any_token=True), 1748 ) 1749 1750 def _parse_stored(self) -> exp.FileFormatProperty: 1751 self._match(TokenType.ALIAS) 1752 1753 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1754 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1755 1756 return self.expression( 1757 exp.FileFormatProperty, 1758 this=( 1759 self.expression( 1760 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1761 ) 1762 if input_format or output_format 1763 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1764 ), 1765 ) 1766 1767 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1768 self._match(TokenType.EQ) 1769 self._match(TokenType.ALIAS) 1770 field = self._parse_field() 1771 if isinstance(field, exp.Identifier) and not field.quoted: 1772 field = exp.var(field) 1773 1774 return self.expression(exp_class, this=field, **kwargs) 1775 1776 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1777 properties = [] 1778 while True: 1779 if before: 1780 prop = self._parse_property_before() 1781 else: 1782 prop = self._parse_property() 1783 if not prop: 1784 break 1785 for p in ensure_list(prop): 1786 properties.append(p) 1787 1788 if properties: 1789 return self.expression(exp.Properties, expressions=properties) 1790 1791 return None 1792 1793 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1794 return self.expression( 1795 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1796 ) 1797 1798 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1799 if self._index >= 2: 1800 pre_volatile_token = self._tokens[self._index - 2] 1801 else: 1802 pre_volatile_token = None 1803 1804 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1805 return exp.VolatileProperty() 1806 1807 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1808 1809 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1810 self._match_pair(TokenType.EQ, TokenType.ON) 1811 1812 prop = self.expression(exp.WithSystemVersioningProperty) 1813 if self._match(TokenType.L_PAREN): 1814 self._match_text_seq("HISTORY_TABLE", "=") 1815 prop.set("this", self._parse_table_parts()) 1816 1817 if self._match(TokenType.COMMA): 1818 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1819 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1820 1821 self._match_r_paren() 1822 1823 return prop 1824 1825 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1826 if self._match(TokenType.L_PAREN, advance=False): 1827 return self._parse_wrapped_properties() 1828 1829 if self._match_text_seq("JOURNAL"): 1830 return self._parse_withjournaltable() 1831 1832 if self._match_texts(self.VIEW_ATTRIBUTES): 1833 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1834 1835 if self._match_text_seq("DATA"): 1836 return self._parse_withdata(no=False) 1837 elif self._match_text_seq("NO", "DATA"): 1838 return self._parse_withdata(no=True) 1839 1840 if not self._next: 1841 return None 1842 1843 return self._parse_withisolatedloading() 1844 1845 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1846 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1847 self._match(TokenType.EQ) 1848 1849 user = self._parse_id_var() 1850 self._match(TokenType.PARAMETER) 1851 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1852 1853 if not user or not host: 1854 return None 1855 1856 return exp.DefinerProperty(this=f"{user}@{host}") 1857 1858 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1859 self._match(TokenType.TABLE) 1860 self._match(TokenType.EQ) 1861 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1862 1863 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1864 return self.expression(exp.LogProperty, no=no) 1865 1866 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1867 return self.expression(exp.JournalProperty, **kwargs) 1868 1869 def _parse_checksum(self) -> exp.ChecksumProperty: 1870 self._match(TokenType.EQ) 1871 1872 on = None 1873 if self._match(TokenType.ON): 1874 on = True 1875 elif self._match_text_seq("OFF"): 1876 on = False 1877 1878 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1879 1880 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1881 return self.expression( 1882 exp.Cluster, 1883 expressions=( 1884 self._parse_wrapped_csv(self._parse_ordered) 1885 if wrapped 1886 else self._parse_csv(self._parse_ordered) 1887 ), 1888 ) 1889 1890 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1891 self._match_text_seq("BY") 1892 1893 self._match_l_paren() 1894 expressions = self._parse_csv(self._parse_column) 1895 self._match_r_paren() 1896 1897 if self._match_text_seq("SORTED", "BY"): 1898 self._match_l_paren() 1899 sorted_by = self._parse_csv(self._parse_ordered) 1900 self._match_r_paren() 1901 else: 1902 sorted_by = None 1903 1904 self._match(TokenType.INTO) 1905 buckets = self._parse_number() 1906 self._match_text_seq("BUCKETS") 1907 1908 return self.expression( 1909 exp.ClusteredByProperty, 1910 expressions=expressions, 1911 sorted_by=sorted_by, 1912 buckets=buckets, 1913 ) 1914 1915 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1916 if not self._match_text_seq("GRANTS"): 1917 self._retreat(self._index - 1) 1918 return None 1919 1920 return self.expression(exp.CopyGrantsProperty) 1921 1922 def _parse_freespace(self) -> exp.FreespaceProperty: 1923 self._match(TokenType.EQ) 1924 return self.expression( 1925 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1926 ) 1927 1928 def _parse_mergeblockratio( 1929 self, no: bool = False, default: bool = False 1930 ) -> exp.MergeBlockRatioProperty: 1931 if self._match(TokenType.EQ): 1932 return self.expression( 1933 exp.MergeBlockRatioProperty, 1934 this=self._parse_number(), 1935 percent=self._match(TokenType.PERCENT), 1936 ) 1937 1938 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1939 1940 def _parse_datablocksize( 1941 self, 1942 default: t.Optional[bool] = None, 1943 minimum: t.Optional[bool] = None, 1944 maximum: t.Optional[bool] = None, 1945 ) -> exp.DataBlocksizeProperty: 1946 self._match(TokenType.EQ) 1947 size = self._parse_number() 1948 1949 units = None 1950 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1951 units = self._prev.text 1952 1953 return self.expression( 1954 exp.DataBlocksizeProperty, 1955 size=size, 1956 units=units, 1957 default=default, 1958 minimum=minimum, 1959 maximum=maximum, 1960 ) 1961 1962 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1963 self._match(TokenType.EQ) 1964 always = self._match_text_seq("ALWAYS") 1965 manual = self._match_text_seq("MANUAL") 1966 never = self._match_text_seq("NEVER") 1967 default = self._match_text_seq("DEFAULT") 1968 1969 autotemp = None 1970 if self._match_text_seq("AUTOTEMP"): 1971 autotemp = self._parse_schema() 1972 1973 return self.expression( 1974 exp.BlockCompressionProperty, 1975 always=always, 1976 manual=manual, 1977 never=never, 1978 default=default, 1979 autotemp=autotemp, 1980 ) 1981 1982 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1983 index = self._index 1984 no = self._match_text_seq("NO") 1985 concurrent = self._match_text_seq("CONCURRENT") 1986 1987 if not self._match_text_seq("ISOLATED", "LOADING"): 1988 self._retreat(index) 1989 return None 1990 1991 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1992 return self.expression( 1993 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1994 ) 1995 1996 def _parse_locking(self) -> exp.LockingProperty: 1997 if self._match(TokenType.TABLE): 1998 kind = "TABLE" 1999 elif self._match(TokenType.VIEW): 2000 kind = "VIEW" 2001 elif self._match(TokenType.ROW): 2002 kind = "ROW" 2003 elif self._match_text_seq("DATABASE"): 2004 kind = "DATABASE" 2005 else: 2006 kind = None 2007 2008 if kind in ("DATABASE", "TABLE", "VIEW"): 2009 this = self._parse_table_parts() 2010 else: 2011 this = None 2012 2013 if self._match(TokenType.FOR): 2014 for_or_in = "FOR" 2015 elif self._match(TokenType.IN): 2016 for_or_in = "IN" 2017 else: 2018 for_or_in = None 2019 2020 if self._match_text_seq("ACCESS"): 2021 lock_type = "ACCESS" 2022 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2023 lock_type = "EXCLUSIVE" 2024 elif self._match_text_seq("SHARE"): 2025 lock_type = "SHARE" 2026 elif self._match_text_seq("READ"): 2027 lock_type = "READ" 2028 elif self._match_text_seq("WRITE"): 2029 lock_type = "WRITE" 2030 elif self._match_text_seq("CHECKSUM"): 2031 lock_type = "CHECKSUM" 2032 else: 2033 lock_type = None 2034 2035 override = self._match_text_seq("OVERRIDE") 2036 2037 return self.expression( 2038 exp.LockingProperty, 2039 this=this, 2040 kind=kind, 2041 for_or_in=for_or_in, 2042 lock_type=lock_type, 2043 override=override, 2044 ) 2045 2046 def _parse_partition_by(self) -> t.List[exp.Expression]: 2047 if self._match(TokenType.PARTITION_BY): 2048 return self._parse_csv(self._parse_conjunction) 2049 return [] 2050 2051 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2052 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2053 if self._match_text_seq("MINVALUE"): 2054 return exp.var("MINVALUE") 2055 if self._match_text_seq("MAXVALUE"): 2056 return exp.var("MAXVALUE") 2057 return self._parse_bitwise() 2058 2059 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2060 expression = None 2061 from_expressions = None 2062 to_expressions = None 2063 2064 if self._match(TokenType.IN): 2065 this = self._parse_wrapped_csv(self._parse_bitwise) 2066 elif self._match(TokenType.FROM): 2067 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2068 self._match_text_seq("TO") 2069 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2070 elif self._match_text_seq("WITH", "(", "MODULUS"): 2071 this = self._parse_number() 2072 self._match_text_seq(",", "REMAINDER") 2073 expression = self._parse_number() 2074 self._match_r_paren() 2075 else: 2076 self.raise_error("Failed to parse partition bound spec.") 2077 2078 return self.expression( 2079 exp.PartitionBoundSpec, 2080 this=this, 2081 expression=expression, 2082 from_expressions=from_expressions, 2083 to_expressions=to_expressions, 2084 ) 2085 2086 # https://www.postgresql.org/docs/current/sql-createtable.html 2087 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2088 if not self._match_text_seq("OF"): 2089 self._retreat(self._index - 1) 2090 return None 2091 2092 this = self._parse_table(schema=True) 2093 2094 if self._match(TokenType.DEFAULT): 2095 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2096 elif self._match_text_seq("FOR", "VALUES"): 2097 expression = self._parse_partition_bound_spec() 2098 else: 2099 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2100 2101 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2102 2103 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2104 self._match(TokenType.EQ) 2105 return self.expression( 2106 exp.PartitionedByProperty, 2107 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2108 ) 2109 2110 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2111 if self._match_text_seq("AND", "STATISTICS"): 2112 statistics = True 2113 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2114 statistics = False 2115 else: 2116 statistics = None 2117 2118 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2119 2120 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2121 if self._match_text_seq("SQL"): 2122 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2123 return None 2124 2125 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2126 if self._match_text_seq("SQL", "DATA"): 2127 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2128 return None 2129 2130 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2131 if self._match_text_seq("PRIMARY", "INDEX"): 2132 return exp.NoPrimaryIndexProperty() 2133 if self._match_text_seq("SQL"): 2134 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2135 return None 2136 2137 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2138 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2139 return exp.OnCommitProperty() 2140 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2141 return exp.OnCommitProperty(delete=True) 2142 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2143 2144 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2145 if self._match_text_seq("SQL", "DATA"): 2146 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2147 return None 2148 2149 def _parse_distkey(self) -> exp.DistKeyProperty: 2150 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2151 2152 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2153 table = self._parse_table(schema=True) 2154 2155 options = [] 2156 while self._match_texts(("INCLUDING", "EXCLUDING")): 2157 this = self._prev.text.upper() 2158 2159 id_var = self._parse_id_var() 2160 if not id_var: 2161 return None 2162 2163 options.append( 2164 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2165 ) 2166 2167 return self.expression(exp.LikeProperty, this=table, expressions=options) 2168 2169 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2170 return self.expression( 2171 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2172 ) 2173 2174 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2175 self._match(TokenType.EQ) 2176 return self.expression( 2177 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2178 ) 2179 2180 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2181 self._match_text_seq("WITH", "CONNECTION") 2182 return self.expression( 2183 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2184 ) 2185 2186 def _parse_returns(self) -> exp.ReturnsProperty: 2187 value: t.Optional[exp.Expression] 2188 is_table = self._match(TokenType.TABLE) 2189 2190 if is_table: 2191 if self._match(TokenType.LT): 2192 value = self.expression( 2193 exp.Schema, 2194 this="TABLE", 2195 expressions=self._parse_csv(self._parse_struct_types), 2196 ) 2197 if not self._match(TokenType.GT): 2198 self.raise_error("Expecting >") 2199 else: 2200 value = self._parse_schema(exp.var("TABLE")) 2201 else: 2202 value = self._parse_types() 2203 2204 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2205 2206 def _parse_describe(self) -> exp.Describe: 2207 kind = self._match_set(self.CREATABLES) and self._prev.text 2208 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2209 if not self._match_set(self.ID_VAR_TOKENS, advance=False): 2210 style = None 2211 self._retreat(self._index - 1) 2212 this = self._parse_table(schema=True) 2213 properties = self._parse_properties() 2214 expressions = properties.expressions if properties else None 2215 return self.expression( 2216 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2217 ) 2218 2219 def _parse_insert(self) -> exp.Insert: 2220 comments = ensure_list(self._prev_comments) 2221 hint = self._parse_hint() 2222 overwrite = self._match(TokenType.OVERWRITE) 2223 ignore = self._match(TokenType.IGNORE) 2224 local = self._match_text_seq("LOCAL") 2225 alternative = None 2226 is_function = None 2227 2228 if self._match_text_seq("DIRECTORY"): 2229 this: t.Optional[exp.Expression] = self.expression( 2230 exp.Directory, 2231 this=self._parse_var_or_string(), 2232 local=local, 2233 row_format=self._parse_row_format(match_row=True), 2234 ) 2235 else: 2236 if self._match(TokenType.OR): 2237 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2238 2239 self._match(TokenType.INTO) 2240 comments += ensure_list(self._prev_comments) 2241 self._match(TokenType.TABLE) 2242 is_function = self._match(TokenType.FUNCTION) 2243 2244 this = ( 2245 self._parse_table(schema=True, parse_partition=True) 2246 if not is_function 2247 else self._parse_function() 2248 ) 2249 2250 returning = self._parse_returning() 2251 2252 return self.expression( 2253 exp.Insert, 2254 comments=comments, 2255 hint=hint, 2256 is_function=is_function, 2257 this=this, 2258 stored=self._match_text_seq("STORED") and self._parse_stored(), 2259 by_name=self._match_text_seq("BY", "NAME"), 2260 exists=self._parse_exists(), 2261 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2262 and self._parse_conjunction(), 2263 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2264 conflict=self._parse_on_conflict(), 2265 returning=returning or self._parse_returning(), 2266 overwrite=overwrite, 2267 alternative=alternative, 2268 ignore=ignore, 2269 ) 2270 2271 def _parse_kill(self) -> exp.Kill: 2272 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2273 2274 return self.expression( 2275 exp.Kill, 2276 this=self._parse_primary(), 2277 kind=kind, 2278 ) 2279 2280 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2281 conflict = self._match_text_seq("ON", "CONFLICT") 2282 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2283 2284 if not conflict and not duplicate: 2285 return None 2286 2287 conflict_keys = None 2288 constraint = None 2289 2290 if conflict: 2291 if self._match_text_seq("ON", "CONSTRAINT"): 2292 constraint = self._parse_id_var() 2293 elif self._match(TokenType.L_PAREN): 2294 conflict_keys = self._parse_csv(self._parse_id_var) 2295 self._match_r_paren() 2296 2297 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2298 if self._prev.token_type == TokenType.UPDATE: 2299 self._match(TokenType.SET) 2300 expressions = self._parse_csv(self._parse_equality) 2301 else: 2302 expressions = None 2303 2304 return self.expression( 2305 exp.OnConflict, 2306 duplicate=duplicate, 2307 expressions=expressions, 2308 action=action, 2309 conflict_keys=conflict_keys, 2310 constraint=constraint, 2311 ) 2312 2313 def _parse_returning(self) -> t.Optional[exp.Returning]: 2314 if not self._match(TokenType.RETURNING): 2315 return None 2316 return self.expression( 2317 exp.Returning, 2318 expressions=self._parse_csv(self._parse_expression), 2319 into=self._match(TokenType.INTO) and self._parse_table_part(), 2320 ) 2321 2322 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2323 if not self._match(TokenType.FORMAT): 2324 return None 2325 return self._parse_row_format() 2326 2327 def _parse_row_format( 2328 self, match_row: bool = False 2329 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2330 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2331 return None 2332 2333 if self._match_text_seq("SERDE"): 2334 this = self._parse_string() 2335 2336 serde_properties = None 2337 if self._match(TokenType.SERDE_PROPERTIES): 2338 serde_properties = self.expression( 2339 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2340 ) 2341 2342 return self.expression( 2343 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2344 ) 2345 2346 self._match_text_seq("DELIMITED") 2347 2348 kwargs = {} 2349 2350 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2351 kwargs["fields"] = self._parse_string() 2352 if self._match_text_seq("ESCAPED", "BY"): 2353 kwargs["escaped"] = self._parse_string() 2354 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2355 kwargs["collection_items"] = self._parse_string() 2356 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2357 kwargs["map_keys"] = self._parse_string() 2358 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2359 kwargs["lines"] = self._parse_string() 2360 if self._match_text_seq("NULL", "DEFINED", "AS"): 2361 kwargs["null"] = self._parse_string() 2362 2363 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2364 2365 def _parse_load(self) -> exp.LoadData | exp.Command: 2366 if self._match_text_seq("DATA"): 2367 local = self._match_text_seq("LOCAL") 2368 self._match_text_seq("INPATH") 2369 inpath = self._parse_string() 2370 overwrite = self._match(TokenType.OVERWRITE) 2371 self._match_pair(TokenType.INTO, TokenType.TABLE) 2372 2373 return self.expression( 2374 exp.LoadData, 2375 this=self._parse_table(schema=True), 2376 local=local, 2377 overwrite=overwrite, 2378 inpath=inpath, 2379 partition=self._parse_partition(), 2380 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2381 serde=self._match_text_seq("SERDE") and self._parse_string(), 2382 ) 2383 return self._parse_as_command(self._prev) 2384 2385 def _parse_delete(self) -> exp.Delete: 2386 # This handles MySQL's "Multiple-Table Syntax" 2387 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2388 tables = None 2389 comments = self._prev_comments 2390 if not self._match(TokenType.FROM, advance=False): 2391 tables = self._parse_csv(self._parse_table) or None 2392 2393 returning = self._parse_returning() 2394 2395 return self.expression( 2396 exp.Delete, 2397 comments=comments, 2398 tables=tables, 2399 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2400 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2401 where=self._parse_where(), 2402 returning=returning or self._parse_returning(), 2403 limit=self._parse_limit(), 2404 ) 2405 2406 def _parse_update(self) -> exp.Update: 2407 comments = self._prev_comments 2408 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2409 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2410 returning = self._parse_returning() 2411 return self.expression( 2412 exp.Update, 2413 comments=comments, 2414 **{ # type: ignore 2415 "this": this, 2416 "expressions": expressions, 2417 "from": self._parse_from(joins=True), 2418 "where": self._parse_where(), 2419 "returning": returning or self._parse_returning(), 2420 "order": self._parse_order(), 2421 "limit": self._parse_limit(), 2422 }, 2423 ) 2424 2425 def _parse_uncache(self) -> exp.Uncache: 2426 if not self._match(TokenType.TABLE): 2427 self.raise_error("Expecting TABLE after UNCACHE") 2428 2429 return self.expression( 2430 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2431 ) 2432 2433 def _parse_cache(self) -> exp.Cache: 2434 lazy = self._match_text_seq("LAZY") 2435 self._match(TokenType.TABLE) 2436 table = self._parse_table(schema=True) 2437 2438 options = [] 2439 if self._match_text_seq("OPTIONS"): 2440 self._match_l_paren() 2441 k = self._parse_string() 2442 self._match(TokenType.EQ) 2443 v = self._parse_string() 2444 options = [k, v] 2445 self._match_r_paren() 2446 2447 self._match(TokenType.ALIAS) 2448 return self.expression( 2449 exp.Cache, 2450 this=table, 2451 lazy=lazy, 2452 options=options, 2453 expression=self._parse_select(nested=True), 2454 ) 2455 2456 def _parse_partition(self) -> t.Optional[exp.Partition]: 2457 if not self._match(TokenType.PARTITION): 2458 return None 2459 2460 return self.expression( 2461 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2462 ) 2463 2464 def _parse_value(self) -> exp.Tuple: 2465 if self._match(TokenType.L_PAREN): 2466 expressions = self._parse_csv(self._parse_expression) 2467 self._match_r_paren() 2468 return self.expression(exp.Tuple, expressions=expressions) 2469 2470 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2471 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2472 2473 def _parse_projections(self) -> t.List[exp.Expression]: 2474 return self._parse_expressions() 2475 2476 def _parse_select( 2477 self, 2478 nested: bool = False, 2479 table: bool = False, 2480 parse_subquery_alias: bool = True, 2481 parse_set_operation: bool = True, 2482 ) -> t.Optional[exp.Expression]: 2483 cte = self._parse_with() 2484 2485 if cte: 2486 this = self._parse_statement() 2487 2488 if not this: 2489 self.raise_error("Failed to parse any statement following CTE") 2490 return cte 2491 2492 if "with" in this.arg_types: 2493 this.set("with", cte) 2494 else: 2495 self.raise_error(f"{this.key} does not support CTE") 2496 this = cte 2497 2498 return this 2499 2500 # duckdb supports leading with FROM x 2501 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2502 2503 if self._match(TokenType.SELECT): 2504 comments = self._prev_comments 2505 2506 hint = self._parse_hint() 2507 all_ = self._match(TokenType.ALL) 2508 distinct = self._match_set(self.DISTINCT_TOKENS) 2509 2510 kind = ( 2511 self._match(TokenType.ALIAS) 2512 and self._match_texts(("STRUCT", "VALUE")) 2513 and self._prev.text.upper() 2514 ) 2515 2516 if distinct: 2517 distinct = self.expression( 2518 exp.Distinct, 2519 on=self._parse_value() if self._match(TokenType.ON) else None, 2520 ) 2521 2522 if all_ and distinct: 2523 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2524 2525 limit = self._parse_limit(top=True) 2526 projections = self._parse_projections() 2527 2528 this = self.expression( 2529 exp.Select, 2530 kind=kind, 2531 hint=hint, 2532 distinct=distinct, 2533 expressions=projections, 2534 limit=limit, 2535 ) 2536 this.comments = comments 2537 2538 into = self._parse_into() 2539 if into: 2540 this.set("into", into) 2541 2542 if not from_: 2543 from_ = self._parse_from() 2544 2545 if from_: 2546 this.set("from", from_) 2547 2548 this = self._parse_query_modifiers(this) 2549 elif (table or nested) and self._match(TokenType.L_PAREN): 2550 if self._match(TokenType.PIVOT): 2551 this = self._parse_simplified_pivot() 2552 elif self._match(TokenType.FROM): 2553 this = exp.select("*").from_( 2554 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2555 ) 2556 else: 2557 this = ( 2558 self._parse_table() 2559 if table 2560 else self._parse_select(nested=True, parse_set_operation=False) 2561 ) 2562 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2563 2564 self._match_r_paren() 2565 2566 # We return early here so that the UNION isn't attached to the subquery by the 2567 # following call to _parse_set_operations, but instead becomes the parent node 2568 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2569 elif self._match(TokenType.VALUES, advance=False): 2570 this = self._parse_derived_table_values() 2571 elif from_: 2572 this = exp.select("*").from_(from_.this, copy=False) 2573 else: 2574 this = None 2575 2576 if parse_set_operation: 2577 return self._parse_set_operations(this) 2578 return this 2579 2580 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2581 if not skip_with_token and not self._match(TokenType.WITH): 2582 return None 2583 2584 comments = self._prev_comments 2585 recursive = self._match(TokenType.RECURSIVE) 2586 2587 expressions = [] 2588 while True: 2589 expressions.append(self._parse_cte()) 2590 2591 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2592 break 2593 else: 2594 self._match(TokenType.WITH) 2595 2596 return self.expression( 2597 exp.With, comments=comments, expressions=expressions, recursive=recursive 2598 ) 2599 2600 def _parse_cte(self) -> exp.CTE: 2601 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2602 if not alias or not alias.this: 2603 self.raise_error("Expected CTE to have alias") 2604 2605 self._match(TokenType.ALIAS) 2606 2607 if self._match_text_seq("NOT", "MATERIALIZED"): 2608 materialized = False 2609 elif self._match_text_seq("MATERIALIZED"): 2610 materialized = True 2611 else: 2612 materialized = None 2613 2614 return self.expression( 2615 exp.CTE, 2616 this=self._parse_wrapped(self._parse_statement), 2617 alias=alias, 2618 materialized=materialized, 2619 ) 2620 2621 def _parse_table_alias( 2622 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2623 ) -> t.Optional[exp.TableAlias]: 2624 any_token = self._match(TokenType.ALIAS) 2625 alias = ( 2626 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2627 or self._parse_string_as_identifier() 2628 ) 2629 2630 index = self._index 2631 if self._match(TokenType.L_PAREN): 2632 columns = self._parse_csv(self._parse_function_parameter) 2633 self._match_r_paren() if columns else self._retreat(index) 2634 else: 2635 columns = None 2636 2637 if not alias and not columns: 2638 return None 2639 2640 return self.expression(exp.TableAlias, this=alias, columns=columns) 2641 2642 def _parse_subquery( 2643 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2644 ) -> t.Optional[exp.Subquery]: 2645 if not this: 2646 return None 2647 2648 return self.expression( 2649 exp.Subquery, 2650 this=this, 2651 pivots=self._parse_pivots(), 2652 alias=self._parse_table_alias() if parse_alias else None, 2653 ) 2654 2655 def _implicit_unnests_to_explicit(self, this: E) -> E: 2656 from sqlglot.optimizer.normalize_identifiers import ( 2657 normalize_identifiers as _norm, 2658 ) 2659 2660 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2661 for i, join in enumerate(this.args.get("joins") or []): 2662 table = join.this 2663 normalized_table = table.copy() 2664 normalized_table.meta["maybe_column"] = True 2665 normalized_table = _norm(normalized_table, dialect=self.dialect) 2666 2667 if isinstance(table, exp.Table) and not join.args.get("on"): 2668 if normalized_table.parts[0].name in refs: 2669 table_as_column = table.to_column() 2670 unnest = exp.Unnest(expressions=[table_as_column]) 2671 2672 # Table.to_column creates a parent Alias node that we want to convert to 2673 # a TableAlias and attach to the Unnest, so it matches the parser's output 2674 if isinstance(table.args.get("alias"), exp.TableAlias): 2675 table_as_column.replace(table_as_column.this) 2676 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2677 2678 table.replace(unnest) 2679 2680 refs.add(normalized_table.alias_or_name) 2681 2682 return this 2683 2684 def _parse_query_modifiers( 2685 self, this: t.Optional[exp.Expression] 2686 ) -> t.Optional[exp.Expression]: 2687 if isinstance(this, (exp.Query, exp.Table)): 2688 for join in self._parse_joins(): 2689 this.append("joins", join) 2690 for lateral in iter(self._parse_lateral, None): 2691 this.append("laterals", lateral) 2692 2693 while True: 2694 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2695 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2696 key, expression = parser(self) 2697 2698 if expression: 2699 this.set(key, expression) 2700 if key == "limit": 2701 offset = expression.args.pop("offset", None) 2702 2703 if offset: 2704 offset = exp.Offset(expression=offset) 2705 this.set("offset", offset) 2706 2707 limit_by_expressions = expression.expressions 2708 expression.set("expressions", None) 2709 offset.set("expressions", limit_by_expressions) 2710 continue 2711 break 2712 2713 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2714 this = self._implicit_unnests_to_explicit(this) 2715 2716 return this 2717 2718 def _parse_hint(self) -> t.Optional[exp.Hint]: 2719 if self._match(TokenType.HINT): 2720 hints = [] 2721 for hint in iter( 2722 lambda: self._parse_csv( 2723 lambda: self._parse_function() or self._parse_var(upper=True) 2724 ), 2725 [], 2726 ): 2727 hints.extend(hint) 2728 2729 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2730 self.raise_error("Expected */ after HINT") 2731 2732 return self.expression(exp.Hint, expressions=hints) 2733 2734 return None 2735 2736 def _parse_into(self) -> t.Optional[exp.Into]: 2737 if not self._match(TokenType.INTO): 2738 return None 2739 2740 temp = self._match(TokenType.TEMPORARY) 2741 unlogged = self._match_text_seq("UNLOGGED") 2742 self._match(TokenType.TABLE) 2743 2744 return self.expression( 2745 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2746 ) 2747 2748 def _parse_from( 2749 self, joins: bool = False, skip_from_token: bool = False 2750 ) -> t.Optional[exp.From]: 2751 if not skip_from_token and not self._match(TokenType.FROM): 2752 return None 2753 2754 return self.expression( 2755 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2756 ) 2757 2758 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2759 return self.expression( 2760 exp.MatchRecognizeMeasure, 2761 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2762 this=self._parse_expression(), 2763 ) 2764 2765 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2766 if not self._match(TokenType.MATCH_RECOGNIZE): 2767 return None 2768 2769 self._match_l_paren() 2770 2771 partition = self._parse_partition_by() 2772 order = self._parse_order() 2773 2774 measures = ( 2775 self._parse_csv(self._parse_match_recognize_measure) 2776 if self._match_text_seq("MEASURES") 2777 else None 2778 ) 2779 2780 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2781 rows = exp.var("ONE ROW PER MATCH") 2782 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2783 text = "ALL ROWS PER MATCH" 2784 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2785 text += " SHOW EMPTY MATCHES" 2786 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2787 text += " OMIT EMPTY MATCHES" 2788 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2789 text += " WITH UNMATCHED ROWS" 2790 rows = exp.var(text) 2791 else: 2792 rows = None 2793 2794 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2795 text = "AFTER MATCH SKIP" 2796 if self._match_text_seq("PAST", "LAST", "ROW"): 2797 text += " PAST LAST ROW" 2798 elif self._match_text_seq("TO", "NEXT", "ROW"): 2799 text += " TO NEXT ROW" 2800 elif self._match_text_seq("TO", "FIRST"): 2801 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2802 elif self._match_text_seq("TO", "LAST"): 2803 text += f" TO LAST {self._advance_any().text}" # type: ignore 2804 after = exp.var(text) 2805 else: 2806 after = None 2807 2808 if self._match_text_seq("PATTERN"): 2809 self._match_l_paren() 2810 2811 if not self._curr: 2812 self.raise_error("Expecting )", self._curr) 2813 2814 paren = 1 2815 start = self._curr 2816 2817 while self._curr and paren > 0: 2818 if self._curr.token_type == TokenType.L_PAREN: 2819 paren += 1 2820 if self._curr.token_type == TokenType.R_PAREN: 2821 paren -= 1 2822 2823 end = self._prev 2824 self._advance() 2825 2826 if paren > 0: 2827 self.raise_error("Expecting )", self._curr) 2828 2829 pattern = exp.var(self._find_sql(start, end)) 2830 else: 2831 pattern = None 2832 2833 define = ( 2834 self._parse_csv(self._parse_name_as_expression) 2835 if self._match_text_seq("DEFINE") 2836 else None 2837 ) 2838 2839 self._match_r_paren() 2840 2841 return self.expression( 2842 exp.MatchRecognize, 2843 partition_by=partition, 2844 order=order, 2845 measures=measures, 2846 rows=rows, 2847 after=after, 2848 pattern=pattern, 2849 define=define, 2850 alias=self._parse_table_alias(), 2851 ) 2852 2853 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2854 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2855 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2856 cross_apply = False 2857 2858 if cross_apply is not None: 2859 this = self._parse_select(table=True) 2860 view = None 2861 outer = None 2862 elif self._match(TokenType.LATERAL): 2863 this = self._parse_select(table=True) 2864 view = self._match(TokenType.VIEW) 2865 outer = self._match(TokenType.OUTER) 2866 else: 2867 return None 2868 2869 if not this: 2870 this = ( 2871 self._parse_unnest() 2872 or self._parse_function() 2873 or self._parse_id_var(any_token=False) 2874 ) 2875 2876 while self._match(TokenType.DOT): 2877 this = exp.Dot( 2878 this=this, 2879 expression=self._parse_function() or self._parse_id_var(any_token=False), 2880 ) 2881 2882 if view: 2883 table = self._parse_id_var(any_token=False) 2884 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2885 table_alias: t.Optional[exp.TableAlias] = self.expression( 2886 exp.TableAlias, this=table, columns=columns 2887 ) 2888 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2889 # We move the alias from the lateral's child node to the lateral itself 2890 table_alias = this.args["alias"].pop() 2891 else: 2892 table_alias = self._parse_table_alias() 2893 2894 return self.expression( 2895 exp.Lateral, 2896 this=this, 2897 view=view, 2898 outer=outer, 2899 alias=table_alias, 2900 cross_apply=cross_apply, 2901 ) 2902 2903 def _parse_join_parts( 2904 self, 2905 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2906 return ( 2907 self._match_set(self.JOIN_METHODS) and self._prev, 2908 self._match_set(self.JOIN_SIDES) and self._prev, 2909 self._match_set(self.JOIN_KINDS) and self._prev, 2910 ) 2911 2912 def _parse_join( 2913 self, skip_join_token: bool = False, parse_bracket: bool = False 2914 ) -> t.Optional[exp.Join]: 2915 if self._match(TokenType.COMMA): 2916 return self.expression(exp.Join, this=self._parse_table()) 2917 2918 index = self._index 2919 method, side, kind = self._parse_join_parts() 2920 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2921 join = self._match(TokenType.JOIN) 2922 2923 if not skip_join_token and not join: 2924 self._retreat(index) 2925 kind = None 2926 method = None 2927 side = None 2928 2929 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2930 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2931 2932 if not skip_join_token and not join and not outer_apply and not cross_apply: 2933 return None 2934 2935 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2936 2937 if method: 2938 kwargs["method"] = method.text 2939 if side: 2940 kwargs["side"] = side.text 2941 if kind: 2942 kwargs["kind"] = kind.text 2943 if hint: 2944 kwargs["hint"] = hint 2945 2946 if self._match(TokenType.MATCH_CONDITION): 2947 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2948 2949 if self._match(TokenType.ON): 2950 kwargs["on"] = self._parse_conjunction() 2951 elif self._match(TokenType.USING): 2952 kwargs["using"] = self._parse_wrapped_id_vars() 2953 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2954 kind and kind.token_type == TokenType.CROSS 2955 ): 2956 index = self._index 2957 joins: t.Optional[list] = list(self._parse_joins()) 2958 2959 if joins and self._match(TokenType.ON): 2960 kwargs["on"] = self._parse_conjunction() 2961 elif joins and self._match(TokenType.USING): 2962 kwargs["using"] = self._parse_wrapped_id_vars() 2963 else: 2964 joins = None 2965 self._retreat(index) 2966 2967 kwargs["this"].set("joins", joins if joins else None) 2968 2969 comments = [c for token in (method, side, kind) if token for c in token.comments] 2970 return self.expression(exp.Join, comments=comments, **kwargs) 2971 2972 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2973 this = self._parse_conjunction() 2974 2975 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2976 return this 2977 2978 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2979 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2980 2981 return this 2982 2983 def _parse_index_params(self) -> exp.IndexParameters: 2984 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2985 2986 if self._match(TokenType.L_PAREN, advance=False): 2987 columns = self._parse_wrapped_csv(self._parse_with_operator) 2988 else: 2989 columns = None 2990 2991 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2992 partition_by = self._parse_partition_by() 2993 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2994 tablespace = ( 2995 self._parse_var(any_token=True) 2996 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2997 else None 2998 ) 2999 where = self._parse_where() 3000 3001 return self.expression( 3002 exp.IndexParameters, 3003 using=using, 3004 columns=columns, 3005 include=include, 3006 partition_by=partition_by, 3007 where=where, 3008 with_storage=with_storage, 3009 tablespace=tablespace, 3010 ) 3011 3012 def _parse_index( 3013 self, 3014 index: t.Optional[exp.Expression] = None, 3015 ) -> t.Optional[exp.Index]: 3016 if index: 3017 unique = None 3018 primary = None 3019 amp = None 3020 3021 self._match(TokenType.ON) 3022 self._match(TokenType.TABLE) # hive 3023 table = self._parse_table_parts(schema=True) 3024 else: 3025 unique = self._match(TokenType.UNIQUE) 3026 primary = self._match_text_seq("PRIMARY") 3027 amp = self._match_text_seq("AMP") 3028 3029 if not self._match(TokenType.INDEX): 3030 return None 3031 3032 index = self._parse_id_var() 3033 table = None 3034 3035 params = self._parse_index_params() 3036 3037 return self.expression( 3038 exp.Index, 3039 this=index, 3040 table=table, 3041 unique=unique, 3042 primary=primary, 3043 amp=amp, 3044 params=params, 3045 ) 3046 3047 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3048 hints: t.List[exp.Expression] = [] 3049 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3050 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3051 hints.append( 3052 self.expression( 3053 exp.WithTableHint, 3054 expressions=self._parse_csv( 3055 lambda: self._parse_function() or self._parse_var(any_token=True) 3056 ), 3057 ) 3058 ) 3059 self._match_r_paren() 3060 else: 3061 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3062 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3063 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3064 3065 self._match_texts(("INDEX", "KEY")) 3066 if self._match(TokenType.FOR): 3067 hint.set("target", self._advance_any() and self._prev.text.upper()) 3068 3069 hint.set("expressions", self._parse_wrapped_id_vars()) 3070 hints.append(hint) 3071 3072 return hints or None 3073 3074 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3075 return ( 3076 (not schema and self._parse_function(optional_parens=False)) 3077 or self._parse_id_var(any_token=False) 3078 or self._parse_string_as_identifier() 3079 or self._parse_placeholder() 3080 ) 3081 3082 def _parse_table_parts( 3083 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3084 ) -> exp.Table: 3085 catalog = None 3086 db = None 3087 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3088 3089 while self._match(TokenType.DOT): 3090 if catalog: 3091 # This allows nesting the table in arbitrarily many dot expressions if needed 3092 table = self.expression( 3093 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3094 ) 3095 else: 3096 catalog = db 3097 db = table 3098 # "" used for tsql FROM a..b case 3099 table = self._parse_table_part(schema=schema) or "" 3100 3101 if ( 3102 wildcard 3103 and self._is_connected() 3104 and (isinstance(table, exp.Identifier) or not table) 3105 and self._match(TokenType.STAR) 3106 ): 3107 if isinstance(table, exp.Identifier): 3108 table.args["this"] += "*" 3109 else: 3110 table = exp.Identifier(this="*") 3111 3112 # We bubble up comments from the Identifier to the Table 3113 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3114 3115 if is_db_reference: 3116 catalog = db 3117 db = table 3118 table = None 3119 3120 if not table and not is_db_reference: 3121 self.raise_error(f"Expected table name but got {self._curr}") 3122 if not db and is_db_reference: 3123 self.raise_error(f"Expected database name but got {self._curr}") 3124 3125 return self.expression( 3126 exp.Table, 3127 comments=comments, 3128 this=table, 3129 db=db, 3130 catalog=catalog, 3131 pivots=self._parse_pivots(), 3132 ) 3133 3134 def _parse_table( 3135 self, 3136 schema: bool = False, 3137 joins: bool = False, 3138 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3139 parse_bracket: bool = False, 3140 is_db_reference: bool = False, 3141 parse_partition: bool = False, 3142 ) -> t.Optional[exp.Expression]: 3143 lateral = self._parse_lateral() 3144 if lateral: 3145 return lateral 3146 3147 unnest = self._parse_unnest() 3148 if unnest: 3149 return unnest 3150 3151 values = self._parse_derived_table_values() 3152 if values: 3153 return values 3154 3155 subquery = self._parse_select(table=True) 3156 if subquery: 3157 if not subquery.args.get("pivots"): 3158 subquery.set("pivots", self._parse_pivots()) 3159 return subquery 3160 3161 bracket = parse_bracket and self._parse_bracket(None) 3162 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3163 3164 only = self._match(TokenType.ONLY) 3165 3166 this = t.cast( 3167 exp.Expression, 3168 bracket 3169 or self._parse_bracket( 3170 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3171 ), 3172 ) 3173 3174 if only: 3175 this.set("only", only) 3176 3177 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3178 self._match_text_seq("*") 3179 3180 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3181 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3182 this.set("partition", self._parse_partition()) 3183 3184 if schema: 3185 return self._parse_schema(this=this) 3186 3187 version = self._parse_version() 3188 3189 if version: 3190 this.set("version", version) 3191 3192 if self.dialect.ALIAS_POST_TABLESAMPLE: 3193 table_sample = self._parse_table_sample() 3194 3195 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3196 if alias: 3197 this.set("alias", alias) 3198 3199 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3200 return self.expression( 3201 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3202 ) 3203 3204 this.set("hints", self._parse_table_hints()) 3205 3206 if not this.args.get("pivots"): 3207 this.set("pivots", self._parse_pivots()) 3208 3209 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3210 table_sample = self._parse_table_sample() 3211 3212 if table_sample: 3213 table_sample.set("this", this) 3214 this = table_sample 3215 3216 if joins: 3217 for join in self._parse_joins(): 3218 this.append("joins", join) 3219 3220 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3221 this.set("ordinality", True) 3222 this.set("alias", self._parse_table_alias()) 3223 3224 return this 3225 3226 def _parse_version(self) -> t.Optional[exp.Version]: 3227 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3228 this = "TIMESTAMP" 3229 elif self._match(TokenType.VERSION_SNAPSHOT): 3230 this = "VERSION" 3231 else: 3232 return None 3233 3234 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3235 kind = self._prev.text.upper() 3236 start = self._parse_bitwise() 3237 self._match_texts(("TO", "AND")) 3238 end = self._parse_bitwise() 3239 expression: t.Optional[exp.Expression] = self.expression( 3240 exp.Tuple, expressions=[start, end] 3241 ) 3242 elif self._match_text_seq("CONTAINED", "IN"): 3243 kind = "CONTAINED IN" 3244 expression = self.expression( 3245 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3246 ) 3247 elif self._match(TokenType.ALL): 3248 kind = "ALL" 3249 expression = None 3250 else: 3251 self._match_text_seq("AS", "OF") 3252 kind = "AS OF" 3253 expression = self._parse_type() 3254 3255 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3256 3257 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3258 if not self._match(TokenType.UNNEST): 3259 return None 3260 3261 expressions = self._parse_wrapped_csv(self._parse_equality) 3262 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3263 3264 alias = self._parse_table_alias() if with_alias else None 3265 3266 if alias: 3267 if self.dialect.UNNEST_COLUMN_ONLY: 3268 if alias.args.get("columns"): 3269 self.raise_error("Unexpected extra column alias in unnest.") 3270 3271 alias.set("columns", [alias.this]) 3272 alias.set("this", None) 3273 3274 columns = alias.args.get("columns") or [] 3275 if offset and len(expressions) < len(columns): 3276 offset = columns.pop() 3277 3278 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3279 self._match(TokenType.ALIAS) 3280 offset = self._parse_id_var( 3281 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3282 ) or exp.to_identifier("offset") 3283 3284 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3285 3286 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3287 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3288 if not is_derived and not self._match_text_seq("VALUES"): 3289 return None 3290 3291 expressions = self._parse_csv(self._parse_value) 3292 alias = self._parse_table_alias() 3293 3294 if is_derived: 3295 self._match_r_paren() 3296 3297 return self.expression( 3298 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3299 ) 3300 3301 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3302 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3303 as_modifier and self._match_text_seq("USING", "SAMPLE") 3304 ): 3305 return None 3306 3307 bucket_numerator = None 3308 bucket_denominator = None 3309 bucket_field = None 3310 percent = None 3311 size = None 3312 seed = None 3313 3314 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3315 matched_l_paren = self._match(TokenType.L_PAREN) 3316 3317 if self.TABLESAMPLE_CSV: 3318 num = None 3319 expressions = self._parse_csv(self._parse_primary) 3320 else: 3321 expressions = None 3322 num = ( 3323 self._parse_factor() 3324 if self._match(TokenType.NUMBER, advance=False) 3325 else self._parse_primary() or self._parse_placeholder() 3326 ) 3327 3328 if self._match_text_seq("BUCKET"): 3329 bucket_numerator = self._parse_number() 3330 self._match_text_seq("OUT", "OF") 3331 bucket_denominator = bucket_denominator = self._parse_number() 3332 self._match(TokenType.ON) 3333 bucket_field = self._parse_field() 3334 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3335 percent = num 3336 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3337 size = num 3338 else: 3339 percent = num 3340 3341 if matched_l_paren: 3342 self._match_r_paren() 3343 3344 if self._match(TokenType.L_PAREN): 3345 method = self._parse_var(upper=True) 3346 seed = self._match(TokenType.COMMA) and self._parse_number() 3347 self._match_r_paren() 3348 elif self._match_texts(("SEED", "REPEATABLE")): 3349 seed = self._parse_wrapped(self._parse_number) 3350 3351 return self.expression( 3352 exp.TableSample, 3353 expressions=expressions, 3354 method=method, 3355 bucket_numerator=bucket_numerator, 3356 bucket_denominator=bucket_denominator, 3357 bucket_field=bucket_field, 3358 percent=percent, 3359 size=size, 3360 seed=seed, 3361 ) 3362 3363 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3364 return list(iter(self._parse_pivot, None)) or None 3365 3366 def _parse_joins(self) -> t.Iterator[exp.Join]: 3367 return iter(self._parse_join, None) 3368 3369 # https://duckdb.org/docs/sql/statements/pivot 3370 def _parse_simplified_pivot(self) -> exp.Pivot: 3371 def _parse_on() -> t.Optional[exp.Expression]: 3372 this = self._parse_bitwise() 3373 return self._parse_in(this) if self._match(TokenType.IN) else this 3374 3375 this = self._parse_table() 3376 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3377 using = self._match(TokenType.USING) and self._parse_csv( 3378 lambda: self._parse_alias(self._parse_function()) 3379 ) 3380 group = self._parse_group() 3381 return self.expression( 3382 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3383 ) 3384 3385 def _parse_pivot_in(self) -> exp.In: 3386 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3387 this = self._parse_conjunction() 3388 3389 self._match(TokenType.ALIAS) 3390 alias = self._parse_field() 3391 if alias: 3392 return self.expression(exp.PivotAlias, this=this, alias=alias) 3393 3394 return this 3395 3396 value = self._parse_column() 3397 3398 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3399 self.raise_error("Expecting IN (") 3400 3401 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3402 3403 self._match_r_paren() 3404 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3405 3406 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3407 index = self._index 3408 include_nulls = None 3409 3410 if self._match(TokenType.PIVOT): 3411 unpivot = False 3412 elif self._match(TokenType.UNPIVOT): 3413 unpivot = True 3414 3415 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3416 if self._match_text_seq("INCLUDE", "NULLS"): 3417 include_nulls = True 3418 elif self._match_text_seq("EXCLUDE", "NULLS"): 3419 include_nulls = False 3420 else: 3421 return None 3422 3423 expressions = [] 3424 3425 if not self._match(TokenType.L_PAREN): 3426 self._retreat(index) 3427 return None 3428 3429 if unpivot: 3430 expressions = self._parse_csv(self._parse_column) 3431 else: 3432 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3433 3434 if not expressions: 3435 self.raise_error("Failed to parse PIVOT's aggregation list") 3436 3437 if not self._match(TokenType.FOR): 3438 self.raise_error("Expecting FOR") 3439 3440 field = self._parse_pivot_in() 3441 3442 self._match_r_paren() 3443 3444 pivot = self.expression( 3445 exp.Pivot, 3446 expressions=expressions, 3447 field=field, 3448 unpivot=unpivot, 3449 include_nulls=include_nulls, 3450 ) 3451 3452 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3453 pivot.set("alias", self._parse_table_alias()) 3454 3455 if not unpivot: 3456 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3457 3458 columns: t.List[exp.Expression] = [] 3459 for fld in pivot.args["field"].expressions: 3460 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3461 for name in names: 3462 if self.PREFIXED_PIVOT_COLUMNS: 3463 name = f"{name}_{field_name}" if name else field_name 3464 else: 3465 name = f"{field_name}_{name}" if name else field_name 3466 3467 columns.append(exp.to_identifier(name)) 3468 3469 pivot.set("columns", columns) 3470 3471 return pivot 3472 3473 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3474 return [agg.alias for agg in aggregations] 3475 3476 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3477 if not skip_where_token and not self._match(TokenType.PREWHERE): 3478 return None 3479 3480 return self.expression( 3481 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3482 ) 3483 3484 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3485 if not skip_where_token and not self._match(TokenType.WHERE): 3486 return None 3487 3488 return self.expression( 3489 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3490 ) 3491 3492 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3493 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3494 return None 3495 3496 elements: t.Dict[str, t.Any] = defaultdict(list) 3497 3498 if self._match(TokenType.ALL): 3499 elements["all"] = True 3500 elif self._match(TokenType.DISTINCT): 3501 elements["all"] = False 3502 3503 while True: 3504 expressions = self._parse_csv(self._parse_conjunction) 3505 if expressions: 3506 elements["expressions"].extend(expressions) 3507 3508 grouping_sets = self._parse_grouping_sets() 3509 if grouping_sets: 3510 elements["grouping_sets"].extend(grouping_sets) 3511 3512 rollup = None 3513 cube = None 3514 totals = None 3515 3516 index = self._index 3517 with_ = self._match(TokenType.WITH) 3518 if self._match(TokenType.ROLLUP): 3519 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3520 elements["rollup"].extend(ensure_list(rollup)) 3521 3522 if self._match(TokenType.CUBE): 3523 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3524 elements["cube"].extend(ensure_list(cube)) 3525 3526 if self._match_text_seq("TOTALS"): 3527 totals = True 3528 elements["totals"] = True # type: ignore 3529 3530 if not (grouping_sets or rollup or cube or totals): 3531 if with_: 3532 self._retreat(index) 3533 break 3534 3535 return self.expression(exp.Group, **elements) # type: ignore 3536 3537 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3538 if not self._match(TokenType.GROUPING_SETS): 3539 return None 3540 3541 return self._parse_wrapped_csv(self._parse_grouping_set) 3542 3543 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3544 if self._match(TokenType.L_PAREN): 3545 grouping_set = self._parse_csv(self._parse_column) 3546 self._match_r_paren() 3547 return self.expression(exp.Tuple, expressions=grouping_set) 3548 3549 return self._parse_column() 3550 3551 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3552 if not skip_having_token and not self._match(TokenType.HAVING): 3553 return None 3554 return self.expression(exp.Having, this=self._parse_conjunction()) 3555 3556 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3557 if not self._match(TokenType.QUALIFY): 3558 return None 3559 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3560 3561 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3562 if skip_start_token: 3563 start = None 3564 elif self._match(TokenType.START_WITH): 3565 start = self._parse_conjunction() 3566 else: 3567 return None 3568 3569 self._match(TokenType.CONNECT_BY) 3570 nocycle = self._match_text_seq("NOCYCLE") 3571 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3572 exp.Prior, this=self._parse_bitwise() 3573 ) 3574 connect = self._parse_conjunction() 3575 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3576 3577 if not start and self._match(TokenType.START_WITH): 3578 start = self._parse_conjunction() 3579 3580 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3581 3582 def _parse_name_as_expression(self) -> exp.Alias: 3583 return self.expression( 3584 exp.Alias, 3585 alias=self._parse_id_var(any_token=True), 3586 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3587 ) 3588 3589 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3590 if self._match_text_seq("INTERPOLATE"): 3591 return self._parse_wrapped_csv(self._parse_name_as_expression) 3592 return None 3593 3594 def _parse_order( 3595 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3596 ) -> t.Optional[exp.Expression]: 3597 siblings = None 3598 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3599 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3600 return this 3601 3602 siblings = True 3603 3604 return self.expression( 3605 exp.Order, 3606 this=this, 3607 expressions=self._parse_csv(self._parse_ordered), 3608 interpolate=self._parse_interpolate(), 3609 siblings=siblings, 3610 ) 3611 3612 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3613 if not self._match(token): 3614 return None 3615 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3616 3617 def _parse_ordered( 3618 self, parse_method: t.Optional[t.Callable] = None 3619 ) -> t.Optional[exp.Ordered]: 3620 this = parse_method() if parse_method else self._parse_conjunction() 3621 if not this: 3622 return None 3623 3624 asc = self._match(TokenType.ASC) 3625 desc = self._match(TokenType.DESC) or (asc and False) 3626 3627 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3628 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3629 3630 nulls_first = is_nulls_first or False 3631 explicitly_null_ordered = is_nulls_first or is_nulls_last 3632 3633 if ( 3634 not explicitly_null_ordered 3635 and ( 3636 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3637 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3638 ) 3639 and self.dialect.NULL_ORDERING != "nulls_are_last" 3640 ): 3641 nulls_first = True 3642 3643 if self._match_text_seq("WITH", "FILL"): 3644 with_fill = self.expression( 3645 exp.WithFill, 3646 **{ # type: ignore 3647 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3648 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3649 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3650 }, 3651 ) 3652 else: 3653 with_fill = None 3654 3655 return self.expression( 3656 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3657 ) 3658 3659 def _parse_limit( 3660 self, 3661 this: t.Optional[exp.Expression] = None, 3662 top: bool = False, 3663 skip_limit_token: bool = False, 3664 ) -> t.Optional[exp.Expression]: 3665 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3666 comments = self._prev_comments 3667 if top: 3668 limit_paren = self._match(TokenType.L_PAREN) 3669 expression = self._parse_term() if limit_paren else self._parse_number() 3670 3671 if limit_paren: 3672 self._match_r_paren() 3673 else: 3674 expression = self._parse_term() 3675 3676 if self._match(TokenType.COMMA): 3677 offset = expression 3678 expression = self._parse_term() 3679 else: 3680 offset = None 3681 3682 limit_exp = self.expression( 3683 exp.Limit, 3684 this=this, 3685 expression=expression, 3686 offset=offset, 3687 comments=comments, 3688 expressions=self._parse_limit_by(), 3689 ) 3690 3691 return limit_exp 3692 3693 if self._match(TokenType.FETCH): 3694 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3695 direction = self._prev.text.upper() if direction else "FIRST" 3696 3697 count = self._parse_field(tokens=self.FETCH_TOKENS) 3698 percent = self._match(TokenType.PERCENT) 3699 3700 self._match_set((TokenType.ROW, TokenType.ROWS)) 3701 3702 only = self._match_text_seq("ONLY") 3703 with_ties = self._match_text_seq("WITH", "TIES") 3704 3705 if only and with_ties: 3706 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3707 3708 return self.expression( 3709 exp.Fetch, 3710 direction=direction, 3711 count=count, 3712 percent=percent, 3713 with_ties=with_ties, 3714 ) 3715 3716 return this 3717 3718 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3719 if not self._match(TokenType.OFFSET): 3720 return this 3721 3722 count = self._parse_term() 3723 self._match_set((TokenType.ROW, TokenType.ROWS)) 3724 3725 return self.expression( 3726 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3727 ) 3728 3729 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3730 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3731 3732 def _parse_locks(self) -> t.List[exp.Lock]: 3733 locks = [] 3734 while True: 3735 if self._match_text_seq("FOR", "UPDATE"): 3736 update = True 3737 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3738 "LOCK", "IN", "SHARE", "MODE" 3739 ): 3740 update = False 3741 else: 3742 break 3743 3744 expressions = None 3745 if self._match_text_seq("OF"): 3746 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3747 3748 wait: t.Optional[bool | exp.Expression] = None 3749 if self._match_text_seq("NOWAIT"): 3750 wait = True 3751 elif self._match_text_seq("WAIT"): 3752 wait = self._parse_primary() 3753 elif self._match_text_seq("SKIP", "LOCKED"): 3754 wait = False 3755 3756 locks.append( 3757 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3758 ) 3759 3760 return locks 3761 3762 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3763 while this and self._match_set(self.SET_OPERATIONS): 3764 token_type = self._prev.token_type 3765 3766 if token_type == TokenType.UNION: 3767 operation = exp.Union 3768 elif token_type == TokenType.EXCEPT: 3769 operation = exp.Except 3770 else: 3771 operation = exp.Intersect 3772 3773 comments = self._prev.comments 3774 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3775 by_name = self._match_text_seq("BY", "NAME") 3776 expression = self._parse_select(nested=True, parse_set_operation=False) 3777 3778 this = self.expression( 3779 operation, 3780 comments=comments, 3781 this=this, 3782 distinct=distinct, 3783 by_name=by_name, 3784 expression=expression, 3785 ) 3786 3787 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3788 expression = this.expression 3789 3790 if expression: 3791 for arg in self.UNION_MODIFIERS: 3792 expr = expression.args.get(arg) 3793 if expr: 3794 this.set(arg, expr.pop()) 3795 3796 return this 3797 3798 def _parse_expression(self) -> t.Optional[exp.Expression]: 3799 return self._parse_alias(self._parse_conjunction()) 3800 3801 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3802 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3803 3804 def _parse_equality(self) -> t.Optional[exp.Expression]: 3805 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3806 3807 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3808 return self._parse_tokens(self._parse_range, self.COMPARISON) 3809 3810 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3811 this = this or self._parse_bitwise() 3812 negate = self._match(TokenType.NOT) 3813 3814 if self._match_set(self.RANGE_PARSERS): 3815 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3816 if not expression: 3817 return this 3818 3819 this = expression 3820 elif self._match(TokenType.ISNULL): 3821 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3822 3823 # Postgres supports ISNULL and NOTNULL for conditions. 3824 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3825 if self._match(TokenType.NOTNULL): 3826 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3827 this = self.expression(exp.Not, this=this) 3828 3829 if negate: 3830 this = self.expression(exp.Not, this=this) 3831 3832 if self._match(TokenType.IS): 3833 this = self._parse_is(this) 3834 3835 return this 3836 3837 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3838 index = self._index - 1 3839 negate = self._match(TokenType.NOT) 3840 3841 if self._match_text_seq("DISTINCT", "FROM"): 3842 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3843 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3844 3845 expression = self._parse_null() or self._parse_boolean() 3846 if not expression: 3847 self._retreat(index) 3848 return None 3849 3850 this = self.expression(exp.Is, this=this, expression=expression) 3851 return self.expression(exp.Not, this=this) if negate else this 3852 3853 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3854 unnest = self._parse_unnest(with_alias=False) 3855 if unnest: 3856 this = self.expression(exp.In, this=this, unnest=unnest) 3857 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3858 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3859 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3860 3861 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3862 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3863 else: 3864 this = self.expression(exp.In, this=this, expressions=expressions) 3865 3866 if matched_l_paren: 3867 self._match_r_paren(this) 3868 elif not self._match(TokenType.R_BRACKET, expression=this): 3869 self.raise_error("Expecting ]") 3870 else: 3871 this = self.expression(exp.In, this=this, field=self._parse_field()) 3872 3873 return this 3874 3875 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3876 low = self._parse_bitwise() 3877 self._match(TokenType.AND) 3878 high = self._parse_bitwise() 3879 return self.expression(exp.Between, this=this, low=low, high=high) 3880 3881 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3882 if not self._match(TokenType.ESCAPE): 3883 return this 3884 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3885 3886 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3887 index = self._index 3888 3889 if not self._match(TokenType.INTERVAL) and match_interval: 3890 return None 3891 3892 if self._match(TokenType.STRING, advance=False): 3893 this = self._parse_primary() 3894 else: 3895 this = self._parse_term() 3896 3897 if not this or ( 3898 isinstance(this, exp.Column) 3899 and not this.table 3900 and not this.this.quoted 3901 and this.name.upper() == "IS" 3902 ): 3903 self._retreat(index) 3904 return None 3905 3906 unit = self._parse_function() or ( 3907 not self._match(TokenType.ALIAS, advance=False) 3908 and self._parse_var(any_token=True, upper=True) 3909 ) 3910 3911 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3912 # each INTERVAL expression into this canonical form so it's easy to transpile 3913 if this and this.is_number: 3914 this = exp.Literal.string(this.name) 3915 elif this and this.is_string: 3916 parts = this.name.split() 3917 3918 if len(parts) == 2: 3919 if unit: 3920 # This is not actually a unit, it's something else (e.g. a "window side") 3921 unit = None 3922 self._retreat(self._index - 1) 3923 3924 this = exp.Literal.string(parts[0]) 3925 unit = self.expression(exp.Var, this=parts[1].upper()) 3926 3927 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3928 unit = self.expression( 3929 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3930 ) 3931 3932 return self.expression(exp.Interval, this=this, unit=unit) 3933 3934 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3935 this = self._parse_term() 3936 3937 while True: 3938 if self._match_set(self.BITWISE): 3939 this = self.expression( 3940 self.BITWISE[self._prev.token_type], 3941 this=this, 3942 expression=self._parse_term(), 3943 ) 3944 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3945 this = self.expression( 3946 exp.DPipe, 3947 this=this, 3948 expression=self._parse_term(), 3949 safe=not self.dialect.STRICT_STRING_CONCAT, 3950 ) 3951 elif self._match(TokenType.DQMARK): 3952 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3953 elif self._match_pair(TokenType.LT, TokenType.LT): 3954 this = self.expression( 3955 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3956 ) 3957 elif self._match_pair(TokenType.GT, TokenType.GT): 3958 this = self.expression( 3959 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3960 ) 3961 else: 3962 break 3963 3964 return this 3965 3966 def _parse_term(self) -> t.Optional[exp.Expression]: 3967 return self._parse_tokens(self._parse_factor, self.TERM) 3968 3969 def _parse_factor(self) -> t.Optional[exp.Expression]: 3970 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3971 this = parse_method() 3972 3973 while self._match_set(self.FACTOR): 3974 this = self.expression( 3975 self.FACTOR[self._prev.token_type], 3976 this=this, 3977 comments=self._prev_comments, 3978 expression=parse_method(), 3979 ) 3980 if isinstance(this, exp.Div): 3981 this.args["typed"] = self.dialect.TYPED_DIVISION 3982 this.args["safe"] = self.dialect.SAFE_DIVISION 3983 3984 return this 3985 3986 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3987 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3988 3989 def _parse_unary(self) -> t.Optional[exp.Expression]: 3990 if self._match_set(self.UNARY_PARSERS): 3991 return self.UNARY_PARSERS[self._prev.token_type](self) 3992 return self._parse_at_time_zone(self._parse_type()) 3993 3994 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3995 interval = parse_interval and self._parse_interval() 3996 if interval: 3997 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3998 while True: 3999 index = self._index 4000 self._match(TokenType.PLUS) 4001 4002 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4003 self._retreat(index) 4004 break 4005 4006 interval = self.expression( # type: ignore 4007 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4008 ) 4009 4010 return interval 4011 4012 index = self._index 4013 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4014 this = self._parse_column() 4015 4016 if data_type: 4017 if isinstance(this, exp.Literal): 4018 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4019 if parser: 4020 return parser(self, this, data_type) 4021 return self.expression(exp.Cast, this=this, to=data_type) 4022 if not data_type.expressions: 4023 self._retreat(index) 4024 return self._parse_column() 4025 return self._parse_column_ops(data_type) 4026 4027 return this and self._parse_column_ops(this) 4028 4029 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4030 this = self._parse_type() 4031 if not this: 4032 return None 4033 4034 if isinstance(this, exp.Column) and not this.table: 4035 this = exp.var(this.name.upper()) 4036 4037 return self.expression( 4038 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4039 ) 4040 4041 def _parse_types( 4042 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4043 ) -> t.Optional[exp.Expression]: 4044 index = self._index 4045 4046 prefix = self._match_text_seq("SYSUDTLIB", ".") 4047 4048 if not self._match_set(self.TYPE_TOKENS): 4049 identifier = allow_identifiers and self._parse_id_var( 4050 any_token=False, tokens=(TokenType.VAR,) 4051 ) 4052 if identifier: 4053 tokens = self.dialect.tokenize(identifier.name) 4054 4055 if len(tokens) != 1: 4056 self.raise_error("Unexpected identifier", self._prev) 4057 4058 if tokens[0].token_type in self.TYPE_TOKENS: 4059 self._prev = tokens[0] 4060 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4061 type_name = identifier.name 4062 4063 while self._match(TokenType.DOT): 4064 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4065 4066 return exp.DataType.build(type_name, udt=True) 4067 else: 4068 self._retreat(self._index - 1) 4069 return None 4070 else: 4071 return None 4072 4073 type_token = self._prev.token_type 4074 4075 if type_token == TokenType.PSEUDO_TYPE: 4076 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4077 4078 if type_token == TokenType.OBJECT_IDENTIFIER: 4079 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4080 4081 nested = type_token in self.NESTED_TYPE_TOKENS 4082 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4083 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4084 expressions = None 4085 maybe_func = False 4086 4087 if self._match(TokenType.L_PAREN): 4088 if is_struct: 4089 expressions = self._parse_csv(self._parse_struct_types) 4090 elif nested: 4091 expressions = self._parse_csv( 4092 lambda: self._parse_types( 4093 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4094 ) 4095 ) 4096 elif type_token in self.ENUM_TYPE_TOKENS: 4097 expressions = self._parse_csv(self._parse_equality) 4098 elif is_aggregate: 4099 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4100 any_token=False, tokens=(TokenType.VAR,) 4101 ) 4102 if not func_or_ident or not self._match(TokenType.COMMA): 4103 return None 4104 expressions = self._parse_csv( 4105 lambda: self._parse_types( 4106 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4107 ) 4108 ) 4109 expressions.insert(0, func_or_ident) 4110 else: 4111 expressions = self._parse_csv(self._parse_type_size) 4112 4113 if not expressions or not self._match(TokenType.R_PAREN): 4114 self._retreat(index) 4115 return None 4116 4117 maybe_func = True 4118 4119 this: t.Optional[exp.Expression] = None 4120 values: t.Optional[t.List[exp.Expression]] = None 4121 4122 if nested and self._match(TokenType.LT): 4123 if is_struct: 4124 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4125 else: 4126 expressions = self._parse_csv( 4127 lambda: self._parse_types( 4128 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4129 ) 4130 ) 4131 4132 if not self._match(TokenType.GT): 4133 self.raise_error("Expecting >") 4134 4135 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4136 values = self._parse_csv(self._parse_conjunction) 4137 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4138 4139 if type_token in self.TIMESTAMPS: 4140 if self._match_text_seq("WITH", "TIME", "ZONE"): 4141 maybe_func = False 4142 tz_type = ( 4143 exp.DataType.Type.TIMETZ 4144 if type_token in self.TIMES 4145 else exp.DataType.Type.TIMESTAMPTZ 4146 ) 4147 this = exp.DataType(this=tz_type, expressions=expressions) 4148 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4149 maybe_func = False 4150 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4151 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4152 maybe_func = False 4153 elif type_token == TokenType.INTERVAL: 4154 unit = self._parse_var(upper=True) 4155 if unit: 4156 if self._match_text_seq("TO"): 4157 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4158 4159 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4160 else: 4161 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4162 4163 if maybe_func and check_func: 4164 index2 = self._index 4165 peek = self._parse_string() 4166 4167 if not peek: 4168 self._retreat(index) 4169 return None 4170 4171 self._retreat(index2) 4172 4173 if not this: 4174 if self._match_text_seq("UNSIGNED"): 4175 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4176 if not unsigned_type_token: 4177 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4178 4179 type_token = unsigned_type_token or type_token 4180 4181 this = exp.DataType( 4182 this=exp.DataType.Type[type_token.value], 4183 expressions=expressions, 4184 nested=nested, 4185 values=values, 4186 prefix=prefix, 4187 ) 4188 4189 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4190 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4191 4192 return this 4193 4194 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4195 index = self._index 4196 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4197 self._match(TokenType.COLON) 4198 column_def = self._parse_column_def(this) 4199 4200 if type_required and ( 4201 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4202 ): 4203 self._retreat(index) 4204 return self._parse_types() 4205 4206 return column_def 4207 4208 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4209 if not self._match_text_seq("AT", "TIME", "ZONE"): 4210 return this 4211 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4212 4213 def _parse_column(self) -> t.Optional[exp.Expression]: 4214 this = self._parse_column_reference() 4215 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4216 4217 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4218 this = self._parse_field() 4219 if ( 4220 not this 4221 and self._match(TokenType.VALUES, advance=False) 4222 and self.VALUES_FOLLOWED_BY_PAREN 4223 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4224 ): 4225 this = self._parse_id_var() 4226 4227 if isinstance(this, exp.Identifier): 4228 # We bubble up comments from the Identifier to the Column 4229 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4230 4231 return this 4232 4233 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4234 this = self._parse_bracket(this) 4235 4236 while self._match_set(self.COLUMN_OPERATORS): 4237 op_token = self._prev.token_type 4238 op = self.COLUMN_OPERATORS.get(op_token) 4239 4240 if op_token == TokenType.DCOLON: 4241 field = self._parse_types() 4242 if not field: 4243 self.raise_error("Expected type") 4244 elif op and self._curr: 4245 field = self._parse_column_reference() 4246 else: 4247 field = self._parse_field(any_token=True, anonymous_func=True) 4248 4249 if isinstance(field, exp.Func) and this: 4250 # bigquery allows function calls like x.y.count(...) 4251 # SAFE.SUBSTR(...) 4252 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4253 this = exp.replace_tree( 4254 this, 4255 lambda n: ( 4256 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4257 if n.table 4258 else n.this 4259 ) 4260 if isinstance(n, exp.Column) 4261 else n, 4262 ) 4263 4264 if op: 4265 this = op(self, this, field) 4266 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4267 this = self.expression( 4268 exp.Column, 4269 this=field, 4270 table=this.this, 4271 db=this.args.get("table"), 4272 catalog=this.args.get("db"), 4273 ) 4274 else: 4275 this = self.expression(exp.Dot, this=this, expression=field) 4276 this = self._parse_bracket(this) 4277 return this 4278 4279 def _parse_primary(self) -> t.Optional[exp.Expression]: 4280 if self._match_set(self.PRIMARY_PARSERS): 4281 token_type = self._prev.token_type 4282 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4283 4284 if token_type == TokenType.STRING: 4285 expressions = [primary] 4286 while self._match(TokenType.STRING): 4287 expressions.append(exp.Literal.string(self._prev.text)) 4288 4289 if len(expressions) > 1: 4290 return self.expression(exp.Concat, expressions=expressions) 4291 4292 return primary 4293 4294 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4295 return exp.Literal.number(f"0.{self._prev.text}") 4296 4297 if self._match(TokenType.L_PAREN): 4298 comments = self._prev_comments 4299 query = self._parse_select() 4300 4301 if query: 4302 expressions = [query] 4303 else: 4304 expressions = self._parse_expressions() 4305 4306 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4307 4308 if isinstance(this, exp.UNWRAPPED_QUERIES): 4309 this = self._parse_set_operations( 4310 self._parse_subquery(this=this, parse_alias=False) 4311 ) 4312 elif isinstance(this, exp.Subquery): 4313 this = self._parse_subquery( 4314 this=self._parse_set_operations(this), parse_alias=False 4315 ) 4316 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4317 this = self.expression(exp.Tuple, expressions=expressions) 4318 else: 4319 this = self.expression(exp.Paren, this=this) 4320 4321 if this: 4322 this.add_comments(comments) 4323 4324 self._match_r_paren(expression=this) 4325 return this 4326 4327 return None 4328 4329 def _parse_field( 4330 self, 4331 any_token: bool = False, 4332 tokens: t.Optional[t.Collection[TokenType]] = None, 4333 anonymous_func: bool = False, 4334 ) -> t.Optional[exp.Expression]: 4335 if anonymous_func: 4336 field = ( 4337 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4338 or self._parse_primary() 4339 ) 4340 else: 4341 field = self._parse_primary() or self._parse_function( 4342 anonymous=anonymous_func, any_token=any_token 4343 ) 4344 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4345 4346 def _parse_function( 4347 self, 4348 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4349 anonymous: bool = False, 4350 optional_parens: bool = True, 4351 any_token: bool = False, 4352 ) -> t.Optional[exp.Expression]: 4353 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4354 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4355 fn_syntax = False 4356 if ( 4357 self._match(TokenType.L_BRACE, advance=False) 4358 and self._next 4359 and self._next.text.upper() == "FN" 4360 ): 4361 self._advance(2) 4362 fn_syntax = True 4363 4364 func = self._parse_function_call( 4365 functions=functions, 4366 anonymous=anonymous, 4367 optional_parens=optional_parens, 4368 any_token=any_token, 4369 ) 4370 4371 if fn_syntax: 4372 self._match(TokenType.R_BRACE) 4373 4374 return func 4375 4376 def _parse_function_call( 4377 self, 4378 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4379 anonymous: bool = False, 4380 optional_parens: bool = True, 4381 any_token: bool = False, 4382 ) -> t.Optional[exp.Expression]: 4383 if not self._curr: 4384 return None 4385 4386 comments = self._curr.comments 4387 token_type = self._curr.token_type 4388 this = self._curr.text 4389 upper = this.upper() 4390 4391 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4392 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4393 self._advance() 4394 return self._parse_window(parser(self)) 4395 4396 if not self._next or self._next.token_type != TokenType.L_PAREN: 4397 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4398 self._advance() 4399 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4400 4401 return None 4402 4403 if any_token: 4404 if token_type in self.RESERVED_TOKENS: 4405 return None 4406 elif token_type not in self.FUNC_TOKENS: 4407 return None 4408 4409 self._advance(2) 4410 4411 parser = self.FUNCTION_PARSERS.get(upper) 4412 if parser and not anonymous: 4413 this = parser(self) 4414 else: 4415 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4416 4417 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4418 this = self.expression(subquery_predicate, this=self._parse_select()) 4419 self._match_r_paren() 4420 return this 4421 4422 if functions is None: 4423 functions = self.FUNCTIONS 4424 4425 function = functions.get(upper) 4426 4427 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4428 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4429 4430 if alias: 4431 args = self._kv_to_prop_eq(args) 4432 4433 if function and not anonymous: 4434 if "dialect" in function.__code__.co_varnames: 4435 func = function(args, dialect=self.dialect) 4436 else: 4437 func = function(args) 4438 4439 func = self.validate_expression(func, args) 4440 if not self.dialect.NORMALIZE_FUNCTIONS: 4441 func.meta["name"] = this 4442 4443 this = func 4444 else: 4445 if token_type == TokenType.IDENTIFIER: 4446 this = exp.Identifier(this=this, quoted=True) 4447 this = self.expression(exp.Anonymous, this=this, expressions=args) 4448 4449 if isinstance(this, exp.Expression): 4450 this.add_comments(comments) 4451 4452 self._match_r_paren(this) 4453 return self._parse_window(this) 4454 4455 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4456 transformed = [] 4457 4458 for e in expressions: 4459 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4460 if isinstance(e, exp.Alias): 4461 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4462 4463 if not isinstance(e, exp.PropertyEQ): 4464 e = self.expression( 4465 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4466 ) 4467 4468 if isinstance(e.this, exp.Column): 4469 e.this.replace(e.this.this) 4470 4471 transformed.append(e) 4472 4473 return transformed 4474 4475 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4476 return self._parse_column_def(self._parse_id_var()) 4477 4478 def _parse_user_defined_function( 4479 self, kind: t.Optional[TokenType] = None 4480 ) -> t.Optional[exp.Expression]: 4481 this = self._parse_id_var() 4482 4483 while self._match(TokenType.DOT): 4484 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4485 4486 if not self._match(TokenType.L_PAREN): 4487 return this 4488 4489 expressions = self._parse_csv(self._parse_function_parameter) 4490 self._match_r_paren() 4491 return self.expression( 4492 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4493 ) 4494 4495 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4496 literal = self._parse_primary() 4497 if literal: 4498 return self.expression(exp.Introducer, this=token.text, expression=literal) 4499 4500 return self.expression(exp.Identifier, this=token.text) 4501 4502 def _parse_session_parameter(self) -> exp.SessionParameter: 4503 kind = None 4504 this = self._parse_id_var() or self._parse_primary() 4505 4506 if this and self._match(TokenType.DOT): 4507 kind = this.name 4508 this = self._parse_var() or self._parse_primary() 4509 4510 return self.expression(exp.SessionParameter, this=this, kind=kind) 4511 4512 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4513 index = self._index 4514 4515 if self._match(TokenType.L_PAREN): 4516 expressions = t.cast( 4517 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4518 ) 4519 4520 if not self._match(TokenType.R_PAREN): 4521 self._retreat(index) 4522 else: 4523 expressions = [self._parse_id_var()] 4524 4525 if self._match_set(self.LAMBDAS): 4526 return self.LAMBDAS[self._prev.token_type](self, expressions) 4527 4528 self._retreat(index) 4529 4530 this: t.Optional[exp.Expression] 4531 4532 if self._match(TokenType.DISTINCT): 4533 this = self.expression( 4534 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4535 ) 4536 else: 4537 this = self._parse_select_or_expression(alias=alias) 4538 4539 return self._parse_limit( 4540 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4541 ) 4542 4543 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4544 index = self._index 4545 if not self._match(TokenType.L_PAREN): 4546 return this 4547 4548 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4549 # expr can be of both types 4550 if self._match_set(self.SELECT_START_TOKENS): 4551 self._retreat(index) 4552 return this 4553 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4554 self._match_r_paren() 4555 return self.expression(exp.Schema, this=this, expressions=args) 4556 4557 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4558 return self._parse_column_def(self._parse_field(any_token=True)) 4559 4560 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4561 # column defs are not really columns, they're identifiers 4562 if isinstance(this, exp.Column): 4563 this = this.this 4564 4565 kind = self._parse_types(schema=True) 4566 4567 if self._match_text_seq("FOR", "ORDINALITY"): 4568 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4569 4570 constraints: t.List[exp.Expression] = [] 4571 4572 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4573 ("ALIAS", "MATERIALIZED") 4574 ): 4575 persisted = self._prev.text.upper() == "MATERIALIZED" 4576 constraints.append( 4577 self.expression( 4578 exp.ComputedColumnConstraint, 4579 this=self._parse_conjunction(), 4580 persisted=persisted or self._match_text_seq("PERSISTED"), 4581 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4582 ) 4583 ) 4584 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4585 self._match(TokenType.ALIAS) 4586 constraints.append( 4587 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4588 ) 4589 4590 while True: 4591 constraint = self._parse_column_constraint() 4592 if not constraint: 4593 break 4594 constraints.append(constraint) 4595 4596 if not kind and not constraints: 4597 return this 4598 4599 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4600 4601 def _parse_auto_increment( 4602 self, 4603 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4604 start = None 4605 increment = None 4606 4607 if self._match(TokenType.L_PAREN, advance=False): 4608 args = self._parse_wrapped_csv(self._parse_bitwise) 4609 start = seq_get(args, 0) 4610 increment = seq_get(args, 1) 4611 elif self._match_text_seq("START"): 4612 start = self._parse_bitwise() 4613 self._match_text_seq("INCREMENT") 4614 increment = self._parse_bitwise() 4615 4616 if start and increment: 4617 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4618 4619 return exp.AutoIncrementColumnConstraint() 4620 4621 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4622 if not self._match_text_seq("REFRESH"): 4623 self._retreat(self._index - 1) 4624 return None 4625 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4626 4627 def _parse_compress(self) -> exp.CompressColumnConstraint: 4628 if self._match(TokenType.L_PAREN, advance=False): 4629 return self.expression( 4630 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4631 ) 4632 4633 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4634 4635 def _parse_generated_as_identity( 4636 self, 4637 ) -> ( 4638 exp.GeneratedAsIdentityColumnConstraint 4639 | exp.ComputedColumnConstraint 4640 | exp.GeneratedAsRowColumnConstraint 4641 ): 4642 if self._match_text_seq("BY", "DEFAULT"): 4643 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4644 this = self.expression( 4645 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4646 ) 4647 else: 4648 self._match_text_seq("ALWAYS") 4649 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4650 4651 self._match(TokenType.ALIAS) 4652 4653 if self._match_text_seq("ROW"): 4654 start = self._match_text_seq("START") 4655 if not start: 4656 self._match(TokenType.END) 4657 hidden = self._match_text_seq("HIDDEN") 4658 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4659 4660 identity = self._match_text_seq("IDENTITY") 4661 4662 if self._match(TokenType.L_PAREN): 4663 if self._match(TokenType.START_WITH): 4664 this.set("start", self._parse_bitwise()) 4665 if self._match_text_seq("INCREMENT", "BY"): 4666 this.set("increment", self._parse_bitwise()) 4667 if self._match_text_seq("MINVALUE"): 4668 this.set("minvalue", self._parse_bitwise()) 4669 if self._match_text_seq("MAXVALUE"): 4670 this.set("maxvalue", self._parse_bitwise()) 4671 4672 if self._match_text_seq("CYCLE"): 4673 this.set("cycle", True) 4674 elif self._match_text_seq("NO", "CYCLE"): 4675 this.set("cycle", False) 4676 4677 if not identity: 4678 this.set("expression", self._parse_bitwise()) 4679 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4680 args = self._parse_csv(self._parse_bitwise) 4681 this.set("start", seq_get(args, 0)) 4682 this.set("increment", seq_get(args, 1)) 4683 4684 self._match_r_paren() 4685 4686 return this 4687 4688 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4689 self._match_text_seq("LENGTH") 4690 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4691 4692 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4693 if self._match_text_seq("NULL"): 4694 return self.expression(exp.NotNullColumnConstraint) 4695 if self._match_text_seq("CASESPECIFIC"): 4696 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4697 if self._match_text_seq("FOR", "REPLICATION"): 4698 return self.expression(exp.NotForReplicationColumnConstraint) 4699 return None 4700 4701 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4702 if self._match(TokenType.CONSTRAINT): 4703 this = self._parse_id_var() 4704 else: 4705 this = None 4706 4707 if self._match_texts(self.CONSTRAINT_PARSERS): 4708 return self.expression( 4709 exp.ColumnConstraint, 4710 this=this, 4711 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4712 ) 4713 4714 return this 4715 4716 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4717 if not self._match(TokenType.CONSTRAINT): 4718 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4719 4720 return self.expression( 4721 exp.Constraint, 4722 this=self._parse_id_var(), 4723 expressions=self._parse_unnamed_constraints(), 4724 ) 4725 4726 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4727 constraints = [] 4728 while True: 4729 constraint = self._parse_unnamed_constraint() or self._parse_function() 4730 if not constraint: 4731 break 4732 constraints.append(constraint) 4733 4734 return constraints 4735 4736 def _parse_unnamed_constraint( 4737 self, constraints: t.Optional[t.Collection[str]] = None 4738 ) -> t.Optional[exp.Expression]: 4739 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4740 constraints or self.CONSTRAINT_PARSERS 4741 ): 4742 return None 4743 4744 constraint = self._prev.text.upper() 4745 if constraint not in self.CONSTRAINT_PARSERS: 4746 self.raise_error(f"No parser found for schema constraint {constraint}.") 4747 4748 return self.CONSTRAINT_PARSERS[constraint](self) 4749 4750 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4751 self._match_text_seq("KEY") 4752 return self.expression( 4753 exp.UniqueColumnConstraint, 4754 this=self._parse_schema(self._parse_id_var(any_token=False)), 4755 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4756 on_conflict=self._parse_on_conflict(), 4757 ) 4758 4759 def _parse_key_constraint_options(self) -> t.List[str]: 4760 options = [] 4761 while True: 4762 if not self._curr: 4763 break 4764 4765 if self._match(TokenType.ON): 4766 action = None 4767 on = self._advance_any() and self._prev.text 4768 4769 if self._match_text_seq("NO", "ACTION"): 4770 action = "NO ACTION" 4771 elif self._match_text_seq("CASCADE"): 4772 action = "CASCADE" 4773 elif self._match_text_seq("RESTRICT"): 4774 action = "RESTRICT" 4775 elif self._match_pair(TokenType.SET, TokenType.NULL): 4776 action = "SET NULL" 4777 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4778 action = "SET DEFAULT" 4779 else: 4780 self.raise_error("Invalid key constraint") 4781 4782 options.append(f"ON {on} {action}") 4783 elif self._match_text_seq("NOT", "ENFORCED"): 4784 options.append("NOT ENFORCED") 4785 elif self._match_text_seq("DEFERRABLE"): 4786 options.append("DEFERRABLE") 4787 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4788 options.append("INITIALLY DEFERRED") 4789 elif self._match_text_seq("NORELY"): 4790 options.append("NORELY") 4791 elif self._match_text_seq("MATCH", "FULL"): 4792 options.append("MATCH FULL") 4793 else: 4794 break 4795 4796 return options 4797 4798 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4799 if match and not self._match(TokenType.REFERENCES): 4800 return None 4801 4802 expressions = None 4803 this = self._parse_table(schema=True) 4804 options = self._parse_key_constraint_options() 4805 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4806 4807 def _parse_foreign_key(self) -> exp.ForeignKey: 4808 expressions = self._parse_wrapped_id_vars() 4809 reference = self._parse_references() 4810 options = {} 4811 4812 while self._match(TokenType.ON): 4813 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4814 self.raise_error("Expected DELETE or UPDATE") 4815 4816 kind = self._prev.text.lower() 4817 4818 if self._match_text_seq("NO", "ACTION"): 4819 action = "NO ACTION" 4820 elif self._match(TokenType.SET): 4821 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4822 action = "SET " + self._prev.text.upper() 4823 else: 4824 self._advance() 4825 action = self._prev.text.upper() 4826 4827 options[kind] = action 4828 4829 return self.expression( 4830 exp.ForeignKey, 4831 expressions=expressions, 4832 reference=reference, 4833 **options, # type: ignore 4834 ) 4835 4836 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4837 return self._parse_field() 4838 4839 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4840 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4841 self._retreat(self._index - 1) 4842 return None 4843 4844 id_vars = self._parse_wrapped_id_vars() 4845 return self.expression( 4846 exp.PeriodForSystemTimeConstraint, 4847 this=seq_get(id_vars, 0), 4848 expression=seq_get(id_vars, 1), 4849 ) 4850 4851 def _parse_primary_key( 4852 self, wrapped_optional: bool = False, in_props: bool = False 4853 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4854 desc = ( 4855 self._match_set((TokenType.ASC, TokenType.DESC)) 4856 and self._prev.token_type == TokenType.DESC 4857 ) 4858 4859 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4860 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4861 4862 expressions = self._parse_wrapped_csv( 4863 self._parse_primary_key_part, optional=wrapped_optional 4864 ) 4865 options = self._parse_key_constraint_options() 4866 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4867 4868 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4869 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4870 4871 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4872 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4873 return this 4874 4875 bracket_kind = self._prev.token_type 4876 expressions = self._parse_csv( 4877 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4878 ) 4879 4880 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4881 self.raise_error("Expected ]") 4882 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4883 self.raise_error("Expected }") 4884 4885 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4886 if bracket_kind == TokenType.L_BRACE: 4887 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4888 elif not this or this.name.upper() == "ARRAY": 4889 this = self.expression(exp.Array, expressions=expressions) 4890 else: 4891 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4892 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4893 4894 self._add_comments(this) 4895 return self._parse_bracket(this) 4896 4897 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4898 if self._match(TokenType.COLON): 4899 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4900 return this 4901 4902 def _parse_case(self) -> t.Optional[exp.Expression]: 4903 ifs = [] 4904 default = None 4905 4906 comments = self._prev_comments 4907 expression = self._parse_conjunction() 4908 4909 while self._match(TokenType.WHEN): 4910 this = self._parse_conjunction() 4911 self._match(TokenType.THEN) 4912 then = self._parse_conjunction() 4913 ifs.append(self.expression(exp.If, this=this, true=then)) 4914 4915 if self._match(TokenType.ELSE): 4916 default = self._parse_conjunction() 4917 4918 if not self._match(TokenType.END): 4919 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4920 default = exp.column("interval") 4921 else: 4922 self.raise_error("Expected END after CASE", self._prev) 4923 4924 return self.expression( 4925 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4926 ) 4927 4928 def _parse_if(self) -> t.Optional[exp.Expression]: 4929 if self._match(TokenType.L_PAREN): 4930 args = self._parse_csv(self._parse_conjunction) 4931 this = self.validate_expression(exp.If.from_arg_list(args), args) 4932 self._match_r_paren() 4933 else: 4934 index = self._index - 1 4935 4936 if self.NO_PAREN_IF_COMMANDS and index == 0: 4937 return self._parse_as_command(self._prev) 4938 4939 condition = self._parse_conjunction() 4940 4941 if not condition: 4942 self._retreat(index) 4943 return None 4944 4945 self._match(TokenType.THEN) 4946 true = self._parse_conjunction() 4947 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4948 self._match(TokenType.END) 4949 this = self.expression(exp.If, this=condition, true=true, false=false) 4950 4951 return this 4952 4953 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4954 if not self._match_text_seq("VALUE", "FOR"): 4955 self._retreat(self._index - 1) 4956 return None 4957 4958 return self.expression( 4959 exp.NextValueFor, 4960 this=self._parse_column(), 4961 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4962 ) 4963 4964 def _parse_extract(self) -> exp.Extract: 4965 this = self._parse_function() or self._parse_var() or self._parse_type() 4966 4967 if self._match(TokenType.FROM): 4968 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4969 4970 if not self._match(TokenType.COMMA): 4971 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4972 4973 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4974 4975 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4976 this = self._parse_conjunction() 4977 4978 if not self._match(TokenType.ALIAS): 4979 if self._match(TokenType.COMMA): 4980 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4981 4982 self.raise_error("Expected AS after CAST") 4983 4984 fmt = None 4985 to = self._parse_types() 4986 4987 if self._match(TokenType.FORMAT): 4988 fmt_string = self._parse_string() 4989 fmt = self._parse_at_time_zone(fmt_string) 4990 4991 if not to: 4992 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4993 if to.this in exp.DataType.TEMPORAL_TYPES: 4994 this = self.expression( 4995 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4996 this=this, 4997 format=exp.Literal.string( 4998 format_time( 4999 fmt_string.this if fmt_string else "", 5000 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5001 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5002 ) 5003 ), 5004 ) 5005 5006 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5007 this.set("zone", fmt.args["zone"]) 5008 return this 5009 elif not to: 5010 self.raise_error("Expected TYPE after CAST") 5011 elif isinstance(to, exp.Identifier): 5012 to = exp.DataType.build(to.name, udt=True) 5013 elif to.this == exp.DataType.Type.CHAR: 5014 if self._match(TokenType.CHARACTER_SET): 5015 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5016 5017 return self.expression( 5018 exp.Cast if strict else exp.TryCast, 5019 this=this, 5020 to=to, 5021 format=fmt, 5022 safe=safe, 5023 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5024 ) 5025 5026 def _parse_string_agg(self) -> exp.Expression: 5027 if self._match(TokenType.DISTINCT): 5028 args: t.List[t.Optional[exp.Expression]] = [ 5029 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5030 ] 5031 if self._match(TokenType.COMMA): 5032 args.extend(self._parse_csv(self._parse_conjunction)) 5033 else: 5034 args = self._parse_csv(self._parse_conjunction) # type: ignore 5035 5036 index = self._index 5037 if not self._match(TokenType.R_PAREN) and args: 5038 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5039 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5040 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5041 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5042 5043 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5044 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5045 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5046 if not self._match_text_seq("WITHIN", "GROUP"): 5047 self._retreat(index) 5048 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5049 5050 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5051 order = self._parse_order(this=seq_get(args, 0)) 5052 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5053 5054 def _parse_convert( 5055 self, strict: bool, safe: t.Optional[bool] = None 5056 ) -> t.Optional[exp.Expression]: 5057 this = self._parse_bitwise() 5058 5059 if self._match(TokenType.USING): 5060 to: t.Optional[exp.Expression] = self.expression( 5061 exp.CharacterSet, this=self._parse_var() 5062 ) 5063 elif self._match(TokenType.COMMA): 5064 to = self._parse_types() 5065 else: 5066 to = None 5067 5068 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5069 5070 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5071 """ 5072 There are generally two variants of the DECODE function: 5073 5074 - DECODE(bin, charset) 5075 - DECODE(expression, search, result [, search, result] ... [, default]) 5076 5077 The second variant will always be parsed into a CASE expression. Note that NULL 5078 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5079 instead of relying on pattern matching. 5080 """ 5081 args = self._parse_csv(self._parse_conjunction) 5082 5083 if len(args) < 3: 5084 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5085 5086 expression, *expressions = args 5087 if not expression: 5088 return None 5089 5090 ifs = [] 5091 for search, result in zip(expressions[::2], expressions[1::2]): 5092 if not search or not result: 5093 return None 5094 5095 if isinstance(search, exp.Literal): 5096 ifs.append( 5097 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5098 ) 5099 elif isinstance(search, exp.Null): 5100 ifs.append( 5101 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5102 ) 5103 else: 5104 cond = exp.or_( 5105 exp.EQ(this=expression.copy(), expression=search), 5106 exp.and_( 5107 exp.Is(this=expression.copy(), expression=exp.Null()), 5108 exp.Is(this=search.copy(), expression=exp.Null()), 5109 copy=False, 5110 ), 5111 copy=False, 5112 ) 5113 ifs.append(exp.If(this=cond, true=result)) 5114 5115 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5116 5117 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5118 self._match_text_seq("KEY") 5119 key = self._parse_column() 5120 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5121 self._match_text_seq("VALUE") 5122 value = self._parse_bitwise() 5123 5124 if not key and not value: 5125 return None 5126 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5127 5128 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5129 if not this or not self._match_text_seq("FORMAT", "JSON"): 5130 return this 5131 5132 return self.expression(exp.FormatJson, this=this) 5133 5134 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5135 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5136 for value in values: 5137 if self._match_text_seq(value, "ON", on): 5138 return f"{value} ON {on}" 5139 5140 return None 5141 5142 @t.overload 5143 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5144 5145 @t.overload 5146 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5147 5148 def _parse_json_object(self, agg=False): 5149 star = self._parse_star() 5150 expressions = ( 5151 [star] 5152 if star 5153 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5154 ) 5155 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5156 5157 unique_keys = None 5158 if self._match_text_seq("WITH", "UNIQUE"): 5159 unique_keys = True 5160 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5161 unique_keys = False 5162 5163 self._match_text_seq("KEYS") 5164 5165 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5166 self._parse_type() 5167 ) 5168 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5169 5170 return self.expression( 5171 exp.JSONObjectAgg if agg else exp.JSONObject, 5172 expressions=expressions, 5173 null_handling=null_handling, 5174 unique_keys=unique_keys, 5175 return_type=return_type, 5176 encoding=encoding, 5177 ) 5178 5179 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5180 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5181 if not self._match_text_seq("NESTED"): 5182 this = self._parse_id_var() 5183 kind = self._parse_types(allow_identifiers=False) 5184 nested = None 5185 else: 5186 this = None 5187 kind = None 5188 nested = True 5189 5190 path = self._match_text_seq("PATH") and self._parse_string() 5191 nested_schema = nested and self._parse_json_schema() 5192 5193 return self.expression( 5194 exp.JSONColumnDef, 5195 this=this, 5196 kind=kind, 5197 path=path, 5198 nested_schema=nested_schema, 5199 ) 5200 5201 def _parse_json_schema(self) -> exp.JSONSchema: 5202 self._match_text_seq("COLUMNS") 5203 return self.expression( 5204 exp.JSONSchema, 5205 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5206 ) 5207 5208 def _parse_json_table(self) -> exp.JSONTable: 5209 this = self._parse_format_json(self._parse_bitwise()) 5210 path = self._match(TokenType.COMMA) and self._parse_string() 5211 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5212 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5213 schema = self._parse_json_schema() 5214 5215 return exp.JSONTable( 5216 this=this, 5217 schema=schema, 5218 path=path, 5219 error_handling=error_handling, 5220 empty_handling=empty_handling, 5221 ) 5222 5223 def _parse_match_against(self) -> exp.MatchAgainst: 5224 expressions = self._parse_csv(self._parse_column) 5225 5226 self._match_text_seq(")", "AGAINST", "(") 5227 5228 this = self._parse_string() 5229 5230 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5231 modifier = "IN NATURAL LANGUAGE MODE" 5232 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5233 modifier = f"{modifier} WITH QUERY EXPANSION" 5234 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5235 modifier = "IN BOOLEAN MODE" 5236 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5237 modifier = "WITH QUERY EXPANSION" 5238 else: 5239 modifier = None 5240 5241 return self.expression( 5242 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5243 ) 5244 5245 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5246 def _parse_open_json(self) -> exp.OpenJSON: 5247 this = self._parse_bitwise() 5248 path = self._match(TokenType.COMMA) and self._parse_string() 5249 5250 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5251 this = self._parse_field(any_token=True) 5252 kind = self._parse_types() 5253 path = self._parse_string() 5254 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5255 5256 return self.expression( 5257 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5258 ) 5259 5260 expressions = None 5261 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5262 self._match_l_paren() 5263 expressions = self._parse_csv(_parse_open_json_column_def) 5264 5265 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5266 5267 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5268 args = self._parse_csv(self._parse_bitwise) 5269 5270 if self._match(TokenType.IN): 5271 return self.expression( 5272 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5273 ) 5274 5275 if haystack_first: 5276 haystack = seq_get(args, 0) 5277 needle = seq_get(args, 1) 5278 else: 5279 needle = seq_get(args, 0) 5280 haystack = seq_get(args, 1) 5281 5282 return self.expression( 5283 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5284 ) 5285 5286 def _parse_predict(self) -> exp.Predict: 5287 self._match_text_seq("MODEL") 5288 this = self._parse_table() 5289 5290 self._match(TokenType.COMMA) 5291 self._match_text_seq("TABLE") 5292 5293 return self.expression( 5294 exp.Predict, 5295 this=this, 5296 expression=self._parse_table(), 5297 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5298 ) 5299 5300 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5301 args = self._parse_csv(self._parse_table) 5302 return exp.JoinHint(this=func_name.upper(), expressions=args) 5303 5304 def _parse_substring(self) -> exp.Substring: 5305 # Postgres supports the form: substring(string [from int] [for int]) 5306 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5307 5308 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5309 5310 if self._match(TokenType.FROM): 5311 args.append(self._parse_bitwise()) 5312 if self._match(TokenType.FOR): 5313 args.append(self._parse_bitwise()) 5314 5315 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5316 5317 def _parse_trim(self) -> exp.Trim: 5318 # https://www.w3resource.com/sql/character-functions/trim.php 5319 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5320 5321 position = None 5322 collation = None 5323 expression = None 5324 5325 if self._match_texts(self.TRIM_TYPES): 5326 position = self._prev.text.upper() 5327 5328 this = self._parse_bitwise() 5329 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5330 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5331 expression = self._parse_bitwise() 5332 5333 if invert_order: 5334 this, expression = expression, this 5335 5336 if self._match(TokenType.COLLATE): 5337 collation = self._parse_bitwise() 5338 5339 return self.expression( 5340 exp.Trim, this=this, position=position, expression=expression, collation=collation 5341 ) 5342 5343 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5344 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5345 5346 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5347 return self._parse_window(self._parse_id_var(), alias=True) 5348 5349 def _parse_respect_or_ignore_nulls( 5350 self, this: t.Optional[exp.Expression] 5351 ) -> t.Optional[exp.Expression]: 5352 if self._match_text_seq("IGNORE", "NULLS"): 5353 return self.expression(exp.IgnoreNulls, this=this) 5354 if self._match_text_seq("RESPECT", "NULLS"): 5355 return self.expression(exp.RespectNulls, this=this) 5356 return this 5357 5358 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5359 if self._match(TokenType.HAVING): 5360 self._match_texts(("MAX", "MIN")) 5361 max = self._prev.text.upper() != "MIN" 5362 return self.expression( 5363 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5364 ) 5365 5366 return this 5367 5368 def _parse_window( 5369 self, this: t.Optional[exp.Expression], alias: bool = False 5370 ) -> t.Optional[exp.Expression]: 5371 func = this 5372 comments = func.comments if isinstance(func, exp.Expression) else None 5373 5374 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5375 self._match(TokenType.WHERE) 5376 this = self.expression( 5377 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5378 ) 5379 self._match_r_paren() 5380 5381 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5382 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5383 if self._match_text_seq("WITHIN", "GROUP"): 5384 order = self._parse_wrapped(self._parse_order) 5385 this = self.expression(exp.WithinGroup, this=this, expression=order) 5386 5387 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5388 # Some dialects choose to implement and some do not. 5389 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5390 5391 # There is some code above in _parse_lambda that handles 5392 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5393 5394 # The below changes handle 5395 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5396 5397 # Oracle allows both formats 5398 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5399 # and Snowflake chose to do the same for familiarity 5400 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5401 if isinstance(this, exp.AggFunc): 5402 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5403 5404 if ignore_respect and ignore_respect is not this: 5405 ignore_respect.replace(ignore_respect.this) 5406 this = self.expression(ignore_respect.__class__, this=this) 5407 5408 this = self._parse_respect_or_ignore_nulls(this) 5409 5410 # bigquery select from window x AS (partition by ...) 5411 if alias: 5412 over = None 5413 self._match(TokenType.ALIAS) 5414 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5415 return this 5416 else: 5417 over = self._prev.text.upper() 5418 5419 if comments and isinstance(func, exp.Expression): 5420 func.pop_comments() 5421 5422 if not self._match(TokenType.L_PAREN): 5423 return self.expression( 5424 exp.Window, 5425 comments=comments, 5426 this=this, 5427 alias=self._parse_id_var(False), 5428 over=over, 5429 ) 5430 5431 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5432 5433 first = self._match(TokenType.FIRST) 5434 if self._match_text_seq("LAST"): 5435 first = False 5436 5437 partition, order = self._parse_partition_and_order() 5438 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5439 5440 if kind: 5441 self._match(TokenType.BETWEEN) 5442 start = self._parse_window_spec() 5443 self._match(TokenType.AND) 5444 end = self._parse_window_spec() 5445 5446 spec = self.expression( 5447 exp.WindowSpec, 5448 kind=kind, 5449 start=start["value"], 5450 start_side=start["side"], 5451 end=end["value"], 5452 end_side=end["side"], 5453 ) 5454 else: 5455 spec = None 5456 5457 self._match_r_paren() 5458 5459 window = self.expression( 5460 exp.Window, 5461 comments=comments, 5462 this=this, 5463 partition_by=partition, 5464 order=order, 5465 spec=spec, 5466 alias=window_alias, 5467 over=over, 5468 first=first, 5469 ) 5470 5471 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5472 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5473 return self._parse_window(window, alias=alias) 5474 5475 return window 5476 5477 def _parse_partition_and_order( 5478 self, 5479 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5480 return self._parse_partition_by(), self._parse_order() 5481 5482 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5483 self._match(TokenType.BETWEEN) 5484 5485 return { 5486 "value": ( 5487 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5488 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5489 or self._parse_bitwise() 5490 ), 5491 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5492 } 5493 5494 def _parse_alias( 5495 self, this: t.Optional[exp.Expression], explicit: bool = False 5496 ) -> t.Optional[exp.Expression]: 5497 any_token = self._match(TokenType.ALIAS) 5498 comments = self._prev_comments or [] 5499 5500 if explicit and not any_token: 5501 return this 5502 5503 if self._match(TokenType.L_PAREN): 5504 aliases = self.expression( 5505 exp.Aliases, 5506 comments=comments, 5507 this=this, 5508 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5509 ) 5510 self._match_r_paren(aliases) 5511 return aliases 5512 5513 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5514 self.STRING_ALIASES and self._parse_string_as_identifier() 5515 ) 5516 5517 if alias: 5518 comments.extend(alias.pop_comments()) 5519 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5520 column = this.this 5521 5522 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5523 if not this.comments and column and column.comments: 5524 this.comments = column.pop_comments() 5525 5526 return this 5527 5528 def _parse_id_var( 5529 self, 5530 any_token: bool = True, 5531 tokens: t.Optional[t.Collection[TokenType]] = None, 5532 ) -> t.Optional[exp.Expression]: 5533 expression = self._parse_identifier() 5534 if not expression and ( 5535 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5536 ): 5537 quoted = self._prev.token_type == TokenType.STRING 5538 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5539 5540 return expression 5541 5542 def _parse_string(self) -> t.Optional[exp.Expression]: 5543 if self._match_set(self.STRING_PARSERS): 5544 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5545 return self._parse_placeholder() 5546 5547 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5548 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5549 5550 def _parse_number(self) -> t.Optional[exp.Expression]: 5551 if self._match_set(self.NUMERIC_PARSERS): 5552 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5553 return self._parse_placeholder() 5554 5555 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5556 if self._match(TokenType.IDENTIFIER): 5557 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5558 return self._parse_placeholder() 5559 5560 def _parse_var( 5561 self, 5562 any_token: bool = False, 5563 tokens: t.Optional[t.Collection[TokenType]] = None, 5564 upper: bool = False, 5565 ) -> t.Optional[exp.Expression]: 5566 if ( 5567 (any_token and self._advance_any()) 5568 or self._match(TokenType.VAR) 5569 or (self._match_set(tokens) if tokens else False) 5570 ): 5571 return self.expression( 5572 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5573 ) 5574 return self._parse_placeholder() 5575 5576 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5577 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5578 self._advance() 5579 return self._prev 5580 return None 5581 5582 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5583 return self._parse_var() or self._parse_string() 5584 5585 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5586 return self._parse_primary() or self._parse_var(any_token=True) 5587 5588 def _parse_null(self) -> t.Optional[exp.Expression]: 5589 if self._match_set(self.NULL_TOKENS): 5590 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5591 return self._parse_placeholder() 5592 5593 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5594 if self._match(TokenType.TRUE): 5595 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5596 if self._match(TokenType.FALSE): 5597 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5598 return self._parse_placeholder() 5599 5600 def _parse_star(self) -> t.Optional[exp.Expression]: 5601 if self._match(TokenType.STAR): 5602 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5603 return self._parse_placeholder() 5604 5605 def _parse_parameter(self) -> exp.Parameter: 5606 self._match(TokenType.L_BRACE) 5607 this = self._parse_identifier() or self._parse_primary_or_var() 5608 expression = self._match(TokenType.COLON) and ( 5609 self._parse_identifier() or self._parse_primary_or_var() 5610 ) 5611 self._match(TokenType.R_BRACE) 5612 return self.expression(exp.Parameter, this=this, expression=expression) 5613 5614 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5615 if self._match_set(self.PLACEHOLDER_PARSERS): 5616 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5617 if placeholder: 5618 return placeholder 5619 self._advance(-1) 5620 return None 5621 5622 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5623 if not self._match(TokenType.EXCEPT): 5624 return None 5625 if self._match(TokenType.L_PAREN, advance=False): 5626 return self._parse_wrapped_csv(self._parse_column) 5627 5628 except_column = self._parse_column() 5629 return [except_column] if except_column else None 5630 5631 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5632 if not self._match(TokenType.REPLACE): 5633 return None 5634 if self._match(TokenType.L_PAREN, advance=False): 5635 return self._parse_wrapped_csv(self._parse_expression) 5636 5637 replace_expression = self._parse_expression() 5638 return [replace_expression] if replace_expression else None 5639 5640 def _parse_csv( 5641 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5642 ) -> t.List[exp.Expression]: 5643 parse_result = parse_method() 5644 items = [parse_result] if parse_result is not None else [] 5645 5646 while self._match(sep): 5647 self._add_comments(parse_result) 5648 parse_result = parse_method() 5649 if parse_result is not None: 5650 items.append(parse_result) 5651 5652 return items 5653 5654 def _parse_tokens( 5655 self, parse_method: t.Callable, expressions: t.Dict 5656 ) -> t.Optional[exp.Expression]: 5657 this = parse_method() 5658 5659 while self._match_set(expressions): 5660 this = self.expression( 5661 expressions[self._prev.token_type], 5662 this=this, 5663 comments=self._prev_comments, 5664 expression=parse_method(), 5665 ) 5666 5667 return this 5668 5669 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5670 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5671 5672 def _parse_wrapped_csv( 5673 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5674 ) -> t.List[exp.Expression]: 5675 return self._parse_wrapped( 5676 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5677 ) 5678 5679 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5680 wrapped = self._match(TokenType.L_PAREN) 5681 if not wrapped and not optional: 5682 self.raise_error("Expecting (") 5683 parse_result = parse_method() 5684 if wrapped: 5685 self._match_r_paren() 5686 return parse_result 5687 5688 def _parse_expressions(self) -> t.List[exp.Expression]: 5689 return self._parse_csv(self._parse_expression) 5690 5691 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5692 return self._parse_select() or self._parse_set_operations( 5693 self._parse_expression() if alias else self._parse_conjunction() 5694 ) 5695 5696 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5697 return self._parse_query_modifiers( 5698 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5699 ) 5700 5701 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5702 this = None 5703 if self._match_texts(self.TRANSACTION_KIND): 5704 this = self._prev.text 5705 5706 self._match_texts(("TRANSACTION", "WORK")) 5707 5708 modes = [] 5709 while True: 5710 mode = [] 5711 while self._match(TokenType.VAR): 5712 mode.append(self._prev.text) 5713 5714 if mode: 5715 modes.append(" ".join(mode)) 5716 if not self._match(TokenType.COMMA): 5717 break 5718 5719 return self.expression(exp.Transaction, this=this, modes=modes) 5720 5721 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5722 chain = None 5723 savepoint = None 5724 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5725 5726 self._match_texts(("TRANSACTION", "WORK")) 5727 5728 if self._match_text_seq("TO"): 5729 self._match_text_seq("SAVEPOINT") 5730 savepoint = self._parse_id_var() 5731 5732 if self._match(TokenType.AND): 5733 chain = not self._match_text_seq("NO") 5734 self._match_text_seq("CHAIN") 5735 5736 if is_rollback: 5737 return self.expression(exp.Rollback, savepoint=savepoint) 5738 5739 return self.expression(exp.Commit, chain=chain) 5740 5741 def _parse_refresh(self) -> exp.Refresh: 5742 self._match(TokenType.TABLE) 5743 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5744 5745 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5746 if not self._match_text_seq("ADD"): 5747 return None 5748 5749 self._match(TokenType.COLUMN) 5750 exists_column = self._parse_exists(not_=True) 5751 expression = self._parse_field_def() 5752 5753 if expression: 5754 expression.set("exists", exists_column) 5755 5756 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5757 if self._match_texts(("FIRST", "AFTER")): 5758 position = self._prev.text 5759 column_position = self.expression( 5760 exp.ColumnPosition, this=self._parse_column(), position=position 5761 ) 5762 expression.set("position", column_position) 5763 5764 return expression 5765 5766 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5767 drop = self._match(TokenType.DROP) and self._parse_drop() 5768 if drop and not isinstance(drop, exp.Command): 5769 drop.set("kind", drop.args.get("kind", "COLUMN")) 5770 return drop 5771 5772 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5773 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5774 return self.expression( 5775 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5776 ) 5777 5778 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5779 index = self._index - 1 5780 5781 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5782 return self._parse_csv( 5783 lambda: self.expression( 5784 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5785 ) 5786 ) 5787 5788 self._retreat(index) 5789 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5790 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5791 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5792 5793 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5794 self._match(TokenType.COLUMN) 5795 column = self._parse_field(any_token=True) 5796 5797 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5798 return self.expression(exp.AlterColumn, this=column, drop=True) 5799 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5800 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5801 if self._match(TokenType.COMMENT): 5802 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5803 5804 self._match_text_seq("SET", "DATA") 5805 self._match_text_seq("TYPE") 5806 return self.expression( 5807 exp.AlterColumn, 5808 this=column, 5809 dtype=self._parse_types(), 5810 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5811 using=self._match(TokenType.USING) and self._parse_conjunction(), 5812 ) 5813 5814 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5815 index = self._index - 1 5816 5817 partition_exists = self._parse_exists() 5818 if self._match(TokenType.PARTITION, advance=False): 5819 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5820 5821 self._retreat(index) 5822 return self._parse_csv(self._parse_drop_column) 5823 5824 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5825 if self._match(TokenType.COLUMN): 5826 exists = self._parse_exists() 5827 old_column = self._parse_column() 5828 to = self._match_text_seq("TO") 5829 new_column = self._parse_column() 5830 5831 if old_column is None or to is None or new_column is None: 5832 return None 5833 5834 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5835 5836 self._match_text_seq("TO") 5837 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5838 5839 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5840 start = self._prev 5841 5842 if not self._match(TokenType.TABLE): 5843 return self._parse_as_command(start) 5844 5845 exists = self._parse_exists() 5846 only = self._match_text_seq("ONLY") 5847 this = self._parse_table(schema=True) 5848 5849 if self._next: 5850 self._advance() 5851 5852 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5853 if parser: 5854 actions = ensure_list(parser(self)) 5855 options = self._parse_csv(self._parse_property) 5856 5857 if not self._curr and actions: 5858 return self.expression( 5859 exp.AlterTable, 5860 this=this, 5861 exists=exists, 5862 actions=actions, 5863 only=only, 5864 options=options, 5865 ) 5866 5867 return self._parse_as_command(start) 5868 5869 def _parse_merge(self) -> exp.Merge: 5870 self._match(TokenType.INTO) 5871 target = self._parse_table() 5872 5873 if target and self._match(TokenType.ALIAS, advance=False): 5874 target.set("alias", self._parse_table_alias()) 5875 5876 self._match(TokenType.USING) 5877 using = self._parse_table() 5878 5879 self._match(TokenType.ON) 5880 on = self._parse_conjunction() 5881 5882 return self.expression( 5883 exp.Merge, 5884 this=target, 5885 using=using, 5886 on=on, 5887 expressions=self._parse_when_matched(), 5888 ) 5889 5890 def _parse_when_matched(self) -> t.List[exp.When]: 5891 whens = [] 5892 5893 while self._match(TokenType.WHEN): 5894 matched = not self._match(TokenType.NOT) 5895 self._match_text_seq("MATCHED") 5896 source = ( 5897 False 5898 if self._match_text_seq("BY", "TARGET") 5899 else self._match_text_seq("BY", "SOURCE") 5900 ) 5901 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5902 5903 self._match(TokenType.THEN) 5904 5905 if self._match(TokenType.INSERT): 5906 _this = self._parse_star() 5907 if _this: 5908 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5909 else: 5910 then = self.expression( 5911 exp.Insert, 5912 this=self._parse_value(), 5913 expression=self._match_text_seq("VALUES") and self._parse_value(), 5914 ) 5915 elif self._match(TokenType.UPDATE): 5916 expressions = self._parse_star() 5917 if expressions: 5918 then = self.expression(exp.Update, expressions=expressions) 5919 else: 5920 then = self.expression( 5921 exp.Update, 5922 expressions=self._match(TokenType.SET) 5923 and self._parse_csv(self._parse_equality), 5924 ) 5925 elif self._match(TokenType.DELETE): 5926 then = self.expression(exp.Var, this=self._prev.text) 5927 else: 5928 then = None 5929 5930 whens.append( 5931 self.expression( 5932 exp.When, 5933 matched=matched, 5934 source=source, 5935 condition=condition, 5936 then=then, 5937 ) 5938 ) 5939 return whens 5940 5941 def _parse_show(self) -> t.Optional[exp.Expression]: 5942 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5943 if parser: 5944 return parser(self) 5945 return self._parse_as_command(self._prev) 5946 5947 def _parse_set_item_assignment( 5948 self, kind: t.Optional[str] = None 5949 ) -> t.Optional[exp.Expression]: 5950 index = self._index 5951 5952 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5953 return self._parse_set_transaction(global_=kind == "GLOBAL") 5954 5955 left = self._parse_primary() or self._parse_id_var() 5956 assignment_delimiter = self._match_texts(("=", "TO")) 5957 5958 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5959 self._retreat(index) 5960 return None 5961 5962 right = self._parse_statement() or self._parse_id_var() 5963 this = self.expression(exp.EQ, this=left, expression=right) 5964 5965 return self.expression(exp.SetItem, this=this, kind=kind) 5966 5967 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5968 self._match_text_seq("TRANSACTION") 5969 characteristics = self._parse_csv( 5970 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5971 ) 5972 return self.expression( 5973 exp.SetItem, 5974 expressions=characteristics, 5975 kind="TRANSACTION", 5976 **{"global": global_}, # type: ignore 5977 ) 5978 5979 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5980 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5981 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5982 5983 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5984 index = self._index 5985 set_ = self.expression( 5986 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5987 ) 5988 5989 if self._curr: 5990 self._retreat(index) 5991 return self._parse_as_command(self._prev) 5992 5993 return set_ 5994 5995 def _parse_var_from_options( 5996 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5997 ) -> t.Optional[exp.Var]: 5998 start = self._curr 5999 if not start: 6000 return None 6001 6002 option = start.text.upper() 6003 continuations = options.get(option) 6004 6005 index = self._index 6006 self._advance() 6007 for keywords in continuations or []: 6008 if isinstance(keywords, str): 6009 keywords = (keywords,) 6010 6011 if self._match_text_seq(*keywords): 6012 option = f"{option} {' '.join(keywords)}" 6013 break 6014 else: 6015 if continuations or continuations is None: 6016 if raise_unmatched: 6017 self.raise_error(f"Unknown option {option}") 6018 6019 self._retreat(index) 6020 return None 6021 6022 return exp.var(option) 6023 6024 def _parse_as_command(self, start: Token) -> exp.Command: 6025 while self._curr: 6026 self._advance() 6027 text = self._find_sql(start, self._prev) 6028 size = len(start.text) 6029 self._warn_unsupported() 6030 return exp.Command(this=text[:size], expression=text[size:]) 6031 6032 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6033 settings = [] 6034 6035 self._match_l_paren() 6036 kind = self._parse_id_var() 6037 6038 if self._match(TokenType.L_PAREN): 6039 while True: 6040 key = self._parse_id_var() 6041 value = self._parse_primary() 6042 6043 if not key and value is None: 6044 break 6045 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6046 self._match(TokenType.R_PAREN) 6047 6048 self._match_r_paren() 6049 6050 return self.expression( 6051 exp.DictProperty, 6052 this=this, 6053 kind=kind.this if kind else None, 6054 settings=settings, 6055 ) 6056 6057 def _parse_dict_range(self, this: str) -> exp.DictRange: 6058 self._match_l_paren() 6059 has_min = self._match_text_seq("MIN") 6060 if has_min: 6061 min = self._parse_var() or self._parse_primary() 6062 self._match_text_seq("MAX") 6063 max = self._parse_var() or self._parse_primary() 6064 else: 6065 max = self._parse_var() or self._parse_primary() 6066 min = exp.Literal.number(0) 6067 self._match_r_paren() 6068 return self.expression(exp.DictRange, this=this, min=min, max=max) 6069 6070 def _parse_comprehension( 6071 self, this: t.Optional[exp.Expression] 6072 ) -> t.Optional[exp.Comprehension]: 6073 index = self._index 6074 expression = self._parse_column() 6075 if not self._match(TokenType.IN): 6076 self._retreat(index - 1) 6077 return None 6078 iterator = self._parse_column() 6079 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6080 return self.expression( 6081 exp.Comprehension, 6082 this=this, 6083 expression=expression, 6084 iterator=iterator, 6085 condition=condition, 6086 ) 6087 6088 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6089 if self._match(TokenType.HEREDOC_STRING): 6090 return self.expression(exp.Heredoc, this=self._prev.text) 6091 6092 if not self._match_text_seq("$"): 6093 return None 6094 6095 tags = ["$"] 6096 tag_text = None 6097 6098 if self._is_connected(): 6099 self._advance() 6100 tags.append(self._prev.text.upper()) 6101 else: 6102 self.raise_error("No closing $ found") 6103 6104 if tags[-1] != "$": 6105 if self._is_connected() and self._match_text_seq("$"): 6106 tag_text = tags[-1] 6107 tags.append("$") 6108 else: 6109 self.raise_error("No closing $ found") 6110 6111 heredoc_start = self._curr 6112 6113 while self._curr: 6114 if self._match_text_seq(*tags, advance=False): 6115 this = self._find_sql(heredoc_start, self._prev) 6116 self._advance(len(tags)) 6117 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6118 6119 self._advance() 6120 6121 self.raise_error(f"No closing {''.join(tags)} found") 6122 return None 6123 6124 def _find_parser( 6125 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6126 ) -> t.Optional[t.Callable]: 6127 if not self._curr: 6128 return None 6129 6130 index = self._index 6131 this = [] 6132 while True: 6133 # The current token might be multiple words 6134 curr = self._curr.text.upper() 6135 key = curr.split(" ") 6136 this.append(curr) 6137 6138 self._advance() 6139 result, trie = in_trie(trie, key) 6140 if result == TrieResult.FAILED: 6141 break 6142 6143 if result == TrieResult.EXISTS: 6144 subparser = parsers[" ".join(this)] 6145 return subparser 6146 6147 self._retreat(index) 6148 return None 6149 6150 def _match(self, token_type, advance=True, expression=None): 6151 if not self._curr: 6152 return None 6153 6154 if self._curr.token_type == token_type: 6155 if advance: 6156 self._advance() 6157 self._add_comments(expression) 6158 return True 6159 6160 return None 6161 6162 def _match_set(self, types, advance=True): 6163 if not self._curr: 6164 return None 6165 6166 if self._curr.token_type in types: 6167 if advance: 6168 self._advance() 6169 return True 6170 6171 return None 6172 6173 def _match_pair(self, token_type_a, token_type_b, advance=True): 6174 if not self._curr or not self._next: 6175 return None 6176 6177 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6178 if advance: 6179 self._advance(2) 6180 return True 6181 6182 return None 6183 6184 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6185 if not self._match(TokenType.L_PAREN, expression=expression): 6186 self.raise_error("Expecting (") 6187 6188 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6189 if not self._match(TokenType.R_PAREN, expression=expression): 6190 self.raise_error("Expecting )") 6191 6192 def _match_texts(self, texts, advance=True): 6193 if self._curr and self._curr.text.upper() in texts: 6194 if advance: 6195 self._advance() 6196 return True 6197 return None 6198 6199 def _match_text_seq(self, *texts, advance=True): 6200 index = self._index 6201 for text in texts: 6202 if self._curr and self._curr.text.upper() == text: 6203 self._advance() 6204 else: 6205 self._retreat(index) 6206 return None 6207 6208 if not advance: 6209 self._retreat(index) 6210 6211 return True 6212 6213 def _replace_lambda( 6214 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6215 ) -> t.Optional[exp.Expression]: 6216 if not node: 6217 return node 6218 6219 for column in node.find_all(exp.Column): 6220 if column.parts[0].name in lambda_variables: 6221 dot_or_id = column.to_dot() if column.table else column.this 6222 parent = column.parent 6223 6224 while isinstance(parent, exp.Dot): 6225 if not isinstance(parent.parent, exp.Dot): 6226 parent.replace(dot_or_id) 6227 break 6228 parent = parent.parent 6229 else: 6230 if column is node: 6231 node = dot_or_id 6232 else: 6233 column.replace(dot_or_id) 6234 return node 6235 6236 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6237 start = self._prev 6238 6239 # Not to be confused with TRUNCATE(number, decimals) function call 6240 if self._match(TokenType.L_PAREN): 6241 self._retreat(self._index - 2) 6242 return self._parse_function() 6243 6244 # Clickhouse supports TRUNCATE DATABASE as well 6245 is_database = self._match(TokenType.DATABASE) 6246 6247 self._match(TokenType.TABLE) 6248 6249 exists = self._parse_exists(not_=False) 6250 6251 expressions = self._parse_csv( 6252 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6253 ) 6254 6255 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6256 6257 if self._match_text_seq("RESTART", "IDENTITY"): 6258 identity = "RESTART" 6259 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6260 identity = "CONTINUE" 6261 else: 6262 identity = None 6263 6264 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6265 option = self._prev.text 6266 else: 6267 option = None 6268 6269 partition = self._parse_partition() 6270 6271 # Fallback case 6272 if self._curr: 6273 return self._parse_as_command(start) 6274 6275 return self.expression( 6276 exp.TruncateTable, 6277 expressions=expressions, 6278 is_database=is_database, 6279 exists=exists, 6280 cluster=cluster, 6281 identity=identity, 6282 option=option, 6283 partition=partition, 6284 ) 6285 6286 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6287 this = self._parse_ordered(self._parse_opclass) 6288 6289 if not self._match(TokenType.WITH): 6290 return this 6291 6292 op = self._parse_var(any_token=True) 6293 6294 return self.expression(exp.WithOperator, this=this, op=op)
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 *ENUM_TYPE_TOKENS, 270 *NESTED_TYPE_TOKENS, 271 *AGGREGATE_TYPE_TOKENS, 272 } 273 274 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 275 TokenType.BIGINT: TokenType.UBIGINT, 276 TokenType.INT: TokenType.UINT, 277 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 278 TokenType.SMALLINT: TokenType.USMALLINT, 279 TokenType.TINYINT: TokenType.UTINYINT, 280 TokenType.DECIMAL: TokenType.UDECIMAL, 281 } 282 283 SUBQUERY_PREDICATES = { 284 TokenType.ANY: exp.Any, 285 TokenType.ALL: exp.All, 286 TokenType.EXISTS: exp.Exists, 287 TokenType.SOME: exp.Any, 288 } 289 290 RESERVED_TOKENS = { 291 *Tokenizer.SINGLE_TOKENS.values(), 292 TokenType.SELECT, 293 } - {TokenType.IDENTIFIER} 294 295 DB_CREATABLES = { 296 TokenType.DATABASE, 297 TokenType.SCHEMA, 298 TokenType.TABLE, 299 TokenType.VIEW, 300 TokenType.MODEL, 301 TokenType.DICTIONARY, 302 TokenType.SEQUENCE, 303 TokenType.STORAGE_INTEGRATION, 304 } 305 306 CREATABLES = { 307 TokenType.COLUMN, 308 TokenType.CONSTRAINT, 309 TokenType.FUNCTION, 310 TokenType.INDEX, 311 TokenType.PROCEDURE, 312 TokenType.FOREIGN_KEY, 313 *DB_CREATABLES, 314 } 315 316 # Tokens that can represent identifiers 317 ID_VAR_TOKENS = { 318 TokenType.VAR, 319 TokenType.ANTI, 320 TokenType.APPLY, 321 TokenType.ASC, 322 TokenType.ASOF, 323 TokenType.AUTO_INCREMENT, 324 TokenType.BEGIN, 325 TokenType.BPCHAR, 326 TokenType.CACHE, 327 TokenType.CASE, 328 TokenType.COLLATE, 329 TokenType.COMMAND, 330 TokenType.COMMENT, 331 TokenType.COMMIT, 332 TokenType.CONSTRAINT, 333 TokenType.DEFAULT, 334 TokenType.DELETE, 335 TokenType.DESC, 336 TokenType.DESCRIBE, 337 TokenType.DICTIONARY, 338 TokenType.DIV, 339 TokenType.END, 340 TokenType.EXECUTE, 341 TokenType.ESCAPE, 342 TokenType.FALSE, 343 TokenType.FIRST, 344 TokenType.FILTER, 345 TokenType.FINAL, 346 TokenType.FORMAT, 347 TokenType.FULL, 348 TokenType.IDENTIFIER, 349 TokenType.IS, 350 TokenType.ISNULL, 351 TokenType.INTERVAL, 352 TokenType.KEEP, 353 TokenType.KILL, 354 TokenType.LEFT, 355 TokenType.LOAD, 356 TokenType.MERGE, 357 TokenType.NATURAL, 358 TokenType.NEXT, 359 TokenType.OFFSET, 360 TokenType.OPERATOR, 361 TokenType.ORDINALITY, 362 TokenType.OVERLAPS, 363 TokenType.OVERWRITE, 364 TokenType.PARTITION, 365 TokenType.PERCENT, 366 TokenType.PIVOT, 367 TokenType.PRAGMA, 368 TokenType.RANGE, 369 TokenType.RECURSIVE, 370 TokenType.REFERENCES, 371 TokenType.REFRESH, 372 TokenType.REPLACE, 373 TokenType.RIGHT, 374 TokenType.ROW, 375 TokenType.ROWS, 376 TokenType.SEMI, 377 TokenType.SET, 378 TokenType.SETTINGS, 379 TokenType.SHOW, 380 TokenType.TEMPORARY, 381 TokenType.TOP, 382 TokenType.TRUE, 383 TokenType.TRUNCATE, 384 TokenType.UNIQUE, 385 TokenType.UNPIVOT, 386 TokenType.UPDATE, 387 TokenType.USE, 388 TokenType.VOLATILE, 389 TokenType.WINDOW, 390 *CREATABLES, 391 *SUBQUERY_PREDICATES, 392 *TYPE_TOKENS, 393 *NO_PAREN_FUNCTIONS, 394 } 395 396 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 397 398 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 399 TokenType.ANTI, 400 TokenType.APPLY, 401 TokenType.ASOF, 402 TokenType.FULL, 403 TokenType.LEFT, 404 TokenType.LOCK, 405 TokenType.NATURAL, 406 TokenType.OFFSET, 407 TokenType.RIGHT, 408 TokenType.SEMI, 409 TokenType.WINDOW, 410 } 411 412 ALIAS_TOKENS = ID_VAR_TOKENS 413 414 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 415 416 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 417 418 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 419 420 FUNC_TOKENS = { 421 TokenType.COLLATE, 422 TokenType.COMMAND, 423 TokenType.CURRENT_DATE, 424 TokenType.CURRENT_DATETIME, 425 TokenType.CURRENT_TIMESTAMP, 426 TokenType.CURRENT_TIME, 427 TokenType.CURRENT_USER, 428 TokenType.FILTER, 429 TokenType.FIRST, 430 TokenType.FORMAT, 431 TokenType.GLOB, 432 TokenType.IDENTIFIER, 433 TokenType.INDEX, 434 TokenType.ISNULL, 435 TokenType.ILIKE, 436 TokenType.INSERT, 437 TokenType.LIKE, 438 TokenType.MERGE, 439 TokenType.OFFSET, 440 TokenType.PRIMARY_KEY, 441 TokenType.RANGE, 442 TokenType.REPLACE, 443 TokenType.RLIKE, 444 TokenType.ROW, 445 TokenType.UNNEST, 446 TokenType.VAR, 447 TokenType.LEFT, 448 TokenType.RIGHT, 449 TokenType.SEQUENCE, 450 TokenType.DATE, 451 TokenType.DATETIME, 452 TokenType.TABLE, 453 TokenType.TIMESTAMP, 454 TokenType.TIMESTAMPTZ, 455 TokenType.TRUNCATE, 456 TokenType.WINDOW, 457 TokenType.XOR, 458 *TYPE_TOKENS, 459 *SUBQUERY_PREDICATES, 460 } 461 462 CONJUNCTION = { 463 TokenType.AND: exp.And, 464 TokenType.OR: exp.Or, 465 } 466 467 EQUALITY = { 468 TokenType.COLON_EQ: exp.PropertyEQ, 469 TokenType.EQ: exp.EQ, 470 TokenType.NEQ: exp.NEQ, 471 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 472 } 473 474 COMPARISON = { 475 TokenType.GT: exp.GT, 476 TokenType.GTE: exp.GTE, 477 TokenType.LT: exp.LT, 478 TokenType.LTE: exp.LTE, 479 } 480 481 BITWISE = { 482 TokenType.AMP: exp.BitwiseAnd, 483 TokenType.CARET: exp.BitwiseXor, 484 TokenType.PIPE: exp.BitwiseOr, 485 } 486 487 TERM = { 488 TokenType.DASH: exp.Sub, 489 TokenType.PLUS: exp.Add, 490 TokenType.MOD: exp.Mod, 491 TokenType.COLLATE: exp.Collate, 492 } 493 494 FACTOR = { 495 TokenType.DIV: exp.IntDiv, 496 TokenType.LR_ARROW: exp.Distance, 497 TokenType.SLASH: exp.Div, 498 TokenType.STAR: exp.Mul, 499 } 500 501 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 502 503 TIMES = { 504 TokenType.TIME, 505 TokenType.TIMETZ, 506 } 507 508 TIMESTAMPS = { 509 TokenType.TIMESTAMP, 510 TokenType.TIMESTAMPTZ, 511 TokenType.TIMESTAMPLTZ, 512 *TIMES, 513 } 514 515 SET_OPERATIONS = { 516 TokenType.UNION, 517 TokenType.INTERSECT, 518 TokenType.EXCEPT, 519 } 520 521 JOIN_METHODS = { 522 TokenType.ASOF, 523 TokenType.NATURAL, 524 TokenType.POSITIONAL, 525 } 526 527 JOIN_SIDES = { 528 TokenType.LEFT, 529 TokenType.RIGHT, 530 TokenType.FULL, 531 } 532 533 JOIN_KINDS = { 534 TokenType.INNER, 535 TokenType.OUTER, 536 TokenType.CROSS, 537 TokenType.SEMI, 538 TokenType.ANTI, 539 } 540 541 JOIN_HINTS: t.Set[str] = set() 542 543 LAMBDAS = { 544 TokenType.ARROW: lambda self, expressions: self.expression( 545 exp.Lambda, 546 this=self._replace_lambda( 547 self._parse_conjunction(), 548 {node.name for node in expressions}, 549 ), 550 expressions=expressions, 551 ), 552 TokenType.FARROW: lambda self, expressions: self.expression( 553 exp.Kwarg, 554 this=exp.var(expressions[0].name), 555 expression=self._parse_conjunction(), 556 ), 557 } 558 559 COLUMN_OPERATORS = { 560 TokenType.DOT: None, 561 TokenType.DCOLON: lambda self, this, to: self.expression( 562 exp.Cast if self.STRICT_CAST else exp.TryCast, 563 this=this, 564 to=to, 565 ), 566 TokenType.ARROW: lambda self, this, path: self.expression( 567 exp.JSONExtract, 568 this=this, 569 expression=self.dialect.to_json_path(path), 570 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 571 ), 572 TokenType.DARROW: lambda self, this, path: self.expression( 573 exp.JSONExtractScalar, 574 this=this, 575 expression=self.dialect.to_json_path(path), 576 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 577 ), 578 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 579 exp.JSONBExtract, 580 this=this, 581 expression=path, 582 ), 583 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 584 exp.JSONBExtractScalar, 585 this=this, 586 expression=path, 587 ), 588 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 589 exp.JSONBContains, 590 this=this, 591 expression=key, 592 ), 593 } 594 595 EXPRESSION_PARSERS = { 596 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 597 exp.Column: lambda self: self._parse_column(), 598 exp.Condition: lambda self: self._parse_conjunction(), 599 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 600 exp.Expression: lambda self: self._parse_expression(), 601 exp.From: lambda self: self._parse_from(), 602 exp.Group: lambda self: self._parse_group(), 603 exp.Having: lambda self: self._parse_having(), 604 exp.Identifier: lambda self: self._parse_id_var(), 605 exp.Join: lambda self: self._parse_join(), 606 exp.Lambda: lambda self: self._parse_lambda(), 607 exp.Lateral: lambda self: self._parse_lateral(), 608 exp.Limit: lambda self: self._parse_limit(), 609 exp.Offset: lambda self: self._parse_offset(), 610 exp.Order: lambda self: self._parse_order(), 611 exp.Ordered: lambda self: self._parse_ordered(), 612 exp.Properties: lambda self: self._parse_properties(), 613 exp.Qualify: lambda self: self._parse_qualify(), 614 exp.Returning: lambda self: self._parse_returning(), 615 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 616 exp.Table: lambda self: self._parse_table_parts(), 617 exp.TableAlias: lambda self: self._parse_table_alias(), 618 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 619 exp.Where: lambda self: self._parse_where(), 620 exp.Window: lambda self: self._parse_named_window(), 621 exp.With: lambda self: self._parse_with(), 622 "JOIN_TYPE": lambda self: self._parse_join_parts(), 623 } 624 625 STATEMENT_PARSERS = { 626 TokenType.ALTER: lambda self: self._parse_alter(), 627 TokenType.BEGIN: lambda self: self._parse_transaction(), 628 TokenType.CACHE: lambda self: self._parse_cache(), 629 TokenType.COMMENT: lambda self: self._parse_comment(), 630 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 631 TokenType.CREATE: lambda self: self._parse_create(), 632 TokenType.DELETE: lambda self: self._parse_delete(), 633 TokenType.DESC: lambda self: self._parse_describe(), 634 TokenType.DESCRIBE: lambda self: self._parse_describe(), 635 TokenType.DROP: lambda self: self._parse_drop(), 636 TokenType.INSERT: lambda self: self._parse_insert(), 637 TokenType.KILL: lambda self: self._parse_kill(), 638 TokenType.LOAD: lambda self: self._parse_load(), 639 TokenType.MERGE: lambda self: self._parse_merge(), 640 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 641 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 642 TokenType.REFRESH: lambda self: self._parse_refresh(), 643 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 644 TokenType.SET: lambda self: self._parse_set(), 645 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 646 TokenType.UNCACHE: lambda self: self._parse_uncache(), 647 TokenType.UPDATE: lambda self: self._parse_update(), 648 TokenType.USE: lambda self: self.expression( 649 exp.Use, 650 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 651 this=self._parse_table(schema=False), 652 ), 653 } 654 655 UNARY_PARSERS = { 656 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 657 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 658 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 659 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 660 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 661 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 662 } 663 664 STRING_PARSERS = { 665 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 666 exp.RawString, this=token.text 667 ), 668 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 669 exp.National, this=token.text 670 ), 671 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 672 TokenType.STRING: lambda self, token: self.expression( 673 exp.Literal, this=token.text, is_string=True 674 ), 675 TokenType.UNICODE_STRING: lambda self, token: self.expression( 676 exp.UnicodeString, 677 this=token.text, 678 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 679 ), 680 } 681 682 NUMERIC_PARSERS = { 683 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 684 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 685 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 686 TokenType.NUMBER: lambda self, token: self.expression( 687 exp.Literal, this=token.text, is_string=False 688 ), 689 } 690 691 PRIMARY_PARSERS = { 692 **STRING_PARSERS, 693 **NUMERIC_PARSERS, 694 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 695 TokenType.NULL: lambda self, _: self.expression(exp.Null), 696 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 697 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 698 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 699 TokenType.STAR: lambda self, _: self.expression( 700 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 701 ), 702 } 703 704 PLACEHOLDER_PARSERS = { 705 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 706 TokenType.PARAMETER: lambda self: self._parse_parameter(), 707 TokenType.COLON: lambda self: ( 708 self.expression(exp.Placeholder, this=self._prev.text) 709 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 710 else None 711 ), 712 } 713 714 RANGE_PARSERS = { 715 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 716 TokenType.GLOB: binary_range_parser(exp.Glob), 717 TokenType.ILIKE: binary_range_parser(exp.ILike), 718 TokenType.IN: lambda self, this: self._parse_in(this), 719 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 720 TokenType.IS: lambda self, this: self._parse_is(this), 721 TokenType.LIKE: binary_range_parser(exp.Like), 722 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 723 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 724 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 725 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 726 } 727 728 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 729 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 730 "AUTO": lambda self: self._parse_auto_property(), 731 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 732 "BACKUP": lambda self: self.expression( 733 exp.BackupProperty, this=self._parse_var(any_token=True) 734 ), 735 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 736 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 738 "CHECKSUM": lambda self: self._parse_checksum(), 739 "CLUSTER BY": lambda self: self._parse_cluster(), 740 "CLUSTERED": lambda self: self._parse_clustered_by(), 741 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 742 exp.CollateProperty, **kwargs 743 ), 744 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 745 "CONTAINS": lambda self: self._parse_contains_property(), 746 "COPY": lambda self: self._parse_copy_property(), 747 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 748 "DEFINER": lambda self: self._parse_definer(), 749 "DETERMINISTIC": lambda self: self.expression( 750 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 751 ), 752 "DISTKEY": lambda self: self._parse_distkey(), 753 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 754 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 755 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 756 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 757 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 758 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 759 "FREESPACE": lambda self: self._parse_freespace(), 760 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 761 "HEAP": lambda self: self.expression(exp.HeapProperty), 762 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 763 "IMMUTABLE": lambda self: self.expression( 764 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 765 ), 766 "INHERITS": lambda self: self.expression( 767 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 768 ), 769 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 770 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 771 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 772 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 773 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 774 "LIKE": lambda self: self._parse_create_like(), 775 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 776 "LOCK": lambda self: self._parse_locking(), 777 "LOCKING": lambda self: self._parse_locking(), 778 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 779 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 780 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 781 "MODIFIES": lambda self: self._parse_modifies_property(), 782 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 783 "NO": lambda self: self._parse_no_property(), 784 "ON": lambda self: self._parse_on_property(), 785 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 786 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 787 "PARTITION": lambda self: self._parse_partitioned_of(), 788 "PARTITION BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 790 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 791 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 792 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 793 "READS": lambda self: self._parse_reads_property(), 794 "REMOTE": lambda self: self._parse_remote_with_connection(), 795 "RETURNS": lambda self: self._parse_returns(), 796 "ROW": lambda self: self._parse_row(), 797 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 798 "SAMPLE": lambda self: self.expression( 799 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 800 ), 801 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 802 "SETTINGS": lambda self: self.expression( 803 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 804 ), 805 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 806 "SORTKEY": lambda self: self._parse_sortkey(), 807 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 808 "STABLE": lambda self: self.expression( 809 exp.StabilityProperty, this=exp.Literal.string("STABLE") 810 ), 811 "STORED": lambda self: self._parse_stored(), 812 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 813 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 814 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 815 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 816 "TO": lambda self: self._parse_to_table(), 817 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 818 "TRANSFORM": lambda self: self.expression( 819 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 820 ), 821 "TTL": lambda self: self._parse_ttl(), 822 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 823 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 824 "VOLATILE": lambda self: self._parse_volatile_property(), 825 "WITH": lambda self: self._parse_with_property(), 826 } 827 828 CONSTRAINT_PARSERS = { 829 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 830 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 831 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 832 "CHARACTER SET": lambda self: self.expression( 833 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 834 ), 835 "CHECK": lambda self: self.expression( 836 exp.CheckColumnConstraint, 837 this=self._parse_wrapped(self._parse_conjunction), 838 enforced=self._match_text_seq("ENFORCED"), 839 ), 840 "COLLATE": lambda self: self.expression( 841 exp.CollateColumnConstraint, this=self._parse_var() 842 ), 843 "COMMENT": lambda self: self.expression( 844 exp.CommentColumnConstraint, this=self._parse_string() 845 ), 846 "COMPRESS": lambda self: self._parse_compress(), 847 "CLUSTERED": lambda self: self.expression( 848 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 849 ), 850 "NONCLUSTERED": lambda self: self.expression( 851 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 852 ), 853 "DEFAULT": lambda self: self.expression( 854 exp.DefaultColumnConstraint, this=self._parse_bitwise() 855 ), 856 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 857 "EPHEMERAL": lambda self: self.expression( 858 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 859 ), 860 "EXCLUDE": lambda self: self.expression( 861 exp.ExcludeColumnConstraint, this=self._parse_index_params() 862 ), 863 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 864 "FORMAT": lambda self: self.expression( 865 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 866 ), 867 "GENERATED": lambda self: self._parse_generated_as_identity(), 868 "IDENTITY": lambda self: self._parse_auto_increment(), 869 "INLINE": lambda self: self._parse_inline(), 870 "LIKE": lambda self: self._parse_create_like(), 871 "NOT": lambda self: self._parse_not_constraint(), 872 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 873 "ON": lambda self: ( 874 self._match(TokenType.UPDATE) 875 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 876 ) 877 or self.expression(exp.OnProperty, this=self._parse_id_var()), 878 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 879 "PERIOD": lambda self: self._parse_period_for_system_time(), 880 "PRIMARY KEY": lambda self: self._parse_primary_key(), 881 "REFERENCES": lambda self: self._parse_references(match=False), 882 "TITLE": lambda self: self.expression( 883 exp.TitleColumnConstraint, this=self._parse_var_or_string() 884 ), 885 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 886 "UNIQUE": lambda self: self._parse_unique(), 887 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 888 "WITH": lambda self: self.expression( 889 exp.Properties, expressions=self._parse_wrapped_properties() 890 ), 891 } 892 893 ALTER_PARSERS = { 894 "ADD": lambda self: self._parse_alter_table_add(), 895 "ALTER": lambda self: self._parse_alter_table_alter(), 896 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 897 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 898 "DROP": lambda self: self._parse_alter_table_drop(), 899 "RENAME": lambda self: self._parse_alter_table_rename(), 900 } 901 902 SCHEMA_UNNAMED_CONSTRAINTS = { 903 "CHECK", 904 "EXCLUDE", 905 "FOREIGN KEY", 906 "LIKE", 907 "PERIOD", 908 "PRIMARY KEY", 909 "UNIQUE", 910 } 911 912 NO_PAREN_FUNCTION_PARSERS = { 913 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 914 "CASE": lambda self: self._parse_case(), 915 "IF": lambda self: self._parse_if(), 916 "NEXT": lambda self: self._parse_next_value_for(), 917 } 918 919 INVALID_FUNC_NAME_TOKENS = { 920 TokenType.IDENTIFIER, 921 TokenType.STRING, 922 } 923 924 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 925 926 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 927 928 FUNCTION_PARSERS = { 929 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 930 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 931 "DECODE": lambda self: self._parse_decode(), 932 "EXTRACT": lambda self: self._parse_extract(), 933 "JSON_OBJECT": lambda self: self._parse_json_object(), 934 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 935 "JSON_TABLE": lambda self: self._parse_json_table(), 936 "MATCH": lambda self: self._parse_match_against(), 937 "OPENJSON": lambda self: self._parse_open_json(), 938 "POSITION": lambda self: self._parse_position(), 939 "PREDICT": lambda self: self._parse_predict(), 940 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 941 "STRING_AGG": lambda self: self._parse_string_agg(), 942 "SUBSTRING": lambda self: self._parse_substring(), 943 "TRIM": lambda self: self._parse_trim(), 944 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 945 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 946 } 947 948 QUERY_MODIFIER_PARSERS = { 949 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 950 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 951 TokenType.WHERE: lambda self: ("where", self._parse_where()), 952 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 953 TokenType.HAVING: lambda self: ("having", self._parse_having()), 954 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 955 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 956 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 957 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 958 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 959 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 960 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 961 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 962 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 963 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 964 TokenType.CLUSTER_BY: lambda self: ( 965 "cluster", 966 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 967 ), 968 TokenType.DISTRIBUTE_BY: lambda self: ( 969 "distribute", 970 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 971 ), 972 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 973 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 974 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 975 } 976 977 SET_PARSERS = { 978 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 979 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 980 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 981 "TRANSACTION": lambda self: self._parse_set_transaction(), 982 } 983 984 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 985 986 TYPE_LITERAL_PARSERS = { 987 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 988 } 989 990 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 991 992 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 993 994 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 995 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 996 "ISOLATION": ( 997 ("LEVEL", "REPEATABLE", "READ"), 998 ("LEVEL", "READ", "COMMITTED"), 999 ("LEVEL", "READ", "UNCOMITTED"), 1000 ("LEVEL", "SERIALIZABLE"), 1001 ), 1002 "READ": ("WRITE", "ONLY"), 1003 } 1004 1005 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1006 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1007 ) 1008 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1009 1010 CREATE_SEQUENCE: OPTIONS_TYPE = { 1011 "SCALE": ("EXTEND", "NOEXTEND"), 1012 "SHARD": ("EXTEND", "NOEXTEND"), 1013 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1014 **dict.fromkeys( 1015 ( 1016 "SESSION", 1017 "GLOBAL", 1018 "KEEP", 1019 "NOKEEP", 1020 "ORDER", 1021 "NOORDER", 1022 "NOCACHE", 1023 "CYCLE", 1024 "NOCYCLE", 1025 "NOMINVALUE", 1026 "NOMAXVALUE", 1027 "NOSCALE", 1028 "NOSHARD", 1029 ), 1030 tuple(), 1031 ), 1032 } 1033 1034 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1035 1036 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1037 1038 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1039 1040 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1041 1042 CLONE_KEYWORDS = {"CLONE", "COPY"} 1043 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1044 1045 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1046 1047 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1048 1049 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1050 1051 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1052 1053 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1054 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1055 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1056 1057 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1058 1059 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1060 1061 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1062 1063 DISTINCT_TOKENS = {TokenType.DISTINCT} 1064 1065 NULL_TOKENS = {TokenType.NULL} 1066 1067 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1068 1069 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1070 1071 STRICT_CAST = True 1072 1073 PREFIXED_PIVOT_COLUMNS = False 1074 IDENTIFY_PIVOT_STRINGS = False 1075 1076 LOG_DEFAULTS_TO_LN = False 1077 1078 # Whether ADD is present for each column added by ALTER TABLE 1079 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1080 1081 # Whether the table sample clause expects CSV syntax 1082 TABLESAMPLE_CSV = False 1083 1084 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1085 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1086 1087 # Whether the TRIM function expects the characters to trim as its first argument 1088 TRIM_PATTERN_FIRST = False 1089 1090 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1091 STRING_ALIASES = False 1092 1093 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1094 MODIFIERS_ATTACHED_TO_UNION = True 1095 UNION_MODIFIERS = {"order", "limit", "offset"} 1096 1097 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1098 NO_PAREN_IF_COMMANDS = True 1099 1100 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1101 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1102 1103 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1104 # If this is True and '(' is not found, the keyword will be treated as an identifier 1105 VALUES_FOLLOWED_BY_PAREN = True 1106 1107 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1108 SUPPORTS_IMPLICIT_UNNEST = False 1109 1110 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1111 INTERVAL_SPANS = True 1112 1113 # Whether a PARTITION clause can follow a table reference 1114 SUPPORTS_PARTITION_SELECTION = False 1115 1116 __slots__ = ( 1117 "error_level", 1118 "error_message_context", 1119 "max_errors", 1120 "dialect", 1121 "sql", 1122 "errors", 1123 "_tokens", 1124 "_index", 1125 "_curr", 1126 "_next", 1127 "_prev", 1128 "_prev_comments", 1129 ) 1130 1131 # Autofilled 1132 SHOW_TRIE: t.Dict = {} 1133 SET_TRIE: t.Dict = {} 1134 1135 def __init__( 1136 self, 1137 error_level: t.Optional[ErrorLevel] = None, 1138 error_message_context: int = 100, 1139 max_errors: int = 3, 1140 dialect: DialectType = None, 1141 ): 1142 from sqlglot.dialects import Dialect 1143 1144 self.error_level = error_level or ErrorLevel.IMMEDIATE 1145 self.error_message_context = error_message_context 1146 self.max_errors = max_errors 1147 self.dialect = Dialect.get_or_raise(dialect) 1148 self.reset() 1149 1150 def reset(self): 1151 self.sql = "" 1152 self.errors = [] 1153 self._tokens = [] 1154 self._index = 0 1155 self._curr = None 1156 self._next = None 1157 self._prev = None 1158 self._prev_comments = None 1159 1160 def parse( 1161 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1162 ) -> t.List[t.Optional[exp.Expression]]: 1163 """ 1164 Parses a list of tokens and returns a list of syntax trees, one tree 1165 per parsed SQL statement. 1166 1167 Args: 1168 raw_tokens: The list of tokens. 1169 sql: The original SQL string, used to produce helpful debug messages. 1170 1171 Returns: 1172 The list of the produced syntax trees. 1173 """ 1174 return self._parse( 1175 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1176 ) 1177 1178 def parse_into( 1179 self, 1180 expression_types: exp.IntoType, 1181 raw_tokens: t.List[Token], 1182 sql: t.Optional[str] = None, 1183 ) -> t.List[t.Optional[exp.Expression]]: 1184 """ 1185 Parses a list of tokens into a given Expression type. If a collection of Expression 1186 types is given instead, this method will try to parse the token list into each one 1187 of them, stopping at the first for which the parsing succeeds. 1188 1189 Args: 1190 expression_types: The expression type(s) to try and parse the token list into. 1191 raw_tokens: The list of tokens. 1192 sql: The original SQL string, used to produce helpful debug messages. 1193 1194 Returns: 1195 The target Expression. 1196 """ 1197 errors = [] 1198 for expression_type in ensure_list(expression_types): 1199 parser = self.EXPRESSION_PARSERS.get(expression_type) 1200 if not parser: 1201 raise TypeError(f"No parser registered for {expression_type}") 1202 1203 try: 1204 return self._parse(parser, raw_tokens, sql) 1205 except ParseError as e: 1206 e.errors[0]["into_expression"] = expression_type 1207 errors.append(e) 1208 1209 raise ParseError( 1210 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1211 errors=merge_errors(errors), 1212 ) from errors[-1] 1213 1214 def _parse( 1215 self, 1216 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1217 raw_tokens: t.List[Token], 1218 sql: t.Optional[str] = None, 1219 ) -> t.List[t.Optional[exp.Expression]]: 1220 self.reset() 1221 self.sql = sql or "" 1222 1223 total = len(raw_tokens) 1224 chunks: t.List[t.List[Token]] = [[]] 1225 1226 for i, token in enumerate(raw_tokens): 1227 if token.token_type == TokenType.SEMICOLON: 1228 if i < total - 1: 1229 chunks.append([]) 1230 else: 1231 chunks[-1].append(token) 1232 1233 expressions = [] 1234 1235 for tokens in chunks: 1236 self._index = -1 1237 self._tokens = tokens 1238 self._advance() 1239 1240 expressions.append(parse_method(self)) 1241 1242 if self._index < len(self._tokens): 1243 self.raise_error("Invalid expression / Unexpected token") 1244 1245 self.check_errors() 1246 1247 return expressions 1248 1249 def check_errors(self) -> None: 1250 """Logs or raises any found errors, depending on the chosen error level setting.""" 1251 if self.error_level == ErrorLevel.WARN: 1252 for error in self.errors: 1253 logger.error(str(error)) 1254 elif self.error_level == ErrorLevel.RAISE and self.errors: 1255 raise ParseError( 1256 concat_messages(self.errors, self.max_errors), 1257 errors=merge_errors(self.errors), 1258 ) 1259 1260 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1261 """ 1262 Appends an error in the list of recorded errors or raises it, depending on the chosen 1263 error level setting. 1264 """ 1265 token = token or self._curr or self._prev or Token.string("") 1266 start = token.start 1267 end = token.end + 1 1268 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1269 highlight = self.sql[start:end] 1270 end_context = self.sql[end : end + self.error_message_context] 1271 1272 error = ParseError.new( 1273 f"{message}. Line {token.line}, Col: {token.col}.\n" 1274 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1275 description=message, 1276 line=token.line, 1277 col=token.col, 1278 start_context=start_context, 1279 highlight=highlight, 1280 end_context=end_context, 1281 ) 1282 1283 if self.error_level == ErrorLevel.IMMEDIATE: 1284 raise error 1285 1286 self.errors.append(error) 1287 1288 def expression( 1289 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1290 ) -> E: 1291 """ 1292 Creates a new, validated Expression. 1293 1294 Args: 1295 exp_class: The expression class to instantiate. 1296 comments: An optional list of comments to attach to the expression. 1297 kwargs: The arguments to set for the expression along with their respective values. 1298 1299 Returns: 1300 The target expression. 1301 """ 1302 instance = exp_class(**kwargs) 1303 instance.add_comments(comments) if comments else self._add_comments(instance) 1304 return self.validate_expression(instance) 1305 1306 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1307 if expression and self._prev_comments: 1308 expression.add_comments(self._prev_comments) 1309 self._prev_comments = None 1310 1311 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1312 """ 1313 Validates an Expression, making sure that all its mandatory arguments are set. 1314 1315 Args: 1316 expression: The expression to validate. 1317 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1318 1319 Returns: 1320 The validated expression. 1321 """ 1322 if self.error_level != ErrorLevel.IGNORE: 1323 for error_message in expression.error_messages(args): 1324 self.raise_error(error_message) 1325 1326 return expression 1327 1328 def _find_sql(self, start: Token, end: Token) -> str: 1329 return self.sql[start.start : end.end + 1] 1330 1331 def _is_connected(self) -> bool: 1332 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1333 1334 def _advance(self, times: int = 1) -> None: 1335 self._index += times 1336 self._curr = seq_get(self._tokens, self._index) 1337 self._next = seq_get(self._tokens, self._index + 1) 1338 1339 if self._index > 0: 1340 self._prev = self._tokens[self._index - 1] 1341 self._prev_comments = self._prev.comments 1342 else: 1343 self._prev = None 1344 self._prev_comments = None 1345 1346 def _retreat(self, index: int) -> None: 1347 if index != self._index: 1348 self._advance(index - self._index) 1349 1350 def _warn_unsupported(self) -> None: 1351 if len(self._tokens) <= 1: 1352 return 1353 1354 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1355 # interested in emitting a warning for the one being currently processed. 1356 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1357 1358 logger.warning( 1359 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1360 ) 1361 1362 def _parse_command(self) -> exp.Command: 1363 self._warn_unsupported() 1364 return self.expression( 1365 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1366 ) 1367 1368 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1369 """ 1370 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1371 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1372 the parser state accordingly 1373 """ 1374 index = self._index 1375 error_level = self.error_level 1376 1377 self.error_level = ErrorLevel.IMMEDIATE 1378 try: 1379 this = parse_method() 1380 except ParseError: 1381 this = None 1382 finally: 1383 if not this or retreat: 1384 self._retreat(index) 1385 self.error_level = error_level 1386 1387 return this 1388 1389 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1390 start = self._prev 1391 exists = self._parse_exists() if allow_exists else None 1392 1393 self._match(TokenType.ON) 1394 1395 materialized = self._match_text_seq("MATERIALIZED") 1396 kind = self._match_set(self.CREATABLES) and self._prev 1397 if not kind: 1398 return self._parse_as_command(start) 1399 1400 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1401 this = self._parse_user_defined_function(kind=kind.token_type) 1402 elif kind.token_type == TokenType.TABLE: 1403 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1404 elif kind.token_type == TokenType.COLUMN: 1405 this = self._parse_column() 1406 else: 1407 this = self._parse_id_var() 1408 1409 self._match(TokenType.IS) 1410 1411 return self.expression( 1412 exp.Comment, 1413 this=this, 1414 kind=kind.text, 1415 expression=self._parse_string(), 1416 exists=exists, 1417 materialized=materialized, 1418 ) 1419 1420 def _parse_to_table( 1421 self, 1422 ) -> exp.ToTableProperty: 1423 table = self._parse_table_parts(schema=True) 1424 return self.expression(exp.ToTableProperty, this=table) 1425 1426 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1427 def _parse_ttl(self) -> exp.Expression: 1428 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1429 this = self._parse_bitwise() 1430 1431 if self._match_text_seq("DELETE"): 1432 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1433 if self._match_text_seq("RECOMPRESS"): 1434 return self.expression( 1435 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1436 ) 1437 if self._match_text_seq("TO", "DISK"): 1438 return self.expression( 1439 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1440 ) 1441 if self._match_text_seq("TO", "VOLUME"): 1442 return self.expression( 1443 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1444 ) 1445 1446 return this 1447 1448 expressions = self._parse_csv(_parse_ttl_action) 1449 where = self._parse_where() 1450 group = self._parse_group() 1451 1452 aggregates = None 1453 if group and self._match(TokenType.SET): 1454 aggregates = self._parse_csv(self._parse_set_item) 1455 1456 return self.expression( 1457 exp.MergeTreeTTL, 1458 expressions=expressions, 1459 where=where, 1460 group=group, 1461 aggregates=aggregates, 1462 ) 1463 1464 def _parse_statement(self) -> t.Optional[exp.Expression]: 1465 if self._curr is None: 1466 return None 1467 1468 if self._match_set(self.STATEMENT_PARSERS): 1469 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1470 1471 if self._match_set(Tokenizer.COMMANDS): 1472 return self._parse_command() 1473 1474 expression = self._parse_expression() 1475 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1476 return self._parse_query_modifiers(expression) 1477 1478 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1479 start = self._prev 1480 temporary = self._match(TokenType.TEMPORARY) 1481 materialized = self._match_text_seq("MATERIALIZED") 1482 1483 kind = self._match_set(self.CREATABLES) and self._prev.text 1484 if not kind: 1485 return self._parse_as_command(start) 1486 1487 if_exists = exists or self._parse_exists() 1488 table = self._parse_table_parts( 1489 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1490 ) 1491 1492 if self._match(TokenType.L_PAREN, advance=False): 1493 expressions = self._parse_wrapped_csv(self._parse_types) 1494 else: 1495 expressions = None 1496 1497 return self.expression( 1498 exp.Drop, 1499 comments=start.comments, 1500 exists=if_exists, 1501 this=table, 1502 expressions=expressions, 1503 kind=kind, 1504 temporary=temporary, 1505 materialized=materialized, 1506 cascade=self._match_text_seq("CASCADE"), 1507 constraints=self._match_text_seq("CONSTRAINTS"), 1508 purge=self._match_text_seq("PURGE"), 1509 ) 1510 1511 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1512 return ( 1513 self._match_text_seq("IF") 1514 and (not not_ or self._match(TokenType.NOT)) 1515 and self._match(TokenType.EXISTS) 1516 ) 1517 1518 def _parse_create(self) -> exp.Create | exp.Command: 1519 # Note: this can't be None because we've matched a statement parser 1520 start = self._prev 1521 comments = self._prev_comments 1522 1523 replace = ( 1524 start.token_type == TokenType.REPLACE 1525 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1526 or self._match_pair(TokenType.OR, TokenType.ALTER) 1527 ) 1528 1529 unique = self._match(TokenType.UNIQUE) 1530 1531 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1532 self._advance() 1533 1534 properties = None 1535 create_token = self._match_set(self.CREATABLES) and self._prev 1536 1537 if not create_token: 1538 # exp.Properties.Location.POST_CREATE 1539 properties = self._parse_properties() 1540 create_token = self._match_set(self.CREATABLES) and self._prev 1541 1542 if not properties or not create_token: 1543 return self._parse_as_command(start) 1544 1545 exists = self._parse_exists(not_=True) 1546 this = None 1547 expression: t.Optional[exp.Expression] = None 1548 indexes = None 1549 no_schema_binding = None 1550 begin = None 1551 end = None 1552 clone = None 1553 1554 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1555 nonlocal properties 1556 if properties and temp_props: 1557 properties.expressions.extend(temp_props.expressions) 1558 elif temp_props: 1559 properties = temp_props 1560 1561 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1562 this = self._parse_user_defined_function(kind=create_token.token_type) 1563 1564 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1565 extend_props(self._parse_properties()) 1566 1567 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1568 1569 if not expression: 1570 if self._match(TokenType.COMMAND): 1571 expression = self._parse_as_command(self._prev) 1572 else: 1573 begin = self._match(TokenType.BEGIN) 1574 return_ = self._match_text_seq("RETURN") 1575 1576 if self._match(TokenType.STRING, advance=False): 1577 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1578 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1579 expression = self._parse_string() 1580 extend_props(self._parse_properties()) 1581 else: 1582 expression = self._parse_statement() 1583 1584 end = self._match_text_seq("END") 1585 1586 if return_: 1587 expression = self.expression(exp.Return, this=expression) 1588 elif create_token.token_type == TokenType.INDEX: 1589 this = self._parse_index(index=self._parse_id_var()) 1590 elif create_token.token_type in self.DB_CREATABLES: 1591 table_parts = self._parse_table_parts( 1592 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1593 ) 1594 1595 # exp.Properties.Location.POST_NAME 1596 self._match(TokenType.COMMA) 1597 extend_props(self._parse_properties(before=True)) 1598 1599 this = self._parse_schema(this=table_parts) 1600 1601 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1602 extend_props(self._parse_properties()) 1603 1604 self._match(TokenType.ALIAS) 1605 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1606 # exp.Properties.Location.POST_ALIAS 1607 extend_props(self._parse_properties()) 1608 1609 if create_token.token_type == TokenType.SEQUENCE: 1610 expression = self._parse_types() 1611 extend_props(self._parse_properties()) 1612 else: 1613 expression = self._parse_ddl_select() 1614 1615 if create_token.token_type == TokenType.TABLE: 1616 # exp.Properties.Location.POST_EXPRESSION 1617 extend_props(self._parse_properties()) 1618 1619 indexes = [] 1620 while True: 1621 index = self._parse_index() 1622 1623 # exp.Properties.Location.POST_INDEX 1624 extend_props(self._parse_properties()) 1625 1626 if not index: 1627 break 1628 else: 1629 self._match(TokenType.COMMA) 1630 indexes.append(index) 1631 elif create_token.token_type == TokenType.VIEW: 1632 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1633 no_schema_binding = True 1634 1635 shallow = self._match_text_seq("SHALLOW") 1636 1637 if self._match_texts(self.CLONE_KEYWORDS): 1638 copy = self._prev.text.lower() == "copy" 1639 clone = self.expression( 1640 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1641 ) 1642 1643 if self._curr: 1644 return self._parse_as_command(start) 1645 1646 return self.expression( 1647 exp.Create, 1648 comments=comments, 1649 this=this, 1650 kind=create_token.text.upper(), 1651 replace=replace, 1652 unique=unique, 1653 expression=expression, 1654 exists=exists, 1655 properties=properties, 1656 indexes=indexes, 1657 no_schema_binding=no_schema_binding, 1658 begin=begin, 1659 end=end, 1660 clone=clone, 1661 ) 1662 1663 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1664 seq = exp.SequenceProperties() 1665 1666 options = [] 1667 index = self._index 1668 1669 while self._curr: 1670 if self._match_text_seq("INCREMENT"): 1671 self._match_text_seq("BY") 1672 self._match_text_seq("=") 1673 seq.set("increment", self._parse_term()) 1674 elif self._match_text_seq("MINVALUE"): 1675 seq.set("minvalue", self._parse_term()) 1676 elif self._match_text_seq("MAXVALUE"): 1677 seq.set("maxvalue", self._parse_term()) 1678 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1679 self._match_text_seq("=") 1680 seq.set("start", self._parse_term()) 1681 elif self._match_text_seq("CACHE"): 1682 # T-SQL allows empty CACHE which is initialized dynamically 1683 seq.set("cache", self._parse_number() or True) 1684 elif self._match_text_seq("OWNED", "BY"): 1685 # "OWNED BY NONE" is the default 1686 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1687 else: 1688 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1689 if opt: 1690 options.append(opt) 1691 else: 1692 break 1693 1694 seq.set("options", options if options else None) 1695 return None if self._index == index else seq 1696 1697 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1698 # only used for teradata currently 1699 self._match(TokenType.COMMA) 1700 1701 kwargs = { 1702 "no": self._match_text_seq("NO"), 1703 "dual": self._match_text_seq("DUAL"), 1704 "before": self._match_text_seq("BEFORE"), 1705 "default": self._match_text_seq("DEFAULT"), 1706 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1707 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1708 "after": self._match_text_seq("AFTER"), 1709 "minimum": self._match_texts(("MIN", "MINIMUM")), 1710 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1711 } 1712 1713 if self._match_texts(self.PROPERTY_PARSERS): 1714 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1715 try: 1716 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1717 except TypeError: 1718 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1719 1720 return None 1721 1722 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1723 return self._parse_wrapped_csv(self._parse_property) 1724 1725 def _parse_property(self) -> t.Optional[exp.Expression]: 1726 if self._match_texts(self.PROPERTY_PARSERS): 1727 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1728 1729 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1730 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1731 1732 if self._match_text_seq("COMPOUND", "SORTKEY"): 1733 return self._parse_sortkey(compound=True) 1734 1735 if self._match_text_seq("SQL", "SECURITY"): 1736 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1737 1738 index = self._index 1739 key = self._parse_column() 1740 1741 if not self._match(TokenType.EQ): 1742 self._retreat(index) 1743 return self._parse_sequence_properties() 1744 1745 return self.expression( 1746 exp.Property, 1747 this=key.to_dot() if isinstance(key, exp.Column) else key, 1748 value=self._parse_bitwise() or self._parse_var(any_token=True), 1749 ) 1750 1751 def _parse_stored(self) -> exp.FileFormatProperty: 1752 self._match(TokenType.ALIAS) 1753 1754 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1755 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1756 1757 return self.expression( 1758 exp.FileFormatProperty, 1759 this=( 1760 self.expression( 1761 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1762 ) 1763 if input_format or output_format 1764 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1765 ), 1766 ) 1767 1768 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1769 self._match(TokenType.EQ) 1770 self._match(TokenType.ALIAS) 1771 field = self._parse_field() 1772 if isinstance(field, exp.Identifier) and not field.quoted: 1773 field = exp.var(field) 1774 1775 return self.expression(exp_class, this=field, **kwargs) 1776 1777 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1778 properties = [] 1779 while True: 1780 if before: 1781 prop = self._parse_property_before() 1782 else: 1783 prop = self._parse_property() 1784 if not prop: 1785 break 1786 for p in ensure_list(prop): 1787 properties.append(p) 1788 1789 if properties: 1790 return self.expression(exp.Properties, expressions=properties) 1791 1792 return None 1793 1794 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1795 return self.expression( 1796 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1797 ) 1798 1799 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1800 if self._index >= 2: 1801 pre_volatile_token = self._tokens[self._index - 2] 1802 else: 1803 pre_volatile_token = None 1804 1805 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1806 return exp.VolatileProperty() 1807 1808 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1809 1810 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1811 self._match_pair(TokenType.EQ, TokenType.ON) 1812 1813 prop = self.expression(exp.WithSystemVersioningProperty) 1814 if self._match(TokenType.L_PAREN): 1815 self._match_text_seq("HISTORY_TABLE", "=") 1816 prop.set("this", self._parse_table_parts()) 1817 1818 if self._match(TokenType.COMMA): 1819 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1820 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1821 1822 self._match_r_paren() 1823 1824 return prop 1825 1826 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1827 if self._match(TokenType.L_PAREN, advance=False): 1828 return self._parse_wrapped_properties() 1829 1830 if self._match_text_seq("JOURNAL"): 1831 return self._parse_withjournaltable() 1832 1833 if self._match_texts(self.VIEW_ATTRIBUTES): 1834 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1835 1836 if self._match_text_seq("DATA"): 1837 return self._parse_withdata(no=False) 1838 elif self._match_text_seq("NO", "DATA"): 1839 return self._parse_withdata(no=True) 1840 1841 if not self._next: 1842 return None 1843 1844 return self._parse_withisolatedloading() 1845 1846 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1847 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1848 self._match(TokenType.EQ) 1849 1850 user = self._parse_id_var() 1851 self._match(TokenType.PARAMETER) 1852 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1853 1854 if not user or not host: 1855 return None 1856 1857 return exp.DefinerProperty(this=f"{user}@{host}") 1858 1859 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1860 self._match(TokenType.TABLE) 1861 self._match(TokenType.EQ) 1862 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1863 1864 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1865 return self.expression(exp.LogProperty, no=no) 1866 1867 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1868 return self.expression(exp.JournalProperty, **kwargs) 1869 1870 def _parse_checksum(self) -> exp.ChecksumProperty: 1871 self._match(TokenType.EQ) 1872 1873 on = None 1874 if self._match(TokenType.ON): 1875 on = True 1876 elif self._match_text_seq("OFF"): 1877 on = False 1878 1879 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1880 1881 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1882 return self.expression( 1883 exp.Cluster, 1884 expressions=( 1885 self._parse_wrapped_csv(self._parse_ordered) 1886 if wrapped 1887 else self._parse_csv(self._parse_ordered) 1888 ), 1889 ) 1890 1891 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1892 self._match_text_seq("BY") 1893 1894 self._match_l_paren() 1895 expressions = self._parse_csv(self._parse_column) 1896 self._match_r_paren() 1897 1898 if self._match_text_seq("SORTED", "BY"): 1899 self._match_l_paren() 1900 sorted_by = self._parse_csv(self._parse_ordered) 1901 self._match_r_paren() 1902 else: 1903 sorted_by = None 1904 1905 self._match(TokenType.INTO) 1906 buckets = self._parse_number() 1907 self._match_text_seq("BUCKETS") 1908 1909 return self.expression( 1910 exp.ClusteredByProperty, 1911 expressions=expressions, 1912 sorted_by=sorted_by, 1913 buckets=buckets, 1914 ) 1915 1916 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1917 if not self._match_text_seq("GRANTS"): 1918 self._retreat(self._index - 1) 1919 return None 1920 1921 return self.expression(exp.CopyGrantsProperty) 1922 1923 def _parse_freespace(self) -> exp.FreespaceProperty: 1924 self._match(TokenType.EQ) 1925 return self.expression( 1926 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1927 ) 1928 1929 def _parse_mergeblockratio( 1930 self, no: bool = False, default: bool = False 1931 ) -> exp.MergeBlockRatioProperty: 1932 if self._match(TokenType.EQ): 1933 return self.expression( 1934 exp.MergeBlockRatioProperty, 1935 this=self._parse_number(), 1936 percent=self._match(TokenType.PERCENT), 1937 ) 1938 1939 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1940 1941 def _parse_datablocksize( 1942 self, 1943 default: t.Optional[bool] = None, 1944 minimum: t.Optional[bool] = None, 1945 maximum: t.Optional[bool] = None, 1946 ) -> exp.DataBlocksizeProperty: 1947 self._match(TokenType.EQ) 1948 size = self._parse_number() 1949 1950 units = None 1951 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1952 units = self._prev.text 1953 1954 return self.expression( 1955 exp.DataBlocksizeProperty, 1956 size=size, 1957 units=units, 1958 default=default, 1959 minimum=minimum, 1960 maximum=maximum, 1961 ) 1962 1963 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1964 self._match(TokenType.EQ) 1965 always = self._match_text_seq("ALWAYS") 1966 manual = self._match_text_seq("MANUAL") 1967 never = self._match_text_seq("NEVER") 1968 default = self._match_text_seq("DEFAULT") 1969 1970 autotemp = None 1971 if self._match_text_seq("AUTOTEMP"): 1972 autotemp = self._parse_schema() 1973 1974 return self.expression( 1975 exp.BlockCompressionProperty, 1976 always=always, 1977 manual=manual, 1978 never=never, 1979 default=default, 1980 autotemp=autotemp, 1981 ) 1982 1983 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1984 index = self._index 1985 no = self._match_text_seq("NO") 1986 concurrent = self._match_text_seq("CONCURRENT") 1987 1988 if not self._match_text_seq("ISOLATED", "LOADING"): 1989 self._retreat(index) 1990 return None 1991 1992 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1993 return self.expression( 1994 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1995 ) 1996 1997 def _parse_locking(self) -> exp.LockingProperty: 1998 if self._match(TokenType.TABLE): 1999 kind = "TABLE" 2000 elif self._match(TokenType.VIEW): 2001 kind = "VIEW" 2002 elif self._match(TokenType.ROW): 2003 kind = "ROW" 2004 elif self._match_text_seq("DATABASE"): 2005 kind = "DATABASE" 2006 else: 2007 kind = None 2008 2009 if kind in ("DATABASE", "TABLE", "VIEW"): 2010 this = self._parse_table_parts() 2011 else: 2012 this = None 2013 2014 if self._match(TokenType.FOR): 2015 for_or_in = "FOR" 2016 elif self._match(TokenType.IN): 2017 for_or_in = "IN" 2018 else: 2019 for_or_in = None 2020 2021 if self._match_text_seq("ACCESS"): 2022 lock_type = "ACCESS" 2023 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2024 lock_type = "EXCLUSIVE" 2025 elif self._match_text_seq("SHARE"): 2026 lock_type = "SHARE" 2027 elif self._match_text_seq("READ"): 2028 lock_type = "READ" 2029 elif self._match_text_seq("WRITE"): 2030 lock_type = "WRITE" 2031 elif self._match_text_seq("CHECKSUM"): 2032 lock_type = "CHECKSUM" 2033 else: 2034 lock_type = None 2035 2036 override = self._match_text_seq("OVERRIDE") 2037 2038 return self.expression( 2039 exp.LockingProperty, 2040 this=this, 2041 kind=kind, 2042 for_or_in=for_or_in, 2043 lock_type=lock_type, 2044 override=override, 2045 ) 2046 2047 def _parse_partition_by(self) -> t.List[exp.Expression]: 2048 if self._match(TokenType.PARTITION_BY): 2049 return self._parse_csv(self._parse_conjunction) 2050 return [] 2051 2052 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2053 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2054 if self._match_text_seq("MINVALUE"): 2055 return exp.var("MINVALUE") 2056 if self._match_text_seq("MAXVALUE"): 2057 return exp.var("MAXVALUE") 2058 return self._parse_bitwise() 2059 2060 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2061 expression = None 2062 from_expressions = None 2063 to_expressions = None 2064 2065 if self._match(TokenType.IN): 2066 this = self._parse_wrapped_csv(self._parse_bitwise) 2067 elif self._match(TokenType.FROM): 2068 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2069 self._match_text_seq("TO") 2070 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2071 elif self._match_text_seq("WITH", "(", "MODULUS"): 2072 this = self._parse_number() 2073 self._match_text_seq(",", "REMAINDER") 2074 expression = self._parse_number() 2075 self._match_r_paren() 2076 else: 2077 self.raise_error("Failed to parse partition bound spec.") 2078 2079 return self.expression( 2080 exp.PartitionBoundSpec, 2081 this=this, 2082 expression=expression, 2083 from_expressions=from_expressions, 2084 to_expressions=to_expressions, 2085 ) 2086 2087 # https://www.postgresql.org/docs/current/sql-createtable.html 2088 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2089 if not self._match_text_seq("OF"): 2090 self._retreat(self._index - 1) 2091 return None 2092 2093 this = self._parse_table(schema=True) 2094 2095 if self._match(TokenType.DEFAULT): 2096 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2097 elif self._match_text_seq("FOR", "VALUES"): 2098 expression = self._parse_partition_bound_spec() 2099 else: 2100 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2101 2102 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2103 2104 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2105 self._match(TokenType.EQ) 2106 return self.expression( 2107 exp.PartitionedByProperty, 2108 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2109 ) 2110 2111 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2112 if self._match_text_seq("AND", "STATISTICS"): 2113 statistics = True 2114 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2115 statistics = False 2116 else: 2117 statistics = None 2118 2119 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2120 2121 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2122 if self._match_text_seq("SQL"): 2123 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2124 return None 2125 2126 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2127 if self._match_text_seq("SQL", "DATA"): 2128 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2129 return None 2130 2131 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2132 if self._match_text_seq("PRIMARY", "INDEX"): 2133 return exp.NoPrimaryIndexProperty() 2134 if self._match_text_seq("SQL"): 2135 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2136 return None 2137 2138 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2139 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2140 return exp.OnCommitProperty() 2141 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2142 return exp.OnCommitProperty(delete=True) 2143 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2144 2145 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2146 if self._match_text_seq("SQL", "DATA"): 2147 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2148 return None 2149 2150 def _parse_distkey(self) -> exp.DistKeyProperty: 2151 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2152 2153 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2154 table = self._parse_table(schema=True) 2155 2156 options = [] 2157 while self._match_texts(("INCLUDING", "EXCLUDING")): 2158 this = self._prev.text.upper() 2159 2160 id_var = self._parse_id_var() 2161 if not id_var: 2162 return None 2163 2164 options.append( 2165 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2166 ) 2167 2168 return self.expression(exp.LikeProperty, this=table, expressions=options) 2169 2170 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2171 return self.expression( 2172 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2173 ) 2174 2175 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2176 self._match(TokenType.EQ) 2177 return self.expression( 2178 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2179 ) 2180 2181 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2182 self._match_text_seq("WITH", "CONNECTION") 2183 return self.expression( 2184 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2185 ) 2186 2187 def _parse_returns(self) -> exp.ReturnsProperty: 2188 value: t.Optional[exp.Expression] 2189 is_table = self._match(TokenType.TABLE) 2190 2191 if is_table: 2192 if self._match(TokenType.LT): 2193 value = self.expression( 2194 exp.Schema, 2195 this="TABLE", 2196 expressions=self._parse_csv(self._parse_struct_types), 2197 ) 2198 if not self._match(TokenType.GT): 2199 self.raise_error("Expecting >") 2200 else: 2201 value = self._parse_schema(exp.var("TABLE")) 2202 else: 2203 value = self._parse_types() 2204 2205 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2206 2207 def _parse_describe(self) -> exp.Describe: 2208 kind = self._match_set(self.CREATABLES) and self._prev.text 2209 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2210 if not self._match_set(self.ID_VAR_TOKENS, advance=False): 2211 style = None 2212 self._retreat(self._index - 1) 2213 this = self._parse_table(schema=True) 2214 properties = self._parse_properties() 2215 expressions = properties.expressions if properties else None 2216 return self.expression( 2217 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2218 ) 2219 2220 def _parse_insert(self) -> exp.Insert: 2221 comments = ensure_list(self._prev_comments) 2222 hint = self._parse_hint() 2223 overwrite = self._match(TokenType.OVERWRITE) 2224 ignore = self._match(TokenType.IGNORE) 2225 local = self._match_text_seq("LOCAL") 2226 alternative = None 2227 is_function = None 2228 2229 if self._match_text_seq("DIRECTORY"): 2230 this: t.Optional[exp.Expression] = self.expression( 2231 exp.Directory, 2232 this=self._parse_var_or_string(), 2233 local=local, 2234 row_format=self._parse_row_format(match_row=True), 2235 ) 2236 else: 2237 if self._match(TokenType.OR): 2238 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2239 2240 self._match(TokenType.INTO) 2241 comments += ensure_list(self._prev_comments) 2242 self._match(TokenType.TABLE) 2243 is_function = self._match(TokenType.FUNCTION) 2244 2245 this = ( 2246 self._parse_table(schema=True, parse_partition=True) 2247 if not is_function 2248 else self._parse_function() 2249 ) 2250 2251 returning = self._parse_returning() 2252 2253 return self.expression( 2254 exp.Insert, 2255 comments=comments, 2256 hint=hint, 2257 is_function=is_function, 2258 this=this, 2259 stored=self._match_text_seq("STORED") and self._parse_stored(), 2260 by_name=self._match_text_seq("BY", "NAME"), 2261 exists=self._parse_exists(), 2262 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2263 and self._parse_conjunction(), 2264 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2265 conflict=self._parse_on_conflict(), 2266 returning=returning or self._parse_returning(), 2267 overwrite=overwrite, 2268 alternative=alternative, 2269 ignore=ignore, 2270 ) 2271 2272 def _parse_kill(self) -> exp.Kill: 2273 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2274 2275 return self.expression( 2276 exp.Kill, 2277 this=self._parse_primary(), 2278 kind=kind, 2279 ) 2280 2281 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2282 conflict = self._match_text_seq("ON", "CONFLICT") 2283 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2284 2285 if not conflict and not duplicate: 2286 return None 2287 2288 conflict_keys = None 2289 constraint = None 2290 2291 if conflict: 2292 if self._match_text_seq("ON", "CONSTRAINT"): 2293 constraint = self._parse_id_var() 2294 elif self._match(TokenType.L_PAREN): 2295 conflict_keys = self._parse_csv(self._parse_id_var) 2296 self._match_r_paren() 2297 2298 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2299 if self._prev.token_type == TokenType.UPDATE: 2300 self._match(TokenType.SET) 2301 expressions = self._parse_csv(self._parse_equality) 2302 else: 2303 expressions = None 2304 2305 return self.expression( 2306 exp.OnConflict, 2307 duplicate=duplicate, 2308 expressions=expressions, 2309 action=action, 2310 conflict_keys=conflict_keys, 2311 constraint=constraint, 2312 ) 2313 2314 def _parse_returning(self) -> t.Optional[exp.Returning]: 2315 if not self._match(TokenType.RETURNING): 2316 return None 2317 return self.expression( 2318 exp.Returning, 2319 expressions=self._parse_csv(self._parse_expression), 2320 into=self._match(TokenType.INTO) and self._parse_table_part(), 2321 ) 2322 2323 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2324 if not self._match(TokenType.FORMAT): 2325 return None 2326 return self._parse_row_format() 2327 2328 def _parse_row_format( 2329 self, match_row: bool = False 2330 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2331 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2332 return None 2333 2334 if self._match_text_seq("SERDE"): 2335 this = self._parse_string() 2336 2337 serde_properties = None 2338 if self._match(TokenType.SERDE_PROPERTIES): 2339 serde_properties = self.expression( 2340 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2341 ) 2342 2343 return self.expression( 2344 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2345 ) 2346 2347 self._match_text_seq("DELIMITED") 2348 2349 kwargs = {} 2350 2351 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2352 kwargs["fields"] = self._parse_string() 2353 if self._match_text_seq("ESCAPED", "BY"): 2354 kwargs["escaped"] = self._parse_string() 2355 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2356 kwargs["collection_items"] = self._parse_string() 2357 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2358 kwargs["map_keys"] = self._parse_string() 2359 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2360 kwargs["lines"] = self._parse_string() 2361 if self._match_text_seq("NULL", "DEFINED", "AS"): 2362 kwargs["null"] = self._parse_string() 2363 2364 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2365 2366 def _parse_load(self) -> exp.LoadData | exp.Command: 2367 if self._match_text_seq("DATA"): 2368 local = self._match_text_seq("LOCAL") 2369 self._match_text_seq("INPATH") 2370 inpath = self._parse_string() 2371 overwrite = self._match(TokenType.OVERWRITE) 2372 self._match_pair(TokenType.INTO, TokenType.TABLE) 2373 2374 return self.expression( 2375 exp.LoadData, 2376 this=self._parse_table(schema=True), 2377 local=local, 2378 overwrite=overwrite, 2379 inpath=inpath, 2380 partition=self._parse_partition(), 2381 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2382 serde=self._match_text_seq("SERDE") and self._parse_string(), 2383 ) 2384 return self._parse_as_command(self._prev) 2385 2386 def _parse_delete(self) -> exp.Delete: 2387 # This handles MySQL's "Multiple-Table Syntax" 2388 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2389 tables = None 2390 comments = self._prev_comments 2391 if not self._match(TokenType.FROM, advance=False): 2392 tables = self._parse_csv(self._parse_table) or None 2393 2394 returning = self._parse_returning() 2395 2396 return self.expression( 2397 exp.Delete, 2398 comments=comments, 2399 tables=tables, 2400 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2401 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2402 where=self._parse_where(), 2403 returning=returning or self._parse_returning(), 2404 limit=self._parse_limit(), 2405 ) 2406 2407 def _parse_update(self) -> exp.Update: 2408 comments = self._prev_comments 2409 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2410 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2411 returning = self._parse_returning() 2412 return self.expression( 2413 exp.Update, 2414 comments=comments, 2415 **{ # type: ignore 2416 "this": this, 2417 "expressions": expressions, 2418 "from": self._parse_from(joins=True), 2419 "where": self._parse_where(), 2420 "returning": returning or self._parse_returning(), 2421 "order": self._parse_order(), 2422 "limit": self._parse_limit(), 2423 }, 2424 ) 2425 2426 def _parse_uncache(self) -> exp.Uncache: 2427 if not self._match(TokenType.TABLE): 2428 self.raise_error("Expecting TABLE after UNCACHE") 2429 2430 return self.expression( 2431 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2432 ) 2433 2434 def _parse_cache(self) -> exp.Cache: 2435 lazy = self._match_text_seq("LAZY") 2436 self._match(TokenType.TABLE) 2437 table = self._parse_table(schema=True) 2438 2439 options = [] 2440 if self._match_text_seq("OPTIONS"): 2441 self._match_l_paren() 2442 k = self._parse_string() 2443 self._match(TokenType.EQ) 2444 v = self._parse_string() 2445 options = [k, v] 2446 self._match_r_paren() 2447 2448 self._match(TokenType.ALIAS) 2449 return self.expression( 2450 exp.Cache, 2451 this=table, 2452 lazy=lazy, 2453 options=options, 2454 expression=self._parse_select(nested=True), 2455 ) 2456 2457 def _parse_partition(self) -> t.Optional[exp.Partition]: 2458 if not self._match(TokenType.PARTITION): 2459 return None 2460 2461 return self.expression( 2462 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2463 ) 2464 2465 def _parse_value(self) -> exp.Tuple: 2466 if self._match(TokenType.L_PAREN): 2467 expressions = self._parse_csv(self._parse_expression) 2468 self._match_r_paren() 2469 return self.expression(exp.Tuple, expressions=expressions) 2470 2471 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2472 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2473 2474 def _parse_projections(self) -> t.List[exp.Expression]: 2475 return self._parse_expressions() 2476 2477 def _parse_select( 2478 self, 2479 nested: bool = False, 2480 table: bool = False, 2481 parse_subquery_alias: bool = True, 2482 parse_set_operation: bool = True, 2483 ) -> t.Optional[exp.Expression]: 2484 cte = self._parse_with() 2485 2486 if cte: 2487 this = self._parse_statement() 2488 2489 if not this: 2490 self.raise_error("Failed to parse any statement following CTE") 2491 return cte 2492 2493 if "with" in this.arg_types: 2494 this.set("with", cte) 2495 else: 2496 self.raise_error(f"{this.key} does not support CTE") 2497 this = cte 2498 2499 return this 2500 2501 # duckdb supports leading with FROM x 2502 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2503 2504 if self._match(TokenType.SELECT): 2505 comments = self._prev_comments 2506 2507 hint = self._parse_hint() 2508 all_ = self._match(TokenType.ALL) 2509 distinct = self._match_set(self.DISTINCT_TOKENS) 2510 2511 kind = ( 2512 self._match(TokenType.ALIAS) 2513 and self._match_texts(("STRUCT", "VALUE")) 2514 and self._prev.text.upper() 2515 ) 2516 2517 if distinct: 2518 distinct = self.expression( 2519 exp.Distinct, 2520 on=self._parse_value() if self._match(TokenType.ON) else None, 2521 ) 2522 2523 if all_ and distinct: 2524 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2525 2526 limit = self._parse_limit(top=True) 2527 projections = self._parse_projections() 2528 2529 this = self.expression( 2530 exp.Select, 2531 kind=kind, 2532 hint=hint, 2533 distinct=distinct, 2534 expressions=projections, 2535 limit=limit, 2536 ) 2537 this.comments = comments 2538 2539 into = self._parse_into() 2540 if into: 2541 this.set("into", into) 2542 2543 if not from_: 2544 from_ = self._parse_from() 2545 2546 if from_: 2547 this.set("from", from_) 2548 2549 this = self._parse_query_modifiers(this) 2550 elif (table or nested) and self._match(TokenType.L_PAREN): 2551 if self._match(TokenType.PIVOT): 2552 this = self._parse_simplified_pivot() 2553 elif self._match(TokenType.FROM): 2554 this = exp.select("*").from_( 2555 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2556 ) 2557 else: 2558 this = ( 2559 self._parse_table() 2560 if table 2561 else self._parse_select(nested=True, parse_set_operation=False) 2562 ) 2563 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2564 2565 self._match_r_paren() 2566 2567 # We return early here so that the UNION isn't attached to the subquery by the 2568 # following call to _parse_set_operations, but instead becomes the parent node 2569 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2570 elif self._match(TokenType.VALUES, advance=False): 2571 this = self._parse_derived_table_values() 2572 elif from_: 2573 this = exp.select("*").from_(from_.this, copy=False) 2574 else: 2575 this = None 2576 2577 if parse_set_operation: 2578 return self._parse_set_operations(this) 2579 return this 2580 2581 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2582 if not skip_with_token and not self._match(TokenType.WITH): 2583 return None 2584 2585 comments = self._prev_comments 2586 recursive = self._match(TokenType.RECURSIVE) 2587 2588 expressions = [] 2589 while True: 2590 expressions.append(self._parse_cte()) 2591 2592 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2593 break 2594 else: 2595 self._match(TokenType.WITH) 2596 2597 return self.expression( 2598 exp.With, comments=comments, expressions=expressions, recursive=recursive 2599 ) 2600 2601 def _parse_cte(self) -> exp.CTE: 2602 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2603 if not alias or not alias.this: 2604 self.raise_error("Expected CTE to have alias") 2605 2606 self._match(TokenType.ALIAS) 2607 2608 if self._match_text_seq("NOT", "MATERIALIZED"): 2609 materialized = False 2610 elif self._match_text_seq("MATERIALIZED"): 2611 materialized = True 2612 else: 2613 materialized = None 2614 2615 return self.expression( 2616 exp.CTE, 2617 this=self._parse_wrapped(self._parse_statement), 2618 alias=alias, 2619 materialized=materialized, 2620 ) 2621 2622 def _parse_table_alias( 2623 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2624 ) -> t.Optional[exp.TableAlias]: 2625 any_token = self._match(TokenType.ALIAS) 2626 alias = ( 2627 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2628 or self._parse_string_as_identifier() 2629 ) 2630 2631 index = self._index 2632 if self._match(TokenType.L_PAREN): 2633 columns = self._parse_csv(self._parse_function_parameter) 2634 self._match_r_paren() if columns else self._retreat(index) 2635 else: 2636 columns = None 2637 2638 if not alias and not columns: 2639 return None 2640 2641 return self.expression(exp.TableAlias, this=alias, columns=columns) 2642 2643 def _parse_subquery( 2644 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2645 ) -> t.Optional[exp.Subquery]: 2646 if not this: 2647 return None 2648 2649 return self.expression( 2650 exp.Subquery, 2651 this=this, 2652 pivots=self._parse_pivots(), 2653 alias=self._parse_table_alias() if parse_alias else None, 2654 ) 2655 2656 def _implicit_unnests_to_explicit(self, this: E) -> E: 2657 from sqlglot.optimizer.normalize_identifiers import ( 2658 normalize_identifiers as _norm, 2659 ) 2660 2661 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2662 for i, join in enumerate(this.args.get("joins") or []): 2663 table = join.this 2664 normalized_table = table.copy() 2665 normalized_table.meta["maybe_column"] = True 2666 normalized_table = _norm(normalized_table, dialect=self.dialect) 2667 2668 if isinstance(table, exp.Table) and not join.args.get("on"): 2669 if normalized_table.parts[0].name in refs: 2670 table_as_column = table.to_column() 2671 unnest = exp.Unnest(expressions=[table_as_column]) 2672 2673 # Table.to_column creates a parent Alias node that we want to convert to 2674 # a TableAlias and attach to the Unnest, so it matches the parser's output 2675 if isinstance(table.args.get("alias"), exp.TableAlias): 2676 table_as_column.replace(table_as_column.this) 2677 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2678 2679 table.replace(unnest) 2680 2681 refs.add(normalized_table.alias_or_name) 2682 2683 return this 2684 2685 def _parse_query_modifiers( 2686 self, this: t.Optional[exp.Expression] 2687 ) -> t.Optional[exp.Expression]: 2688 if isinstance(this, (exp.Query, exp.Table)): 2689 for join in self._parse_joins(): 2690 this.append("joins", join) 2691 for lateral in iter(self._parse_lateral, None): 2692 this.append("laterals", lateral) 2693 2694 while True: 2695 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2696 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2697 key, expression = parser(self) 2698 2699 if expression: 2700 this.set(key, expression) 2701 if key == "limit": 2702 offset = expression.args.pop("offset", None) 2703 2704 if offset: 2705 offset = exp.Offset(expression=offset) 2706 this.set("offset", offset) 2707 2708 limit_by_expressions = expression.expressions 2709 expression.set("expressions", None) 2710 offset.set("expressions", limit_by_expressions) 2711 continue 2712 break 2713 2714 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2715 this = self._implicit_unnests_to_explicit(this) 2716 2717 return this 2718 2719 def _parse_hint(self) -> t.Optional[exp.Hint]: 2720 if self._match(TokenType.HINT): 2721 hints = [] 2722 for hint in iter( 2723 lambda: self._parse_csv( 2724 lambda: self._parse_function() or self._parse_var(upper=True) 2725 ), 2726 [], 2727 ): 2728 hints.extend(hint) 2729 2730 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2731 self.raise_error("Expected */ after HINT") 2732 2733 return self.expression(exp.Hint, expressions=hints) 2734 2735 return None 2736 2737 def _parse_into(self) -> t.Optional[exp.Into]: 2738 if not self._match(TokenType.INTO): 2739 return None 2740 2741 temp = self._match(TokenType.TEMPORARY) 2742 unlogged = self._match_text_seq("UNLOGGED") 2743 self._match(TokenType.TABLE) 2744 2745 return self.expression( 2746 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2747 ) 2748 2749 def _parse_from( 2750 self, joins: bool = False, skip_from_token: bool = False 2751 ) -> t.Optional[exp.From]: 2752 if not skip_from_token and not self._match(TokenType.FROM): 2753 return None 2754 2755 return self.expression( 2756 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2757 ) 2758 2759 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2760 return self.expression( 2761 exp.MatchRecognizeMeasure, 2762 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2763 this=self._parse_expression(), 2764 ) 2765 2766 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2767 if not self._match(TokenType.MATCH_RECOGNIZE): 2768 return None 2769 2770 self._match_l_paren() 2771 2772 partition = self._parse_partition_by() 2773 order = self._parse_order() 2774 2775 measures = ( 2776 self._parse_csv(self._parse_match_recognize_measure) 2777 if self._match_text_seq("MEASURES") 2778 else None 2779 ) 2780 2781 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2782 rows = exp.var("ONE ROW PER MATCH") 2783 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2784 text = "ALL ROWS PER MATCH" 2785 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2786 text += " SHOW EMPTY MATCHES" 2787 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2788 text += " OMIT EMPTY MATCHES" 2789 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2790 text += " WITH UNMATCHED ROWS" 2791 rows = exp.var(text) 2792 else: 2793 rows = None 2794 2795 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2796 text = "AFTER MATCH SKIP" 2797 if self._match_text_seq("PAST", "LAST", "ROW"): 2798 text += " PAST LAST ROW" 2799 elif self._match_text_seq("TO", "NEXT", "ROW"): 2800 text += " TO NEXT ROW" 2801 elif self._match_text_seq("TO", "FIRST"): 2802 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2803 elif self._match_text_seq("TO", "LAST"): 2804 text += f" TO LAST {self._advance_any().text}" # type: ignore 2805 after = exp.var(text) 2806 else: 2807 after = None 2808 2809 if self._match_text_seq("PATTERN"): 2810 self._match_l_paren() 2811 2812 if not self._curr: 2813 self.raise_error("Expecting )", self._curr) 2814 2815 paren = 1 2816 start = self._curr 2817 2818 while self._curr and paren > 0: 2819 if self._curr.token_type == TokenType.L_PAREN: 2820 paren += 1 2821 if self._curr.token_type == TokenType.R_PAREN: 2822 paren -= 1 2823 2824 end = self._prev 2825 self._advance() 2826 2827 if paren > 0: 2828 self.raise_error("Expecting )", self._curr) 2829 2830 pattern = exp.var(self._find_sql(start, end)) 2831 else: 2832 pattern = None 2833 2834 define = ( 2835 self._parse_csv(self._parse_name_as_expression) 2836 if self._match_text_seq("DEFINE") 2837 else None 2838 ) 2839 2840 self._match_r_paren() 2841 2842 return self.expression( 2843 exp.MatchRecognize, 2844 partition_by=partition, 2845 order=order, 2846 measures=measures, 2847 rows=rows, 2848 after=after, 2849 pattern=pattern, 2850 define=define, 2851 alias=self._parse_table_alias(), 2852 ) 2853 2854 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2855 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2856 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2857 cross_apply = False 2858 2859 if cross_apply is not None: 2860 this = self._parse_select(table=True) 2861 view = None 2862 outer = None 2863 elif self._match(TokenType.LATERAL): 2864 this = self._parse_select(table=True) 2865 view = self._match(TokenType.VIEW) 2866 outer = self._match(TokenType.OUTER) 2867 else: 2868 return None 2869 2870 if not this: 2871 this = ( 2872 self._parse_unnest() 2873 or self._parse_function() 2874 or self._parse_id_var(any_token=False) 2875 ) 2876 2877 while self._match(TokenType.DOT): 2878 this = exp.Dot( 2879 this=this, 2880 expression=self._parse_function() or self._parse_id_var(any_token=False), 2881 ) 2882 2883 if view: 2884 table = self._parse_id_var(any_token=False) 2885 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2886 table_alias: t.Optional[exp.TableAlias] = self.expression( 2887 exp.TableAlias, this=table, columns=columns 2888 ) 2889 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2890 # We move the alias from the lateral's child node to the lateral itself 2891 table_alias = this.args["alias"].pop() 2892 else: 2893 table_alias = self._parse_table_alias() 2894 2895 return self.expression( 2896 exp.Lateral, 2897 this=this, 2898 view=view, 2899 outer=outer, 2900 alias=table_alias, 2901 cross_apply=cross_apply, 2902 ) 2903 2904 def _parse_join_parts( 2905 self, 2906 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2907 return ( 2908 self._match_set(self.JOIN_METHODS) and self._prev, 2909 self._match_set(self.JOIN_SIDES) and self._prev, 2910 self._match_set(self.JOIN_KINDS) and self._prev, 2911 ) 2912 2913 def _parse_join( 2914 self, skip_join_token: bool = False, parse_bracket: bool = False 2915 ) -> t.Optional[exp.Join]: 2916 if self._match(TokenType.COMMA): 2917 return self.expression(exp.Join, this=self._parse_table()) 2918 2919 index = self._index 2920 method, side, kind = self._parse_join_parts() 2921 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2922 join = self._match(TokenType.JOIN) 2923 2924 if not skip_join_token and not join: 2925 self._retreat(index) 2926 kind = None 2927 method = None 2928 side = None 2929 2930 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2931 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2932 2933 if not skip_join_token and not join and not outer_apply and not cross_apply: 2934 return None 2935 2936 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2937 2938 if method: 2939 kwargs["method"] = method.text 2940 if side: 2941 kwargs["side"] = side.text 2942 if kind: 2943 kwargs["kind"] = kind.text 2944 if hint: 2945 kwargs["hint"] = hint 2946 2947 if self._match(TokenType.MATCH_CONDITION): 2948 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2949 2950 if self._match(TokenType.ON): 2951 kwargs["on"] = self._parse_conjunction() 2952 elif self._match(TokenType.USING): 2953 kwargs["using"] = self._parse_wrapped_id_vars() 2954 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2955 kind and kind.token_type == TokenType.CROSS 2956 ): 2957 index = self._index 2958 joins: t.Optional[list] = list(self._parse_joins()) 2959 2960 if joins and self._match(TokenType.ON): 2961 kwargs["on"] = self._parse_conjunction() 2962 elif joins and self._match(TokenType.USING): 2963 kwargs["using"] = self._parse_wrapped_id_vars() 2964 else: 2965 joins = None 2966 self._retreat(index) 2967 2968 kwargs["this"].set("joins", joins if joins else None) 2969 2970 comments = [c for token in (method, side, kind) if token for c in token.comments] 2971 return self.expression(exp.Join, comments=comments, **kwargs) 2972 2973 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2974 this = self._parse_conjunction() 2975 2976 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2977 return this 2978 2979 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2980 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2981 2982 return this 2983 2984 def _parse_index_params(self) -> exp.IndexParameters: 2985 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2986 2987 if self._match(TokenType.L_PAREN, advance=False): 2988 columns = self._parse_wrapped_csv(self._parse_with_operator) 2989 else: 2990 columns = None 2991 2992 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2993 partition_by = self._parse_partition_by() 2994 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2995 tablespace = ( 2996 self._parse_var(any_token=True) 2997 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2998 else None 2999 ) 3000 where = self._parse_where() 3001 3002 return self.expression( 3003 exp.IndexParameters, 3004 using=using, 3005 columns=columns, 3006 include=include, 3007 partition_by=partition_by, 3008 where=where, 3009 with_storage=with_storage, 3010 tablespace=tablespace, 3011 ) 3012 3013 def _parse_index( 3014 self, 3015 index: t.Optional[exp.Expression] = None, 3016 ) -> t.Optional[exp.Index]: 3017 if index: 3018 unique = None 3019 primary = None 3020 amp = None 3021 3022 self._match(TokenType.ON) 3023 self._match(TokenType.TABLE) # hive 3024 table = self._parse_table_parts(schema=True) 3025 else: 3026 unique = self._match(TokenType.UNIQUE) 3027 primary = self._match_text_seq("PRIMARY") 3028 amp = self._match_text_seq("AMP") 3029 3030 if not self._match(TokenType.INDEX): 3031 return None 3032 3033 index = self._parse_id_var() 3034 table = None 3035 3036 params = self._parse_index_params() 3037 3038 return self.expression( 3039 exp.Index, 3040 this=index, 3041 table=table, 3042 unique=unique, 3043 primary=primary, 3044 amp=amp, 3045 params=params, 3046 ) 3047 3048 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3049 hints: t.List[exp.Expression] = [] 3050 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3051 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3052 hints.append( 3053 self.expression( 3054 exp.WithTableHint, 3055 expressions=self._parse_csv( 3056 lambda: self._parse_function() or self._parse_var(any_token=True) 3057 ), 3058 ) 3059 ) 3060 self._match_r_paren() 3061 else: 3062 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3063 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3064 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3065 3066 self._match_texts(("INDEX", "KEY")) 3067 if self._match(TokenType.FOR): 3068 hint.set("target", self._advance_any() and self._prev.text.upper()) 3069 3070 hint.set("expressions", self._parse_wrapped_id_vars()) 3071 hints.append(hint) 3072 3073 return hints or None 3074 3075 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3076 return ( 3077 (not schema and self._parse_function(optional_parens=False)) 3078 or self._parse_id_var(any_token=False) 3079 or self._parse_string_as_identifier() 3080 or self._parse_placeholder() 3081 ) 3082 3083 def _parse_table_parts( 3084 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3085 ) -> exp.Table: 3086 catalog = None 3087 db = None 3088 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3089 3090 while self._match(TokenType.DOT): 3091 if catalog: 3092 # This allows nesting the table in arbitrarily many dot expressions if needed 3093 table = self.expression( 3094 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3095 ) 3096 else: 3097 catalog = db 3098 db = table 3099 # "" used for tsql FROM a..b case 3100 table = self._parse_table_part(schema=schema) or "" 3101 3102 if ( 3103 wildcard 3104 and self._is_connected() 3105 and (isinstance(table, exp.Identifier) or not table) 3106 and self._match(TokenType.STAR) 3107 ): 3108 if isinstance(table, exp.Identifier): 3109 table.args["this"] += "*" 3110 else: 3111 table = exp.Identifier(this="*") 3112 3113 # We bubble up comments from the Identifier to the Table 3114 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3115 3116 if is_db_reference: 3117 catalog = db 3118 db = table 3119 table = None 3120 3121 if not table and not is_db_reference: 3122 self.raise_error(f"Expected table name but got {self._curr}") 3123 if not db and is_db_reference: 3124 self.raise_error(f"Expected database name but got {self._curr}") 3125 3126 return self.expression( 3127 exp.Table, 3128 comments=comments, 3129 this=table, 3130 db=db, 3131 catalog=catalog, 3132 pivots=self._parse_pivots(), 3133 ) 3134 3135 def _parse_table( 3136 self, 3137 schema: bool = False, 3138 joins: bool = False, 3139 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3140 parse_bracket: bool = False, 3141 is_db_reference: bool = False, 3142 parse_partition: bool = False, 3143 ) -> t.Optional[exp.Expression]: 3144 lateral = self._parse_lateral() 3145 if lateral: 3146 return lateral 3147 3148 unnest = self._parse_unnest() 3149 if unnest: 3150 return unnest 3151 3152 values = self._parse_derived_table_values() 3153 if values: 3154 return values 3155 3156 subquery = self._parse_select(table=True) 3157 if subquery: 3158 if not subquery.args.get("pivots"): 3159 subquery.set("pivots", self._parse_pivots()) 3160 return subquery 3161 3162 bracket = parse_bracket and self._parse_bracket(None) 3163 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3164 3165 only = self._match(TokenType.ONLY) 3166 3167 this = t.cast( 3168 exp.Expression, 3169 bracket 3170 or self._parse_bracket( 3171 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3172 ), 3173 ) 3174 3175 if only: 3176 this.set("only", only) 3177 3178 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3179 self._match_text_seq("*") 3180 3181 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3182 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3183 this.set("partition", self._parse_partition()) 3184 3185 if schema: 3186 return self._parse_schema(this=this) 3187 3188 version = self._parse_version() 3189 3190 if version: 3191 this.set("version", version) 3192 3193 if self.dialect.ALIAS_POST_TABLESAMPLE: 3194 table_sample = self._parse_table_sample() 3195 3196 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3197 if alias: 3198 this.set("alias", alias) 3199 3200 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3201 return self.expression( 3202 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3203 ) 3204 3205 this.set("hints", self._parse_table_hints()) 3206 3207 if not this.args.get("pivots"): 3208 this.set("pivots", self._parse_pivots()) 3209 3210 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3211 table_sample = self._parse_table_sample() 3212 3213 if table_sample: 3214 table_sample.set("this", this) 3215 this = table_sample 3216 3217 if joins: 3218 for join in self._parse_joins(): 3219 this.append("joins", join) 3220 3221 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3222 this.set("ordinality", True) 3223 this.set("alias", self._parse_table_alias()) 3224 3225 return this 3226 3227 def _parse_version(self) -> t.Optional[exp.Version]: 3228 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3229 this = "TIMESTAMP" 3230 elif self._match(TokenType.VERSION_SNAPSHOT): 3231 this = "VERSION" 3232 else: 3233 return None 3234 3235 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3236 kind = self._prev.text.upper() 3237 start = self._parse_bitwise() 3238 self._match_texts(("TO", "AND")) 3239 end = self._parse_bitwise() 3240 expression: t.Optional[exp.Expression] = self.expression( 3241 exp.Tuple, expressions=[start, end] 3242 ) 3243 elif self._match_text_seq("CONTAINED", "IN"): 3244 kind = "CONTAINED IN" 3245 expression = self.expression( 3246 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3247 ) 3248 elif self._match(TokenType.ALL): 3249 kind = "ALL" 3250 expression = None 3251 else: 3252 self._match_text_seq("AS", "OF") 3253 kind = "AS OF" 3254 expression = self._parse_type() 3255 3256 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3257 3258 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3259 if not self._match(TokenType.UNNEST): 3260 return None 3261 3262 expressions = self._parse_wrapped_csv(self._parse_equality) 3263 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3264 3265 alias = self._parse_table_alias() if with_alias else None 3266 3267 if alias: 3268 if self.dialect.UNNEST_COLUMN_ONLY: 3269 if alias.args.get("columns"): 3270 self.raise_error("Unexpected extra column alias in unnest.") 3271 3272 alias.set("columns", [alias.this]) 3273 alias.set("this", None) 3274 3275 columns = alias.args.get("columns") or [] 3276 if offset and len(expressions) < len(columns): 3277 offset = columns.pop() 3278 3279 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3280 self._match(TokenType.ALIAS) 3281 offset = self._parse_id_var( 3282 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3283 ) or exp.to_identifier("offset") 3284 3285 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3286 3287 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3288 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3289 if not is_derived and not self._match_text_seq("VALUES"): 3290 return None 3291 3292 expressions = self._parse_csv(self._parse_value) 3293 alias = self._parse_table_alias() 3294 3295 if is_derived: 3296 self._match_r_paren() 3297 3298 return self.expression( 3299 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3300 ) 3301 3302 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3303 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3304 as_modifier and self._match_text_seq("USING", "SAMPLE") 3305 ): 3306 return None 3307 3308 bucket_numerator = None 3309 bucket_denominator = None 3310 bucket_field = None 3311 percent = None 3312 size = None 3313 seed = None 3314 3315 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3316 matched_l_paren = self._match(TokenType.L_PAREN) 3317 3318 if self.TABLESAMPLE_CSV: 3319 num = None 3320 expressions = self._parse_csv(self._parse_primary) 3321 else: 3322 expressions = None 3323 num = ( 3324 self._parse_factor() 3325 if self._match(TokenType.NUMBER, advance=False) 3326 else self._parse_primary() or self._parse_placeholder() 3327 ) 3328 3329 if self._match_text_seq("BUCKET"): 3330 bucket_numerator = self._parse_number() 3331 self._match_text_seq("OUT", "OF") 3332 bucket_denominator = bucket_denominator = self._parse_number() 3333 self._match(TokenType.ON) 3334 bucket_field = self._parse_field() 3335 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3336 percent = num 3337 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3338 size = num 3339 else: 3340 percent = num 3341 3342 if matched_l_paren: 3343 self._match_r_paren() 3344 3345 if self._match(TokenType.L_PAREN): 3346 method = self._parse_var(upper=True) 3347 seed = self._match(TokenType.COMMA) and self._parse_number() 3348 self._match_r_paren() 3349 elif self._match_texts(("SEED", "REPEATABLE")): 3350 seed = self._parse_wrapped(self._parse_number) 3351 3352 return self.expression( 3353 exp.TableSample, 3354 expressions=expressions, 3355 method=method, 3356 bucket_numerator=bucket_numerator, 3357 bucket_denominator=bucket_denominator, 3358 bucket_field=bucket_field, 3359 percent=percent, 3360 size=size, 3361 seed=seed, 3362 ) 3363 3364 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3365 return list(iter(self._parse_pivot, None)) or None 3366 3367 def _parse_joins(self) -> t.Iterator[exp.Join]: 3368 return iter(self._parse_join, None) 3369 3370 # https://duckdb.org/docs/sql/statements/pivot 3371 def _parse_simplified_pivot(self) -> exp.Pivot: 3372 def _parse_on() -> t.Optional[exp.Expression]: 3373 this = self._parse_bitwise() 3374 return self._parse_in(this) if self._match(TokenType.IN) else this 3375 3376 this = self._parse_table() 3377 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3378 using = self._match(TokenType.USING) and self._parse_csv( 3379 lambda: self._parse_alias(self._parse_function()) 3380 ) 3381 group = self._parse_group() 3382 return self.expression( 3383 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3384 ) 3385 3386 def _parse_pivot_in(self) -> exp.In: 3387 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3388 this = self._parse_conjunction() 3389 3390 self._match(TokenType.ALIAS) 3391 alias = self._parse_field() 3392 if alias: 3393 return self.expression(exp.PivotAlias, this=this, alias=alias) 3394 3395 return this 3396 3397 value = self._parse_column() 3398 3399 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3400 self.raise_error("Expecting IN (") 3401 3402 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3403 3404 self._match_r_paren() 3405 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3406 3407 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3408 index = self._index 3409 include_nulls = None 3410 3411 if self._match(TokenType.PIVOT): 3412 unpivot = False 3413 elif self._match(TokenType.UNPIVOT): 3414 unpivot = True 3415 3416 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3417 if self._match_text_seq("INCLUDE", "NULLS"): 3418 include_nulls = True 3419 elif self._match_text_seq("EXCLUDE", "NULLS"): 3420 include_nulls = False 3421 else: 3422 return None 3423 3424 expressions = [] 3425 3426 if not self._match(TokenType.L_PAREN): 3427 self._retreat(index) 3428 return None 3429 3430 if unpivot: 3431 expressions = self._parse_csv(self._parse_column) 3432 else: 3433 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3434 3435 if not expressions: 3436 self.raise_error("Failed to parse PIVOT's aggregation list") 3437 3438 if not self._match(TokenType.FOR): 3439 self.raise_error("Expecting FOR") 3440 3441 field = self._parse_pivot_in() 3442 3443 self._match_r_paren() 3444 3445 pivot = self.expression( 3446 exp.Pivot, 3447 expressions=expressions, 3448 field=field, 3449 unpivot=unpivot, 3450 include_nulls=include_nulls, 3451 ) 3452 3453 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3454 pivot.set("alias", self._parse_table_alias()) 3455 3456 if not unpivot: 3457 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3458 3459 columns: t.List[exp.Expression] = [] 3460 for fld in pivot.args["field"].expressions: 3461 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3462 for name in names: 3463 if self.PREFIXED_PIVOT_COLUMNS: 3464 name = f"{name}_{field_name}" if name else field_name 3465 else: 3466 name = f"{field_name}_{name}" if name else field_name 3467 3468 columns.append(exp.to_identifier(name)) 3469 3470 pivot.set("columns", columns) 3471 3472 return pivot 3473 3474 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3475 return [agg.alias for agg in aggregations] 3476 3477 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3478 if not skip_where_token and not self._match(TokenType.PREWHERE): 3479 return None 3480 3481 return self.expression( 3482 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3483 ) 3484 3485 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3486 if not skip_where_token and not self._match(TokenType.WHERE): 3487 return None 3488 3489 return self.expression( 3490 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3491 ) 3492 3493 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3494 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3495 return None 3496 3497 elements: t.Dict[str, t.Any] = defaultdict(list) 3498 3499 if self._match(TokenType.ALL): 3500 elements["all"] = True 3501 elif self._match(TokenType.DISTINCT): 3502 elements["all"] = False 3503 3504 while True: 3505 expressions = self._parse_csv(self._parse_conjunction) 3506 if expressions: 3507 elements["expressions"].extend(expressions) 3508 3509 grouping_sets = self._parse_grouping_sets() 3510 if grouping_sets: 3511 elements["grouping_sets"].extend(grouping_sets) 3512 3513 rollup = None 3514 cube = None 3515 totals = None 3516 3517 index = self._index 3518 with_ = self._match(TokenType.WITH) 3519 if self._match(TokenType.ROLLUP): 3520 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3521 elements["rollup"].extend(ensure_list(rollup)) 3522 3523 if self._match(TokenType.CUBE): 3524 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3525 elements["cube"].extend(ensure_list(cube)) 3526 3527 if self._match_text_seq("TOTALS"): 3528 totals = True 3529 elements["totals"] = True # type: ignore 3530 3531 if not (grouping_sets or rollup or cube or totals): 3532 if with_: 3533 self._retreat(index) 3534 break 3535 3536 return self.expression(exp.Group, **elements) # type: ignore 3537 3538 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3539 if not self._match(TokenType.GROUPING_SETS): 3540 return None 3541 3542 return self._parse_wrapped_csv(self._parse_grouping_set) 3543 3544 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3545 if self._match(TokenType.L_PAREN): 3546 grouping_set = self._parse_csv(self._parse_column) 3547 self._match_r_paren() 3548 return self.expression(exp.Tuple, expressions=grouping_set) 3549 3550 return self._parse_column() 3551 3552 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3553 if not skip_having_token and not self._match(TokenType.HAVING): 3554 return None 3555 return self.expression(exp.Having, this=self._parse_conjunction()) 3556 3557 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3558 if not self._match(TokenType.QUALIFY): 3559 return None 3560 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3561 3562 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3563 if skip_start_token: 3564 start = None 3565 elif self._match(TokenType.START_WITH): 3566 start = self._parse_conjunction() 3567 else: 3568 return None 3569 3570 self._match(TokenType.CONNECT_BY) 3571 nocycle = self._match_text_seq("NOCYCLE") 3572 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3573 exp.Prior, this=self._parse_bitwise() 3574 ) 3575 connect = self._parse_conjunction() 3576 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3577 3578 if not start and self._match(TokenType.START_WITH): 3579 start = self._parse_conjunction() 3580 3581 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3582 3583 def _parse_name_as_expression(self) -> exp.Alias: 3584 return self.expression( 3585 exp.Alias, 3586 alias=self._parse_id_var(any_token=True), 3587 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3588 ) 3589 3590 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3591 if self._match_text_seq("INTERPOLATE"): 3592 return self._parse_wrapped_csv(self._parse_name_as_expression) 3593 return None 3594 3595 def _parse_order( 3596 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3597 ) -> t.Optional[exp.Expression]: 3598 siblings = None 3599 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3600 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3601 return this 3602 3603 siblings = True 3604 3605 return self.expression( 3606 exp.Order, 3607 this=this, 3608 expressions=self._parse_csv(self._parse_ordered), 3609 interpolate=self._parse_interpolate(), 3610 siblings=siblings, 3611 ) 3612 3613 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3614 if not self._match(token): 3615 return None 3616 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3617 3618 def _parse_ordered( 3619 self, parse_method: t.Optional[t.Callable] = None 3620 ) -> t.Optional[exp.Ordered]: 3621 this = parse_method() if parse_method else self._parse_conjunction() 3622 if not this: 3623 return None 3624 3625 asc = self._match(TokenType.ASC) 3626 desc = self._match(TokenType.DESC) or (asc and False) 3627 3628 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3629 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3630 3631 nulls_first = is_nulls_first or False 3632 explicitly_null_ordered = is_nulls_first or is_nulls_last 3633 3634 if ( 3635 not explicitly_null_ordered 3636 and ( 3637 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3638 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3639 ) 3640 and self.dialect.NULL_ORDERING != "nulls_are_last" 3641 ): 3642 nulls_first = True 3643 3644 if self._match_text_seq("WITH", "FILL"): 3645 with_fill = self.expression( 3646 exp.WithFill, 3647 **{ # type: ignore 3648 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3649 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3650 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3651 }, 3652 ) 3653 else: 3654 with_fill = None 3655 3656 return self.expression( 3657 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3658 ) 3659 3660 def _parse_limit( 3661 self, 3662 this: t.Optional[exp.Expression] = None, 3663 top: bool = False, 3664 skip_limit_token: bool = False, 3665 ) -> t.Optional[exp.Expression]: 3666 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3667 comments = self._prev_comments 3668 if top: 3669 limit_paren = self._match(TokenType.L_PAREN) 3670 expression = self._parse_term() if limit_paren else self._parse_number() 3671 3672 if limit_paren: 3673 self._match_r_paren() 3674 else: 3675 expression = self._parse_term() 3676 3677 if self._match(TokenType.COMMA): 3678 offset = expression 3679 expression = self._parse_term() 3680 else: 3681 offset = None 3682 3683 limit_exp = self.expression( 3684 exp.Limit, 3685 this=this, 3686 expression=expression, 3687 offset=offset, 3688 comments=comments, 3689 expressions=self._parse_limit_by(), 3690 ) 3691 3692 return limit_exp 3693 3694 if self._match(TokenType.FETCH): 3695 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3696 direction = self._prev.text.upper() if direction else "FIRST" 3697 3698 count = self._parse_field(tokens=self.FETCH_TOKENS) 3699 percent = self._match(TokenType.PERCENT) 3700 3701 self._match_set((TokenType.ROW, TokenType.ROWS)) 3702 3703 only = self._match_text_seq("ONLY") 3704 with_ties = self._match_text_seq("WITH", "TIES") 3705 3706 if only and with_ties: 3707 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3708 3709 return self.expression( 3710 exp.Fetch, 3711 direction=direction, 3712 count=count, 3713 percent=percent, 3714 with_ties=with_ties, 3715 ) 3716 3717 return this 3718 3719 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3720 if not self._match(TokenType.OFFSET): 3721 return this 3722 3723 count = self._parse_term() 3724 self._match_set((TokenType.ROW, TokenType.ROWS)) 3725 3726 return self.expression( 3727 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3728 ) 3729 3730 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3731 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3732 3733 def _parse_locks(self) -> t.List[exp.Lock]: 3734 locks = [] 3735 while True: 3736 if self._match_text_seq("FOR", "UPDATE"): 3737 update = True 3738 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3739 "LOCK", "IN", "SHARE", "MODE" 3740 ): 3741 update = False 3742 else: 3743 break 3744 3745 expressions = None 3746 if self._match_text_seq("OF"): 3747 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3748 3749 wait: t.Optional[bool | exp.Expression] = None 3750 if self._match_text_seq("NOWAIT"): 3751 wait = True 3752 elif self._match_text_seq("WAIT"): 3753 wait = self._parse_primary() 3754 elif self._match_text_seq("SKIP", "LOCKED"): 3755 wait = False 3756 3757 locks.append( 3758 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3759 ) 3760 3761 return locks 3762 3763 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3764 while this and self._match_set(self.SET_OPERATIONS): 3765 token_type = self._prev.token_type 3766 3767 if token_type == TokenType.UNION: 3768 operation = exp.Union 3769 elif token_type == TokenType.EXCEPT: 3770 operation = exp.Except 3771 else: 3772 operation = exp.Intersect 3773 3774 comments = self._prev.comments 3775 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3776 by_name = self._match_text_seq("BY", "NAME") 3777 expression = self._parse_select(nested=True, parse_set_operation=False) 3778 3779 this = self.expression( 3780 operation, 3781 comments=comments, 3782 this=this, 3783 distinct=distinct, 3784 by_name=by_name, 3785 expression=expression, 3786 ) 3787 3788 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3789 expression = this.expression 3790 3791 if expression: 3792 for arg in self.UNION_MODIFIERS: 3793 expr = expression.args.get(arg) 3794 if expr: 3795 this.set(arg, expr.pop()) 3796 3797 return this 3798 3799 def _parse_expression(self) -> t.Optional[exp.Expression]: 3800 return self._parse_alias(self._parse_conjunction()) 3801 3802 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3803 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3804 3805 def _parse_equality(self) -> t.Optional[exp.Expression]: 3806 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3807 3808 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3809 return self._parse_tokens(self._parse_range, self.COMPARISON) 3810 3811 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3812 this = this or self._parse_bitwise() 3813 negate = self._match(TokenType.NOT) 3814 3815 if self._match_set(self.RANGE_PARSERS): 3816 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3817 if not expression: 3818 return this 3819 3820 this = expression 3821 elif self._match(TokenType.ISNULL): 3822 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3823 3824 # Postgres supports ISNULL and NOTNULL for conditions. 3825 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3826 if self._match(TokenType.NOTNULL): 3827 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3828 this = self.expression(exp.Not, this=this) 3829 3830 if negate: 3831 this = self.expression(exp.Not, this=this) 3832 3833 if self._match(TokenType.IS): 3834 this = self._parse_is(this) 3835 3836 return this 3837 3838 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3839 index = self._index - 1 3840 negate = self._match(TokenType.NOT) 3841 3842 if self._match_text_seq("DISTINCT", "FROM"): 3843 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3844 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3845 3846 expression = self._parse_null() or self._parse_boolean() 3847 if not expression: 3848 self._retreat(index) 3849 return None 3850 3851 this = self.expression(exp.Is, this=this, expression=expression) 3852 return self.expression(exp.Not, this=this) if negate else this 3853 3854 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3855 unnest = self._parse_unnest(with_alias=False) 3856 if unnest: 3857 this = self.expression(exp.In, this=this, unnest=unnest) 3858 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3859 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3860 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3861 3862 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3863 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3864 else: 3865 this = self.expression(exp.In, this=this, expressions=expressions) 3866 3867 if matched_l_paren: 3868 self._match_r_paren(this) 3869 elif not self._match(TokenType.R_BRACKET, expression=this): 3870 self.raise_error("Expecting ]") 3871 else: 3872 this = self.expression(exp.In, this=this, field=self._parse_field()) 3873 3874 return this 3875 3876 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3877 low = self._parse_bitwise() 3878 self._match(TokenType.AND) 3879 high = self._parse_bitwise() 3880 return self.expression(exp.Between, this=this, low=low, high=high) 3881 3882 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3883 if not self._match(TokenType.ESCAPE): 3884 return this 3885 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3886 3887 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3888 index = self._index 3889 3890 if not self._match(TokenType.INTERVAL) and match_interval: 3891 return None 3892 3893 if self._match(TokenType.STRING, advance=False): 3894 this = self._parse_primary() 3895 else: 3896 this = self._parse_term() 3897 3898 if not this or ( 3899 isinstance(this, exp.Column) 3900 and not this.table 3901 and not this.this.quoted 3902 and this.name.upper() == "IS" 3903 ): 3904 self._retreat(index) 3905 return None 3906 3907 unit = self._parse_function() or ( 3908 not self._match(TokenType.ALIAS, advance=False) 3909 and self._parse_var(any_token=True, upper=True) 3910 ) 3911 3912 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3913 # each INTERVAL expression into this canonical form so it's easy to transpile 3914 if this and this.is_number: 3915 this = exp.Literal.string(this.name) 3916 elif this and this.is_string: 3917 parts = this.name.split() 3918 3919 if len(parts) == 2: 3920 if unit: 3921 # This is not actually a unit, it's something else (e.g. a "window side") 3922 unit = None 3923 self._retreat(self._index - 1) 3924 3925 this = exp.Literal.string(parts[0]) 3926 unit = self.expression(exp.Var, this=parts[1].upper()) 3927 3928 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3929 unit = self.expression( 3930 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3931 ) 3932 3933 return self.expression(exp.Interval, this=this, unit=unit) 3934 3935 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3936 this = self._parse_term() 3937 3938 while True: 3939 if self._match_set(self.BITWISE): 3940 this = self.expression( 3941 self.BITWISE[self._prev.token_type], 3942 this=this, 3943 expression=self._parse_term(), 3944 ) 3945 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3946 this = self.expression( 3947 exp.DPipe, 3948 this=this, 3949 expression=self._parse_term(), 3950 safe=not self.dialect.STRICT_STRING_CONCAT, 3951 ) 3952 elif self._match(TokenType.DQMARK): 3953 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3954 elif self._match_pair(TokenType.LT, TokenType.LT): 3955 this = self.expression( 3956 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3957 ) 3958 elif self._match_pair(TokenType.GT, TokenType.GT): 3959 this = self.expression( 3960 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3961 ) 3962 else: 3963 break 3964 3965 return this 3966 3967 def _parse_term(self) -> t.Optional[exp.Expression]: 3968 return self._parse_tokens(self._parse_factor, self.TERM) 3969 3970 def _parse_factor(self) -> t.Optional[exp.Expression]: 3971 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3972 this = parse_method() 3973 3974 while self._match_set(self.FACTOR): 3975 this = self.expression( 3976 self.FACTOR[self._prev.token_type], 3977 this=this, 3978 comments=self._prev_comments, 3979 expression=parse_method(), 3980 ) 3981 if isinstance(this, exp.Div): 3982 this.args["typed"] = self.dialect.TYPED_DIVISION 3983 this.args["safe"] = self.dialect.SAFE_DIVISION 3984 3985 return this 3986 3987 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3988 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3989 3990 def _parse_unary(self) -> t.Optional[exp.Expression]: 3991 if self._match_set(self.UNARY_PARSERS): 3992 return self.UNARY_PARSERS[self._prev.token_type](self) 3993 return self._parse_at_time_zone(self._parse_type()) 3994 3995 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3996 interval = parse_interval and self._parse_interval() 3997 if interval: 3998 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3999 while True: 4000 index = self._index 4001 self._match(TokenType.PLUS) 4002 4003 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4004 self._retreat(index) 4005 break 4006 4007 interval = self.expression( # type: ignore 4008 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4009 ) 4010 4011 return interval 4012 4013 index = self._index 4014 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4015 this = self._parse_column() 4016 4017 if data_type: 4018 if isinstance(this, exp.Literal): 4019 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4020 if parser: 4021 return parser(self, this, data_type) 4022 return self.expression(exp.Cast, this=this, to=data_type) 4023 if not data_type.expressions: 4024 self._retreat(index) 4025 return self._parse_column() 4026 return self._parse_column_ops(data_type) 4027 4028 return this and self._parse_column_ops(this) 4029 4030 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4031 this = self._parse_type() 4032 if not this: 4033 return None 4034 4035 if isinstance(this, exp.Column) and not this.table: 4036 this = exp.var(this.name.upper()) 4037 4038 return self.expression( 4039 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4040 ) 4041 4042 def _parse_types( 4043 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4044 ) -> t.Optional[exp.Expression]: 4045 index = self._index 4046 4047 prefix = self._match_text_seq("SYSUDTLIB", ".") 4048 4049 if not self._match_set(self.TYPE_TOKENS): 4050 identifier = allow_identifiers and self._parse_id_var( 4051 any_token=False, tokens=(TokenType.VAR,) 4052 ) 4053 if identifier: 4054 tokens = self.dialect.tokenize(identifier.name) 4055 4056 if len(tokens) != 1: 4057 self.raise_error("Unexpected identifier", self._prev) 4058 4059 if tokens[0].token_type in self.TYPE_TOKENS: 4060 self._prev = tokens[0] 4061 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4062 type_name = identifier.name 4063 4064 while self._match(TokenType.DOT): 4065 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4066 4067 return exp.DataType.build(type_name, udt=True) 4068 else: 4069 self._retreat(self._index - 1) 4070 return None 4071 else: 4072 return None 4073 4074 type_token = self._prev.token_type 4075 4076 if type_token == TokenType.PSEUDO_TYPE: 4077 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4078 4079 if type_token == TokenType.OBJECT_IDENTIFIER: 4080 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4081 4082 nested = type_token in self.NESTED_TYPE_TOKENS 4083 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4084 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4085 expressions = None 4086 maybe_func = False 4087 4088 if self._match(TokenType.L_PAREN): 4089 if is_struct: 4090 expressions = self._parse_csv(self._parse_struct_types) 4091 elif nested: 4092 expressions = self._parse_csv( 4093 lambda: self._parse_types( 4094 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4095 ) 4096 ) 4097 elif type_token in self.ENUM_TYPE_TOKENS: 4098 expressions = self._parse_csv(self._parse_equality) 4099 elif is_aggregate: 4100 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4101 any_token=False, tokens=(TokenType.VAR,) 4102 ) 4103 if not func_or_ident or not self._match(TokenType.COMMA): 4104 return None 4105 expressions = self._parse_csv( 4106 lambda: self._parse_types( 4107 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4108 ) 4109 ) 4110 expressions.insert(0, func_or_ident) 4111 else: 4112 expressions = self._parse_csv(self._parse_type_size) 4113 4114 if not expressions or not self._match(TokenType.R_PAREN): 4115 self._retreat(index) 4116 return None 4117 4118 maybe_func = True 4119 4120 this: t.Optional[exp.Expression] = None 4121 values: t.Optional[t.List[exp.Expression]] = None 4122 4123 if nested and self._match(TokenType.LT): 4124 if is_struct: 4125 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4126 else: 4127 expressions = self._parse_csv( 4128 lambda: self._parse_types( 4129 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4130 ) 4131 ) 4132 4133 if not self._match(TokenType.GT): 4134 self.raise_error("Expecting >") 4135 4136 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4137 values = self._parse_csv(self._parse_conjunction) 4138 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4139 4140 if type_token in self.TIMESTAMPS: 4141 if self._match_text_seq("WITH", "TIME", "ZONE"): 4142 maybe_func = False 4143 tz_type = ( 4144 exp.DataType.Type.TIMETZ 4145 if type_token in self.TIMES 4146 else exp.DataType.Type.TIMESTAMPTZ 4147 ) 4148 this = exp.DataType(this=tz_type, expressions=expressions) 4149 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4150 maybe_func = False 4151 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4152 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4153 maybe_func = False 4154 elif type_token == TokenType.INTERVAL: 4155 unit = self._parse_var(upper=True) 4156 if unit: 4157 if self._match_text_seq("TO"): 4158 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4159 4160 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4161 else: 4162 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4163 4164 if maybe_func and check_func: 4165 index2 = self._index 4166 peek = self._parse_string() 4167 4168 if not peek: 4169 self._retreat(index) 4170 return None 4171 4172 self._retreat(index2) 4173 4174 if not this: 4175 if self._match_text_seq("UNSIGNED"): 4176 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4177 if not unsigned_type_token: 4178 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4179 4180 type_token = unsigned_type_token or type_token 4181 4182 this = exp.DataType( 4183 this=exp.DataType.Type[type_token.value], 4184 expressions=expressions, 4185 nested=nested, 4186 values=values, 4187 prefix=prefix, 4188 ) 4189 4190 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4191 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4192 4193 return this 4194 4195 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4196 index = self._index 4197 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4198 self._match(TokenType.COLON) 4199 column_def = self._parse_column_def(this) 4200 4201 if type_required and ( 4202 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4203 ): 4204 self._retreat(index) 4205 return self._parse_types() 4206 4207 return column_def 4208 4209 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4210 if not self._match_text_seq("AT", "TIME", "ZONE"): 4211 return this 4212 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4213 4214 def _parse_column(self) -> t.Optional[exp.Expression]: 4215 this = self._parse_column_reference() 4216 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4217 4218 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4219 this = self._parse_field() 4220 if ( 4221 not this 4222 and self._match(TokenType.VALUES, advance=False) 4223 and self.VALUES_FOLLOWED_BY_PAREN 4224 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4225 ): 4226 this = self._parse_id_var() 4227 4228 if isinstance(this, exp.Identifier): 4229 # We bubble up comments from the Identifier to the Column 4230 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4231 4232 return this 4233 4234 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4235 this = self._parse_bracket(this) 4236 4237 while self._match_set(self.COLUMN_OPERATORS): 4238 op_token = self._prev.token_type 4239 op = self.COLUMN_OPERATORS.get(op_token) 4240 4241 if op_token == TokenType.DCOLON: 4242 field = self._parse_types() 4243 if not field: 4244 self.raise_error("Expected type") 4245 elif op and self._curr: 4246 field = self._parse_column_reference() 4247 else: 4248 field = self._parse_field(any_token=True, anonymous_func=True) 4249 4250 if isinstance(field, exp.Func) and this: 4251 # bigquery allows function calls like x.y.count(...) 4252 # SAFE.SUBSTR(...) 4253 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4254 this = exp.replace_tree( 4255 this, 4256 lambda n: ( 4257 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4258 if n.table 4259 else n.this 4260 ) 4261 if isinstance(n, exp.Column) 4262 else n, 4263 ) 4264 4265 if op: 4266 this = op(self, this, field) 4267 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4268 this = self.expression( 4269 exp.Column, 4270 this=field, 4271 table=this.this, 4272 db=this.args.get("table"), 4273 catalog=this.args.get("db"), 4274 ) 4275 else: 4276 this = self.expression(exp.Dot, this=this, expression=field) 4277 this = self._parse_bracket(this) 4278 return this 4279 4280 def _parse_primary(self) -> t.Optional[exp.Expression]: 4281 if self._match_set(self.PRIMARY_PARSERS): 4282 token_type = self._prev.token_type 4283 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4284 4285 if token_type == TokenType.STRING: 4286 expressions = [primary] 4287 while self._match(TokenType.STRING): 4288 expressions.append(exp.Literal.string(self._prev.text)) 4289 4290 if len(expressions) > 1: 4291 return self.expression(exp.Concat, expressions=expressions) 4292 4293 return primary 4294 4295 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4296 return exp.Literal.number(f"0.{self._prev.text}") 4297 4298 if self._match(TokenType.L_PAREN): 4299 comments = self._prev_comments 4300 query = self._parse_select() 4301 4302 if query: 4303 expressions = [query] 4304 else: 4305 expressions = self._parse_expressions() 4306 4307 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4308 4309 if isinstance(this, exp.UNWRAPPED_QUERIES): 4310 this = self._parse_set_operations( 4311 self._parse_subquery(this=this, parse_alias=False) 4312 ) 4313 elif isinstance(this, exp.Subquery): 4314 this = self._parse_subquery( 4315 this=self._parse_set_operations(this), parse_alias=False 4316 ) 4317 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4318 this = self.expression(exp.Tuple, expressions=expressions) 4319 else: 4320 this = self.expression(exp.Paren, this=this) 4321 4322 if this: 4323 this.add_comments(comments) 4324 4325 self._match_r_paren(expression=this) 4326 return this 4327 4328 return None 4329 4330 def _parse_field( 4331 self, 4332 any_token: bool = False, 4333 tokens: t.Optional[t.Collection[TokenType]] = None, 4334 anonymous_func: bool = False, 4335 ) -> t.Optional[exp.Expression]: 4336 if anonymous_func: 4337 field = ( 4338 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4339 or self._parse_primary() 4340 ) 4341 else: 4342 field = self._parse_primary() or self._parse_function( 4343 anonymous=anonymous_func, any_token=any_token 4344 ) 4345 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4346 4347 def _parse_function( 4348 self, 4349 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4350 anonymous: bool = False, 4351 optional_parens: bool = True, 4352 any_token: bool = False, 4353 ) -> t.Optional[exp.Expression]: 4354 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4355 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4356 fn_syntax = False 4357 if ( 4358 self._match(TokenType.L_BRACE, advance=False) 4359 and self._next 4360 and self._next.text.upper() == "FN" 4361 ): 4362 self._advance(2) 4363 fn_syntax = True 4364 4365 func = self._parse_function_call( 4366 functions=functions, 4367 anonymous=anonymous, 4368 optional_parens=optional_parens, 4369 any_token=any_token, 4370 ) 4371 4372 if fn_syntax: 4373 self._match(TokenType.R_BRACE) 4374 4375 return func 4376 4377 def _parse_function_call( 4378 self, 4379 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4380 anonymous: bool = False, 4381 optional_parens: bool = True, 4382 any_token: bool = False, 4383 ) -> t.Optional[exp.Expression]: 4384 if not self._curr: 4385 return None 4386 4387 comments = self._curr.comments 4388 token_type = self._curr.token_type 4389 this = self._curr.text 4390 upper = this.upper() 4391 4392 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4393 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4394 self._advance() 4395 return self._parse_window(parser(self)) 4396 4397 if not self._next or self._next.token_type != TokenType.L_PAREN: 4398 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4399 self._advance() 4400 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4401 4402 return None 4403 4404 if any_token: 4405 if token_type in self.RESERVED_TOKENS: 4406 return None 4407 elif token_type not in self.FUNC_TOKENS: 4408 return None 4409 4410 self._advance(2) 4411 4412 parser = self.FUNCTION_PARSERS.get(upper) 4413 if parser and not anonymous: 4414 this = parser(self) 4415 else: 4416 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4417 4418 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4419 this = self.expression(subquery_predicate, this=self._parse_select()) 4420 self._match_r_paren() 4421 return this 4422 4423 if functions is None: 4424 functions = self.FUNCTIONS 4425 4426 function = functions.get(upper) 4427 4428 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4429 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4430 4431 if alias: 4432 args = self._kv_to_prop_eq(args) 4433 4434 if function and not anonymous: 4435 if "dialect" in function.__code__.co_varnames: 4436 func = function(args, dialect=self.dialect) 4437 else: 4438 func = function(args) 4439 4440 func = self.validate_expression(func, args) 4441 if not self.dialect.NORMALIZE_FUNCTIONS: 4442 func.meta["name"] = this 4443 4444 this = func 4445 else: 4446 if token_type == TokenType.IDENTIFIER: 4447 this = exp.Identifier(this=this, quoted=True) 4448 this = self.expression(exp.Anonymous, this=this, expressions=args) 4449 4450 if isinstance(this, exp.Expression): 4451 this.add_comments(comments) 4452 4453 self._match_r_paren(this) 4454 return self._parse_window(this) 4455 4456 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4457 transformed = [] 4458 4459 for e in expressions: 4460 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4461 if isinstance(e, exp.Alias): 4462 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4463 4464 if not isinstance(e, exp.PropertyEQ): 4465 e = self.expression( 4466 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4467 ) 4468 4469 if isinstance(e.this, exp.Column): 4470 e.this.replace(e.this.this) 4471 4472 transformed.append(e) 4473 4474 return transformed 4475 4476 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4477 return self._parse_column_def(self._parse_id_var()) 4478 4479 def _parse_user_defined_function( 4480 self, kind: t.Optional[TokenType] = None 4481 ) -> t.Optional[exp.Expression]: 4482 this = self._parse_id_var() 4483 4484 while self._match(TokenType.DOT): 4485 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4486 4487 if not self._match(TokenType.L_PAREN): 4488 return this 4489 4490 expressions = self._parse_csv(self._parse_function_parameter) 4491 self._match_r_paren() 4492 return self.expression( 4493 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4494 ) 4495 4496 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4497 literal = self._parse_primary() 4498 if literal: 4499 return self.expression(exp.Introducer, this=token.text, expression=literal) 4500 4501 return self.expression(exp.Identifier, this=token.text) 4502 4503 def _parse_session_parameter(self) -> exp.SessionParameter: 4504 kind = None 4505 this = self._parse_id_var() or self._parse_primary() 4506 4507 if this and self._match(TokenType.DOT): 4508 kind = this.name 4509 this = self._parse_var() or self._parse_primary() 4510 4511 return self.expression(exp.SessionParameter, this=this, kind=kind) 4512 4513 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4514 index = self._index 4515 4516 if self._match(TokenType.L_PAREN): 4517 expressions = t.cast( 4518 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4519 ) 4520 4521 if not self._match(TokenType.R_PAREN): 4522 self._retreat(index) 4523 else: 4524 expressions = [self._parse_id_var()] 4525 4526 if self._match_set(self.LAMBDAS): 4527 return self.LAMBDAS[self._prev.token_type](self, expressions) 4528 4529 self._retreat(index) 4530 4531 this: t.Optional[exp.Expression] 4532 4533 if self._match(TokenType.DISTINCT): 4534 this = self.expression( 4535 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4536 ) 4537 else: 4538 this = self._parse_select_or_expression(alias=alias) 4539 4540 return self._parse_limit( 4541 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4542 ) 4543 4544 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4545 index = self._index 4546 if not self._match(TokenType.L_PAREN): 4547 return this 4548 4549 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4550 # expr can be of both types 4551 if self._match_set(self.SELECT_START_TOKENS): 4552 self._retreat(index) 4553 return this 4554 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4555 self._match_r_paren() 4556 return self.expression(exp.Schema, this=this, expressions=args) 4557 4558 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4559 return self._parse_column_def(self._parse_field(any_token=True)) 4560 4561 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4562 # column defs are not really columns, they're identifiers 4563 if isinstance(this, exp.Column): 4564 this = this.this 4565 4566 kind = self._parse_types(schema=True) 4567 4568 if self._match_text_seq("FOR", "ORDINALITY"): 4569 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4570 4571 constraints: t.List[exp.Expression] = [] 4572 4573 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4574 ("ALIAS", "MATERIALIZED") 4575 ): 4576 persisted = self._prev.text.upper() == "MATERIALIZED" 4577 constraints.append( 4578 self.expression( 4579 exp.ComputedColumnConstraint, 4580 this=self._parse_conjunction(), 4581 persisted=persisted or self._match_text_seq("PERSISTED"), 4582 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4583 ) 4584 ) 4585 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4586 self._match(TokenType.ALIAS) 4587 constraints.append( 4588 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4589 ) 4590 4591 while True: 4592 constraint = self._parse_column_constraint() 4593 if not constraint: 4594 break 4595 constraints.append(constraint) 4596 4597 if not kind and not constraints: 4598 return this 4599 4600 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4601 4602 def _parse_auto_increment( 4603 self, 4604 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4605 start = None 4606 increment = None 4607 4608 if self._match(TokenType.L_PAREN, advance=False): 4609 args = self._parse_wrapped_csv(self._parse_bitwise) 4610 start = seq_get(args, 0) 4611 increment = seq_get(args, 1) 4612 elif self._match_text_seq("START"): 4613 start = self._parse_bitwise() 4614 self._match_text_seq("INCREMENT") 4615 increment = self._parse_bitwise() 4616 4617 if start and increment: 4618 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4619 4620 return exp.AutoIncrementColumnConstraint() 4621 4622 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4623 if not self._match_text_seq("REFRESH"): 4624 self._retreat(self._index - 1) 4625 return None 4626 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4627 4628 def _parse_compress(self) -> exp.CompressColumnConstraint: 4629 if self._match(TokenType.L_PAREN, advance=False): 4630 return self.expression( 4631 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4632 ) 4633 4634 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4635 4636 def _parse_generated_as_identity( 4637 self, 4638 ) -> ( 4639 exp.GeneratedAsIdentityColumnConstraint 4640 | exp.ComputedColumnConstraint 4641 | exp.GeneratedAsRowColumnConstraint 4642 ): 4643 if self._match_text_seq("BY", "DEFAULT"): 4644 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4645 this = self.expression( 4646 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4647 ) 4648 else: 4649 self._match_text_seq("ALWAYS") 4650 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4651 4652 self._match(TokenType.ALIAS) 4653 4654 if self._match_text_seq("ROW"): 4655 start = self._match_text_seq("START") 4656 if not start: 4657 self._match(TokenType.END) 4658 hidden = self._match_text_seq("HIDDEN") 4659 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4660 4661 identity = self._match_text_seq("IDENTITY") 4662 4663 if self._match(TokenType.L_PAREN): 4664 if self._match(TokenType.START_WITH): 4665 this.set("start", self._parse_bitwise()) 4666 if self._match_text_seq("INCREMENT", "BY"): 4667 this.set("increment", self._parse_bitwise()) 4668 if self._match_text_seq("MINVALUE"): 4669 this.set("minvalue", self._parse_bitwise()) 4670 if self._match_text_seq("MAXVALUE"): 4671 this.set("maxvalue", self._parse_bitwise()) 4672 4673 if self._match_text_seq("CYCLE"): 4674 this.set("cycle", True) 4675 elif self._match_text_seq("NO", "CYCLE"): 4676 this.set("cycle", False) 4677 4678 if not identity: 4679 this.set("expression", self._parse_bitwise()) 4680 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4681 args = self._parse_csv(self._parse_bitwise) 4682 this.set("start", seq_get(args, 0)) 4683 this.set("increment", seq_get(args, 1)) 4684 4685 self._match_r_paren() 4686 4687 return this 4688 4689 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4690 self._match_text_seq("LENGTH") 4691 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4692 4693 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4694 if self._match_text_seq("NULL"): 4695 return self.expression(exp.NotNullColumnConstraint) 4696 if self._match_text_seq("CASESPECIFIC"): 4697 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4698 if self._match_text_seq("FOR", "REPLICATION"): 4699 return self.expression(exp.NotForReplicationColumnConstraint) 4700 return None 4701 4702 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4703 if self._match(TokenType.CONSTRAINT): 4704 this = self._parse_id_var() 4705 else: 4706 this = None 4707 4708 if self._match_texts(self.CONSTRAINT_PARSERS): 4709 return self.expression( 4710 exp.ColumnConstraint, 4711 this=this, 4712 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4713 ) 4714 4715 return this 4716 4717 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4718 if not self._match(TokenType.CONSTRAINT): 4719 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4720 4721 return self.expression( 4722 exp.Constraint, 4723 this=self._parse_id_var(), 4724 expressions=self._parse_unnamed_constraints(), 4725 ) 4726 4727 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4728 constraints = [] 4729 while True: 4730 constraint = self._parse_unnamed_constraint() or self._parse_function() 4731 if not constraint: 4732 break 4733 constraints.append(constraint) 4734 4735 return constraints 4736 4737 def _parse_unnamed_constraint( 4738 self, constraints: t.Optional[t.Collection[str]] = None 4739 ) -> t.Optional[exp.Expression]: 4740 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4741 constraints or self.CONSTRAINT_PARSERS 4742 ): 4743 return None 4744 4745 constraint = self._prev.text.upper() 4746 if constraint not in self.CONSTRAINT_PARSERS: 4747 self.raise_error(f"No parser found for schema constraint {constraint}.") 4748 4749 return self.CONSTRAINT_PARSERS[constraint](self) 4750 4751 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4752 self._match_text_seq("KEY") 4753 return self.expression( 4754 exp.UniqueColumnConstraint, 4755 this=self._parse_schema(self._parse_id_var(any_token=False)), 4756 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4757 on_conflict=self._parse_on_conflict(), 4758 ) 4759 4760 def _parse_key_constraint_options(self) -> t.List[str]: 4761 options = [] 4762 while True: 4763 if not self._curr: 4764 break 4765 4766 if self._match(TokenType.ON): 4767 action = None 4768 on = self._advance_any() and self._prev.text 4769 4770 if self._match_text_seq("NO", "ACTION"): 4771 action = "NO ACTION" 4772 elif self._match_text_seq("CASCADE"): 4773 action = "CASCADE" 4774 elif self._match_text_seq("RESTRICT"): 4775 action = "RESTRICT" 4776 elif self._match_pair(TokenType.SET, TokenType.NULL): 4777 action = "SET NULL" 4778 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4779 action = "SET DEFAULT" 4780 else: 4781 self.raise_error("Invalid key constraint") 4782 4783 options.append(f"ON {on} {action}") 4784 elif self._match_text_seq("NOT", "ENFORCED"): 4785 options.append("NOT ENFORCED") 4786 elif self._match_text_seq("DEFERRABLE"): 4787 options.append("DEFERRABLE") 4788 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4789 options.append("INITIALLY DEFERRED") 4790 elif self._match_text_seq("NORELY"): 4791 options.append("NORELY") 4792 elif self._match_text_seq("MATCH", "FULL"): 4793 options.append("MATCH FULL") 4794 else: 4795 break 4796 4797 return options 4798 4799 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4800 if match and not self._match(TokenType.REFERENCES): 4801 return None 4802 4803 expressions = None 4804 this = self._parse_table(schema=True) 4805 options = self._parse_key_constraint_options() 4806 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4807 4808 def _parse_foreign_key(self) -> exp.ForeignKey: 4809 expressions = self._parse_wrapped_id_vars() 4810 reference = self._parse_references() 4811 options = {} 4812 4813 while self._match(TokenType.ON): 4814 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4815 self.raise_error("Expected DELETE or UPDATE") 4816 4817 kind = self._prev.text.lower() 4818 4819 if self._match_text_seq("NO", "ACTION"): 4820 action = "NO ACTION" 4821 elif self._match(TokenType.SET): 4822 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4823 action = "SET " + self._prev.text.upper() 4824 else: 4825 self._advance() 4826 action = self._prev.text.upper() 4827 4828 options[kind] = action 4829 4830 return self.expression( 4831 exp.ForeignKey, 4832 expressions=expressions, 4833 reference=reference, 4834 **options, # type: ignore 4835 ) 4836 4837 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4838 return self._parse_field() 4839 4840 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4841 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4842 self._retreat(self._index - 1) 4843 return None 4844 4845 id_vars = self._parse_wrapped_id_vars() 4846 return self.expression( 4847 exp.PeriodForSystemTimeConstraint, 4848 this=seq_get(id_vars, 0), 4849 expression=seq_get(id_vars, 1), 4850 ) 4851 4852 def _parse_primary_key( 4853 self, wrapped_optional: bool = False, in_props: bool = False 4854 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4855 desc = ( 4856 self._match_set((TokenType.ASC, TokenType.DESC)) 4857 and self._prev.token_type == TokenType.DESC 4858 ) 4859 4860 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4861 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4862 4863 expressions = self._parse_wrapped_csv( 4864 self._parse_primary_key_part, optional=wrapped_optional 4865 ) 4866 options = self._parse_key_constraint_options() 4867 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4868 4869 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4870 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4871 4872 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4873 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4874 return this 4875 4876 bracket_kind = self._prev.token_type 4877 expressions = self._parse_csv( 4878 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4879 ) 4880 4881 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4882 self.raise_error("Expected ]") 4883 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4884 self.raise_error("Expected }") 4885 4886 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4887 if bracket_kind == TokenType.L_BRACE: 4888 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4889 elif not this or this.name.upper() == "ARRAY": 4890 this = self.expression(exp.Array, expressions=expressions) 4891 else: 4892 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4893 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4894 4895 self._add_comments(this) 4896 return self._parse_bracket(this) 4897 4898 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4899 if self._match(TokenType.COLON): 4900 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4901 return this 4902 4903 def _parse_case(self) -> t.Optional[exp.Expression]: 4904 ifs = [] 4905 default = None 4906 4907 comments = self._prev_comments 4908 expression = self._parse_conjunction() 4909 4910 while self._match(TokenType.WHEN): 4911 this = self._parse_conjunction() 4912 self._match(TokenType.THEN) 4913 then = self._parse_conjunction() 4914 ifs.append(self.expression(exp.If, this=this, true=then)) 4915 4916 if self._match(TokenType.ELSE): 4917 default = self._parse_conjunction() 4918 4919 if not self._match(TokenType.END): 4920 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4921 default = exp.column("interval") 4922 else: 4923 self.raise_error("Expected END after CASE", self._prev) 4924 4925 return self.expression( 4926 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4927 ) 4928 4929 def _parse_if(self) -> t.Optional[exp.Expression]: 4930 if self._match(TokenType.L_PAREN): 4931 args = self._parse_csv(self._parse_conjunction) 4932 this = self.validate_expression(exp.If.from_arg_list(args), args) 4933 self._match_r_paren() 4934 else: 4935 index = self._index - 1 4936 4937 if self.NO_PAREN_IF_COMMANDS and index == 0: 4938 return self._parse_as_command(self._prev) 4939 4940 condition = self._parse_conjunction() 4941 4942 if not condition: 4943 self._retreat(index) 4944 return None 4945 4946 self._match(TokenType.THEN) 4947 true = self._parse_conjunction() 4948 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4949 self._match(TokenType.END) 4950 this = self.expression(exp.If, this=condition, true=true, false=false) 4951 4952 return this 4953 4954 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4955 if not self._match_text_seq("VALUE", "FOR"): 4956 self._retreat(self._index - 1) 4957 return None 4958 4959 return self.expression( 4960 exp.NextValueFor, 4961 this=self._parse_column(), 4962 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4963 ) 4964 4965 def _parse_extract(self) -> exp.Extract: 4966 this = self._parse_function() or self._parse_var() or self._parse_type() 4967 4968 if self._match(TokenType.FROM): 4969 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4970 4971 if not self._match(TokenType.COMMA): 4972 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4973 4974 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4975 4976 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4977 this = self._parse_conjunction() 4978 4979 if not self._match(TokenType.ALIAS): 4980 if self._match(TokenType.COMMA): 4981 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4982 4983 self.raise_error("Expected AS after CAST") 4984 4985 fmt = None 4986 to = self._parse_types() 4987 4988 if self._match(TokenType.FORMAT): 4989 fmt_string = self._parse_string() 4990 fmt = self._parse_at_time_zone(fmt_string) 4991 4992 if not to: 4993 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4994 if to.this in exp.DataType.TEMPORAL_TYPES: 4995 this = self.expression( 4996 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4997 this=this, 4998 format=exp.Literal.string( 4999 format_time( 5000 fmt_string.this if fmt_string else "", 5001 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5002 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5003 ) 5004 ), 5005 ) 5006 5007 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5008 this.set("zone", fmt.args["zone"]) 5009 return this 5010 elif not to: 5011 self.raise_error("Expected TYPE after CAST") 5012 elif isinstance(to, exp.Identifier): 5013 to = exp.DataType.build(to.name, udt=True) 5014 elif to.this == exp.DataType.Type.CHAR: 5015 if self._match(TokenType.CHARACTER_SET): 5016 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5017 5018 return self.expression( 5019 exp.Cast if strict else exp.TryCast, 5020 this=this, 5021 to=to, 5022 format=fmt, 5023 safe=safe, 5024 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5025 ) 5026 5027 def _parse_string_agg(self) -> exp.Expression: 5028 if self._match(TokenType.DISTINCT): 5029 args: t.List[t.Optional[exp.Expression]] = [ 5030 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5031 ] 5032 if self._match(TokenType.COMMA): 5033 args.extend(self._parse_csv(self._parse_conjunction)) 5034 else: 5035 args = self._parse_csv(self._parse_conjunction) # type: ignore 5036 5037 index = self._index 5038 if not self._match(TokenType.R_PAREN) and args: 5039 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5040 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5041 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5042 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5043 5044 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5045 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5046 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5047 if not self._match_text_seq("WITHIN", "GROUP"): 5048 self._retreat(index) 5049 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5050 5051 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5052 order = self._parse_order(this=seq_get(args, 0)) 5053 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5054 5055 def _parse_convert( 5056 self, strict: bool, safe: t.Optional[bool] = None 5057 ) -> t.Optional[exp.Expression]: 5058 this = self._parse_bitwise() 5059 5060 if self._match(TokenType.USING): 5061 to: t.Optional[exp.Expression] = self.expression( 5062 exp.CharacterSet, this=self._parse_var() 5063 ) 5064 elif self._match(TokenType.COMMA): 5065 to = self._parse_types() 5066 else: 5067 to = None 5068 5069 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5070 5071 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5072 """ 5073 There are generally two variants of the DECODE function: 5074 5075 - DECODE(bin, charset) 5076 - DECODE(expression, search, result [, search, result] ... [, default]) 5077 5078 The second variant will always be parsed into a CASE expression. Note that NULL 5079 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5080 instead of relying on pattern matching. 5081 """ 5082 args = self._parse_csv(self._parse_conjunction) 5083 5084 if len(args) < 3: 5085 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5086 5087 expression, *expressions = args 5088 if not expression: 5089 return None 5090 5091 ifs = [] 5092 for search, result in zip(expressions[::2], expressions[1::2]): 5093 if not search or not result: 5094 return None 5095 5096 if isinstance(search, exp.Literal): 5097 ifs.append( 5098 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5099 ) 5100 elif isinstance(search, exp.Null): 5101 ifs.append( 5102 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5103 ) 5104 else: 5105 cond = exp.or_( 5106 exp.EQ(this=expression.copy(), expression=search), 5107 exp.and_( 5108 exp.Is(this=expression.copy(), expression=exp.Null()), 5109 exp.Is(this=search.copy(), expression=exp.Null()), 5110 copy=False, 5111 ), 5112 copy=False, 5113 ) 5114 ifs.append(exp.If(this=cond, true=result)) 5115 5116 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5117 5118 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5119 self._match_text_seq("KEY") 5120 key = self._parse_column() 5121 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5122 self._match_text_seq("VALUE") 5123 value = self._parse_bitwise() 5124 5125 if not key and not value: 5126 return None 5127 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5128 5129 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5130 if not this or not self._match_text_seq("FORMAT", "JSON"): 5131 return this 5132 5133 return self.expression(exp.FormatJson, this=this) 5134 5135 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5136 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5137 for value in values: 5138 if self._match_text_seq(value, "ON", on): 5139 return f"{value} ON {on}" 5140 5141 return None 5142 5143 @t.overload 5144 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5145 5146 @t.overload 5147 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5148 5149 def _parse_json_object(self, agg=False): 5150 star = self._parse_star() 5151 expressions = ( 5152 [star] 5153 if star 5154 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5155 ) 5156 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5157 5158 unique_keys = None 5159 if self._match_text_seq("WITH", "UNIQUE"): 5160 unique_keys = True 5161 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5162 unique_keys = False 5163 5164 self._match_text_seq("KEYS") 5165 5166 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5167 self._parse_type() 5168 ) 5169 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5170 5171 return self.expression( 5172 exp.JSONObjectAgg if agg else exp.JSONObject, 5173 expressions=expressions, 5174 null_handling=null_handling, 5175 unique_keys=unique_keys, 5176 return_type=return_type, 5177 encoding=encoding, 5178 ) 5179 5180 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5181 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5182 if not self._match_text_seq("NESTED"): 5183 this = self._parse_id_var() 5184 kind = self._parse_types(allow_identifiers=False) 5185 nested = None 5186 else: 5187 this = None 5188 kind = None 5189 nested = True 5190 5191 path = self._match_text_seq("PATH") and self._parse_string() 5192 nested_schema = nested and self._parse_json_schema() 5193 5194 return self.expression( 5195 exp.JSONColumnDef, 5196 this=this, 5197 kind=kind, 5198 path=path, 5199 nested_schema=nested_schema, 5200 ) 5201 5202 def _parse_json_schema(self) -> exp.JSONSchema: 5203 self._match_text_seq("COLUMNS") 5204 return self.expression( 5205 exp.JSONSchema, 5206 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5207 ) 5208 5209 def _parse_json_table(self) -> exp.JSONTable: 5210 this = self._parse_format_json(self._parse_bitwise()) 5211 path = self._match(TokenType.COMMA) and self._parse_string() 5212 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5213 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5214 schema = self._parse_json_schema() 5215 5216 return exp.JSONTable( 5217 this=this, 5218 schema=schema, 5219 path=path, 5220 error_handling=error_handling, 5221 empty_handling=empty_handling, 5222 ) 5223 5224 def _parse_match_against(self) -> exp.MatchAgainst: 5225 expressions = self._parse_csv(self._parse_column) 5226 5227 self._match_text_seq(")", "AGAINST", "(") 5228 5229 this = self._parse_string() 5230 5231 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5232 modifier = "IN NATURAL LANGUAGE MODE" 5233 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5234 modifier = f"{modifier} WITH QUERY EXPANSION" 5235 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5236 modifier = "IN BOOLEAN MODE" 5237 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5238 modifier = "WITH QUERY EXPANSION" 5239 else: 5240 modifier = None 5241 5242 return self.expression( 5243 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5244 ) 5245 5246 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5247 def _parse_open_json(self) -> exp.OpenJSON: 5248 this = self._parse_bitwise() 5249 path = self._match(TokenType.COMMA) and self._parse_string() 5250 5251 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5252 this = self._parse_field(any_token=True) 5253 kind = self._parse_types() 5254 path = self._parse_string() 5255 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5256 5257 return self.expression( 5258 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5259 ) 5260 5261 expressions = None 5262 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5263 self._match_l_paren() 5264 expressions = self._parse_csv(_parse_open_json_column_def) 5265 5266 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5267 5268 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5269 args = self._parse_csv(self._parse_bitwise) 5270 5271 if self._match(TokenType.IN): 5272 return self.expression( 5273 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5274 ) 5275 5276 if haystack_first: 5277 haystack = seq_get(args, 0) 5278 needle = seq_get(args, 1) 5279 else: 5280 needle = seq_get(args, 0) 5281 haystack = seq_get(args, 1) 5282 5283 return self.expression( 5284 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5285 ) 5286 5287 def _parse_predict(self) -> exp.Predict: 5288 self._match_text_seq("MODEL") 5289 this = self._parse_table() 5290 5291 self._match(TokenType.COMMA) 5292 self._match_text_seq("TABLE") 5293 5294 return self.expression( 5295 exp.Predict, 5296 this=this, 5297 expression=self._parse_table(), 5298 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5299 ) 5300 5301 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5302 args = self._parse_csv(self._parse_table) 5303 return exp.JoinHint(this=func_name.upper(), expressions=args) 5304 5305 def _parse_substring(self) -> exp.Substring: 5306 # Postgres supports the form: substring(string [from int] [for int]) 5307 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5308 5309 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5310 5311 if self._match(TokenType.FROM): 5312 args.append(self._parse_bitwise()) 5313 if self._match(TokenType.FOR): 5314 args.append(self._parse_bitwise()) 5315 5316 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5317 5318 def _parse_trim(self) -> exp.Trim: 5319 # https://www.w3resource.com/sql/character-functions/trim.php 5320 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5321 5322 position = None 5323 collation = None 5324 expression = None 5325 5326 if self._match_texts(self.TRIM_TYPES): 5327 position = self._prev.text.upper() 5328 5329 this = self._parse_bitwise() 5330 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5331 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5332 expression = self._parse_bitwise() 5333 5334 if invert_order: 5335 this, expression = expression, this 5336 5337 if self._match(TokenType.COLLATE): 5338 collation = self._parse_bitwise() 5339 5340 return self.expression( 5341 exp.Trim, this=this, position=position, expression=expression, collation=collation 5342 ) 5343 5344 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5345 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5346 5347 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5348 return self._parse_window(self._parse_id_var(), alias=True) 5349 5350 def _parse_respect_or_ignore_nulls( 5351 self, this: t.Optional[exp.Expression] 5352 ) -> t.Optional[exp.Expression]: 5353 if self._match_text_seq("IGNORE", "NULLS"): 5354 return self.expression(exp.IgnoreNulls, this=this) 5355 if self._match_text_seq("RESPECT", "NULLS"): 5356 return self.expression(exp.RespectNulls, this=this) 5357 return this 5358 5359 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5360 if self._match(TokenType.HAVING): 5361 self._match_texts(("MAX", "MIN")) 5362 max = self._prev.text.upper() != "MIN" 5363 return self.expression( 5364 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5365 ) 5366 5367 return this 5368 5369 def _parse_window( 5370 self, this: t.Optional[exp.Expression], alias: bool = False 5371 ) -> t.Optional[exp.Expression]: 5372 func = this 5373 comments = func.comments if isinstance(func, exp.Expression) else None 5374 5375 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5376 self._match(TokenType.WHERE) 5377 this = self.expression( 5378 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5379 ) 5380 self._match_r_paren() 5381 5382 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5383 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5384 if self._match_text_seq("WITHIN", "GROUP"): 5385 order = self._parse_wrapped(self._parse_order) 5386 this = self.expression(exp.WithinGroup, this=this, expression=order) 5387 5388 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5389 # Some dialects choose to implement and some do not. 5390 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5391 5392 # There is some code above in _parse_lambda that handles 5393 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5394 5395 # The below changes handle 5396 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5397 5398 # Oracle allows both formats 5399 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5400 # and Snowflake chose to do the same for familiarity 5401 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5402 if isinstance(this, exp.AggFunc): 5403 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5404 5405 if ignore_respect and ignore_respect is not this: 5406 ignore_respect.replace(ignore_respect.this) 5407 this = self.expression(ignore_respect.__class__, this=this) 5408 5409 this = self._parse_respect_or_ignore_nulls(this) 5410 5411 # bigquery select from window x AS (partition by ...) 5412 if alias: 5413 over = None 5414 self._match(TokenType.ALIAS) 5415 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5416 return this 5417 else: 5418 over = self._prev.text.upper() 5419 5420 if comments and isinstance(func, exp.Expression): 5421 func.pop_comments() 5422 5423 if not self._match(TokenType.L_PAREN): 5424 return self.expression( 5425 exp.Window, 5426 comments=comments, 5427 this=this, 5428 alias=self._parse_id_var(False), 5429 over=over, 5430 ) 5431 5432 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5433 5434 first = self._match(TokenType.FIRST) 5435 if self._match_text_seq("LAST"): 5436 first = False 5437 5438 partition, order = self._parse_partition_and_order() 5439 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5440 5441 if kind: 5442 self._match(TokenType.BETWEEN) 5443 start = self._parse_window_spec() 5444 self._match(TokenType.AND) 5445 end = self._parse_window_spec() 5446 5447 spec = self.expression( 5448 exp.WindowSpec, 5449 kind=kind, 5450 start=start["value"], 5451 start_side=start["side"], 5452 end=end["value"], 5453 end_side=end["side"], 5454 ) 5455 else: 5456 spec = None 5457 5458 self._match_r_paren() 5459 5460 window = self.expression( 5461 exp.Window, 5462 comments=comments, 5463 this=this, 5464 partition_by=partition, 5465 order=order, 5466 spec=spec, 5467 alias=window_alias, 5468 over=over, 5469 first=first, 5470 ) 5471 5472 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5473 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5474 return self._parse_window(window, alias=alias) 5475 5476 return window 5477 5478 def _parse_partition_and_order( 5479 self, 5480 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5481 return self._parse_partition_by(), self._parse_order() 5482 5483 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5484 self._match(TokenType.BETWEEN) 5485 5486 return { 5487 "value": ( 5488 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5489 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5490 or self._parse_bitwise() 5491 ), 5492 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5493 } 5494 5495 def _parse_alias( 5496 self, this: t.Optional[exp.Expression], explicit: bool = False 5497 ) -> t.Optional[exp.Expression]: 5498 any_token = self._match(TokenType.ALIAS) 5499 comments = self._prev_comments or [] 5500 5501 if explicit and not any_token: 5502 return this 5503 5504 if self._match(TokenType.L_PAREN): 5505 aliases = self.expression( 5506 exp.Aliases, 5507 comments=comments, 5508 this=this, 5509 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5510 ) 5511 self._match_r_paren(aliases) 5512 return aliases 5513 5514 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5515 self.STRING_ALIASES and self._parse_string_as_identifier() 5516 ) 5517 5518 if alias: 5519 comments.extend(alias.pop_comments()) 5520 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5521 column = this.this 5522 5523 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5524 if not this.comments and column and column.comments: 5525 this.comments = column.pop_comments() 5526 5527 return this 5528 5529 def _parse_id_var( 5530 self, 5531 any_token: bool = True, 5532 tokens: t.Optional[t.Collection[TokenType]] = None, 5533 ) -> t.Optional[exp.Expression]: 5534 expression = self._parse_identifier() 5535 if not expression and ( 5536 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5537 ): 5538 quoted = self._prev.token_type == TokenType.STRING 5539 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5540 5541 return expression 5542 5543 def _parse_string(self) -> t.Optional[exp.Expression]: 5544 if self._match_set(self.STRING_PARSERS): 5545 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5546 return self._parse_placeholder() 5547 5548 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5549 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5550 5551 def _parse_number(self) -> t.Optional[exp.Expression]: 5552 if self._match_set(self.NUMERIC_PARSERS): 5553 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5554 return self._parse_placeholder() 5555 5556 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5557 if self._match(TokenType.IDENTIFIER): 5558 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5559 return self._parse_placeholder() 5560 5561 def _parse_var( 5562 self, 5563 any_token: bool = False, 5564 tokens: t.Optional[t.Collection[TokenType]] = None, 5565 upper: bool = False, 5566 ) -> t.Optional[exp.Expression]: 5567 if ( 5568 (any_token and self._advance_any()) 5569 or self._match(TokenType.VAR) 5570 or (self._match_set(tokens) if tokens else False) 5571 ): 5572 return self.expression( 5573 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5574 ) 5575 return self._parse_placeholder() 5576 5577 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5578 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5579 self._advance() 5580 return self._prev 5581 return None 5582 5583 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5584 return self._parse_var() or self._parse_string() 5585 5586 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5587 return self._parse_primary() or self._parse_var(any_token=True) 5588 5589 def _parse_null(self) -> t.Optional[exp.Expression]: 5590 if self._match_set(self.NULL_TOKENS): 5591 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5592 return self._parse_placeholder() 5593 5594 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5595 if self._match(TokenType.TRUE): 5596 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5597 if self._match(TokenType.FALSE): 5598 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5599 return self._parse_placeholder() 5600 5601 def _parse_star(self) -> t.Optional[exp.Expression]: 5602 if self._match(TokenType.STAR): 5603 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5604 return self._parse_placeholder() 5605 5606 def _parse_parameter(self) -> exp.Parameter: 5607 self._match(TokenType.L_BRACE) 5608 this = self._parse_identifier() or self._parse_primary_or_var() 5609 expression = self._match(TokenType.COLON) and ( 5610 self._parse_identifier() or self._parse_primary_or_var() 5611 ) 5612 self._match(TokenType.R_BRACE) 5613 return self.expression(exp.Parameter, this=this, expression=expression) 5614 5615 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5616 if self._match_set(self.PLACEHOLDER_PARSERS): 5617 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5618 if placeholder: 5619 return placeholder 5620 self._advance(-1) 5621 return None 5622 5623 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5624 if not self._match(TokenType.EXCEPT): 5625 return None 5626 if self._match(TokenType.L_PAREN, advance=False): 5627 return self._parse_wrapped_csv(self._parse_column) 5628 5629 except_column = self._parse_column() 5630 return [except_column] if except_column else None 5631 5632 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5633 if not self._match(TokenType.REPLACE): 5634 return None 5635 if self._match(TokenType.L_PAREN, advance=False): 5636 return self._parse_wrapped_csv(self._parse_expression) 5637 5638 replace_expression = self._parse_expression() 5639 return [replace_expression] if replace_expression else None 5640 5641 def _parse_csv( 5642 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5643 ) -> t.List[exp.Expression]: 5644 parse_result = parse_method() 5645 items = [parse_result] if parse_result is not None else [] 5646 5647 while self._match(sep): 5648 self._add_comments(parse_result) 5649 parse_result = parse_method() 5650 if parse_result is not None: 5651 items.append(parse_result) 5652 5653 return items 5654 5655 def _parse_tokens( 5656 self, parse_method: t.Callable, expressions: t.Dict 5657 ) -> t.Optional[exp.Expression]: 5658 this = parse_method() 5659 5660 while self._match_set(expressions): 5661 this = self.expression( 5662 expressions[self._prev.token_type], 5663 this=this, 5664 comments=self._prev_comments, 5665 expression=parse_method(), 5666 ) 5667 5668 return this 5669 5670 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5671 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5672 5673 def _parse_wrapped_csv( 5674 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5675 ) -> t.List[exp.Expression]: 5676 return self._parse_wrapped( 5677 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5678 ) 5679 5680 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5681 wrapped = self._match(TokenType.L_PAREN) 5682 if not wrapped and not optional: 5683 self.raise_error("Expecting (") 5684 parse_result = parse_method() 5685 if wrapped: 5686 self._match_r_paren() 5687 return parse_result 5688 5689 def _parse_expressions(self) -> t.List[exp.Expression]: 5690 return self._parse_csv(self._parse_expression) 5691 5692 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5693 return self._parse_select() or self._parse_set_operations( 5694 self._parse_expression() if alias else self._parse_conjunction() 5695 ) 5696 5697 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5698 return self._parse_query_modifiers( 5699 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5700 ) 5701 5702 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5703 this = None 5704 if self._match_texts(self.TRANSACTION_KIND): 5705 this = self._prev.text 5706 5707 self._match_texts(("TRANSACTION", "WORK")) 5708 5709 modes = [] 5710 while True: 5711 mode = [] 5712 while self._match(TokenType.VAR): 5713 mode.append(self._prev.text) 5714 5715 if mode: 5716 modes.append(" ".join(mode)) 5717 if not self._match(TokenType.COMMA): 5718 break 5719 5720 return self.expression(exp.Transaction, this=this, modes=modes) 5721 5722 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5723 chain = None 5724 savepoint = None 5725 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5726 5727 self._match_texts(("TRANSACTION", "WORK")) 5728 5729 if self._match_text_seq("TO"): 5730 self._match_text_seq("SAVEPOINT") 5731 savepoint = self._parse_id_var() 5732 5733 if self._match(TokenType.AND): 5734 chain = not self._match_text_seq("NO") 5735 self._match_text_seq("CHAIN") 5736 5737 if is_rollback: 5738 return self.expression(exp.Rollback, savepoint=savepoint) 5739 5740 return self.expression(exp.Commit, chain=chain) 5741 5742 def _parse_refresh(self) -> exp.Refresh: 5743 self._match(TokenType.TABLE) 5744 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5745 5746 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5747 if not self._match_text_seq("ADD"): 5748 return None 5749 5750 self._match(TokenType.COLUMN) 5751 exists_column = self._parse_exists(not_=True) 5752 expression = self._parse_field_def() 5753 5754 if expression: 5755 expression.set("exists", exists_column) 5756 5757 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5758 if self._match_texts(("FIRST", "AFTER")): 5759 position = self._prev.text 5760 column_position = self.expression( 5761 exp.ColumnPosition, this=self._parse_column(), position=position 5762 ) 5763 expression.set("position", column_position) 5764 5765 return expression 5766 5767 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5768 drop = self._match(TokenType.DROP) and self._parse_drop() 5769 if drop and not isinstance(drop, exp.Command): 5770 drop.set("kind", drop.args.get("kind", "COLUMN")) 5771 return drop 5772 5773 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5774 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5775 return self.expression( 5776 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5777 ) 5778 5779 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5780 index = self._index - 1 5781 5782 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5783 return self._parse_csv( 5784 lambda: self.expression( 5785 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5786 ) 5787 ) 5788 5789 self._retreat(index) 5790 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5791 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5792 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5793 5794 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5795 self._match(TokenType.COLUMN) 5796 column = self._parse_field(any_token=True) 5797 5798 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5799 return self.expression(exp.AlterColumn, this=column, drop=True) 5800 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5801 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5802 if self._match(TokenType.COMMENT): 5803 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5804 5805 self._match_text_seq("SET", "DATA") 5806 self._match_text_seq("TYPE") 5807 return self.expression( 5808 exp.AlterColumn, 5809 this=column, 5810 dtype=self._parse_types(), 5811 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5812 using=self._match(TokenType.USING) and self._parse_conjunction(), 5813 ) 5814 5815 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5816 index = self._index - 1 5817 5818 partition_exists = self._parse_exists() 5819 if self._match(TokenType.PARTITION, advance=False): 5820 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5821 5822 self._retreat(index) 5823 return self._parse_csv(self._parse_drop_column) 5824 5825 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5826 if self._match(TokenType.COLUMN): 5827 exists = self._parse_exists() 5828 old_column = self._parse_column() 5829 to = self._match_text_seq("TO") 5830 new_column = self._parse_column() 5831 5832 if old_column is None or to is None or new_column is None: 5833 return None 5834 5835 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5836 5837 self._match_text_seq("TO") 5838 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5839 5840 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5841 start = self._prev 5842 5843 if not self._match(TokenType.TABLE): 5844 return self._parse_as_command(start) 5845 5846 exists = self._parse_exists() 5847 only = self._match_text_seq("ONLY") 5848 this = self._parse_table(schema=True) 5849 5850 if self._next: 5851 self._advance() 5852 5853 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5854 if parser: 5855 actions = ensure_list(parser(self)) 5856 options = self._parse_csv(self._parse_property) 5857 5858 if not self._curr and actions: 5859 return self.expression( 5860 exp.AlterTable, 5861 this=this, 5862 exists=exists, 5863 actions=actions, 5864 only=only, 5865 options=options, 5866 ) 5867 5868 return self._parse_as_command(start) 5869 5870 def _parse_merge(self) -> exp.Merge: 5871 self._match(TokenType.INTO) 5872 target = self._parse_table() 5873 5874 if target and self._match(TokenType.ALIAS, advance=False): 5875 target.set("alias", self._parse_table_alias()) 5876 5877 self._match(TokenType.USING) 5878 using = self._parse_table() 5879 5880 self._match(TokenType.ON) 5881 on = self._parse_conjunction() 5882 5883 return self.expression( 5884 exp.Merge, 5885 this=target, 5886 using=using, 5887 on=on, 5888 expressions=self._parse_when_matched(), 5889 ) 5890 5891 def _parse_when_matched(self) -> t.List[exp.When]: 5892 whens = [] 5893 5894 while self._match(TokenType.WHEN): 5895 matched = not self._match(TokenType.NOT) 5896 self._match_text_seq("MATCHED") 5897 source = ( 5898 False 5899 if self._match_text_seq("BY", "TARGET") 5900 else self._match_text_seq("BY", "SOURCE") 5901 ) 5902 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5903 5904 self._match(TokenType.THEN) 5905 5906 if self._match(TokenType.INSERT): 5907 _this = self._parse_star() 5908 if _this: 5909 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5910 else: 5911 then = self.expression( 5912 exp.Insert, 5913 this=self._parse_value(), 5914 expression=self._match_text_seq("VALUES") and self._parse_value(), 5915 ) 5916 elif self._match(TokenType.UPDATE): 5917 expressions = self._parse_star() 5918 if expressions: 5919 then = self.expression(exp.Update, expressions=expressions) 5920 else: 5921 then = self.expression( 5922 exp.Update, 5923 expressions=self._match(TokenType.SET) 5924 and self._parse_csv(self._parse_equality), 5925 ) 5926 elif self._match(TokenType.DELETE): 5927 then = self.expression(exp.Var, this=self._prev.text) 5928 else: 5929 then = None 5930 5931 whens.append( 5932 self.expression( 5933 exp.When, 5934 matched=matched, 5935 source=source, 5936 condition=condition, 5937 then=then, 5938 ) 5939 ) 5940 return whens 5941 5942 def _parse_show(self) -> t.Optional[exp.Expression]: 5943 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5944 if parser: 5945 return parser(self) 5946 return self._parse_as_command(self._prev) 5947 5948 def _parse_set_item_assignment( 5949 self, kind: t.Optional[str] = None 5950 ) -> t.Optional[exp.Expression]: 5951 index = self._index 5952 5953 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5954 return self._parse_set_transaction(global_=kind == "GLOBAL") 5955 5956 left = self._parse_primary() or self._parse_id_var() 5957 assignment_delimiter = self._match_texts(("=", "TO")) 5958 5959 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5960 self._retreat(index) 5961 return None 5962 5963 right = self._parse_statement() or self._parse_id_var() 5964 this = self.expression(exp.EQ, this=left, expression=right) 5965 5966 return self.expression(exp.SetItem, this=this, kind=kind) 5967 5968 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5969 self._match_text_seq("TRANSACTION") 5970 characteristics = self._parse_csv( 5971 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5972 ) 5973 return self.expression( 5974 exp.SetItem, 5975 expressions=characteristics, 5976 kind="TRANSACTION", 5977 **{"global": global_}, # type: ignore 5978 ) 5979 5980 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5981 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5982 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5983 5984 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5985 index = self._index 5986 set_ = self.expression( 5987 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5988 ) 5989 5990 if self._curr: 5991 self._retreat(index) 5992 return self._parse_as_command(self._prev) 5993 5994 return set_ 5995 5996 def _parse_var_from_options( 5997 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5998 ) -> t.Optional[exp.Var]: 5999 start = self._curr 6000 if not start: 6001 return None 6002 6003 option = start.text.upper() 6004 continuations = options.get(option) 6005 6006 index = self._index 6007 self._advance() 6008 for keywords in continuations or []: 6009 if isinstance(keywords, str): 6010 keywords = (keywords,) 6011 6012 if self._match_text_seq(*keywords): 6013 option = f"{option} {' '.join(keywords)}" 6014 break 6015 else: 6016 if continuations or continuations is None: 6017 if raise_unmatched: 6018 self.raise_error(f"Unknown option {option}") 6019 6020 self._retreat(index) 6021 return None 6022 6023 return exp.var(option) 6024 6025 def _parse_as_command(self, start: Token) -> exp.Command: 6026 while self._curr: 6027 self._advance() 6028 text = self._find_sql(start, self._prev) 6029 size = len(start.text) 6030 self._warn_unsupported() 6031 return exp.Command(this=text[:size], expression=text[size:]) 6032 6033 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6034 settings = [] 6035 6036 self._match_l_paren() 6037 kind = self._parse_id_var() 6038 6039 if self._match(TokenType.L_PAREN): 6040 while True: 6041 key = self._parse_id_var() 6042 value = self._parse_primary() 6043 6044 if not key and value is None: 6045 break 6046 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6047 self._match(TokenType.R_PAREN) 6048 6049 self._match_r_paren() 6050 6051 return self.expression( 6052 exp.DictProperty, 6053 this=this, 6054 kind=kind.this if kind else None, 6055 settings=settings, 6056 ) 6057 6058 def _parse_dict_range(self, this: str) -> exp.DictRange: 6059 self._match_l_paren() 6060 has_min = self._match_text_seq("MIN") 6061 if has_min: 6062 min = self._parse_var() or self._parse_primary() 6063 self._match_text_seq("MAX") 6064 max = self._parse_var() or self._parse_primary() 6065 else: 6066 max = self._parse_var() or self._parse_primary() 6067 min = exp.Literal.number(0) 6068 self._match_r_paren() 6069 return self.expression(exp.DictRange, this=this, min=min, max=max) 6070 6071 def _parse_comprehension( 6072 self, this: t.Optional[exp.Expression] 6073 ) -> t.Optional[exp.Comprehension]: 6074 index = self._index 6075 expression = self._parse_column() 6076 if not self._match(TokenType.IN): 6077 self._retreat(index - 1) 6078 return None 6079 iterator = self._parse_column() 6080 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6081 return self.expression( 6082 exp.Comprehension, 6083 this=this, 6084 expression=expression, 6085 iterator=iterator, 6086 condition=condition, 6087 ) 6088 6089 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6090 if self._match(TokenType.HEREDOC_STRING): 6091 return self.expression(exp.Heredoc, this=self._prev.text) 6092 6093 if not self._match_text_seq("$"): 6094 return None 6095 6096 tags = ["$"] 6097 tag_text = None 6098 6099 if self._is_connected(): 6100 self._advance() 6101 tags.append(self._prev.text.upper()) 6102 else: 6103 self.raise_error("No closing $ found") 6104 6105 if tags[-1] != "$": 6106 if self._is_connected() and self._match_text_seq("$"): 6107 tag_text = tags[-1] 6108 tags.append("$") 6109 else: 6110 self.raise_error("No closing $ found") 6111 6112 heredoc_start = self._curr 6113 6114 while self._curr: 6115 if self._match_text_seq(*tags, advance=False): 6116 this = self._find_sql(heredoc_start, self._prev) 6117 self._advance(len(tags)) 6118 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6119 6120 self._advance() 6121 6122 self.raise_error(f"No closing {''.join(tags)} found") 6123 return None 6124 6125 def _find_parser( 6126 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6127 ) -> t.Optional[t.Callable]: 6128 if not self._curr: 6129 return None 6130 6131 index = self._index 6132 this = [] 6133 while True: 6134 # The current token might be multiple words 6135 curr = self._curr.text.upper() 6136 key = curr.split(" ") 6137 this.append(curr) 6138 6139 self._advance() 6140 result, trie = in_trie(trie, key) 6141 if result == TrieResult.FAILED: 6142 break 6143 6144 if result == TrieResult.EXISTS: 6145 subparser = parsers[" ".join(this)] 6146 return subparser 6147 6148 self._retreat(index) 6149 return None 6150 6151 def _match(self, token_type, advance=True, expression=None): 6152 if not self._curr: 6153 return None 6154 6155 if self._curr.token_type == token_type: 6156 if advance: 6157 self._advance() 6158 self._add_comments(expression) 6159 return True 6160 6161 return None 6162 6163 def _match_set(self, types, advance=True): 6164 if not self._curr: 6165 return None 6166 6167 if self._curr.token_type in types: 6168 if advance: 6169 self._advance() 6170 return True 6171 6172 return None 6173 6174 def _match_pair(self, token_type_a, token_type_b, advance=True): 6175 if not self._curr or not self._next: 6176 return None 6177 6178 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6179 if advance: 6180 self._advance(2) 6181 return True 6182 6183 return None 6184 6185 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6186 if not self._match(TokenType.L_PAREN, expression=expression): 6187 self.raise_error("Expecting (") 6188 6189 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6190 if not self._match(TokenType.R_PAREN, expression=expression): 6191 self.raise_error("Expecting )") 6192 6193 def _match_texts(self, texts, advance=True): 6194 if self._curr and self._curr.text.upper() in texts: 6195 if advance: 6196 self._advance() 6197 return True 6198 return None 6199 6200 def _match_text_seq(self, *texts, advance=True): 6201 index = self._index 6202 for text in texts: 6203 if self._curr and self._curr.text.upper() == text: 6204 self._advance() 6205 else: 6206 self._retreat(index) 6207 return None 6208 6209 if not advance: 6210 self._retreat(index) 6211 6212 return True 6213 6214 def _replace_lambda( 6215 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6216 ) -> t.Optional[exp.Expression]: 6217 if not node: 6218 return node 6219 6220 for column in node.find_all(exp.Column): 6221 if column.parts[0].name in lambda_variables: 6222 dot_or_id = column.to_dot() if column.table else column.this 6223 parent = column.parent 6224 6225 while isinstance(parent, exp.Dot): 6226 if not isinstance(parent.parent, exp.Dot): 6227 parent.replace(dot_or_id) 6228 break 6229 parent = parent.parent 6230 else: 6231 if column is node: 6232 node = dot_or_id 6233 else: 6234 column.replace(dot_or_id) 6235 return node 6236 6237 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6238 start = self._prev 6239 6240 # Not to be confused with TRUNCATE(number, decimals) function call 6241 if self._match(TokenType.L_PAREN): 6242 self._retreat(self._index - 2) 6243 return self._parse_function() 6244 6245 # Clickhouse supports TRUNCATE DATABASE as well 6246 is_database = self._match(TokenType.DATABASE) 6247 6248 self._match(TokenType.TABLE) 6249 6250 exists = self._parse_exists(not_=False) 6251 6252 expressions = self._parse_csv( 6253 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6254 ) 6255 6256 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6257 6258 if self._match_text_seq("RESTART", "IDENTITY"): 6259 identity = "RESTART" 6260 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6261 identity = "CONTINUE" 6262 else: 6263 identity = None 6264 6265 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6266 option = self._prev.text 6267 else: 6268 option = None 6269 6270 partition = self._parse_partition() 6271 6272 # Fallback case 6273 if self._curr: 6274 return self._parse_as_command(start) 6275 6276 return self.expression( 6277 exp.TruncateTable, 6278 expressions=expressions, 6279 is_database=is_database, 6280 exists=exists, 6281 cluster=cluster, 6282 identity=identity, 6283 option=option, 6284 partition=partition, 6285 ) 6286 6287 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6288 this = self._parse_ordered(self._parse_opclass) 6289 6290 if not self._match(TokenType.WITH): 6291 return this 6292 6293 op = self._parse_var(any_token=True) 6294 6295 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1135 def __init__( 1136 self, 1137 error_level: t.Optional[ErrorLevel] = None, 1138 error_message_context: int = 100, 1139 max_errors: int = 3, 1140 dialect: DialectType = None, 1141 ): 1142 from sqlglot.dialects import Dialect 1143 1144 self.error_level = error_level or ErrorLevel.IMMEDIATE 1145 self.error_message_context = error_message_context 1146 self.max_errors = max_errors 1147 self.dialect = Dialect.get_or_raise(dialect) 1148 self.reset()
1160 def parse( 1161 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1162 ) -> t.List[t.Optional[exp.Expression]]: 1163 """ 1164 Parses a list of tokens and returns a list of syntax trees, one tree 1165 per parsed SQL statement. 1166 1167 Args: 1168 raw_tokens: The list of tokens. 1169 sql: The original SQL string, used to produce helpful debug messages. 1170 1171 Returns: 1172 The list of the produced syntax trees. 1173 """ 1174 return self._parse( 1175 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1176 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1178 def parse_into( 1179 self, 1180 expression_types: exp.IntoType, 1181 raw_tokens: t.List[Token], 1182 sql: t.Optional[str] = None, 1183 ) -> t.List[t.Optional[exp.Expression]]: 1184 """ 1185 Parses a list of tokens into a given Expression type. If a collection of Expression 1186 types is given instead, this method will try to parse the token list into each one 1187 of them, stopping at the first for which the parsing succeeds. 1188 1189 Args: 1190 expression_types: The expression type(s) to try and parse the token list into. 1191 raw_tokens: The list of tokens. 1192 sql: The original SQL string, used to produce helpful debug messages. 1193 1194 Returns: 1195 The target Expression. 1196 """ 1197 errors = [] 1198 for expression_type in ensure_list(expression_types): 1199 parser = self.EXPRESSION_PARSERS.get(expression_type) 1200 if not parser: 1201 raise TypeError(f"No parser registered for {expression_type}") 1202 1203 try: 1204 return self._parse(parser, raw_tokens, sql) 1205 except ParseError as e: 1206 e.errors[0]["into_expression"] = expression_type 1207 errors.append(e) 1208 1209 raise ParseError( 1210 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1211 errors=merge_errors(errors), 1212 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1249 def check_errors(self) -> None: 1250 """Logs or raises any found errors, depending on the chosen error level setting.""" 1251 if self.error_level == ErrorLevel.WARN: 1252 for error in self.errors: 1253 logger.error(str(error)) 1254 elif self.error_level == ErrorLevel.RAISE and self.errors: 1255 raise ParseError( 1256 concat_messages(self.errors, self.max_errors), 1257 errors=merge_errors(self.errors), 1258 )
Logs or raises any found errors, depending on the chosen error level setting.
1260 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1261 """ 1262 Appends an error in the list of recorded errors or raises it, depending on the chosen 1263 error level setting. 1264 """ 1265 token = token or self._curr or self._prev or Token.string("") 1266 start = token.start 1267 end = token.end + 1 1268 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1269 highlight = self.sql[start:end] 1270 end_context = self.sql[end : end + self.error_message_context] 1271 1272 error = ParseError.new( 1273 f"{message}. Line {token.line}, Col: {token.col}.\n" 1274 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1275 description=message, 1276 line=token.line, 1277 col=token.col, 1278 start_context=start_context, 1279 highlight=highlight, 1280 end_context=end_context, 1281 ) 1282 1283 if self.error_level == ErrorLevel.IMMEDIATE: 1284 raise error 1285 1286 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1288 def expression( 1289 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1290 ) -> E: 1291 """ 1292 Creates a new, validated Expression. 1293 1294 Args: 1295 exp_class: The expression class to instantiate. 1296 comments: An optional list of comments to attach to the expression. 1297 kwargs: The arguments to set for the expression along with their respective values. 1298 1299 Returns: 1300 The target expression. 1301 """ 1302 instance = exp_class(**kwargs) 1303 instance.add_comments(comments) if comments else self._add_comments(instance) 1304 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1311 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1312 """ 1313 Validates an Expression, making sure that all its mandatory arguments are set. 1314 1315 Args: 1316 expression: The expression to validate. 1317 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1318 1319 Returns: 1320 The validated expression. 1321 """ 1322 if self.error_level != ErrorLevel.IGNORE: 1323 for error_message in expression.error_messages(args): 1324 self.raise_error(error_message) 1325 1326 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.