sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.DATETIME, 221 TokenType.DATETIME64, 222 TokenType.DATE, 223 TokenType.DATE32, 224 TokenType.INT4RANGE, 225 TokenType.INT4MULTIRANGE, 226 TokenType.INT8RANGE, 227 TokenType.INT8MULTIRANGE, 228 TokenType.NUMRANGE, 229 TokenType.NUMMULTIRANGE, 230 TokenType.TSRANGE, 231 TokenType.TSMULTIRANGE, 232 TokenType.TSTZRANGE, 233 TokenType.TSTZMULTIRANGE, 234 TokenType.DATERANGE, 235 TokenType.DATEMULTIRANGE, 236 TokenType.DECIMAL, 237 TokenType.UDECIMAL, 238 TokenType.BIGDECIMAL, 239 TokenType.UUID, 240 TokenType.GEOGRAPHY, 241 TokenType.GEOMETRY, 242 TokenType.HLLSKETCH, 243 TokenType.HSTORE, 244 TokenType.PSEUDO_TYPE, 245 TokenType.SUPER, 246 TokenType.SERIAL, 247 TokenType.SMALLSERIAL, 248 TokenType.BIGSERIAL, 249 TokenType.XML, 250 TokenType.YEAR, 251 TokenType.UNIQUEIDENTIFIER, 252 TokenType.USERDEFINED, 253 TokenType.MONEY, 254 TokenType.SMALLMONEY, 255 TokenType.ROWVERSION, 256 TokenType.IMAGE, 257 TokenType.VARIANT, 258 TokenType.OBJECT, 259 TokenType.OBJECT_IDENTIFIER, 260 TokenType.INET, 261 TokenType.IPADDRESS, 262 TokenType.IPPREFIX, 263 TokenType.IPV4, 264 TokenType.IPV6, 265 TokenType.UNKNOWN, 266 TokenType.NULL, 267 TokenType.NAME, 268 *ENUM_TYPE_TOKENS, 269 *NESTED_TYPE_TOKENS, 270 *AGGREGATE_TYPE_TOKENS, 271 } 272 273 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 274 TokenType.BIGINT: TokenType.UBIGINT, 275 TokenType.INT: TokenType.UINT, 276 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 277 TokenType.SMALLINT: TokenType.USMALLINT, 278 TokenType.TINYINT: TokenType.UTINYINT, 279 TokenType.DECIMAL: TokenType.UDECIMAL, 280 } 281 282 SUBQUERY_PREDICATES = { 283 TokenType.ANY: exp.Any, 284 TokenType.ALL: exp.All, 285 TokenType.EXISTS: exp.Exists, 286 TokenType.SOME: exp.Any, 287 } 288 289 RESERVED_TOKENS = { 290 *Tokenizer.SINGLE_TOKENS.values(), 291 TokenType.SELECT, 292 } 293 294 DB_CREATABLES = { 295 TokenType.DATABASE, 296 TokenType.SCHEMA, 297 TokenType.TABLE, 298 TokenType.VIEW, 299 TokenType.MODEL, 300 TokenType.DICTIONARY, 301 TokenType.SEQUENCE, 302 TokenType.STORAGE_INTEGRATION, 303 } 304 305 CREATABLES = { 306 TokenType.COLUMN, 307 TokenType.CONSTRAINT, 308 TokenType.FUNCTION, 309 TokenType.INDEX, 310 TokenType.PROCEDURE, 311 TokenType.FOREIGN_KEY, 312 *DB_CREATABLES, 313 } 314 315 # Tokens that can represent identifiers 316 ID_VAR_TOKENS = { 317 TokenType.VAR, 318 TokenType.ANTI, 319 TokenType.APPLY, 320 TokenType.ASC, 321 TokenType.ASOF, 322 TokenType.AUTO_INCREMENT, 323 TokenType.BEGIN, 324 TokenType.BPCHAR, 325 TokenType.CACHE, 326 TokenType.CASE, 327 TokenType.COLLATE, 328 TokenType.COMMAND, 329 TokenType.COMMENT, 330 TokenType.COMMIT, 331 TokenType.CONSTRAINT, 332 TokenType.DEFAULT, 333 TokenType.DELETE, 334 TokenType.DESC, 335 TokenType.DESCRIBE, 336 TokenType.DICTIONARY, 337 TokenType.DIV, 338 TokenType.END, 339 TokenType.EXECUTE, 340 TokenType.ESCAPE, 341 TokenType.FALSE, 342 TokenType.FIRST, 343 TokenType.FILTER, 344 TokenType.FINAL, 345 TokenType.FORMAT, 346 TokenType.FULL, 347 TokenType.IS, 348 TokenType.ISNULL, 349 TokenType.INTERVAL, 350 TokenType.KEEP, 351 TokenType.KILL, 352 TokenType.LEFT, 353 TokenType.LOAD, 354 TokenType.MERGE, 355 TokenType.NATURAL, 356 TokenType.NEXT, 357 TokenType.OFFSET, 358 TokenType.OPERATOR, 359 TokenType.ORDINALITY, 360 TokenType.OVERLAPS, 361 TokenType.OVERWRITE, 362 TokenType.PARTITION, 363 TokenType.PERCENT, 364 TokenType.PIVOT, 365 TokenType.PRAGMA, 366 TokenType.RANGE, 367 TokenType.RECURSIVE, 368 TokenType.REFERENCES, 369 TokenType.REFRESH, 370 TokenType.REPLACE, 371 TokenType.RIGHT, 372 TokenType.ROW, 373 TokenType.ROWS, 374 TokenType.SEMI, 375 TokenType.SET, 376 TokenType.SETTINGS, 377 TokenType.SHOW, 378 TokenType.TEMPORARY, 379 TokenType.TOP, 380 TokenType.TRUE, 381 TokenType.TRUNCATE, 382 TokenType.UNIQUE, 383 TokenType.UNPIVOT, 384 TokenType.UPDATE, 385 TokenType.USE, 386 TokenType.VOLATILE, 387 TokenType.WINDOW, 388 *CREATABLES, 389 *SUBQUERY_PREDICATES, 390 *TYPE_TOKENS, 391 *NO_PAREN_FUNCTIONS, 392 } 393 394 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 395 396 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 397 TokenType.ANTI, 398 TokenType.APPLY, 399 TokenType.ASOF, 400 TokenType.FULL, 401 TokenType.LEFT, 402 TokenType.LOCK, 403 TokenType.NATURAL, 404 TokenType.OFFSET, 405 TokenType.RIGHT, 406 TokenType.SEMI, 407 TokenType.WINDOW, 408 } 409 410 ALIAS_TOKENS = ID_VAR_TOKENS 411 412 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 413 414 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 415 416 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 417 418 FUNC_TOKENS = { 419 TokenType.COLLATE, 420 TokenType.COMMAND, 421 TokenType.CURRENT_DATE, 422 TokenType.CURRENT_DATETIME, 423 TokenType.CURRENT_TIMESTAMP, 424 TokenType.CURRENT_TIME, 425 TokenType.CURRENT_USER, 426 TokenType.FILTER, 427 TokenType.FIRST, 428 TokenType.FORMAT, 429 TokenType.GLOB, 430 TokenType.IDENTIFIER, 431 TokenType.INDEX, 432 TokenType.ISNULL, 433 TokenType.ILIKE, 434 TokenType.INSERT, 435 TokenType.LIKE, 436 TokenType.MERGE, 437 TokenType.OFFSET, 438 TokenType.PRIMARY_KEY, 439 TokenType.RANGE, 440 TokenType.REPLACE, 441 TokenType.RLIKE, 442 TokenType.ROW, 443 TokenType.UNNEST, 444 TokenType.VAR, 445 TokenType.LEFT, 446 TokenType.RIGHT, 447 TokenType.SEQUENCE, 448 TokenType.DATE, 449 TokenType.DATETIME, 450 TokenType.TABLE, 451 TokenType.TIMESTAMP, 452 TokenType.TIMESTAMPTZ, 453 TokenType.TRUNCATE, 454 TokenType.WINDOW, 455 TokenType.XOR, 456 *TYPE_TOKENS, 457 *SUBQUERY_PREDICATES, 458 } 459 460 CONJUNCTION = { 461 TokenType.AND: exp.And, 462 TokenType.OR: exp.Or, 463 } 464 465 EQUALITY = { 466 TokenType.COLON_EQ: exp.PropertyEQ, 467 TokenType.EQ: exp.EQ, 468 TokenType.NEQ: exp.NEQ, 469 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 470 } 471 472 COMPARISON = { 473 TokenType.GT: exp.GT, 474 TokenType.GTE: exp.GTE, 475 TokenType.LT: exp.LT, 476 TokenType.LTE: exp.LTE, 477 } 478 479 BITWISE = { 480 TokenType.AMP: exp.BitwiseAnd, 481 TokenType.CARET: exp.BitwiseXor, 482 TokenType.PIPE: exp.BitwiseOr, 483 } 484 485 TERM = { 486 TokenType.DASH: exp.Sub, 487 TokenType.PLUS: exp.Add, 488 TokenType.MOD: exp.Mod, 489 TokenType.COLLATE: exp.Collate, 490 } 491 492 FACTOR = { 493 TokenType.DIV: exp.IntDiv, 494 TokenType.LR_ARROW: exp.Distance, 495 TokenType.SLASH: exp.Div, 496 TokenType.STAR: exp.Mul, 497 } 498 499 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 500 501 TIMES = { 502 TokenType.TIME, 503 TokenType.TIMETZ, 504 } 505 506 TIMESTAMPS = { 507 TokenType.TIMESTAMP, 508 TokenType.TIMESTAMPTZ, 509 TokenType.TIMESTAMPLTZ, 510 *TIMES, 511 } 512 513 SET_OPERATIONS = { 514 TokenType.UNION, 515 TokenType.INTERSECT, 516 TokenType.EXCEPT, 517 } 518 519 JOIN_METHODS = { 520 TokenType.ASOF, 521 TokenType.NATURAL, 522 TokenType.POSITIONAL, 523 } 524 525 JOIN_SIDES = { 526 TokenType.LEFT, 527 TokenType.RIGHT, 528 TokenType.FULL, 529 } 530 531 JOIN_KINDS = { 532 TokenType.INNER, 533 TokenType.OUTER, 534 TokenType.CROSS, 535 TokenType.SEMI, 536 TokenType.ANTI, 537 } 538 539 JOIN_HINTS: t.Set[str] = set() 540 541 LAMBDAS = { 542 TokenType.ARROW: lambda self, expressions: self.expression( 543 exp.Lambda, 544 this=self._replace_lambda( 545 self._parse_conjunction(), 546 {node.name for node in expressions}, 547 ), 548 expressions=expressions, 549 ), 550 TokenType.FARROW: lambda self, expressions: self.expression( 551 exp.Kwarg, 552 this=exp.var(expressions[0].name), 553 expression=self._parse_conjunction(), 554 ), 555 } 556 557 COLUMN_OPERATORS = { 558 TokenType.DOT: None, 559 TokenType.DCOLON: lambda self, this, to: self.expression( 560 exp.Cast if self.STRICT_CAST else exp.TryCast, 561 this=this, 562 to=to, 563 ), 564 TokenType.ARROW: lambda self, this, path: self.expression( 565 exp.JSONExtract, 566 this=this, 567 expression=self.dialect.to_json_path(path), 568 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 569 ), 570 TokenType.DARROW: lambda self, this, path: self.expression( 571 exp.JSONExtractScalar, 572 this=this, 573 expression=self.dialect.to_json_path(path), 574 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 575 ), 576 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 577 exp.JSONBExtract, 578 this=this, 579 expression=path, 580 ), 581 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 582 exp.JSONBExtractScalar, 583 this=this, 584 expression=path, 585 ), 586 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 587 exp.JSONBContains, 588 this=this, 589 expression=key, 590 ), 591 } 592 593 EXPRESSION_PARSERS = { 594 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 595 exp.Column: lambda self: self._parse_column(), 596 exp.Condition: lambda self: self._parse_conjunction(), 597 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 598 exp.Expression: lambda self: self._parse_expression(), 599 exp.From: lambda self: self._parse_from(), 600 exp.Group: lambda self: self._parse_group(), 601 exp.Having: lambda self: self._parse_having(), 602 exp.Identifier: lambda self: self._parse_id_var(), 603 exp.Join: lambda self: self._parse_join(), 604 exp.Lambda: lambda self: self._parse_lambda(), 605 exp.Lateral: lambda self: self._parse_lateral(), 606 exp.Limit: lambda self: self._parse_limit(), 607 exp.Offset: lambda self: self._parse_offset(), 608 exp.Order: lambda self: self._parse_order(), 609 exp.Ordered: lambda self: self._parse_ordered(), 610 exp.Properties: lambda self: self._parse_properties(), 611 exp.Qualify: lambda self: self._parse_qualify(), 612 exp.Returning: lambda self: self._parse_returning(), 613 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 614 exp.Table: lambda self: self._parse_table_parts(), 615 exp.TableAlias: lambda self: self._parse_table_alias(), 616 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 617 exp.Where: lambda self: self._parse_where(), 618 exp.Window: lambda self: self._parse_named_window(), 619 exp.With: lambda self: self._parse_with(), 620 "JOIN_TYPE": lambda self: self._parse_join_parts(), 621 } 622 623 STATEMENT_PARSERS = { 624 TokenType.ALTER: lambda self: self._parse_alter(), 625 TokenType.BEGIN: lambda self: self._parse_transaction(), 626 TokenType.CACHE: lambda self: self._parse_cache(), 627 TokenType.COMMENT: lambda self: self._parse_comment(), 628 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 629 TokenType.CREATE: lambda self: self._parse_create(), 630 TokenType.DELETE: lambda self: self._parse_delete(), 631 TokenType.DESC: lambda self: self._parse_describe(), 632 TokenType.DESCRIBE: lambda self: self._parse_describe(), 633 TokenType.DROP: lambda self: self._parse_drop(), 634 TokenType.INSERT: lambda self: self._parse_insert(), 635 TokenType.KILL: lambda self: self._parse_kill(), 636 TokenType.LOAD: lambda self: self._parse_load(), 637 TokenType.MERGE: lambda self: self._parse_merge(), 638 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 639 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 640 TokenType.REFRESH: lambda self: self._parse_refresh(), 641 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 642 TokenType.SET: lambda self: self._parse_set(), 643 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 644 TokenType.UNCACHE: lambda self: self._parse_uncache(), 645 TokenType.UPDATE: lambda self: self._parse_update(), 646 TokenType.USE: lambda self: self.expression( 647 exp.Use, 648 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 649 this=self._parse_table(schema=False), 650 ), 651 } 652 653 UNARY_PARSERS = { 654 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 655 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 656 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 657 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 658 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 659 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 660 } 661 662 STRING_PARSERS = { 663 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 664 exp.RawString, this=token.text 665 ), 666 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 667 exp.National, this=token.text 668 ), 669 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 670 TokenType.STRING: lambda self, token: self.expression( 671 exp.Literal, this=token.text, is_string=True 672 ), 673 TokenType.UNICODE_STRING: lambda self, token: self.expression( 674 exp.UnicodeString, 675 this=token.text, 676 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 677 ), 678 } 679 680 NUMERIC_PARSERS = { 681 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 682 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 683 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 684 TokenType.NUMBER: lambda self, token: self.expression( 685 exp.Literal, this=token.text, is_string=False 686 ), 687 } 688 689 PRIMARY_PARSERS = { 690 **STRING_PARSERS, 691 **NUMERIC_PARSERS, 692 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 693 TokenType.NULL: lambda self, _: self.expression(exp.Null), 694 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 695 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 696 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 697 TokenType.STAR: lambda self, _: self.expression( 698 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 699 ), 700 } 701 702 PLACEHOLDER_PARSERS = { 703 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 704 TokenType.PARAMETER: lambda self: self._parse_parameter(), 705 TokenType.COLON: lambda self: ( 706 self.expression(exp.Placeholder, this=self._prev.text) 707 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 708 else None 709 ), 710 } 711 712 RANGE_PARSERS = { 713 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 714 TokenType.GLOB: binary_range_parser(exp.Glob), 715 TokenType.ILIKE: binary_range_parser(exp.ILike), 716 TokenType.IN: lambda self, this: self._parse_in(this), 717 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 718 TokenType.IS: lambda self, this: self._parse_is(this), 719 TokenType.LIKE: binary_range_parser(exp.Like), 720 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 721 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 722 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 723 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 724 } 725 726 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 727 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 728 "AUTO": lambda self: self._parse_auto_property(), 729 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 730 "BACKUP": lambda self: self.expression( 731 exp.BackupProperty, this=self._parse_var(any_token=True) 732 ), 733 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 734 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 735 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHECKSUM": lambda self: self._parse_checksum(), 737 "CLUSTER BY": lambda self: self._parse_cluster(), 738 "CLUSTERED": lambda self: self._parse_clustered_by(), 739 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 740 exp.CollateProperty, **kwargs 741 ), 742 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 743 "CONTAINS": lambda self: self._parse_contains_property(), 744 "COPY": lambda self: self._parse_copy_property(), 745 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 746 "DEFINER": lambda self: self._parse_definer(), 747 "DETERMINISTIC": lambda self: self.expression( 748 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 749 ), 750 "DISTKEY": lambda self: self._parse_distkey(), 751 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 752 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 753 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 754 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 755 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 756 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 757 "FREESPACE": lambda self: self._parse_freespace(), 758 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 759 "HEAP": lambda self: self.expression(exp.HeapProperty), 760 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 761 "IMMUTABLE": lambda self: self.expression( 762 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 763 ), 764 "INHERITS": lambda self: self.expression( 765 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 766 ), 767 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 768 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 769 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 770 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 771 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 772 "LIKE": lambda self: self._parse_create_like(), 773 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 774 "LOCK": lambda self: self._parse_locking(), 775 "LOCKING": lambda self: self._parse_locking(), 776 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 777 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 778 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 779 "MODIFIES": lambda self: self._parse_modifies_property(), 780 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 781 "NO": lambda self: self._parse_no_property(), 782 "ON": lambda self: self._parse_on_property(), 783 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 784 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 785 "PARTITION": lambda self: self._parse_partitioned_of(), 786 "PARTITION BY": lambda self: self._parse_partitioned_by(), 787 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 789 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 790 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 791 "READS": lambda self: self._parse_reads_property(), 792 "REMOTE": lambda self: self._parse_remote_with_connection(), 793 "RETURNS": lambda self: self._parse_returns(), 794 "ROW": lambda self: self._parse_row(), 795 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 796 "SAMPLE": lambda self: self.expression( 797 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 798 ), 799 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 800 "SETTINGS": lambda self: self.expression( 801 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 802 ), 803 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 804 "SORTKEY": lambda self: self._parse_sortkey(), 805 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 806 "STABLE": lambda self: self.expression( 807 exp.StabilityProperty, this=exp.Literal.string("STABLE") 808 ), 809 "STORED": lambda self: self._parse_stored(), 810 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 811 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 812 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 813 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 814 "TO": lambda self: self._parse_to_table(), 815 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 816 "TRANSFORM": lambda self: self.expression( 817 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 818 ), 819 "TTL": lambda self: self._parse_ttl(), 820 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 821 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 822 "VOLATILE": lambda self: self._parse_volatile_property(), 823 "WITH": lambda self: self._parse_with_property(), 824 } 825 826 CONSTRAINT_PARSERS = { 827 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 828 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 829 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 830 "CHARACTER SET": lambda self: self.expression( 831 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 832 ), 833 "CHECK": lambda self: self.expression( 834 exp.CheckColumnConstraint, 835 this=self._parse_wrapped(self._parse_conjunction), 836 enforced=self._match_text_seq("ENFORCED"), 837 ), 838 "COLLATE": lambda self: self.expression( 839 exp.CollateColumnConstraint, this=self._parse_var() 840 ), 841 "COMMENT": lambda self: self.expression( 842 exp.CommentColumnConstraint, this=self._parse_string() 843 ), 844 "COMPRESS": lambda self: self._parse_compress(), 845 "CLUSTERED": lambda self: self.expression( 846 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 847 ), 848 "NONCLUSTERED": lambda self: self.expression( 849 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 850 ), 851 "DEFAULT": lambda self: self.expression( 852 exp.DefaultColumnConstraint, this=self._parse_bitwise() 853 ), 854 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 855 "EXCLUDE": lambda self: self.expression( 856 exp.ExcludeColumnConstraint, this=self._parse_index_params() 857 ), 858 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 859 "FORMAT": lambda self: self.expression( 860 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 861 ), 862 "GENERATED": lambda self: self._parse_generated_as_identity(), 863 "IDENTITY": lambda self: self._parse_auto_increment(), 864 "INLINE": lambda self: self._parse_inline(), 865 "LIKE": lambda self: self._parse_create_like(), 866 "NOT": lambda self: self._parse_not_constraint(), 867 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 868 "ON": lambda self: ( 869 self._match(TokenType.UPDATE) 870 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 871 ) 872 or self.expression(exp.OnProperty, this=self._parse_id_var()), 873 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 874 "PERIOD": lambda self: self._parse_period_for_system_time(), 875 "PRIMARY KEY": lambda self: self._parse_primary_key(), 876 "REFERENCES": lambda self: self._parse_references(match=False), 877 "TITLE": lambda self: self.expression( 878 exp.TitleColumnConstraint, this=self._parse_var_or_string() 879 ), 880 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 881 "UNIQUE": lambda self: self._parse_unique(), 882 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 883 "WITH": lambda self: self.expression( 884 exp.Properties, expressions=self._parse_wrapped_properties() 885 ), 886 } 887 888 ALTER_PARSERS = { 889 "ADD": lambda self: self._parse_alter_table_add(), 890 "ALTER": lambda self: self._parse_alter_table_alter(), 891 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 892 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 893 "DROP": lambda self: self._parse_alter_table_drop(), 894 "RENAME": lambda self: self._parse_alter_table_rename(), 895 } 896 897 SCHEMA_UNNAMED_CONSTRAINTS = { 898 "CHECK", 899 "EXCLUDE", 900 "FOREIGN KEY", 901 "LIKE", 902 "PERIOD", 903 "PRIMARY KEY", 904 "UNIQUE", 905 } 906 907 NO_PAREN_FUNCTION_PARSERS = { 908 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 909 "CASE": lambda self: self._parse_case(), 910 "IF": lambda self: self._parse_if(), 911 "NEXT": lambda self: self._parse_next_value_for(), 912 } 913 914 INVALID_FUNC_NAME_TOKENS = { 915 TokenType.IDENTIFIER, 916 TokenType.STRING, 917 } 918 919 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 920 921 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 922 923 FUNCTION_PARSERS = { 924 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 925 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 926 "DECODE": lambda self: self._parse_decode(), 927 "EXTRACT": lambda self: self._parse_extract(), 928 "JSON_OBJECT": lambda self: self._parse_json_object(), 929 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 930 "JSON_TABLE": lambda self: self._parse_json_table(), 931 "MATCH": lambda self: self._parse_match_against(), 932 "OPENJSON": lambda self: self._parse_open_json(), 933 "POSITION": lambda self: self._parse_position(), 934 "PREDICT": lambda self: self._parse_predict(), 935 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 936 "STRING_AGG": lambda self: self._parse_string_agg(), 937 "SUBSTRING": lambda self: self._parse_substring(), 938 "TRIM": lambda self: self._parse_trim(), 939 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 940 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 941 } 942 943 QUERY_MODIFIER_PARSERS = { 944 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 945 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 946 TokenType.WHERE: lambda self: ("where", self._parse_where()), 947 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 948 TokenType.HAVING: lambda self: ("having", self._parse_having()), 949 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 950 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 951 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 952 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 953 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 954 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 955 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 956 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 957 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 958 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 959 TokenType.CLUSTER_BY: lambda self: ( 960 "cluster", 961 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 962 ), 963 TokenType.DISTRIBUTE_BY: lambda self: ( 964 "distribute", 965 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 966 ), 967 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 968 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 969 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 970 } 971 972 SET_PARSERS = { 973 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 974 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 975 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 976 "TRANSACTION": lambda self: self._parse_set_transaction(), 977 } 978 979 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 980 981 TYPE_LITERAL_PARSERS = { 982 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 983 } 984 985 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 986 987 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 988 989 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 990 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 991 "ISOLATION": ( 992 ("LEVEL", "REPEATABLE", "READ"), 993 ("LEVEL", "READ", "COMMITTED"), 994 ("LEVEL", "READ", "UNCOMITTED"), 995 ("LEVEL", "SERIALIZABLE"), 996 ), 997 "READ": ("WRITE", "ONLY"), 998 } 999 1000 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1001 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1002 ) 1003 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1004 1005 CREATE_SEQUENCE: OPTIONS_TYPE = { 1006 "SCALE": ("EXTEND", "NOEXTEND"), 1007 "SHARD": ("EXTEND", "NOEXTEND"), 1008 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1009 **dict.fromkeys( 1010 ( 1011 "SESSION", 1012 "GLOBAL", 1013 "KEEP", 1014 "NOKEEP", 1015 "ORDER", 1016 "NOORDER", 1017 "NOCACHE", 1018 "CYCLE", 1019 "NOCYCLE", 1020 "NOMINVALUE", 1021 "NOMAXVALUE", 1022 "NOSCALE", 1023 "NOSHARD", 1024 ), 1025 tuple(), 1026 ), 1027 } 1028 1029 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1030 1031 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1032 1033 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1034 1035 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1036 1037 CLONE_KEYWORDS = {"CLONE", "COPY"} 1038 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1039 1040 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1041 1042 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1043 1044 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1045 1046 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1047 1048 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1049 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1050 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1051 1052 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1053 1054 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1055 1056 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1057 1058 DISTINCT_TOKENS = {TokenType.DISTINCT} 1059 1060 NULL_TOKENS = {TokenType.NULL} 1061 1062 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1063 1064 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1065 1066 STRICT_CAST = True 1067 1068 PREFIXED_PIVOT_COLUMNS = False 1069 IDENTIFY_PIVOT_STRINGS = False 1070 1071 LOG_DEFAULTS_TO_LN = False 1072 1073 # Whether ADD is present for each column added by ALTER TABLE 1074 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1075 1076 # Whether the table sample clause expects CSV syntax 1077 TABLESAMPLE_CSV = False 1078 1079 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1080 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1081 1082 # Whether the TRIM function expects the characters to trim as its first argument 1083 TRIM_PATTERN_FIRST = False 1084 1085 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1086 STRING_ALIASES = False 1087 1088 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1089 MODIFIERS_ATTACHED_TO_UNION = True 1090 UNION_MODIFIERS = {"order", "limit", "offset"} 1091 1092 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1093 NO_PAREN_IF_COMMANDS = True 1094 1095 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1096 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1097 1098 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1099 # If this is True and '(' is not found, the keyword will be treated as an identifier 1100 VALUES_FOLLOWED_BY_PAREN = True 1101 1102 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1103 SUPPORTS_IMPLICIT_UNNEST = False 1104 1105 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1106 INTERVAL_SPANS = True 1107 1108 __slots__ = ( 1109 "error_level", 1110 "error_message_context", 1111 "max_errors", 1112 "dialect", 1113 "sql", 1114 "errors", 1115 "_tokens", 1116 "_index", 1117 "_curr", 1118 "_next", 1119 "_prev", 1120 "_prev_comments", 1121 ) 1122 1123 # Autofilled 1124 SHOW_TRIE: t.Dict = {} 1125 SET_TRIE: t.Dict = {} 1126 1127 def __init__( 1128 self, 1129 error_level: t.Optional[ErrorLevel] = None, 1130 error_message_context: int = 100, 1131 max_errors: int = 3, 1132 dialect: DialectType = None, 1133 ): 1134 from sqlglot.dialects import Dialect 1135 1136 self.error_level = error_level or ErrorLevel.IMMEDIATE 1137 self.error_message_context = error_message_context 1138 self.max_errors = max_errors 1139 self.dialect = Dialect.get_or_raise(dialect) 1140 self.reset() 1141 1142 def reset(self): 1143 self.sql = "" 1144 self.errors = [] 1145 self._tokens = [] 1146 self._index = 0 1147 self._curr = None 1148 self._next = None 1149 self._prev = None 1150 self._prev_comments = None 1151 1152 def parse( 1153 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1154 ) -> t.List[t.Optional[exp.Expression]]: 1155 """ 1156 Parses a list of tokens and returns a list of syntax trees, one tree 1157 per parsed SQL statement. 1158 1159 Args: 1160 raw_tokens: The list of tokens. 1161 sql: The original SQL string, used to produce helpful debug messages. 1162 1163 Returns: 1164 The list of the produced syntax trees. 1165 """ 1166 return self._parse( 1167 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1168 ) 1169 1170 def parse_into( 1171 self, 1172 expression_types: exp.IntoType, 1173 raw_tokens: t.List[Token], 1174 sql: t.Optional[str] = None, 1175 ) -> t.List[t.Optional[exp.Expression]]: 1176 """ 1177 Parses a list of tokens into a given Expression type. If a collection of Expression 1178 types is given instead, this method will try to parse the token list into each one 1179 of them, stopping at the first for which the parsing succeeds. 1180 1181 Args: 1182 expression_types: The expression type(s) to try and parse the token list into. 1183 raw_tokens: The list of tokens. 1184 sql: The original SQL string, used to produce helpful debug messages. 1185 1186 Returns: 1187 The target Expression. 1188 """ 1189 errors = [] 1190 for expression_type in ensure_list(expression_types): 1191 parser = self.EXPRESSION_PARSERS.get(expression_type) 1192 if not parser: 1193 raise TypeError(f"No parser registered for {expression_type}") 1194 1195 try: 1196 return self._parse(parser, raw_tokens, sql) 1197 except ParseError as e: 1198 e.errors[0]["into_expression"] = expression_type 1199 errors.append(e) 1200 1201 raise ParseError( 1202 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1203 errors=merge_errors(errors), 1204 ) from errors[-1] 1205 1206 def _parse( 1207 self, 1208 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1209 raw_tokens: t.List[Token], 1210 sql: t.Optional[str] = None, 1211 ) -> t.List[t.Optional[exp.Expression]]: 1212 self.reset() 1213 self.sql = sql or "" 1214 1215 total = len(raw_tokens) 1216 chunks: t.List[t.List[Token]] = [[]] 1217 1218 for i, token in enumerate(raw_tokens): 1219 if token.token_type == TokenType.SEMICOLON: 1220 if i < total - 1: 1221 chunks.append([]) 1222 else: 1223 chunks[-1].append(token) 1224 1225 expressions = [] 1226 1227 for tokens in chunks: 1228 self._index = -1 1229 self._tokens = tokens 1230 self._advance() 1231 1232 expressions.append(parse_method(self)) 1233 1234 if self._index < len(self._tokens): 1235 self.raise_error("Invalid expression / Unexpected token") 1236 1237 self.check_errors() 1238 1239 return expressions 1240 1241 def check_errors(self) -> None: 1242 """Logs or raises any found errors, depending on the chosen error level setting.""" 1243 if self.error_level == ErrorLevel.WARN: 1244 for error in self.errors: 1245 logger.error(str(error)) 1246 elif self.error_level == ErrorLevel.RAISE and self.errors: 1247 raise ParseError( 1248 concat_messages(self.errors, self.max_errors), 1249 errors=merge_errors(self.errors), 1250 ) 1251 1252 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1253 """ 1254 Appends an error in the list of recorded errors or raises it, depending on the chosen 1255 error level setting. 1256 """ 1257 token = token or self._curr or self._prev or Token.string("") 1258 start = token.start 1259 end = token.end + 1 1260 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1261 highlight = self.sql[start:end] 1262 end_context = self.sql[end : end + self.error_message_context] 1263 1264 error = ParseError.new( 1265 f"{message}. Line {token.line}, Col: {token.col}.\n" 1266 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1267 description=message, 1268 line=token.line, 1269 col=token.col, 1270 start_context=start_context, 1271 highlight=highlight, 1272 end_context=end_context, 1273 ) 1274 1275 if self.error_level == ErrorLevel.IMMEDIATE: 1276 raise error 1277 1278 self.errors.append(error) 1279 1280 def expression( 1281 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1282 ) -> E: 1283 """ 1284 Creates a new, validated Expression. 1285 1286 Args: 1287 exp_class: The expression class to instantiate. 1288 comments: An optional list of comments to attach to the expression. 1289 kwargs: The arguments to set for the expression along with their respective values. 1290 1291 Returns: 1292 The target expression. 1293 """ 1294 instance = exp_class(**kwargs) 1295 instance.add_comments(comments) if comments else self._add_comments(instance) 1296 return self.validate_expression(instance) 1297 1298 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1299 if expression and self._prev_comments: 1300 expression.add_comments(self._prev_comments) 1301 self._prev_comments = None 1302 1303 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1304 """ 1305 Validates an Expression, making sure that all its mandatory arguments are set. 1306 1307 Args: 1308 expression: The expression to validate. 1309 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1310 1311 Returns: 1312 The validated expression. 1313 """ 1314 if self.error_level != ErrorLevel.IGNORE: 1315 for error_message in expression.error_messages(args): 1316 self.raise_error(error_message) 1317 1318 return expression 1319 1320 def _find_sql(self, start: Token, end: Token) -> str: 1321 return self.sql[start.start : end.end + 1] 1322 1323 def _is_connected(self) -> bool: 1324 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1325 1326 def _advance(self, times: int = 1) -> None: 1327 self._index += times 1328 self._curr = seq_get(self._tokens, self._index) 1329 self._next = seq_get(self._tokens, self._index + 1) 1330 1331 if self._index > 0: 1332 self._prev = self._tokens[self._index - 1] 1333 self._prev_comments = self._prev.comments 1334 else: 1335 self._prev = None 1336 self._prev_comments = None 1337 1338 def _retreat(self, index: int) -> None: 1339 if index != self._index: 1340 self._advance(index - self._index) 1341 1342 def _warn_unsupported(self) -> None: 1343 if len(self._tokens) <= 1: 1344 return 1345 1346 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1347 # interested in emitting a warning for the one being currently processed. 1348 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1349 1350 logger.warning( 1351 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1352 ) 1353 1354 def _parse_command(self) -> exp.Command: 1355 self._warn_unsupported() 1356 return self.expression( 1357 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1358 ) 1359 1360 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1361 """ 1362 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1363 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1364 the parser state accordingly 1365 """ 1366 index = self._index 1367 error_level = self.error_level 1368 1369 self.error_level = ErrorLevel.IMMEDIATE 1370 try: 1371 this = parse_method() 1372 except ParseError: 1373 this = None 1374 finally: 1375 if not this or retreat: 1376 self._retreat(index) 1377 self.error_level = error_level 1378 1379 return this 1380 1381 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1382 start = self._prev 1383 exists = self._parse_exists() if allow_exists else None 1384 1385 self._match(TokenType.ON) 1386 1387 kind = self._match_set(self.CREATABLES) and self._prev 1388 if not kind: 1389 return self._parse_as_command(start) 1390 1391 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1392 this = self._parse_user_defined_function(kind=kind.token_type) 1393 elif kind.token_type == TokenType.TABLE: 1394 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1395 elif kind.token_type == TokenType.COLUMN: 1396 this = self._parse_column() 1397 else: 1398 this = self._parse_id_var() 1399 1400 self._match(TokenType.IS) 1401 1402 return self.expression( 1403 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1404 ) 1405 1406 def _parse_to_table( 1407 self, 1408 ) -> exp.ToTableProperty: 1409 table = self._parse_table_parts(schema=True) 1410 return self.expression(exp.ToTableProperty, this=table) 1411 1412 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1413 def _parse_ttl(self) -> exp.Expression: 1414 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1415 this = self._parse_bitwise() 1416 1417 if self._match_text_seq("DELETE"): 1418 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1419 if self._match_text_seq("RECOMPRESS"): 1420 return self.expression( 1421 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1422 ) 1423 if self._match_text_seq("TO", "DISK"): 1424 return self.expression( 1425 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1426 ) 1427 if self._match_text_seq("TO", "VOLUME"): 1428 return self.expression( 1429 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1430 ) 1431 1432 return this 1433 1434 expressions = self._parse_csv(_parse_ttl_action) 1435 where = self._parse_where() 1436 group = self._parse_group() 1437 1438 aggregates = None 1439 if group and self._match(TokenType.SET): 1440 aggregates = self._parse_csv(self._parse_set_item) 1441 1442 return self.expression( 1443 exp.MergeTreeTTL, 1444 expressions=expressions, 1445 where=where, 1446 group=group, 1447 aggregates=aggregates, 1448 ) 1449 1450 def _parse_statement(self) -> t.Optional[exp.Expression]: 1451 if self._curr is None: 1452 return None 1453 1454 if self._match_set(self.STATEMENT_PARSERS): 1455 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1456 1457 if self._match_set(Tokenizer.COMMANDS): 1458 return self._parse_command() 1459 1460 expression = self._parse_expression() 1461 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1462 return self._parse_query_modifiers(expression) 1463 1464 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1465 start = self._prev 1466 temporary = self._match(TokenType.TEMPORARY) 1467 materialized = self._match_text_seq("MATERIALIZED") 1468 1469 kind = self._match_set(self.CREATABLES) and self._prev.text 1470 if not kind: 1471 return self._parse_as_command(start) 1472 1473 if_exists = exists or self._parse_exists() 1474 table = self._parse_table_parts( 1475 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1476 ) 1477 1478 if self._match(TokenType.L_PAREN, advance=False): 1479 expressions = self._parse_wrapped_csv(self._parse_types) 1480 else: 1481 expressions = None 1482 1483 return self.expression( 1484 exp.Drop, 1485 comments=start.comments, 1486 exists=if_exists, 1487 this=table, 1488 expressions=expressions, 1489 kind=kind, 1490 temporary=temporary, 1491 materialized=materialized, 1492 cascade=self._match_text_seq("CASCADE"), 1493 constraints=self._match_text_seq("CONSTRAINTS"), 1494 purge=self._match_text_seq("PURGE"), 1495 ) 1496 1497 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1498 return ( 1499 self._match_text_seq("IF") 1500 and (not not_ or self._match(TokenType.NOT)) 1501 and self._match(TokenType.EXISTS) 1502 ) 1503 1504 def _parse_create(self) -> exp.Create | exp.Command: 1505 # Note: this can't be None because we've matched a statement parser 1506 start = self._prev 1507 comments = self._prev_comments 1508 1509 replace = ( 1510 start.token_type == TokenType.REPLACE 1511 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1512 or self._match_pair(TokenType.OR, TokenType.ALTER) 1513 ) 1514 1515 unique = self._match(TokenType.UNIQUE) 1516 1517 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1518 self._advance() 1519 1520 properties = None 1521 create_token = self._match_set(self.CREATABLES) and self._prev 1522 1523 if not create_token: 1524 # exp.Properties.Location.POST_CREATE 1525 properties = self._parse_properties() 1526 create_token = self._match_set(self.CREATABLES) and self._prev 1527 1528 if not properties or not create_token: 1529 return self._parse_as_command(start) 1530 1531 exists = self._parse_exists(not_=True) 1532 this = None 1533 expression: t.Optional[exp.Expression] = None 1534 indexes = None 1535 no_schema_binding = None 1536 begin = None 1537 end = None 1538 clone = None 1539 1540 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1541 nonlocal properties 1542 if properties and temp_props: 1543 properties.expressions.extend(temp_props.expressions) 1544 elif temp_props: 1545 properties = temp_props 1546 1547 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1548 this = self._parse_user_defined_function(kind=create_token.token_type) 1549 1550 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1551 extend_props(self._parse_properties()) 1552 1553 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1554 1555 if not expression: 1556 if self._match(TokenType.COMMAND): 1557 expression = self._parse_as_command(self._prev) 1558 else: 1559 begin = self._match(TokenType.BEGIN) 1560 return_ = self._match_text_seq("RETURN") 1561 1562 if self._match(TokenType.STRING, advance=False): 1563 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1564 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1565 expression = self._parse_string() 1566 extend_props(self._parse_properties()) 1567 else: 1568 expression = self._parse_statement() 1569 1570 end = self._match_text_seq("END") 1571 1572 if return_: 1573 expression = self.expression(exp.Return, this=expression) 1574 elif create_token.token_type == TokenType.INDEX: 1575 this = self._parse_index(index=self._parse_id_var()) 1576 elif create_token.token_type in self.DB_CREATABLES: 1577 table_parts = self._parse_table_parts( 1578 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1579 ) 1580 1581 # exp.Properties.Location.POST_NAME 1582 self._match(TokenType.COMMA) 1583 extend_props(self._parse_properties(before=True)) 1584 1585 this = self._parse_schema(this=table_parts) 1586 1587 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1588 extend_props(self._parse_properties()) 1589 1590 self._match(TokenType.ALIAS) 1591 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1592 # exp.Properties.Location.POST_ALIAS 1593 extend_props(self._parse_properties()) 1594 1595 if create_token.token_type == TokenType.SEQUENCE: 1596 expression = self._parse_types() 1597 extend_props(self._parse_properties()) 1598 else: 1599 expression = self._parse_ddl_select() 1600 1601 if create_token.token_type == TokenType.TABLE: 1602 # exp.Properties.Location.POST_EXPRESSION 1603 extend_props(self._parse_properties()) 1604 1605 indexes = [] 1606 while True: 1607 index = self._parse_index() 1608 1609 # exp.Properties.Location.POST_INDEX 1610 extend_props(self._parse_properties()) 1611 1612 if not index: 1613 break 1614 else: 1615 self._match(TokenType.COMMA) 1616 indexes.append(index) 1617 elif create_token.token_type == TokenType.VIEW: 1618 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1619 no_schema_binding = True 1620 1621 shallow = self._match_text_seq("SHALLOW") 1622 1623 if self._match_texts(self.CLONE_KEYWORDS): 1624 copy = self._prev.text.lower() == "copy" 1625 clone = self.expression( 1626 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1627 ) 1628 1629 if self._curr: 1630 return self._parse_as_command(start) 1631 1632 return self.expression( 1633 exp.Create, 1634 comments=comments, 1635 this=this, 1636 kind=create_token.text.upper(), 1637 replace=replace, 1638 unique=unique, 1639 expression=expression, 1640 exists=exists, 1641 properties=properties, 1642 indexes=indexes, 1643 no_schema_binding=no_schema_binding, 1644 begin=begin, 1645 end=end, 1646 clone=clone, 1647 ) 1648 1649 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1650 seq = exp.SequenceProperties() 1651 1652 options = [] 1653 index = self._index 1654 1655 while self._curr: 1656 if self._match_text_seq("INCREMENT"): 1657 self._match_text_seq("BY") 1658 self._match_text_seq("=") 1659 seq.set("increment", self._parse_term()) 1660 elif self._match_text_seq("MINVALUE"): 1661 seq.set("minvalue", self._parse_term()) 1662 elif self._match_text_seq("MAXVALUE"): 1663 seq.set("maxvalue", self._parse_term()) 1664 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1665 self._match_text_seq("=") 1666 seq.set("start", self._parse_term()) 1667 elif self._match_text_seq("CACHE"): 1668 # T-SQL allows empty CACHE which is initialized dynamically 1669 seq.set("cache", self._parse_number() or True) 1670 elif self._match_text_seq("OWNED", "BY"): 1671 # "OWNED BY NONE" is the default 1672 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1673 else: 1674 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1675 if opt: 1676 options.append(opt) 1677 else: 1678 break 1679 1680 seq.set("options", options if options else None) 1681 return None if self._index == index else seq 1682 1683 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1684 # only used for teradata currently 1685 self._match(TokenType.COMMA) 1686 1687 kwargs = { 1688 "no": self._match_text_seq("NO"), 1689 "dual": self._match_text_seq("DUAL"), 1690 "before": self._match_text_seq("BEFORE"), 1691 "default": self._match_text_seq("DEFAULT"), 1692 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1693 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1694 "after": self._match_text_seq("AFTER"), 1695 "minimum": self._match_texts(("MIN", "MINIMUM")), 1696 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1697 } 1698 1699 if self._match_texts(self.PROPERTY_PARSERS): 1700 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1701 try: 1702 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1703 except TypeError: 1704 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1705 1706 return None 1707 1708 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1709 return self._parse_wrapped_csv(self._parse_property) 1710 1711 def _parse_property(self) -> t.Optional[exp.Expression]: 1712 if self._match_texts(self.PROPERTY_PARSERS): 1713 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1714 1715 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1716 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1717 1718 if self._match_text_seq("COMPOUND", "SORTKEY"): 1719 return self._parse_sortkey(compound=True) 1720 1721 if self._match_text_seq("SQL", "SECURITY"): 1722 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1723 1724 index = self._index 1725 key = self._parse_column() 1726 1727 if not self._match(TokenType.EQ): 1728 self._retreat(index) 1729 return self._parse_sequence_properties() 1730 1731 return self.expression( 1732 exp.Property, 1733 this=key.to_dot() if isinstance(key, exp.Column) else key, 1734 value=self._parse_bitwise() or self._parse_var(any_token=True), 1735 ) 1736 1737 def _parse_stored(self) -> exp.FileFormatProperty: 1738 self._match(TokenType.ALIAS) 1739 1740 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1741 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1742 1743 return self.expression( 1744 exp.FileFormatProperty, 1745 this=( 1746 self.expression( 1747 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1748 ) 1749 if input_format or output_format 1750 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1751 ), 1752 ) 1753 1754 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1755 self._match(TokenType.EQ) 1756 self._match(TokenType.ALIAS) 1757 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1758 1759 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1760 properties = [] 1761 while True: 1762 if before: 1763 prop = self._parse_property_before() 1764 else: 1765 prop = self._parse_property() 1766 if not prop: 1767 break 1768 for p in ensure_list(prop): 1769 properties.append(p) 1770 1771 if properties: 1772 return self.expression(exp.Properties, expressions=properties) 1773 1774 return None 1775 1776 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1777 return self.expression( 1778 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1779 ) 1780 1781 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1782 if self._index >= 2: 1783 pre_volatile_token = self._tokens[self._index - 2] 1784 else: 1785 pre_volatile_token = None 1786 1787 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1788 return exp.VolatileProperty() 1789 1790 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1791 1792 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1793 self._match_pair(TokenType.EQ, TokenType.ON) 1794 1795 prop = self.expression(exp.WithSystemVersioningProperty) 1796 if self._match(TokenType.L_PAREN): 1797 self._match_text_seq("HISTORY_TABLE", "=") 1798 prop.set("this", self._parse_table_parts()) 1799 1800 if self._match(TokenType.COMMA): 1801 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1802 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1803 1804 self._match_r_paren() 1805 1806 return prop 1807 1808 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1809 if self._match(TokenType.L_PAREN, advance=False): 1810 return self._parse_wrapped_properties() 1811 1812 if self._match_text_seq("JOURNAL"): 1813 return self._parse_withjournaltable() 1814 1815 if self._match_texts(self.VIEW_ATTRIBUTES): 1816 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1817 1818 if self._match_text_seq("DATA"): 1819 return self._parse_withdata(no=False) 1820 elif self._match_text_seq("NO", "DATA"): 1821 return self._parse_withdata(no=True) 1822 1823 if not self._next: 1824 return None 1825 1826 return self._parse_withisolatedloading() 1827 1828 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1829 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1830 self._match(TokenType.EQ) 1831 1832 user = self._parse_id_var() 1833 self._match(TokenType.PARAMETER) 1834 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1835 1836 if not user or not host: 1837 return None 1838 1839 return exp.DefinerProperty(this=f"{user}@{host}") 1840 1841 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1842 self._match(TokenType.TABLE) 1843 self._match(TokenType.EQ) 1844 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1845 1846 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1847 return self.expression(exp.LogProperty, no=no) 1848 1849 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1850 return self.expression(exp.JournalProperty, **kwargs) 1851 1852 def _parse_checksum(self) -> exp.ChecksumProperty: 1853 self._match(TokenType.EQ) 1854 1855 on = None 1856 if self._match(TokenType.ON): 1857 on = True 1858 elif self._match_text_seq("OFF"): 1859 on = False 1860 1861 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1862 1863 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1864 return self.expression( 1865 exp.Cluster, 1866 expressions=( 1867 self._parse_wrapped_csv(self._parse_ordered) 1868 if wrapped 1869 else self._parse_csv(self._parse_ordered) 1870 ), 1871 ) 1872 1873 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1874 self._match_text_seq("BY") 1875 1876 self._match_l_paren() 1877 expressions = self._parse_csv(self._parse_column) 1878 self._match_r_paren() 1879 1880 if self._match_text_seq("SORTED", "BY"): 1881 self._match_l_paren() 1882 sorted_by = self._parse_csv(self._parse_ordered) 1883 self._match_r_paren() 1884 else: 1885 sorted_by = None 1886 1887 self._match(TokenType.INTO) 1888 buckets = self._parse_number() 1889 self._match_text_seq("BUCKETS") 1890 1891 return self.expression( 1892 exp.ClusteredByProperty, 1893 expressions=expressions, 1894 sorted_by=sorted_by, 1895 buckets=buckets, 1896 ) 1897 1898 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1899 if not self._match_text_seq("GRANTS"): 1900 self._retreat(self._index - 1) 1901 return None 1902 1903 return self.expression(exp.CopyGrantsProperty) 1904 1905 def _parse_freespace(self) -> exp.FreespaceProperty: 1906 self._match(TokenType.EQ) 1907 return self.expression( 1908 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1909 ) 1910 1911 def _parse_mergeblockratio( 1912 self, no: bool = False, default: bool = False 1913 ) -> exp.MergeBlockRatioProperty: 1914 if self._match(TokenType.EQ): 1915 return self.expression( 1916 exp.MergeBlockRatioProperty, 1917 this=self._parse_number(), 1918 percent=self._match(TokenType.PERCENT), 1919 ) 1920 1921 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1922 1923 def _parse_datablocksize( 1924 self, 1925 default: t.Optional[bool] = None, 1926 minimum: t.Optional[bool] = None, 1927 maximum: t.Optional[bool] = None, 1928 ) -> exp.DataBlocksizeProperty: 1929 self._match(TokenType.EQ) 1930 size = self._parse_number() 1931 1932 units = None 1933 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1934 units = self._prev.text 1935 1936 return self.expression( 1937 exp.DataBlocksizeProperty, 1938 size=size, 1939 units=units, 1940 default=default, 1941 minimum=minimum, 1942 maximum=maximum, 1943 ) 1944 1945 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1946 self._match(TokenType.EQ) 1947 always = self._match_text_seq("ALWAYS") 1948 manual = self._match_text_seq("MANUAL") 1949 never = self._match_text_seq("NEVER") 1950 default = self._match_text_seq("DEFAULT") 1951 1952 autotemp = None 1953 if self._match_text_seq("AUTOTEMP"): 1954 autotemp = self._parse_schema() 1955 1956 return self.expression( 1957 exp.BlockCompressionProperty, 1958 always=always, 1959 manual=manual, 1960 never=never, 1961 default=default, 1962 autotemp=autotemp, 1963 ) 1964 1965 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1966 index = self._index 1967 no = self._match_text_seq("NO") 1968 concurrent = self._match_text_seq("CONCURRENT") 1969 1970 if not self._match_text_seq("ISOLATED", "LOADING"): 1971 self._retreat(index) 1972 return None 1973 1974 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1975 return self.expression( 1976 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1977 ) 1978 1979 def _parse_locking(self) -> exp.LockingProperty: 1980 if self._match(TokenType.TABLE): 1981 kind = "TABLE" 1982 elif self._match(TokenType.VIEW): 1983 kind = "VIEW" 1984 elif self._match(TokenType.ROW): 1985 kind = "ROW" 1986 elif self._match_text_seq("DATABASE"): 1987 kind = "DATABASE" 1988 else: 1989 kind = None 1990 1991 if kind in ("DATABASE", "TABLE", "VIEW"): 1992 this = self._parse_table_parts() 1993 else: 1994 this = None 1995 1996 if self._match(TokenType.FOR): 1997 for_or_in = "FOR" 1998 elif self._match(TokenType.IN): 1999 for_or_in = "IN" 2000 else: 2001 for_or_in = None 2002 2003 if self._match_text_seq("ACCESS"): 2004 lock_type = "ACCESS" 2005 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2006 lock_type = "EXCLUSIVE" 2007 elif self._match_text_seq("SHARE"): 2008 lock_type = "SHARE" 2009 elif self._match_text_seq("READ"): 2010 lock_type = "READ" 2011 elif self._match_text_seq("WRITE"): 2012 lock_type = "WRITE" 2013 elif self._match_text_seq("CHECKSUM"): 2014 lock_type = "CHECKSUM" 2015 else: 2016 lock_type = None 2017 2018 override = self._match_text_seq("OVERRIDE") 2019 2020 return self.expression( 2021 exp.LockingProperty, 2022 this=this, 2023 kind=kind, 2024 for_or_in=for_or_in, 2025 lock_type=lock_type, 2026 override=override, 2027 ) 2028 2029 def _parse_partition_by(self) -> t.List[exp.Expression]: 2030 if self._match(TokenType.PARTITION_BY): 2031 return self._parse_csv(self._parse_conjunction) 2032 return [] 2033 2034 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2035 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2036 if self._match_text_seq("MINVALUE"): 2037 return exp.var("MINVALUE") 2038 if self._match_text_seq("MAXVALUE"): 2039 return exp.var("MAXVALUE") 2040 return self._parse_bitwise() 2041 2042 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2043 expression = None 2044 from_expressions = None 2045 to_expressions = None 2046 2047 if self._match(TokenType.IN): 2048 this = self._parse_wrapped_csv(self._parse_bitwise) 2049 elif self._match(TokenType.FROM): 2050 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2051 self._match_text_seq("TO") 2052 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2053 elif self._match_text_seq("WITH", "(", "MODULUS"): 2054 this = self._parse_number() 2055 self._match_text_seq(",", "REMAINDER") 2056 expression = self._parse_number() 2057 self._match_r_paren() 2058 else: 2059 self.raise_error("Failed to parse partition bound spec.") 2060 2061 return self.expression( 2062 exp.PartitionBoundSpec, 2063 this=this, 2064 expression=expression, 2065 from_expressions=from_expressions, 2066 to_expressions=to_expressions, 2067 ) 2068 2069 # https://www.postgresql.org/docs/current/sql-createtable.html 2070 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2071 if not self._match_text_seq("OF"): 2072 self._retreat(self._index - 1) 2073 return None 2074 2075 this = self._parse_table(schema=True) 2076 2077 if self._match(TokenType.DEFAULT): 2078 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2079 elif self._match_text_seq("FOR", "VALUES"): 2080 expression = self._parse_partition_bound_spec() 2081 else: 2082 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2083 2084 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2085 2086 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2087 self._match(TokenType.EQ) 2088 return self.expression( 2089 exp.PartitionedByProperty, 2090 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2091 ) 2092 2093 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2094 if self._match_text_seq("AND", "STATISTICS"): 2095 statistics = True 2096 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2097 statistics = False 2098 else: 2099 statistics = None 2100 2101 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2102 2103 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2104 if self._match_text_seq("SQL"): 2105 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2106 return None 2107 2108 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2109 if self._match_text_seq("SQL", "DATA"): 2110 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2111 return None 2112 2113 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2114 if self._match_text_seq("PRIMARY", "INDEX"): 2115 return exp.NoPrimaryIndexProperty() 2116 if self._match_text_seq("SQL"): 2117 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2118 return None 2119 2120 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2121 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2122 return exp.OnCommitProperty() 2123 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2124 return exp.OnCommitProperty(delete=True) 2125 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2126 2127 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2128 if self._match_text_seq("SQL", "DATA"): 2129 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2130 return None 2131 2132 def _parse_distkey(self) -> exp.DistKeyProperty: 2133 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2134 2135 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2136 table = self._parse_table(schema=True) 2137 2138 options = [] 2139 while self._match_texts(("INCLUDING", "EXCLUDING")): 2140 this = self._prev.text.upper() 2141 2142 id_var = self._parse_id_var() 2143 if not id_var: 2144 return None 2145 2146 options.append( 2147 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2148 ) 2149 2150 return self.expression(exp.LikeProperty, this=table, expressions=options) 2151 2152 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2153 return self.expression( 2154 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2155 ) 2156 2157 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2158 self._match(TokenType.EQ) 2159 return self.expression( 2160 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2161 ) 2162 2163 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2164 self._match_text_seq("WITH", "CONNECTION") 2165 return self.expression( 2166 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2167 ) 2168 2169 def _parse_returns(self) -> exp.ReturnsProperty: 2170 value: t.Optional[exp.Expression] 2171 is_table = self._match(TokenType.TABLE) 2172 2173 if is_table: 2174 if self._match(TokenType.LT): 2175 value = self.expression( 2176 exp.Schema, 2177 this="TABLE", 2178 expressions=self._parse_csv(self._parse_struct_types), 2179 ) 2180 if not self._match(TokenType.GT): 2181 self.raise_error("Expecting >") 2182 else: 2183 value = self._parse_schema(exp.var("TABLE")) 2184 else: 2185 value = self._parse_types() 2186 2187 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2188 2189 def _parse_describe(self) -> exp.Describe: 2190 kind = self._match_set(self.CREATABLES) and self._prev.text 2191 style = self._match_texts(("EXTENDED", "FORMATTED")) and self._prev.text.upper() 2192 this = self._parse_table(schema=True) 2193 properties = self._parse_properties() 2194 expressions = properties.expressions if properties else None 2195 return self.expression( 2196 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2197 ) 2198 2199 def _parse_insert(self) -> exp.Insert: 2200 comments = ensure_list(self._prev_comments) 2201 hint = self._parse_hint() 2202 overwrite = self._match(TokenType.OVERWRITE) 2203 ignore = self._match(TokenType.IGNORE) 2204 local = self._match_text_seq("LOCAL") 2205 alternative = None 2206 is_function = None 2207 2208 if self._match_text_seq("DIRECTORY"): 2209 this: t.Optional[exp.Expression] = self.expression( 2210 exp.Directory, 2211 this=self._parse_var_or_string(), 2212 local=local, 2213 row_format=self._parse_row_format(match_row=True), 2214 ) 2215 else: 2216 if self._match(TokenType.OR): 2217 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2218 2219 self._match(TokenType.INTO) 2220 comments += ensure_list(self._prev_comments) 2221 self._match(TokenType.TABLE) 2222 is_function = self._match(TokenType.FUNCTION) 2223 2224 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2225 2226 returning = self._parse_returning() 2227 2228 return self.expression( 2229 exp.Insert, 2230 comments=comments, 2231 hint=hint, 2232 is_function=is_function, 2233 this=this, 2234 by_name=self._match_text_seq("BY", "NAME"), 2235 exists=self._parse_exists(), 2236 partition=self._parse_partition(), 2237 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2238 and self._parse_conjunction(), 2239 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2240 conflict=self._parse_on_conflict(), 2241 returning=returning or self._parse_returning(), 2242 overwrite=overwrite, 2243 alternative=alternative, 2244 ignore=ignore, 2245 ) 2246 2247 def _parse_kill(self) -> exp.Kill: 2248 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2249 2250 return self.expression( 2251 exp.Kill, 2252 this=self._parse_primary(), 2253 kind=kind, 2254 ) 2255 2256 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2257 conflict = self._match_text_seq("ON", "CONFLICT") 2258 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2259 2260 if not conflict and not duplicate: 2261 return None 2262 2263 conflict_keys = None 2264 constraint = None 2265 2266 if conflict: 2267 if self._match_text_seq("ON", "CONSTRAINT"): 2268 constraint = self._parse_id_var() 2269 elif self._match(TokenType.L_PAREN): 2270 conflict_keys = self._parse_csv(self._parse_id_var) 2271 self._match_r_paren() 2272 2273 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2274 if self._prev.token_type == TokenType.UPDATE: 2275 self._match(TokenType.SET) 2276 expressions = self._parse_csv(self._parse_equality) 2277 else: 2278 expressions = None 2279 2280 return self.expression( 2281 exp.OnConflict, 2282 duplicate=duplicate, 2283 expressions=expressions, 2284 action=action, 2285 conflict_keys=conflict_keys, 2286 constraint=constraint, 2287 ) 2288 2289 def _parse_returning(self) -> t.Optional[exp.Returning]: 2290 if not self._match(TokenType.RETURNING): 2291 return None 2292 return self.expression( 2293 exp.Returning, 2294 expressions=self._parse_csv(self._parse_expression), 2295 into=self._match(TokenType.INTO) and self._parse_table_part(), 2296 ) 2297 2298 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2299 if not self._match(TokenType.FORMAT): 2300 return None 2301 return self._parse_row_format() 2302 2303 def _parse_row_format( 2304 self, match_row: bool = False 2305 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2306 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2307 return None 2308 2309 if self._match_text_seq("SERDE"): 2310 this = self._parse_string() 2311 2312 serde_properties = None 2313 if self._match(TokenType.SERDE_PROPERTIES): 2314 serde_properties = self.expression( 2315 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2316 ) 2317 2318 return self.expression( 2319 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2320 ) 2321 2322 self._match_text_seq("DELIMITED") 2323 2324 kwargs = {} 2325 2326 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2327 kwargs["fields"] = self._parse_string() 2328 if self._match_text_seq("ESCAPED", "BY"): 2329 kwargs["escaped"] = self._parse_string() 2330 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2331 kwargs["collection_items"] = self._parse_string() 2332 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2333 kwargs["map_keys"] = self._parse_string() 2334 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2335 kwargs["lines"] = self._parse_string() 2336 if self._match_text_seq("NULL", "DEFINED", "AS"): 2337 kwargs["null"] = self._parse_string() 2338 2339 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2340 2341 def _parse_load(self) -> exp.LoadData | exp.Command: 2342 if self._match_text_seq("DATA"): 2343 local = self._match_text_seq("LOCAL") 2344 self._match_text_seq("INPATH") 2345 inpath = self._parse_string() 2346 overwrite = self._match(TokenType.OVERWRITE) 2347 self._match_pair(TokenType.INTO, TokenType.TABLE) 2348 2349 return self.expression( 2350 exp.LoadData, 2351 this=self._parse_table(schema=True), 2352 local=local, 2353 overwrite=overwrite, 2354 inpath=inpath, 2355 partition=self._parse_partition(), 2356 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2357 serde=self._match_text_seq("SERDE") and self._parse_string(), 2358 ) 2359 return self._parse_as_command(self._prev) 2360 2361 def _parse_delete(self) -> exp.Delete: 2362 # This handles MySQL's "Multiple-Table Syntax" 2363 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2364 tables = None 2365 comments = self._prev_comments 2366 if not self._match(TokenType.FROM, advance=False): 2367 tables = self._parse_csv(self._parse_table) or None 2368 2369 returning = self._parse_returning() 2370 2371 return self.expression( 2372 exp.Delete, 2373 comments=comments, 2374 tables=tables, 2375 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2376 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2377 where=self._parse_where(), 2378 returning=returning or self._parse_returning(), 2379 limit=self._parse_limit(), 2380 ) 2381 2382 def _parse_update(self) -> exp.Update: 2383 comments = self._prev_comments 2384 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2385 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2386 returning = self._parse_returning() 2387 return self.expression( 2388 exp.Update, 2389 comments=comments, 2390 **{ # type: ignore 2391 "this": this, 2392 "expressions": expressions, 2393 "from": self._parse_from(joins=True), 2394 "where": self._parse_where(), 2395 "returning": returning or self._parse_returning(), 2396 "order": self._parse_order(), 2397 "limit": self._parse_limit(), 2398 }, 2399 ) 2400 2401 def _parse_uncache(self) -> exp.Uncache: 2402 if not self._match(TokenType.TABLE): 2403 self.raise_error("Expecting TABLE after UNCACHE") 2404 2405 return self.expression( 2406 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2407 ) 2408 2409 def _parse_cache(self) -> exp.Cache: 2410 lazy = self._match_text_seq("LAZY") 2411 self._match(TokenType.TABLE) 2412 table = self._parse_table(schema=True) 2413 2414 options = [] 2415 if self._match_text_seq("OPTIONS"): 2416 self._match_l_paren() 2417 k = self._parse_string() 2418 self._match(TokenType.EQ) 2419 v = self._parse_string() 2420 options = [k, v] 2421 self._match_r_paren() 2422 2423 self._match(TokenType.ALIAS) 2424 return self.expression( 2425 exp.Cache, 2426 this=table, 2427 lazy=lazy, 2428 options=options, 2429 expression=self._parse_select(nested=True), 2430 ) 2431 2432 def _parse_partition(self) -> t.Optional[exp.Partition]: 2433 if not self._match(TokenType.PARTITION): 2434 return None 2435 2436 return self.expression( 2437 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2438 ) 2439 2440 def _parse_value(self) -> exp.Tuple: 2441 if self._match(TokenType.L_PAREN): 2442 expressions = self._parse_csv(self._parse_expression) 2443 self._match_r_paren() 2444 return self.expression(exp.Tuple, expressions=expressions) 2445 2446 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2447 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2448 2449 def _parse_projections(self) -> t.List[exp.Expression]: 2450 return self._parse_expressions() 2451 2452 def _parse_select( 2453 self, 2454 nested: bool = False, 2455 table: bool = False, 2456 parse_subquery_alias: bool = True, 2457 parse_set_operation: bool = True, 2458 ) -> t.Optional[exp.Expression]: 2459 cte = self._parse_with() 2460 2461 if cte: 2462 this = self._parse_statement() 2463 2464 if not this: 2465 self.raise_error("Failed to parse any statement following CTE") 2466 return cte 2467 2468 if "with" in this.arg_types: 2469 this.set("with", cte) 2470 else: 2471 self.raise_error(f"{this.key} does not support CTE") 2472 this = cte 2473 2474 return this 2475 2476 # duckdb supports leading with FROM x 2477 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2478 2479 if self._match(TokenType.SELECT): 2480 comments = self._prev_comments 2481 2482 hint = self._parse_hint() 2483 all_ = self._match(TokenType.ALL) 2484 distinct = self._match_set(self.DISTINCT_TOKENS) 2485 2486 kind = ( 2487 self._match(TokenType.ALIAS) 2488 and self._match_texts(("STRUCT", "VALUE")) 2489 and self._prev.text.upper() 2490 ) 2491 2492 if distinct: 2493 distinct = self.expression( 2494 exp.Distinct, 2495 on=self._parse_value() if self._match(TokenType.ON) else None, 2496 ) 2497 2498 if all_ and distinct: 2499 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2500 2501 limit = self._parse_limit(top=True) 2502 projections = self._parse_projections() 2503 2504 this = self.expression( 2505 exp.Select, 2506 kind=kind, 2507 hint=hint, 2508 distinct=distinct, 2509 expressions=projections, 2510 limit=limit, 2511 ) 2512 this.comments = comments 2513 2514 into = self._parse_into() 2515 if into: 2516 this.set("into", into) 2517 2518 if not from_: 2519 from_ = self._parse_from() 2520 2521 if from_: 2522 this.set("from", from_) 2523 2524 this = self._parse_query_modifiers(this) 2525 elif (table or nested) and self._match(TokenType.L_PAREN): 2526 if self._match(TokenType.PIVOT): 2527 this = self._parse_simplified_pivot() 2528 elif self._match(TokenType.FROM): 2529 this = exp.select("*").from_( 2530 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2531 ) 2532 else: 2533 this = ( 2534 self._parse_table() 2535 if table 2536 else self._parse_select(nested=True, parse_set_operation=False) 2537 ) 2538 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2539 2540 self._match_r_paren() 2541 2542 # We return early here so that the UNION isn't attached to the subquery by the 2543 # following call to _parse_set_operations, but instead becomes the parent node 2544 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2545 elif self._match(TokenType.VALUES, advance=False): 2546 this = self._parse_derived_table_values() 2547 elif from_: 2548 this = exp.select("*").from_(from_.this, copy=False) 2549 else: 2550 this = None 2551 2552 if parse_set_operation: 2553 return self._parse_set_operations(this) 2554 return this 2555 2556 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2557 if not skip_with_token and not self._match(TokenType.WITH): 2558 return None 2559 2560 comments = self._prev_comments 2561 recursive = self._match(TokenType.RECURSIVE) 2562 2563 expressions = [] 2564 while True: 2565 expressions.append(self._parse_cte()) 2566 2567 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2568 break 2569 else: 2570 self._match(TokenType.WITH) 2571 2572 return self.expression( 2573 exp.With, comments=comments, expressions=expressions, recursive=recursive 2574 ) 2575 2576 def _parse_cte(self) -> exp.CTE: 2577 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2578 if not alias or not alias.this: 2579 self.raise_error("Expected CTE to have alias") 2580 2581 self._match(TokenType.ALIAS) 2582 2583 if self._match_text_seq("NOT", "MATERIALIZED"): 2584 materialized = False 2585 elif self._match_text_seq("MATERIALIZED"): 2586 materialized = True 2587 else: 2588 materialized = None 2589 2590 return self.expression( 2591 exp.CTE, 2592 this=self._parse_wrapped(self._parse_statement), 2593 alias=alias, 2594 materialized=materialized, 2595 ) 2596 2597 def _parse_table_alias( 2598 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2599 ) -> t.Optional[exp.TableAlias]: 2600 any_token = self._match(TokenType.ALIAS) 2601 alias = ( 2602 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2603 or self._parse_string_as_identifier() 2604 ) 2605 2606 index = self._index 2607 if self._match(TokenType.L_PAREN): 2608 columns = self._parse_csv(self._parse_function_parameter) 2609 self._match_r_paren() if columns else self._retreat(index) 2610 else: 2611 columns = None 2612 2613 if not alias and not columns: 2614 return None 2615 2616 return self.expression(exp.TableAlias, this=alias, columns=columns) 2617 2618 def _parse_subquery( 2619 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2620 ) -> t.Optional[exp.Subquery]: 2621 if not this: 2622 return None 2623 2624 return self.expression( 2625 exp.Subquery, 2626 this=this, 2627 pivots=self._parse_pivots(), 2628 alias=self._parse_table_alias() if parse_alias else None, 2629 ) 2630 2631 def _implicit_unnests_to_explicit(self, this: E) -> E: 2632 from sqlglot.optimizer.normalize_identifiers import ( 2633 normalize_identifiers as _norm, 2634 ) 2635 2636 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2637 for i, join in enumerate(this.args.get("joins") or []): 2638 table = join.this 2639 normalized_table = table.copy() 2640 normalized_table.meta["maybe_column"] = True 2641 normalized_table = _norm(normalized_table, dialect=self.dialect) 2642 2643 if isinstance(table, exp.Table) and not join.args.get("on"): 2644 if normalized_table.parts[0].name in refs: 2645 table_as_column = table.to_column() 2646 unnest = exp.Unnest(expressions=[table_as_column]) 2647 2648 # Table.to_column creates a parent Alias node that we want to convert to 2649 # a TableAlias and attach to the Unnest, so it matches the parser's output 2650 if isinstance(table.args.get("alias"), exp.TableAlias): 2651 table_as_column.replace(table_as_column.this) 2652 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2653 2654 table.replace(unnest) 2655 2656 refs.add(normalized_table.alias_or_name) 2657 2658 return this 2659 2660 def _parse_query_modifiers( 2661 self, this: t.Optional[exp.Expression] 2662 ) -> t.Optional[exp.Expression]: 2663 if isinstance(this, (exp.Query, exp.Table)): 2664 for join in self._parse_joins(): 2665 this.append("joins", join) 2666 for lateral in iter(self._parse_lateral, None): 2667 this.append("laterals", lateral) 2668 2669 while True: 2670 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2671 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2672 key, expression = parser(self) 2673 2674 if expression: 2675 this.set(key, expression) 2676 if key == "limit": 2677 offset = expression.args.pop("offset", None) 2678 2679 if offset: 2680 offset = exp.Offset(expression=offset) 2681 this.set("offset", offset) 2682 2683 limit_by_expressions = expression.expressions 2684 expression.set("expressions", None) 2685 offset.set("expressions", limit_by_expressions) 2686 continue 2687 break 2688 2689 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2690 this = self._implicit_unnests_to_explicit(this) 2691 2692 return this 2693 2694 def _parse_hint(self) -> t.Optional[exp.Hint]: 2695 if self._match(TokenType.HINT): 2696 hints = [] 2697 for hint in iter( 2698 lambda: self._parse_csv( 2699 lambda: self._parse_function() or self._parse_var(upper=True) 2700 ), 2701 [], 2702 ): 2703 hints.extend(hint) 2704 2705 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2706 self.raise_error("Expected */ after HINT") 2707 2708 return self.expression(exp.Hint, expressions=hints) 2709 2710 return None 2711 2712 def _parse_into(self) -> t.Optional[exp.Into]: 2713 if not self._match(TokenType.INTO): 2714 return None 2715 2716 temp = self._match(TokenType.TEMPORARY) 2717 unlogged = self._match_text_seq("UNLOGGED") 2718 self._match(TokenType.TABLE) 2719 2720 return self.expression( 2721 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2722 ) 2723 2724 def _parse_from( 2725 self, joins: bool = False, skip_from_token: bool = False 2726 ) -> t.Optional[exp.From]: 2727 if not skip_from_token and not self._match(TokenType.FROM): 2728 return None 2729 2730 return self.expression( 2731 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2732 ) 2733 2734 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2735 if not self._match(TokenType.MATCH_RECOGNIZE): 2736 return None 2737 2738 self._match_l_paren() 2739 2740 partition = self._parse_partition_by() 2741 order = self._parse_order() 2742 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2743 2744 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2745 rows = exp.var("ONE ROW PER MATCH") 2746 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2747 text = "ALL ROWS PER MATCH" 2748 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2749 text += " SHOW EMPTY MATCHES" 2750 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2751 text += " OMIT EMPTY MATCHES" 2752 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2753 text += " WITH UNMATCHED ROWS" 2754 rows = exp.var(text) 2755 else: 2756 rows = None 2757 2758 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2759 text = "AFTER MATCH SKIP" 2760 if self._match_text_seq("PAST", "LAST", "ROW"): 2761 text += " PAST LAST ROW" 2762 elif self._match_text_seq("TO", "NEXT", "ROW"): 2763 text += " TO NEXT ROW" 2764 elif self._match_text_seq("TO", "FIRST"): 2765 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2766 elif self._match_text_seq("TO", "LAST"): 2767 text += f" TO LAST {self._advance_any().text}" # type: ignore 2768 after = exp.var(text) 2769 else: 2770 after = None 2771 2772 if self._match_text_seq("PATTERN"): 2773 self._match_l_paren() 2774 2775 if not self._curr: 2776 self.raise_error("Expecting )", self._curr) 2777 2778 paren = 1 2779 start = self._curr 2780 2781 while self._curr and paren > 0: 2782 if self._curr.token_type == TokenType.L_PAREN: 2783 paren += 1 2784 if self._curr.token_type == TokenType.R_PAREN: 2785 paren -= 1 2786 2787 end = self._prev 2788 self._advance() 2789 2790 if paren > 0: 2791 self.raise_error("Expecting )", self._curr) 2792 2793 pattern = exp.var(self._find_sql(start, end)) 2794 else: 2795 pattern = None 2796 2797 define = ( 2798 self._parse_csv(self._parse_name_as_expression) 2799 if self._match_text_seq("DEFINE") 2800 else None 2801 ) 2802 2803 self._match_r_paren() 2804 2805 return self.expression( 2806 exp.MatchRecognize, 2807 partition_by=partition, 2808 order=order, 2809 measures=measures, 2810 rows=rows, 2811 after=after, 2812 pattern=pattern, 2813 define=define, 2814 alias=self._parse_table_alias(), 2815 ) 2816 2817 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2818 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2819 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2820 cross_apply = False 2821 2822 if cross_apply is not None: 2823 this = self._parse_select(table=True) 2824 view = None 2825 outer = None 2826 elif self._match(TokenType.LATERAL): 2827 this = self._parse_select(table=True) 2828 view = self._match(TokenType.VIEW) 2829 outer = self._match(TokenType.OUTER) 2830 else: 2831 return None 2832 2833 if not this: 2834 this = ( 2835 self._parse_unnest() 2836 or self._parse_function() 2837 or self._parse_id_var(any_token=False) 2838 ) 2839 2840 while self._match(TokenType.DOT): 2841 this = exp.Dot( 2842 this=this, 2843 expression=self._parse_function() or self._parse_id_var(any_token=False), 2844 ) 2845 2846 if view: 2847 table = self._parse_id_var(any_token=False) 2848 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2849 table_alias: t.Optional[exp.TableAlias] = self.expression( 2850 exp.TableAlias, this=table, columns=columns 2851 ) 2852 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2853 # We move the alias from the lateral's child node to the lateral itself 2854 table_alias = this.args["alias"].pop() 2855 else: 2856 table_alias = self._parse_table_alias() 2857 2858 return self.expression( 2859 exp.Lateral, 2860 this=this, 2861 view=view, 2862 outer=outer, 2863 alias=table_alias, 2864 cross_apply=cross_apply, 2865 ) 2866 2867 def _parse_join_parts( 2868 self, 2869 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2870 return ( 2871 self._match_set(self.JOIN_METHODS) and self._prev, 2872 self._match_set(self.JOIN_SIDES) and self._prev, 2873 self._match_set(self.JOIN_KINDS) and self._prev, 2874 ) 2875 2876 def _parse_join( 2877 self, skip_join_token: bool = False, parse_bracket: bool = False 2878 ) -> t.Optional[exp.Join]: 2879 if self._match(TokenType.COMMA): 2880 return self.expression(exp.Join, this=self._parse_table()) 2881 2882 index = self._index 2883 method, side, kind = self._parse_join_parts() 2884 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2885 join = self._match(TokenType.JOIN) 2886 2887 if not skip_join_token and not join: 2888 self._retreat(index) 2889 kind = None 2890 method = None 2891 side = None 2892 2893 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2894 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2895 2896 if not skip_join_token and not join and not outer_apply and not cross_apply: 2897 return None 2898 2899 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2900 2901 if method: 2902 kwargs["method"] = method.text 2903 if side: 2904 kwargs["side"] = side.text 2905 if kind: 2906 kwargs["kind"] = kind.text 2907 if hint: 2908 kwargs["hint"] = hint 2909 2910 if self._match(TokenType.MATCH_CONDITION): 2911 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2912 2913 if self._match(TokenType.ON): 2914 kwargs["on"] = self._parse_conjunction() 2915 elif self._match(TokenType.USING): 2916 kwargs["using"] = self._parse_wrapped_id_vars() 2917 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2918 kind and kind.token_type == TokenType.CROSS 2919 ): 2920 index = self._index 2921 joins: t.Optional[list] = list(self._parse_joins()) 2922 2923 if joins and self._match(TokenType.ON): 2924 kwargs["on"] = self._parse_conjunction() 2925 elif joins and self._match(TokenType.USING): 2926 kwargs["using"] = self._parse_wrapped_id_vars() 2927 else: 2928 joins = None 2929 self._retreat(index) 2930 2931 kwargs["this"].set("joins", joins if joins else None) 2932 2933 comments = [c for token in (method, side, kind) if token for c in token.comments] 2934 return self.expression(exp.Join, comments=comments, **kwargs) 2935 2936 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2937 this = self._parse_conjunction() 2938 2939 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2940 return this 2941 2942 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2943 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2944 2945 return this 2946 2947 def _parse_index_params(self) -> exp.IndexParameters: 2948 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2949 2950 if self._match(TokenType.L_PAREN, advance=False): 2951 columns = self._parse_wrapped_csv(self._parse_with_operator) 2952 else: 2953 columns = None 2954 2955 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2956 partition_by = self._parse_partition_by() 2957 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2958 tablespace = ( 2959 self._parse_var(any_token=True) 2960 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2961 else None 2962 ) 2963 where = self._parse_where() 2964 2965 return self.expression( 2966 exp.IndexParameters, 2967 using=using, 2968 columns=columns, 2969 include=include, 2970 partition_by=partition_by, 2971 where=where, 2972 with_storage=with_storage, 2973 tablespace=tablespace, 2974 ) 2975 2976 def _parse_index( 2977 self, 2978 index: t.Optional[exp.Expression] = None, 2979 ) -> t.Optional[exp.Index]: 2980 if index: 2981 unique = None 2982 primary = None 2983 amp = None 2984 2985 self._match(TokenType.ON) 2986 self._match(TokenType.TABLE) # hive 2987 table = self._parse_table_parts(schema=True) 2988 else: 2989 unique = self._match(TokenType.UNIQUE) 2990 primary = self._match_text_seq("PRIMARY") 2991 amp = self._match_text_seq("AMP") 2992 2993 if not self._match(TokenType.INDEX): 2994 return None 2995 2996 index = self._parse_id_var() 2997 table = None 2998 2999 params = self._parse_index_params() 3000 3001 return self.expression( 3002 exp.Index, 3003 this=index, 3004 table=table, 3005 unique=unique, 3006 primary=primary, 3007 amp=amp, 3008 params=params, 3009 ) 3010 3011 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3012 hints: t.List[exp.Expression] = [] 3013 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3014 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3015 hints.append( 3016 self.expression( 3017 exp.WithTableHint, 3018 expressions=self._parse_csv( 3019 lambda: self._parse_function() or self._parse_var(any_token=True) 3020 ), 3021 ) 3022 ) 3023 self._match_r_paren() 3024 else: 3025 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3026 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3027 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3028 3029 self._match_texts(("INDEX", "KEY")) 3030 if self._match(TokenType.FOR): 3031 hint.set("target", self._advance_any() and self._prev.text.upper()) 3032 3033 hint.set("expressions", self._parse_wrapped_id_vars()) 3034 hints.append(hint) 3035 3036 return hints or None 3037 3038 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3039 return ( 3040 (not schema and self._parse_function(optional_parens=False)) 3041 or self._parse_id_var(any_token=False) 3042 or self._parse_string_as_identifier() 3043 or self._parse_placeholder() 3044 ) 3045 3046 def _parse_table_parts( 3047 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3048 ) -> exp.Table: 3049 catalog = None 3050 db = None 3051 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3052 3053 while self._match(TokenType.DOT): 3054 if catalog: 3055 # This allows nesting the table in arbitrarily many dot expressions if needed 3056 table = self.expression( 3057 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3058 ) 3059 else: 3060 catalog = db 3061 db = table 3062 # "" used for tsql FROM a..b case 3063 table = self._parse_table_part(schema=schema) or "" 3064 3065 if ( 3066 wildcard 3067 and self._is_connected() 3068 and (isinstance(table, exp.Identifier) or not table) 3069 and self._match(TokenType.STAR) 3070 ): 3071 if isinstance(table, exp.Identifier): 3072 table.args["this"] += "*" 3073 else: 3074 table = exp.Identifier(this="*") 3075 3076 if is_db_reference: 3077 catalog = db 3078 db = table 3079 table = None 3080 3081 if not table and not is_db_reference: 3082 self.raise_error(f"Expected table name but got {self._curr}") 3083 if not db and is_db_reference: 3084 self.raise_error(f"Expected database name but got {self._curr}") 3085 3086 return self.expression( 3087 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3088 ) 3089 3090 def _parse_table( 3091 self, 3092 schema: bool = False, 3093 joins: bool = False, 3094 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3095 parse_bracket: bool = False, 3096 is_db_reference: bool = False, 3097 ) -> t.Optional[exp.Expression]: 3098 lateral = self._parse_lateral() 3099 if lateral: 3100 return lateral 3101 3102 unnest = self._parse_unnest() 3103 if unnest: 3104 return unnest 3105 3106 values = self._parse_derived_table_values() 3107 if values: 3108 return values 3109 3110 subquery = self._parse_select(table=True) 3111 if subquery: 3112 if not subquery.args.get("pivots"): 3113 subquery.set("pivots", self._parse_pivots()) 3114 return subquery 3115 3116 bracket = parse_bracket and self._parse_bracket(None) 3117 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3118 3119 only = self._match(TokenType.ONLY) 3120 3121 this = t.cast( 3122 exp.Expression, 3123 bracket 3124 or self._parse_bracket( 3125 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3126 ), 3127 ) 3128 3129 if only: 3130 this.set("only", only) 3131 3132 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3133 self._match_text_seq("*") 3134 3135 if schema: 3136 return self._parse_schema(this=this) 3137 3138 version = self._parse_version() 3139 3140 if version: 3141 this.set("version", version) 3142 3143 if self.dialect.ALIAS_POST_TABLESAMPLE: 3144 table_sample = self._parse_table_sample() 3145 3146 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3147 if alias: 3148 this.set("alias", alias) 3149 3150 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3151 return self.expression( 3152 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3153 ) 3154 3155 this.set("hints", self._parse_table_hints()) 3156 3157 if not this.args.get("pivots"): 3158 this.set("pivots", self._parse_pivots()) 3159 3160 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3161 table_sample = self._parse_table_sample() 3162 3163 if table_sample: 3164 table_sample.set("this", this) 3165 this = table_sample 3166 3167 if joins: 3168 for join in self._parse_joins(): 3169 this.append("joins", join) 3170 3171 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3172 this.set("ordinality", True) 3173 this.set("alias", self._parse_table_alias()) 3174 3175 return this 3176 3177 def _parse_version(self) -> t.Optional[exp.Version]: 3178 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3179 this = "TIMESTAMP" 3180 elif self._match(TokenType.VERSION_SNAPSHOT): 3181 this = "VERSION" 3182 else: 3183 return None 3184 3185 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3186 kind = self._prev.text.upper() 3187 start = self._parse_bitwise() 3188 self._match_texts(("TO", "AND")) 3189 end = self._parse_bitwise() 3190 expression: t.Optional[exp.Expression] = self.expression( 3191 exp.Tuple, expressions=[start, end] 3192 ) 3193 elif self._match_text_seq("CONTAINED", "IN"): 3194 kind = "CONTAINED IN" 3195 expression = self.expression( 3196 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3197 ) 3198 elif self._match(TokenType.ALL): 3199 kind = "ALL" 3200 expression = None 3201 else: 3202 self._match_text_seq("AS", "OF") 3203 kind = "AS OF" 3204 expression = self._parse_type() 3205 3206 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3207 3208 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3209 if not self._match(TokenType.UNNEST): 3210 return None 3211 3212 expressions = self._parse_wrapped_csv(self._parse_equality) 3213 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3214 3215 alias = self._parse_table_alias() if with_alias else None 3216 3217 if alias: 3218 if self.dialect.UNNEST_COLUMN_ONLY: 3219 if alias.args.get("columns"): 3220 self.raise_error("Unexpected extra column alias in unnest.") 3221 3222 alias.set("columns", [alias.this]) 3223 alias.set("this", None) 3224 3225 columns = alias.args.get("columns") or [] 3226 if offset and len(expressions) < len(columns): 3227 offset = columns.pop() 3228 3229 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3230 self._match(TokenType.ALIAS) 3231 offset = self._parse_id_var( 3232 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3233 ) or exp.to_identifier("offset") 3234 3235 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3236 3237 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3238 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3239 if not is_derived and not self._match_text_seq("VALUES"): 3240 return None 3241 3242 expressions = self._parse_csv(self._parse_value) 3243 alias = self._parse_table_alias() 3244 3245 if is_derived: 3246 self._match_r_paren() 3247 3248 return self.expression( 3249 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3250 ) 3251 3252 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3253 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3254 as_modifier and self._match_text_seq("USING", "SAMPLE") 3255 ): 3256 return None 3257 3258 bucket_numerator = None 3259 bucket_denominator = None 3260 bucket_field = None 3261 percent = None 3262 size = None 3263 seed = None 3264 3265 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3266 matched_l_paren = self._match(TokenType.L_PAREN) 3267 3268 if self.TABLESAMPLE_CSV: 3269 num = None 3270 expressions = self._parse_csv(self._parse_primary) 3271 else: 3272 expressions = None 3273 num = ( 3274 self._parse_factor() 3275 if self._match(TokenType.NUMBER, advance=False) 3276 else self._parse_primary() or self._parse_placeholder() 3277 ) 3278 3279 if self._match_text_seq("BUCKET"): 3280 bucket_numerator = self._parse_number() 3281 self._match_text_seq("OUT", "OF") 3282 bucket_denominator = bucket_denominator = self._parse_number() 3283 self._match(TokenType.ON) 3284 bucket_field = self._parse_field() 3285 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3286 percent = num 3287 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3288 size = num 3289 else: 3290 percent = num 3291 3292 if matched_l_paren: 3293 self._match_r_paren() 3294 3295 if self._match(TokenType.L_PAREN): 3296 method = self._parse_var(upper=True) 3297 seed = self._match(TokenType.COMMA) and self._parse_number() 3298 self._match_r_paren() 3299 elif self._match_texts(("SEED", "REPEATABLE")): 3300 seed = self._parse_wrapped(self._parse_number) 3301 3302 return self.expression( 3303 exp.TableSample, 3304 expressions=expressions, 3305 method=method, 3306 bucket_numerator=bucket_numerator, 3307 bucket_denominator=bucket_denominator, 3308 bucket_field=bucket_field, 3309 percent=percent, 3310 size=size, 3311 seed=seed, 3312 ) 3313 3314 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3315 return list(iter(self._parse_pivot, None)) or None 3316 3317 def _parse_joins(self) -> t.Iterator[exp.Join]: 3318 return iter(self._parse_join, None) 3319 3320 # https://duckdb.org/docs/sql/statements/pivot 3321 def _parse_simplified_pivot(self) -> exp.Pivot: 3322 def _parse_on() -> t.Optional[exp.Expression]: 3323 this = self._parse_bitwise() 3324 return self._parse_in(this) if self._match(TokenType.IN) else this 3325 3326 this = self._parse_table() 3327 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3328 using = self._match(TokenType.USING) and self._parse_csv( 3329 lambda: self._parse_alias(self._parse_function()) 3330 ) 3331 group = self._parse_group() 3332 return self.expression( 3333 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3334 ) 3335 3336 def _parse_pivot_in(self) -> exp.In: 3337 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3338 this = self._parse_conjunction() 3339 3340 self._match(TokenType.ALIAS) 3341 alias = self._parse_field() 3342 if alias: 3343 return self.expression(exp.PivotAlias, this=this, alias=alias) 3344 3345 return this 3346 3347 value = self._parse_column() 3348 3349 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3350 self.raise_error("Expecting IN (") 3351 3352 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3353 3354 self._match_r_paren() 3355 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3356 3357 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3358 index = self._index 3359 include_nulls = None 3360 3361 if self._match(TokenType.PIVOT): 3362 unpivot = False 3363 elif self._match(TokenType.UNPIVOT): 3364 unpivot = True 3365 3366 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3367 if self._match_text_seq("INCLUDE", "NULLS"): 3368 include_nulls = True 3369 elif self._match_text_seq("EXCLUDE", "NULLS"): 3370 include_nulls = False 3371 else: 3372 return None 3373 3374 expressions = [] 3375 3376 if not self._match(TokenType.L_PAREN): 3377 self._retreat(index) 3378 return None 3379 3380 if unpivot: 3381 expressions = self._parse_csv(self._parse_column) 3382 else: 3383 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3384 3385 if not expressions: 3386 self.raise_error("Failed to parse PIVOT's aggregation list") 3387 3388 if not self._match(TokenType.FOR): 3389 self.raise_error("Expecting FOR") 3390 3391 field = self._parse_pivot_in() 3392 3393 self._match_r_paren() 3394 3395 pivot = self.expression( 3396 exp.Pivot, 3397 expressions=expressions, 3398 field=field, 3399 unpivot=unpivot, 3400 include_nulls=include_nulls, 3401 ) 3402 3403 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3404 pivot.set("alias", self._parse_table_alias()) 3405 3406 if not unpivot: 3407 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3408 3409 columns: t.List[exp.Expression] = [] 3410 for fld in pivot.args["field"].expressions: 3411 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3412 for name in names: 3413 if self.PREFIXED_PIVOT_COLUMNS: 3414 name = f"{name}_{field_name}" if name else field_name 3415 else: 3416 name = f"{field_name}_{name}" if name else field_name 3417 3418 columns.append(exp.to_identifier(name)) 3419 3420 pivot.set("columns", columns) 3421 3422 return pivot 3423 3424 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3425 return [agg.alias for agg in aggregations] 3426 3427 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3428 if not skip_where_token and not self._match(TokenType.PREWHERE): 3429 return None 3430 3431 return self.expression( 3432 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3433 ) 3434 3435 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3436 if not skip_where_token and not self._match(TokenType.WHERE): 3437 return None 3438 3439 return self.expression( 3440 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3441 ) 3442 3443 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3444 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3445 return None 3446 3447 elements = defaultdict(list) 3448 3449 if self._match(TokenType.ALL): 3450 return self.expression(exp.Group, all=True) 3451 3452 while True: 3453 expressions = self._parse_csv(self._parse_conjunction) 3454 if expressions: 3455 elements["expressions"].extend(expressions) 3456 3457 grouping_sets = self._parse_grouping_sets() 3458 if grouping_sets: 3459 elements["grouping_sets"].extend(grouping_sets) 3460 3461 rollup = None 3462 cube = None 3463 totals = None 3464 3465 index = self._index 3466 with_ = self._match(TokenType.WITH) 3467 if self._match(TokenType.ROLLUP): 3468 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3469 elements["rollup"].extend(ensure_list(rollup)) 3470 3471 if self._match(TokenType.CUBE): 3472 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3473 elements["cube"].extend(ensure_list(cube)) 3474 3475 if self._match_text_seq("TOTALS"): 3476 totals = True 3477 elements["totals"] = True # type: ignore 3478 3479 if not (grouping_sets or rollup or cube or totals): 3480 if with_: 3481 self._retreat(index) 3482 break 3483 3484 return self.expression(exp.Group, **elements) # type: ignore 3485 3486 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3487 if not self._match(TokenType.GROUPING_SETS): 3488 return None 3489 3490 return self._parse_wrapped_csv(self._parse_grouping_set) 3491 3492 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3493 if self._match(TokenType.L_PAREN): 3494 grouping_set = self._parse_csv(self._parse_column) 3495 self._match_r_paren() 3496 return self.expression(exp.Tuple, expressions=grouping_set) 3497 3498 return self._parse_column() 3499 3500 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3501 if not skip_having_token and not self._match(TokenType.HAVING): 3502 return None 3503 return self.expression(exp.Having, this=self._parse_conjunction()) 3504 3505 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3506 if not self._match(TokenType.QUALIFY): 3507 return None 3508 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3509 3510 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3511 if skip_start_token: 3512 start = None 3513 elif self._match(TokenType.START_WITH): 3514 start = self._parse_conjunction() 3515 else: 3516 return None 3517 3518 self._match(TokenType.CONNECT_BY) 3519 nocycle = self._match_text_seq("NOCYCLE") 3520 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3521 exp.Prior, this=self._parse_bitwise() 3522 ) 3523 connect = self._parse_conjunction() 3524 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3525 3526 if not start and self._match(TokenType.START_WITH): 3527 start = self._parse_conjunction() 3528 3529 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3530 3531 def _parse_name_as_expression(self) -> exp.Alias: 3532 return self.expression( 3533 exp.Alias, 3534 alias=self._parse_id_var(any_token=True), 3535 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3536 ) 3537 3538 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3539 if self._match_text_seq("INTERPOLATE"): 3540 return self._parse_wrapped_csv(self._parse_name_as_expression) 3541 return None 3542 3543 def _parse_order( 3544 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3545 ) -> t.Optional[exp.Expression]: 3546 siblings = None 3547 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3548 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3549 return this 3550 3551 siblings = True 3552 3553 return self.expression( 3554 exp.Order, 3555 this=this, 3556 expressions=self._parse_csv(self._parse_ordered), 3557 interpolate=self._parse_interpolate(), 3558 siblings=siblings, 3559 ) 3560 3561 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3562 if not self._match(token): 3563 return None 3564 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3565 3566 def _parse_ordered( 3567 self, parse_method: t.Optional[t.Callable] = None 3568 ) -> t.Optional[exp.Ordered]: 3569 this = parse_method() if parse_method else self._parse_conjunction() 3570 if not this: 3571 return None 3572 3573 asc = self._match(TokenType.ASC) 3574 desc = self._match(TokenType.DESC) or (asc and False) 3575 3576 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3577 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3578 3579 nulls_first = is_nulls_first or False 3580 explicitly_null_ordered = is_nulls_first or is_nulls_last 3581 3582 if ( 3583 not explicitly_null_ordered 3584 and ( 3585 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3586 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3587 ) 3588 and self.dialect.NULL_ORDERING != "nulls_are_last" 3589 ): 3590 nulls_first = True 3591 3592 if self._match_text_seq("WITH", "FILL"): 3593 with_fill = self.expression( 3594 exp.WithFill, 3595 **{ # type: ignore 3596 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3597 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3598 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3599 }, 3600 ) 3601 else: 3602 with_fill = None 3603 3604 return self.expression( 3605 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3606 ) 3607 3608 def _parse_limit( 3609 self, 3610 this: t.Optional[exp.Expression] = None, 3611 top: bool = False, 3612 skip_limit_token: bool = False, 3613 ) -> t.Optional[exp.Expression]: 3614 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3615 comments = self._prev_comments 3616 if top: 3617 limit_paren = self._match(TokenType.L_PAREN) 3618 expression = self._parse_term() if limit_paren else self._parse_number() 3619 3620 if limit_paren: 3621 self._match_r_paren() 3622 else: 3623 expression = self._parse_term() 3624 3625 if self._match(TokenType.COMMA): 3626 offset = expression 3627 expression = self._parse_term() 3628 else: 3629 offset = None 3630 3631 limit_exp = self.expression( 3632 exp.Limit, 3633 this=this, 3634 expression=expression, 3635 offset=offset, 3636 comments=comments, 3637 expressions=self._parse_limit_by(), 3638 ) 3639 3640 return limit_exp 3641 3642 if self._match(TokenType.FETCH): 3643 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3644 direction = self._prev.text.upper() if direction else "FIRST" 3645 3646 count = self._parse_field(tokens=self.FETCH_TOKENS) 3647 percent = self._match(TokenType.PERCENT) 3648 3649 self._match_set((TokenType.ROW, TokenType.ROWS)) 3650 3651 only = self._match_text_seq("ONLY") 3652 with_ties = self._match_text_seq("WITH", "TIES") 3653 3654 if only and with_ties: 3655 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3656 3657 return self.expression( 3658 exp.Fetch, 3659 direction=direction, 3660 count=count, 3661 percent=percent, 3662 with_ties=with_ties, 3663 ) 3664 3665 return this 3666 3667 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3668 if not self._match(TokenType.OFFSET): 3669 return this 3670 3671 count = self._parse_term() 3672 self._match_set((TokenType.ROW, TokenType.ROWS)) 3673 3674 return self.expression( 3675 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3676 ) 3677 3678 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3679 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3680 3681 def _parse_locks(self) -> t.List[exp.Lock]: 3682 locks = [] 3683 while True: 3684 if self._match_text_seq("FOR", "UPDATE"): 3685 update = True 3686 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3687 "LOCK", "IN", "SHARE", "MODE" 3688 ): 3689 update = False 3690 else: 3691 break 3692 3693 expressions = None 3694 if self._match_text_seq("OF"): 3695 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3696 3697 wait: t.Optional[bool | exp.Expression] = None 3698 if self._match_text_seq("NOWAIT"): 3699 wait = True 3700 elif self._match_text_seq("WAIT"): 3701 wait = self._parse_primary() 3702 elif self._match_text_seq("SKIP", "LOCKED"): 3703 wait = False 3704 3705 locks.append( 3706 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3707 ) 3708 3709 return locks 3710 3711 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3712 while this and self._match_set(self.SET_OPERATIONS): 3713 token_type = self._prev.token_type 3714 3715 if token_type == TokenType.UNION: 3716 operation = exp.Union 3717 elif token_type == TokenType.EXCEPT: 3718 operation = exp.Except 3719 else: 3720 operation = exp.Intersect 3721 3722 comments = self._prev.comments 3723 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3724 by_name = self._match_text_seq("BY", "NAME") 3725 expression = self._parse_select(nested=True, parse_set_operation=False) 3726 3727 this = self.expression( 3728 operation, 3729 comments=comments, 3730 this=this, 3731 distinct=distinct, 3732 by_name=by_name, 3733 expression=expression, 3734 ) 3735 3736 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3737 expression = this.expression 3738 3739 if expression: 3740 for arg in self.UNION_MODIFIERS: 3741 expr = expression.args.get(arg) 3742 if expr: 3743 this.set(arg, expr.pop()) 3744 3745 return this 3746 3747 def _parse_expression(self) -> t.Optional[exp.Expression]: 3748 return self._parse_alias(self._parse_conjunction()) 3749 3750 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3751 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3752 3753 def _parse_equality(self) -> t.Optional[exp.Expression]: 3754 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3755 3756 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3757 return self._parse_tokens(self._parse_range, self.COMPARISON) 3758 3759 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3760 this = this or self._parse_bitwise() 3761 negate = self._match(TokenType.NOT) 3762 3763 if self._match_set(self.RANGE_PARSERS): 3764 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3765 if not expression: 3766 return this 3767 3768 this = expression 3769 elif self._match(TokenType.ISNULL): 3770 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3771 3772 # Postgres supports ISNULL and NOTNULL for conditions. 3773 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3774 if self._match(TokenType.NOTNULL): 3775 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3776 this = self.expression(exp.Not, this=this) 3777 3778 if negate: 3779 this = self.expression(exp.Not, this=this) 3780 3781 if self._match(TokenType.IS): 3782 this = self._parse_is(this) 3783 3784 return this 3785 3786 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3787 index = self._index - 1 3788 negate = self._match(TokenType.NOT) 3789 3790 if self._match_text_seq("DISTINCT", "FROM"): 3791 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3792 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3793 3794 expression = self._parse_null() or self._parse_boolean() 3795 if not expression: 3796 self._retreat(index) 3797 return None 3798 3799 this = self.expression(exp.Is, this=this, expression=expression) 3800 return self.expression(exp.Not, this=this) if negate else this 3801 3802 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3803 unnest = self._parse_unnest(with_alias=False) 3804 if unnest: 3805 this = self.expression(exp.In, this=this, unnest=unnest) 3806 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3807 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3808 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3809 3810 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3811 this = self.expression(exp.In, this=this, query=expressions[0]) 3812 else: 3813 this = self.expression(exp.In, this=this, expressions=expressions) 3814 3815 if matched_l_paren: 3816 self._match_r_paren(this) 3817 elif not self._match(TokenType.R_BRACKET, expression=this): 3818 self.raise_error("Expecting ]") 3819 else: 3820 this = self.expression(exp.In, this=this, field=self._parse_field()) 3821 3822 return this 3823 3824 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3825 low = self._parse_bitwise() 3826 self._match(TokenType.AND) 3827 high = self._parse_bitwise() 3828 return self.expression(exp.Between, this=this, low=low, high=high) 3829 3830 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3831 if not self._match(TokenType.ESCAPE): 3832 return this 3833 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3834 3835 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3836 index = self._index 3837 3838 if not self._match(TokenType.INTERVAL) and match_interval: 3839 return None 3840 3841 if self._match(TokenType.STRING, advance=False): 3842 this = self._parse_primary() 3843 else: 3844 this = self._parse_term() 3845 3846 if not this or ( 3847 isinstance(this, exp.Column) 3848 and not this.table 3849 and not this.this.quoted 3850 and this.name.upper() == "IS" 3851 ): 3852 self._retreat(index) 3853 return None 3854 3855 unit = self._parse_function() or ( 3856 not self._match(TokenType.ALIAS, advance=False) 3857 and self._parse_var(any_token=True, upper=True) 3858 ) 3859 3860 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3861 # each INTERVAL expression into this canonical form so it's easy to transpile 3862 if this and this.is_number: 3863 this = exp.Literal.string(this.name) 3864 elif this and this.is_string: 3865 parts = this.name.split() 3866 3867 if len(parts) == 2: 3868 if unit: 3869 # This is not actually a unit, it's something else (e.g. a "window side") 3870 unit = None 3871 self._retreat(self._index - 1) 3872 3873 this = exp.Literal.string(parts[0]) 3874 unit = self.expression(exp.Var, this=parts[1].upper()) 3875 3876 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3877 unit = self.expression( 3878 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3879 ) 3880 3881 return self.expression(exp.Interval, this=this, unit=unit) 3882 3883 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3884 this = self._parse_term() 3885 3886 while True: 3887 if self._match_set(self.BITWISE): 3888 this = self.expression( 3889 self.BITWISE[self._prev.token_type], 3890 this=this, 3891 expression=self._parse_term(), 3892 ) 3893 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3894 this = self.expression( 3895 exp.DPipe, 3896 this=this, 3897 expression=self._parse_term(), 3898 safe=not self.dialect.STRICT_STRING_CONCAT, 3899 ) 3900 elif self._match(TokenType.DQMARK): 3901 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3902 elif self._match_pair(TokenType.LT, TokenType.LT): 3903 this = self.expression( 3904 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3905 ) 3906 elif self._match_pair(TokenType.GT, TokenType.GT): 3907 this = self.expression( 3908 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3909 ) 3910 else: 3911 break 3912 3913 return this 3914 3915 def _parse_term(self) -> t.Optional[exp.Expression]: 3916 return self._parse_tokens(self._parse_factor, self.TERM) 3917 3918 def _parse_factor(self) -> t.Optional[exp.Expression]: 3919 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3920 this = parse_method() 3921 3922 while self._match_set(self.FACTOR): 3923 this = self.expression( 3924 self.FACTOR[self._prev.token_type], 3925 this=this, 3926 comments=self._prev_comments, 3927 expression=parse_method(), 3928 ) 3929 if isinstance(this, exp.Div): 3930 this.args["typed"] = self.dialect.TYPED_DIVISION 3931 this.args["safe"] = self.dialect.SAFE_DIVISION 3932 3933 return this 3934 3935 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3936 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3937 3938 def _parse_unary(self) -> t.Optional[exp.Expression]: 3939 if self._match_set(self.UNARY_PARSERS): 3940 return self.UNARY_PARSERS[self._prev.token_type](self) 3941 return self._parse_at_time_zone(self._parse_type()) 3942 3943 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3944 interval = parse_interval and self._parse_interval() 3945 if interval: 3946 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3947 while True: 3948 index = self._index 3949 self._match(TokenType.PLUS) 3950 3951 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3952 self._retreat(index) 3953 break 3954 3955 interval = self.expression( # type: ignore 3956 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3957 ) 3958 3959 return interval 3960 3961 index = self._index 3962 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3963 this = self._parse_column() 3964 3965 if data_type: 3966 if isinstance(this, exp.Literal): 3967 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3968 if parser: 3969 return parser(self, this, data_type) 3970 return self.expression(exp.Cast, this=this, to=data_type) 3971 if not data_type.expressions: 3972 self._retreat(index) 3973 return self._parse_column() 3974 return self._parse_column_ops(data_type) 3975 3976 return this and self._parse_column_ops(this) 3977 3978 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3979 this = self._parse_type() 3980 if not this: 3981 return None 3982 3983 if isinstance(this, exp.Column) and not this.table: 3984 this = exp.var(this.name.upper()) 3985 3986 return self.expression( 3987 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3988 ) 3989 3990 def _parse_types( 3991 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3992 ) -> t.Optional[exp.Expression]: 3993 index = self._index 3994 3995 prefix = self._match_text_seq("SYSUDTLIB", ".") 3996 3997 if not self._match_set(self.TYPE_TOKENS): 3998 identifier = allow_identifiers and self._parse_id_var( 3999 any_token=False, tokens=(TokenType.VAR,) 4000 ) 4001 if identifier: 4002 tokens = self.dialect.tokenize(identifier.name) 4003 4004 if len(tokens) != 1: 4005 self.raise_error("Unexpected identifier", self._prev) 4006 4007 if tokens[0].token_type in self.TYPE_TOKENS: 4008 self._prev = tokens[0] 4009 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4010 type_name = identifier.name 4011 4012 while self._match(TokenType.DOT): 4013 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4014 4015 return exp.DataType.build(type_name, udt=True) 4016 else: 4017 self._retreat(self._index - 1) 4018 return None 4019 else: 4020 return None 4021 4022 type_token = self._prev.token_type 4023 4024 if type_token == TokenType.PSEUDO_TYPE: 4025 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4026 4027 if type_token == TokenType.OBJECT_IDENTIFIER: 4028 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4029 4030 nested = type_token in self.NESTED_TYPE_TOKENS 4031 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4032 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4033 expressions = None 4034 maybe_func = False 4035 4036 if self._match(TokenType.L_PAREN): 4037 if is_struct: 4038 expressions = self._parse_csv(self._parse_struct_types) 4039 elif nested: 4040 expressions = self._parse_csv( 4041 lambda: self._parse_types( 4042 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4043 ) 4044 ) 4045 elif type_token in self.ENUM_TYPE_TOKENS: 4046 expressions = self._parse_csv(self._parse_equality) 4047 elif is_aggregate: 4048 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4049 any_token=False, tokens=(TokenType.VAR,) 4050 ) 4051 if not func_or_ident or not self._match(TokenType.COMMA): 4052 return None 4053 expressions = self._parse_csv( 4054 lambda: self._parse_types( 4055 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4056 ) 4057 ) 4058 expressions.insert(0, func_or_ident) 4059 else: 4060 expressions = self._parse_csv(self._parse_type_size) 4061 4062 if not expressions or not self._match(TokenType.R_PAREN): 4063 self._retreat(index) 4064 return None 4065 4066 maybe_func = True 4067 4068 this: t.Optional[exp.Expression] = None 4069 values: t.Optional[t.List[exp.Expression]] = None 4070 4071 if nested and self._match(TokenType.LT): 4072 if is_struct: 4073 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4074 else: 4075 expressions = self._parse_csv( 4076 lambda: self._parse_types( 4077 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4078 ) 4079 ) 4080 4081 if not self._match(TokenType.GT): 4082 self.raise_error("Expecting >") 4083 4084 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4085 values = self._parse_csv(self._parse_conjunction) 4086 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4087 4088 if type_token in self.TIMESTAMPS: 4089 if self._match_text_seq("WITH", "TIME", "ZONE"): 4090 maybe_func = False 4091 tz_type = ( 4092 exp.DataType.Type.TIMETZ 4093 if type_token in self.TIMES 4094 else exp.DataType.Type.TIMESTAMPTZ 4095 ) 4096 this = exp.DataType(this=tz_type, expressions=expressions) 4097 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4098 maybe_func = False 4099 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4100 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4101 maybe_func = False 4102 elif type_token == TokenType.INTERVAL: 4103 unit = self._parse_var(upper=True) 4104 if unit: 4105 if self._match_text_seq("TO"): 4106 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4107 4108 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4109 else: 4110 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4111 4112 if maybe_func and check_func: 4113 index2 = self._index 4114 peek = self._parse_string() 4115 4116 if not peek: 4117 self._retreat(index) 4118 return None 4119 4120 self._retreat(index2) 4121 4122 if not this: 4123 if self._match_text_seq("UNSIGNED"): 4124 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4125 if not unsigned_type_token: 4126 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4127 4128 type_token = unsigned_type_token or type_token 4129 4130 this = exp.DataType( 4131 this=exp.DataType.Type[type_token.value], 4132 expressions=expressions, 4133 nested=nested, 4134 values=values, 4135 prefix=prefix, 4136 ) 4137 4138 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4139 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4140 4141 return this 4142 4143 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4144 index = self._index 4145 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4146 self._match(TokenType.COLON) 4147 column_def = self._parse_column_def(this) 4148 4149 if type_required and ( 4150 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4151 ): 4152 self._retreat(index) 4153 return self._parse_types() 4154 4155 return column_def 4156 4157 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4158 if not self._match_text_seq("AT", "TIME", "ZONE"): 4159 return this 4160 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4161 4162 def _parse_column(self) -> t.Optional[exp.Expression]: 4163 this = self._parse_column_reference() 4164 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4165 4166 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4167 this = self._parse_field() 4168 if ( 4169 not this 4170 and self._match(TokenType.VALUES, advance=False) 4171 and self.VALUES_FOLLOWED_BY_PAREN 4172 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4173 ): 4174 this = self._parse_id_var() 4175 4176 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4177 4178 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4179 this = self._parse_bracket(this) 4180 4181 while self._match_set(self.COLUMN_OPERATORS): 4182 op_token = self._prev.token_type 4183 op = self.COLUMN_OPERATORS.get(op_token) 4184 4185 if op_token == TokenType.DCOLON: 4186 field = self._parse_types() 4187 if not field: 4188 self.raise_error("Expected type") 4189 elif op and self._curr: 4190 field = self._parse_column_reference() 4191 else: 4192 field = self._parse_field(anonymous_func=True, any_token=True) 4193 4194 if isinstance(field, exp.Func) and this: 4195 # bigquery allows function calls like x.y.count(...) 4196 # SAFE.SUBSTR(...) 4197 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4198 this = exp.replace_tree( 4199 this, 4200 lambda n: ( 4201 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4202 if n.table 4203 else n.this 4204 ) 4205 if isinstance(n, exp.Column) 4206 else n, 4207 ) 4208 4209 if op: 4210 this = op(self, this, field) 4211 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4212 this = self.expression( 4213 exp.Column, 4214 this=field, 4215 table=this.this, 4216 db=this.args.get("table"), 4217 catalog=this.args.get("db"), 4218 ) 4219 else: 4220 this = self.expression(exp.Dot, this=this, expression=field) 4221 this = self._parse_bracket(this) 4222 return this 4223 4224 def _parse_primary(self) -> t.Optional[exp.Expression]: 4225 if self._match_set(self.PRIMARY_PARSERS): 4226 token_type = self._prev.token_type 4227 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4228 4229 if token_type == TokenType.STRING: 4230 expressions = [primary] 4231 while self._match(TokenType.STRING): 4232 expressions.append(exp.Literal.string(self._prev.text)) 4233 4234 if len(expressions) > 1: 4235 return self.expression(exp.Concat, expressions=expressions) 4236 4237 return primary 4238 4239 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4240 return exp.Literal.number(f"0.{self._prev.text}") 4241 4242 if self._match(TokenType.L_PAREN): 4243 comments = self._prev_comments 4244 query = self._parse_select() 4245 4246 if query: 4247 expressions = [query] 4248 else: 4249 expressions = self._parse_expressions() 4250 4251 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4252 4253 if isinstance(this, exp.UNWRAPPED_QUERIES): 4254 this = self._parse_set_operations( 4255 self._parse_subquery(this=this, parse_alias=False) 4256 ) 4257 elif isinstance(this, exp.Subquery): 4258 this = self._parse_subquery( 4259 this=self._parse_set_operations(this), parse_alias=False 4260 ) 4261 elif len(expressions) > 1: 4262 this = self.expression(exp.Tuple, expressions=expressions) 4263 else: 4264 this = self.expression(exp.Paren, this=this) 4265 4266 if this: 4267 this.add_comments(comments) 4268 4269 self._match_r_paren(expression=this) 4270 return this 4271 4272 return None 4273 4274 def _parse_field( 4275 self, 4276 any_token: bool = False, 4277 tokens: t.Optional[t.Collection[TokenType]] = None, 4278 anonymous_func: bool = False, 4279 ) -> t.Optional[exp.Expression]: 4280 return ( 4281 self._parse_primary() 4282 or self._parse_function(anonymous=anonymous_func) 4283 or self._parse_id_var(any_token=any_token, tokens=tokens) 4284 ) 4285 4286 def _parse_function( 4287 self, 4288 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4289 anonymous: bool = False, 4290 optional_parens: bool = True, 4291 ) -> t.Optional[exp.Expression]: 4292 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4293 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4294 fn_syntax = False 4295 if ( 4296 self._match(TokenType.L_BRACE, advance=False) 4297 and self._next 4298 and self._next.text.upper() == "FN" 4299 ): 4300 self._advance(2) 4301 fn_syntax = True 4302 4303 func = self._parse_function_call( 4304 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4305 ) 4306 4307 if fn_syntax: 4308 self._match(TokenType.R_BRACE) 4309 4310 return func 4311 4312 def _parse_function_call( 4313 self, 4314 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4315 anonymous: bool = False, 4316 optional_parens: bool = True, 4317 ) -> t.Optional[exp.Expression]: 4318 if not self._curr: 4319 return None 4320 4321 comments = self._curr.comments 4322 token_type = self._curr.token_type 4323 this = self._curr.text 4324 upper = this.upper() 4325 4326 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4327 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4328 self._advance() 4329 return self._parse_window(parser(self)) 4330 4331 if not self._next or self._next.token_type != TokenType.L_PAREN: 4332 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4333 self._advance() 4334 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4335 4336 return None 4337 4338 if token_type not in self.FUNC_TOKENS: 4339 return None 4340 4341 self._advance(2) 4342 4343 parser = self.FUNCTION_PARSERS.get(upper) 4344 if parser and not anonymous: 4345 this = parser(self) 4346 else: 4347 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4348 4349 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4350 this = self.expression(subquery_predicate, this=self._parse_select()) 4351 self._match_r_paren() 4352 return this 4353 4354 if functions is None: 4355 functions = self.FUNCTIONS 4356 4357 function = functions.get(upper) 4358 4359 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4360 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4361 4362 if alias: 4363 args = self._kv_to_prop_eq(args) 4364 4365 if function and not anonymous: 4366 if "dialect" in function.__code__.co_varnames: 4367 func = function(args, dialect=self.dialect) 4368 else: 4369 func = function(args) 4370 4371 func = self.validate_expression(func, args) 4372 if not self.dialect.NORMALIZE_FUNCTIONS: 4373 func.meta["name"] = this 4374 4375 this = func 4376 else: 4377 if token_type == TokenType.IDENTIFIER: 4378 this = exp.Identifier(this=this, quoted=True) 4379 this = self.expression(exp.Anonymous, this=this, expressions=args) 4380 4381 if isinstance(this, exp.Expression): 4382 this.add_comments(comments) 4383 4384 self._match_r_paren(this) 4385 return self._parse_window(this) 4386 4387 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4388 transformed = [] 4389 4390 for e in expressions: 4391 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4392 if isinstance(e, exp.Alias): 4393 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4394 4395 if not isinstance(e, exp.PropertyEQ): 4396 e = self.expression( 4397 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4398 ) 4399 4400 if isinstance(e.this, exp.Column): 4401 e.this.replace(e.this.this) 4402 4403 transformed.append(e) 4404 4405 return transformed 4406 4407 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4408 return self._parse_column_def(self._parse_id_var()) 4409 4410 def _parse_user_defined_function( 4411 self, kind: t.Optional[TokenType] = None 4412 ) -> t.Optional[exp.Expression]: 4413 this = self._parse_id_var() 4414 4415 while self._match(TokenType.DOT): 4416 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4417 4418 if not self._match(TokenType.L_PAREN): 4419 return this 4420 4421 expressions = self._parse_csv(self._parse_function_parameter) 4422 self._match_r_paren() 4423 return self.expression( 4424 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4425 ) 4426 4427 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4428 literal = self._parse_primary() 4429 if literal: 4430 return self.expression(exp.Introducer, this=token.text, expression=literal) 4431 4432 return self.expression(exp.Identifier, this=token.text) 4433 4434 def _parse_session_parameter(self) -> exp.SessionParameter: 4435 kind = None 4436 this = self._parse_id_var() or self._parse_primary() 4437 4438 if this and self._match(TokenType.DOT): 4439 kind = this.name 4440 this = self._parse_var() or self._parse_primary() 4441 4442 return self.expression(exp.SessionParameter, this=this, kind=kind) 4443 4444 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4445 index = self._index 4446 4447 if self._match(TokenType.L_PAREN): 4448 expressions = t.cast( 4449 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4450 ) 4451 4452 if not self._match(TokenType.R_PAREN): 4453 self._retreat(index) 4454 else: 4455 expressions = [self._parse_id_var()] 4456 4457 if self._match_set(self.LAMBDAS): 4458 return self.LAMBDAS[self._prev.token_type](self, expressions) 4459 4460 self._retreat(index) 4461 4462 this: t.Optional[exp.Expression] 4463 4464 if self._match(TokenType.DISTINCT): 4465 this = self.expression( 4466 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4467 ) 4468 else: 4469 this = self._parse_select_or_expression(alias=alias) 4470 4471 return self._parse_limit( 4472 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4473 ) 4474 4475 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4476 index = self._index 4477 4478 if not self._match(TokenType.L_PAREN): 4479 return this 4480 4481 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4482 # expr can be of both types 4483 if self._match_set(self.SELECT_START_TOKENS): 4484 self._retreat(index) 4485 return this 4486 4487 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4488 4489 self._match_r_paren() 4490 return self.expression(exp.Schema, this=this, expressions=args) 4491 4492 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4493 return self._parse_column_def(self._parse_field(any_token=True)) 4494 4495 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4496 # column defs are not really columns, they're identifiers 4497 if isinstance(this, exp.Column): 4498 this = this.this 4499 4500 kind = self._parse_types(schema=True) 4501 4502 if self._match_text_seq("FOR", "ORDINALITY"): 4503 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4504 4505 constraints: t.List[exp.Expression] = [] 4506 4507 if (not kind and self._match(TokenType.ALIAS)) or self._match_text_seq("ALIAS"): 4508 constraints.append( 4509 self.expression( 4510 exp.ComputedColumnConstraint, 4511 this=self._parse_conjunction(), 4512 persisted=self._match_text_seq("PERSISTED"), 4513 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4514 ) 4515 ) 4516 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4517 self._match(TokenType.ALIAS) 4518 constraints.append( 4519 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4520 ) 4521 4522 while True: 4523 constraint = self._parse_column_constraint() 4524 if not constraint: 4525 break 4526 constraints.append(constraint) 4527 4528 if not kind and not constraints: 4529 return this 4530 4531 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4532 4533 def _parse_auto_increment( 4534 self, 4535 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4536 start = None 4537 increment = None 4538 4539 if self._match(TokenType.L_PAREN, advance=False): 4540 args = self._parse_wrapped_csv(self._parse_bitwise) 4541 start = seq_get(args, 0) 4542 increment = seq_get(args, 1) 4543 elif self._match_text_seq("START"): 4544 start = self._parse_bitwise() 4545 self._match_text_seq("INCREMENT") 4546 increment = self._parse_bitwise() 4547 4548 if start and increment: 4549 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4550 4551 return exp.AutoIncrementColumnConstraint() 4552 4553 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4554 if not self._match_text_seq("REFRESH"): 4555 self._retreat(self._index - 1) 4556 return None 4557 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4558 4559 def _parse_compress(self) -> exp.CompressColumnConstraint: 4560 if self._match(TokenType.L_PAREN, advance=False): 4561 return self.expression( 4562 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4563 ) 4564 4565 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4566 4567 def _parse_generated_as_identity( 4568 self, 4569 ) -> ( 4570 exp.GeneratedAsIdentityColumnConstraint 4571 | exp.ComputedColumnConstraint 4572 | exp.GeneratedAsRowColumnConstraint 4573 ): 4574 if self._match_text_seq("BY", "DEFAULT"): 4575 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4576 this = self.expression( 4577 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4578 ) 4579 else: 4580 self._match_text_seq("ALWAYS") 4581 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4582 4583 self._match(TokenType.ALIAS) 4584 4585 if self._match_text_seq("ROW"): 4586 start = self._match_text_seq("START") 4587 if not start: 4588 self._match(TokenType.END) 4589 hidden = self._match_text_seq("HIDDEN") 4590 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4591 4592 identity = self._match_text_seq("IDENTITY") 4593 4594 if self._match(TokenType.L_PAREN): 4595 if self._match(TokenType.START_WITH): 4596 this.set("start", self._parse_bitwise()) 4597 if self._match_text_seq("INCREMENT", "BY"): 4598 this.set("increment", self._parse_bitwise()) 4599 if self._match_text_seq("MINVALUE"): 4600 this.set("minvalue", self._parse_bitwise()) 4601 if self._match_text_seq("MAXVALUE"): 4602 this.set("maxvalue", self._parse_bitwise()) 4603 4604 if self._match_text_seq("CYCLE"): 4605 this.set("cycle", True) 4606 elif self._match_text_seq("NO", "CYCLE"): 4607 this.set("cycle", False) 4608 4609 if not identity: 4610 this.set("expression", self._parse_bitwise()) 4611 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4612 args = self._parse_csv(self._parse_bitwise) 4613 this.set("start", seq_get(args, 0)) 4614 this.set("increment", seq_get(args, 1)) 4615 4616 self._match_r_paren() 4617 4618 return this 4619 4620 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4621 self._match_text_seq("LENGTH") 4622 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4623 4624 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4625 if self._match_text_seq("NULL"): 4626 return self.expression(exp.NotNullColumnConstraint) 4627 if self._match_text_seq("CASESPECIFIC"): 4628 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4629 if self._match_text_seq("FOR", "REPLICATION"): 4630 return self.expression(exp.NotForReplicationColumnConstraint) 4631 return None 4632 4633 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4634 if self._match(TokenType.CONSTRAINT): 4635 this = self._parse_id_var() 4636 else: 4637 this = None 4638 4639 if self._match_texts(self.CONSTRAINT_PARSERS): 4640 return self.expression( 4641 exp.ColumnConstraint, 4642 this=this, 4643 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4644 ) 4645 4646 return this 4647 4648 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4649 if not self._match(TokenType.CONSTRAINT): 4650 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4651 4652 return self.expression( 4653 exp.Constraint, 4654 this=self._parse_id_var(), 4655 expressions=self._parse_unnamed_constraints(), 4656 ) 4657 4658 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4659 constraints = [] 4660 while True: 4661 constraint = self._parse_unnamed_constraint() or self._parse_function() 4662 if not constraint: 4663 break 4664 constraints.append(constraint) 4665 4666 return constraints 4667 4668 def _parse_unnamed_constraint( 4669 self, constraints: t.Optional[t.Collection[str]] = None 4670 ) -> t.Optional[exp.Expression]: 4671 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4672 constraints or self.CONSTRAINT_PARSERS 4673 ): 4674 return None 4675 4676 constraint = self._prev.text.upper() 4677 if constraint not in self.CONSTRAINT_PARSERS: 4678 self.raise_error(f"No parser found for schema constraint {constraint}.") 4679 4680 return self.CONSTRAINT_PARSERS[constraint](self) 4681 4682 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4683 self._match_text_seq("KEY") 4684 return self.expression( 4685 exp.UniqueColumnConstraint, 4686 this=self._parse_schema(self._parse_id_var(any_token=False)), 4687 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4688 on_conflict=self._parse_on_conflict(), 4689 ) 4690 4691 def _parse_key_constraint_options(self) -> t.List[str]: 4692 options = [] 4693 while True: 4694 if not self._curr: 4695 break 4696 4697 if self._match(TokenType.ON): 4698 action = None 4699 on = self._advance_any() and self._prev.text 4700 4701 if self._match_text_seq("NO", "ACTION"): 4702 action = "NO ACTION" 4703 elif self._match_text_seq("CASCADE"): 4704 action = "CASCADE" 4705 elif self._match_text_seq("RESTRICT"): 4706 action = "RESTRICT" 4707 elif self._match_pair(TokenType.SET, TokenType.NULL): 4708 action = "SET NULL" 4709 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4710 action = "SET DEFAULT" 4711 else: 4712 self.raise_error("Invalid key constraint") 4713 4714 options.append(f"ON {on} {action}") 4715 elif self._match_text_seq("NOT", "ENFORCED"): 4716 options.append("NOT ENFORCED") 4717 elif self._match_text_seq("DEFERRABLE"): 4718 options.append("DEFERRABLE") 4719 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4720 options.append("INITIALLY DEFERRED") 4721 elif self._match_text_seq("NORELY"): 4722 options.append("NORELY") 4723 elif self._match_text_seq("MATCH", "FULL"): 4724 options.append("MATCH FULL") 4725 else: 4726 break 4727 4728 return options 4729 4730 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4731 if match and not self._match(TokenType.REFERENCES): 4732 return None 4733 4734 expressions = None 4735 this = self._parse_table(schema=True) 4736 options = self._parse_key_constraint_options() 4737 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4738 4739 def _parse_foreign_key(self) -> exp.ForeignKey: 4740 expressions = self._parse_wrapped_id_vars() 4741 reference = self._parse_references() 4742 options = {} 4743 4744 while self._match(TokenType.ON): 4745 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4746 self.raise_error("Expected DELETE or UPDATE") 4747 4748 kind = self._prev.text.lower() 4749 4750 if self._match_text_seq("NO", "ACTION"): 4751 action = "NO ACTION" 4752 elif self._match(TokenType.SET): 4753 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4754 action = "SET " + self._prev.text.upper() 4755 else: 4756 self._advance() 4757 action = self._prev.text.upper() 4758 4759 options[kind] = action 4760 4761 return self.expression( 4762 exp.ForeignKey, 4763 expressions=expressions, 4764 reference=reference, 4765 **options, # type: ignore 4766 ) 4767 4768 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4769 return self._parse_field() 4770 4771 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4772 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4773 self._retreat(self._index - 1) 4774 return None 4775 4776 id_vars = self._parse_wrapped_id_vars() 4777 return self.expression( 4778 exp.PeriodForSystemTimeConstraint, 4779 this=seq_get(id_vars, 0), 4780 expression=seq_get(id_vars, 1), 4781 ) 4782 4783 def _parse_primary_key( 4784 self, wrapped_optional: bool = False, in_props: bool = False 4785 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4786 desc = ( 4787 self._match_set((TokenType.ASC, TokenType.DESC)) 4788 and self._prev.token_type == TokenType.DESC 4789 ) 4790 4791 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4792 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4793 4794 expressions = self._parse_wrapped_csv( 4795 self._parse_primary_key_part, optional=wrapped_optional 4796 ) 4797 options = self._parse_key_constraint_options() 4798 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4799 4800 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4801 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4802 4803 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4804 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4805 return this 4806 4807 bracket_kind = self._prev.token_type 4808 expressions = self._parse_csv( 4809 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4810 ) 4811 4812 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4813 self.raise_error("Expected ]") 4814 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4815 self.raise_error("Expected }") 4816 4817 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4818 if bracket_kind == TokenType.L_BRACE: 4819 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4820 elif not this or this.name.upper() == "ARRAY": 4821 this = self.expression(exp.Array, expressions=expressions) 4822 else: 4823 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4824 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4825 4826 self._add_comments(this) 4827 return self._parse_bracket(this) 4828 4829 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4830 if self._match(TokenType.COLON): 4831 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4832 return this 4833 4834 def _parse_case(self) -> t.Optional[exp.Expression]: 4835 ifs = [] 4836 default = None 4837 4838 comments = self._prev_comments 4839 expression = self._parse_conjunction() 4840 4841 while self._match(TokenType.WHEN): 4842 this = self._parse_conjunction() 4843 self._match(TokenType.THEN) 4844 then = self._parse_conjunction() 4845 ifs.append(self.expression(exp.If, this=this, true=then)) 4846 4847 if self._match(TokenType.ELSE): 4848 default = self._parse_conjunction() 4849 4850 if not self._match(TokenType.END): 4851 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4852 default = exp.column("interval") 4853 else: 4854 self.raise_error("Expected END after CASE", self._prev) 4855 4856 return self.expression( 4857 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4858 ) 4859 4860 def _parse_if(self) -> t.Optional[exp.Expression]: 4861 if self._match(TokenType.L_PAREN): 4862 args = self._parse_csv(self._parse_conjunction) 4863 this = self.validate_expression(exp.If.from_arg_list(args), args) 4864 self._match_r_paren() 4865 else: 4866 index = self._index - 1 4867 4868 if self.NO_PAREN_IF_COMMANDS and index == 0: 4869 return self._parse_as_command(self._prev) 4870 4871 condition = self._parse_conjunction() 4872 4873 if not condition: 4874 self._retreat(index) 4875 return None 4876 4877 self._match(TokenType.THEN) 4878 true = self._parse_conjunction() 4879 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4880 self._match(TokenType.END) 4881 this = self.expression(exp.If, this=condition, true=true, false=false) 4882 4883 return this 4884 4885 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4886 if not self._match_text_seq("VALUE", "FOR"): 4887 self._retreat(self._index - 1) 4888 return None 4889 4890 return self.expression( 4891 exp.NextValueFor, 4892 this=self._parse_column(), 4893 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4894 ) 4895 4896 def _parse_extract(self) -> exp.Extract: 4897 this = self._parse_function() or self._parse_var() or self._parse_type() 4898 4899 if self._match(TokenType.FROM): 4900 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4901 4902 if not self._match(TokenType.COMMA): 4903 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4904 4905 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4906 4907 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4908 this = self._parse_conjunction() 4909 4910 if not self._match(TokenType.ALIAS): 4911 if self._match(TokenType.COMMA): 4912 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4913 4914 self.raise_error("Expected AS after CAST") 4915 4916 fmt = None 4917 to = self._parse_types() 4918 4919 if self._match(TokenType.FORMAT): 4920 fmt_string = self._parse_string() 4921 fmt = self._parse_at_time_zone(fmt_string) 4922 4923 if not to: 4924 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4925 if to.this in exp.DataType.TEMPORAL_TYPES: 4926 this = self.expression( 4927 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4928 this=this, 4929 format=exp.Literal.string( 4930 format_time( 4931 fmt_string.this if fmt_string else "", 4932 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4933 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4934 ) 4935 ), 4936 ) 4937 4938 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4939 this.set("zone", fmt.args["zone"]) 4940 return this 4941 elif not to: 4942 self.raise_error("Expected TYPE after CAST") 4943 elif isinstance(to, exp.Identifier): 4944 to = exp.DataType.build(to.name, udt=True) 4945 elif to.this == exp.DataType.Type.CHAR: 4946 if self._match(TokenType.CHARACTER_SET): 4947 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4948 4949 return self.expression( 4950 exp.Cast if strict else exp.TryCast, 4951 this=this, 4952 to=to, 4953 format=fmt, 4954 safe=safe, 4955 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4956 ) 4957 4958 def _parse_string_agg(self) -> exp.Expression: 4959 if self._match(TokenType.DISTINCT): 4960 args: t.List[t.Optional[exp.Expression]] = [ 4961 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4962 ] 4963 if self._match(TokenType.COMMA): 4964 args.extend(self._parse_csv(self._parse_conjunction)) 4965 else: 4966 args = self._parse_csv(self._parse_conjunction) # type: ignore 4967 4968 index = self._index 4969 if not self._match(TokenType.R_PAREN) and args: 4970 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4971 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4972 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4973 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4974 4975 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4976 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4977 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4978 if not self._match_text_seq("WITHIN", "GROUP"): 4979 self._retreat(index) 4980 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4981 4982 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4983 order = self._parse_order(this=seq_get(args, 0)) 4984 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4985 4986 def _parse_convert( 4987 self, strict: bool, safe: t.Optional[bool] = None 4988 ) -> t.Optional[exp.Expression]: 4989 this = self._parse_bitwise() 4990 4991 if self._match(TokenType.USING): 4992 to: t.Optional[exp.Expression] = self.expression( 4993 exp.CharacterSet, this=self._parse_var() 4994 ) 4995 elif self._match(TokenType.COMMA): 4996 to = self._parse_types() 4997 else: 4998 to = None 4999 5000 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5001 5002 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5003 """ 5004 There are generally two variants of the DECODE function: 5005 5006 - DECODE(bin, charset) 5007 - DECODE(expression, search, result [, search, result] ... [, default]) 5008 5009 The second variant will always be parsed into a CASE expression. Note that NULL 5010 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5011 instead of relying on pattern matching. 5012 """ 5013 args = self._parse_csv(self._parse_conjunction) 5014 5015 if len(args) < 3: 5016 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5017 5018 expression, *expressions = args 5019 if not expression: 5020 return None 5021 5022 ifs = [] 5023 for search, result in zip(expressions[::2], expressions[1::2]): 5024 if not search or not result: 5025 return None 5026 5027 if isinstance(search, exp.Literal): 5028 ifs.append( 5029 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5030 ) 5031 elif isinstance(search, exp.Null): 5032 ifs.append( 5033 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5034 ) 5035 else: 5036 cond = exp.or_( 5037 exp.EQ(this=expression.copy(), expression=search), 5038 exp.and_( 5039 exp.Is(this=expression.copy(), expression=exp.Null()), 5040 exp.Is(this=search.copy(), expression=exp.Null()), 5041 copy=False, 5042 ), 5043 copy=False, 5044 ) 5045 ifs.append(exp.If(this=cond, true=result)) 5046 5047 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5048 5049 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5050 self._match_text_seq("KEY") 5051 key = self._parse_column() 5052 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5053 self._match_text_seq("VALUE") 5054 value = self._parse_bitwise() 5055 5056 if not key and not value: 5057 return None 5058 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5059 5060 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5061 if not this or not self._match_text_seq("FORMAT", "JSON"): 5062 return this 5063 5064 return self.expression(exp.FormatJson, this=this) 5065 5066 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5067 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5068 for value in values: 5069 if self._match_text_seq(value, "ON", on): 5070 return f"{value} ON {on}" 5071 5072 return None 5073 5074 @t.overload 5075 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5076 5077 @t.overload 5078 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5079 5080 def _parse_json_object(self, agg=False): 5081 star = self._parse_star() 5082 expressions = ( 5083 [star] 5084 if star 5085 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5086 ) 5087 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5088 5089 unique_keys = None 5090 if self._match_text_seq("WITH", "UNIQUE"): 5091 unique_keys = True 5092 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5093 unique_keys = False 5094 5095 self._match_text_seq("KEYS") 5096 5097 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5098 self._parse_type() 5099 ) 5100 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5101 5102 return self.expression( 5103 exp.JSONObjectAgg if agg else exp.JSONObject, 5104 expressions=expressions, 5105 null_handling=null_handling, 5106 unique_keys=unique_keys, 5107 return_type=return_type, 5108 encoding=encoding, 5109 ) 5110 5111 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5112 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5113 if not self._match_text_seq("NESTED"): 5114 this = self._parse_id_var() 5115 kind = self._parse_types(allow_identifiers=False) 5116 nested = None 5117 else: 5118 this = None 5119 kind = None 5120 nested = True 5121 5122 path = self._match_text_seq("PATH") and self._parse_string() 5123 nested_schema = nested and self._parse_json_schema() 5124 5125 return self.expression( 5126 exp.JSONColumnDef, 5127 this=this, 5128 kind=kind, 5129 path=path, 5130 nested_schema=nested_schema, 5131 ) 5132 5133 def _parse_json_schema(self) -> exp.JSONSchema: 5134 self._match_text_seq("COLUMNS") 5135 return self.expression( 5136 exp.JSONSchema, 5137 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5138 ) 5139 5140 def _parse_json_table(self) -> exp.JSONTable: 5141 this = self._parse_format_json(self._parse_bitwise()) 5142 path = self._match(TokenType.COMMA) and self._parse_string() 5143 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5144 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5145 schema = self._parse_json_schema() 5146 5147 return exp.JSONTable( 5148 this=this, 5149 schema=schema, 5150 path=path, 5151 error_handling=error_handling, 5152 empty_handling=empty_handling, 5153 ) 5154 5155 def _parse_match_against(self) -> exp.MatchAgainst: 5156 expressions = self._parse_csv(self._parse_column) 5157 5158 self._match_text_seq(")", "AGAINST", "(") 5159 5160 this = self._parse_string() 5161 5162 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5163 modifier = "IN NATURAL LANGUAGE MODE" 5164 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5165 modifier = f"{modifier} WITH QUERY EXPANSION" 5166 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5167 modifier = "IN BOOLEAN MODE" 5168 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5169 modifier = "WITH QUERY EXPANSION" 5170 else: 5171 modifier = None 5172 5173 return self.expression( 5174 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5175 ) 5176 5177 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5178 def _parse_open_json(self) -> exp.OpenJSON: 5179 this = self._parse_bitwise() 5180 path = self._match(TokenType.COMMA) and self._parse_string() 5181 5182 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5183 this = self._parse_field(any_token=True) 5184 kind = self._parse_types() 5185 path = self._parse_string() 5186 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5187 5188 return self.expression( 5189 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5190 ) 5191 5192 expressions = None 5193 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5194 self._match_l_paren() 5195 expressions = self._parse_csv(_parse_open_json_column_def) 5196 5197 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5198 5199 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5200 args = self._parse_csv(self._parse_bitwise) 5201 5202 if self._match(TokenType.IN): 5203 return self.expression( 5204 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5205 ) 5206 5207 if haystack_first: 5208 haystack = seq_get(args, 0) 5209 needle = seq_get(args, 1) 5210 else: 5211 needle = seq_get(args, 0) 5212 haystack = seq_get(args, 1) 5213 5214 return self.expression( 5215 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5216 ) 5217 5218 def _parse_predict(self) -> exp.Predict: 5219 self._match_text_seq("MODEL") 5220 this = self._parse_table() 5221 5222 self._match(TokenType.COMMA) 5223 self._match_text_seq("TABLE") 5224 5225 return self.expression( 5226 exp.Predict, 5227 this=this, 5228 expression=self._parse_table(), 5229 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5230 ) 5231 5232 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5233 args = self._parse_csv(self._parse_table) 5234 return exp.JoinHint(this=func_name.upper(), expressions=args) 5235 5236 def _parse_substring(self) -> exp.Substring: 5237 # Postgres supports the form: substring(string [from int] [for int]) 5238 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5239 5240 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5241 5242 if self._match(TokenType.FROM): 5243 args.append(self._parse_bitwise()) 5244 if self._match(TokenType.FOR): 5245 args.append(self._parse_bitwise()) 5246 5247 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5248 5249 def _parse_trim(self) -> exp.Trim: 5250 # https://www.w3resource.com/sql/character-functions/trim.php 5251 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5252 5253 position = None 5254 collation = None 5255 expression = None 5256 5257 if self._match_texts(self.TRIM_TYPES): 5258 position = self._prev.text.upper() 5259 5260 this = self._parse_bitwise() 5261 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5262 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5263 expression = self._parse_bitwise() 5264 5265 if invert_order: 5266 this, expression = expression, this 5267 5268 if self._match(TokenType.COLLATE): 5269 collation = self._parse_bitwise() 5270 5271 return self.expression( 5272 exp.Trim, this=this, position=position, expression=expression, collation=collation 5273 ) 5274 5275 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5276 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5277 5278 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5279 return self._parse_window(self._parse_id_var(), alias=True) 5280 5281 def _parse_respect_or_ignore_nulls( 5282 self, this: t.Optional[exp.Expression] 5283 ) -> t.Optional[exp.Expression]: 5284 if self._match_text_seq("IGNORE", "NULLS"): 5285 return self.expression(exp.IgnoreNulls, this=this) 5286 if self._match_text_seq("RESPECT", "NULLS"): 5287 return self.expression(exp.RespectNulls, this=this) 5288 return this 5289 5290 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5291 if self._match(TokenType.HAVING): 5292 self._match_texts(("MAX", "MIN")) 5293 max = self._prev.text.upper() != "MIN" 5294 return self.expression( 5295 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5296 ) 5297 5298 return this 5299 5300 def _parse_window( 5301 self, this: t.Optional[exp.Expression], alias: bool = False 5302 ) -> t.Optional[exp.Expression]: 5303 func = this 5304 comments = func.comments if isinstance(func, exp.Expression) else None 5305 5306 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5307 self._match(TokenType.WHERE) 5308 this = self.expression( 5309 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5310 ) 5311 self._match_r_paren() 5312 5313 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5314 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5315 if self._match_text_seq("WITHIN", "GROUP"): 5316 order = self._parse_wrapped(self._parse_order) 5317 this = self.expression(exp.WithinGroup, this=this, expression=order) 5318 5319 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5320 # Some dialects choose to implement and some do not. 5321 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5322 5323 # There is some code above in _parse_lambda that handles 5324 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5325 5326 # The below changes handle 5327 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5328 5329 # Oracle allows both formats 5330 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5331 # and Snowflake chose to do the same for familiarity 5332 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5333 if isinstance(this, exp.AggFunc): 5334 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5335 5336 if ignore_respect and ignore_respect is not this: 5337 ignore_respect.replace(ignore_respect.this) 5338 this = self.expression(ignore_respect.__class__, this=this) 5339 5340 this = self._parse_respect_or_ignore_nulls(this) 5341 5342 # bigquery select from window x AS (partition by ...) 5343 if alias: 5344 over = None 5345 self._match(TokenType.ALIAS) 5346 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5347 return this 5348 else: 5349 over = self._prev.text.upper() 5350 5351 if comments: 5352 func.comments = None # type: ignore 5353 5354 if not self._match(TokenType.L_PAREN): 5355 return self.expression( 5356 exp.Window, 5357 comments=comments, 5358 this=this, 5359 alias=self._parse_id_var(False), 5360 over=over, 5361 ) 5362 5363 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5364 5365 first = self._match(TokenType.FIRST) 5366 if self._match_text_seq("LAST"): 5367 first = False 5368 5369 partition, order = self._parse_partition_and_order() 5370 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5371 5372 if kind: 5373 self._match(TokenType.BETWEEN) 5374 start = self._parse_window_spec() 5375 self._match(TokenType.AND) 5376 end = self._parse_window_spec() 5377 5378 spec = self.expression( 5379 exp.WindowSpec, 5380 kind=kind, 5381 start=start["value"], 5382 start_side=start["side"], 5383 end=end["value"], 5384 end_side=end["side"], 5385 ) 5386 else: 5387 spec = None 5388 5389 self._match_r_paren() 5390 5391 window = self.expression( 5392 exp.Window, 5393 comments=comments, 5394 this=this, 5395 partition_by=partition, 5396 order=order, 5397 spec=spec, 5398 alias=window_alias, 5399 over=over, 5400 first=first, 5401 ) 5402 5403 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5404 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5405 return self._parse_window(window, alias=alias) 5406 5407 return window 5408 5409 def _parse_partition_and_order( 5410 self, 5411 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5412 return self._parse_partition_by(), self._parse_order() 5413 5414 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5415 self._match(TokenType.BETWEEN) 5416 5417 return { 5418 "value": ( 5419 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5420 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5421 or self._parse_bitwise() 5422 ), 5423 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5424 } 5425 5426 def _parse_alias( 5427 self, this: t.Optional[exp.Expression], explicit: bool = False 5428 ) -> t.Optional[exp.Expression]: 5429 any_token = self._match(TokenType.ALIAS) 5430 comments = self._prev_comments 5431 5432 if explicit and not any_token: 5433 return this 5434 5435 if self._match(TokenType.L_PAREN): 5436 aliases = self.expression( 5437 exp.Aliases, 5438 comments=comments, 5439 this=this, 5440 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5441 ) 5442 self._match_r_paren(aliases) 5443 return aliases 5444 5445 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5446 self.STRING_ALIASES and self._parse_string_as_identifier() 5447 ) 5448 5449 if alias: 5450 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5451 column = this.this 5452 5453 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5454 if not this.comments and column and column.comments: 5455 this.comments = column.comments 5456 column.comments = None 5457 5458 return this 5459 5460 def _parse_id_var( 5461 self, 5462 any_token: bool = True, 5463 tokens: t.Optional[t.Collection[TokenType]] = None, 5464 ) -> t.Optional[exp.Expression]: 5465 identifier = self._parse_identifier() 5466 5467 if identifier: 5468 return identifier 5469 5470 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5471 quoted = self._prev.token_type == TokenType.STRING 5472 return exp.Identifier(this=self._prev.text, quoted=quoted) 5473 5474 return None 5475 5476 def _parse_string(self) -> t.Optional[exp.Expression]: 5477 if self._match_set(self.STRING_PARSERS): 5478 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5479 return self._parse_placeholder() 5480 5481 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5482 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5483 5484 def _parse_number(self) -> t.Optional[exp.Expression]: 5485 if self._match_set(self.NUMERIC_PARSERS): 5486 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5487 return self._parse_placeholder() 5488 5489 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5490 if self._match(TokenType.IDENTIFIER): 5491 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5492 return self._parse_placeholder() 5493 5494 def _parse_var( 5495 self, 5496 any_token: bool = False, 5497 tokens: t.Optional[t.Collection[TokenType]] = None, 5498 upper: bool = False, 5499 ) -> t.Optional[exp.Expression]: 5500 if ( 5501 (any_token and self._advance_any()) 5502 or self._match(TokenType.VAR) 5503 or (self._match_set(tokens) if tokens else False) 5504 ): 5505 return self.expression( 5506 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5507 ) 5508 return self._parse_placeholder() 5509 5510 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5511 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5512 self._advance() 5513 return self._prev 5514 return None 5515 5516 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5517 return self._parse_var() or self._parse_string() 5518 5519 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5520 return self._parse_primary() or self._parse_var(any_token=True) 5521 5522 def _parse_null(self) -> t.Optional[exp.Expression]: 5523 if self._match_set(self.NULL_TOKENS): 5524 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5525 return self._parse_placeholder() 5526 5527 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5528 if self._match(TokenType.TRUE): 5529 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5530 if self._match(TokenType.FALSE): 5531 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5532 return self._parse_placeholder() 5533 5534 def _parse_star(self) -> t.Optional[exp.Expression]: 5535 if self._match(TokenType.STAR): 5536 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5537 return self._parse_placeholder() 5538 5539 def _parse_parameter(self) -> exp.Parameter: 5540 self._match(TokenType.L_BRACE) 5541 this = self._parse_identifier() or self._parse_primary_or_var() 5542 expression = self._match(TokenType.COLON) and ( 5543 self._parse_identifier() or self._parse_primary_or_var() 5544 ) 5545 self._match(TokenType.R_BRACE) 5546 return self.expression(exp.Parameter, this=this, expression=expression) 5547 5548 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5549 if self._match_set(self.PLACEHOLDER_PARSERS): 5550 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5551 if placeholder: 5552 return placeholder 5553 self._advance(-1) 5554 return None 5555 5556 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5557 if not self._match(TokenType.EXCEPT): 5558 return None 5559 if self._match(TokenType.L_PAREN, advance=False): 5560 return self._parse_wrapped_csv(self._parse_column) 5561 5562 except_column = self._parse_column() 5563 return [except_column] if except_column else None 5564 5565 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5566 if not self._match(TokenType.REPLACE): 5567 return None 5568 if self._match(TokenType.L_PAREN, advance=False): 5569 return self._parse_wrapped_csv(self._parse_expression) 5570 5571 replace_expression = self._parse_expression() 5572 return [replace_expression] if replace_expression else None 5573 5574 def _parse_csv( 5575 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5576 ) -> t.List[exp.Expression]: 5577 parse_result = parse_method() 5578 items = [parse_result] if parse_result is not None else [] 5579 5580 while self._match(sep): 5581 self._add_comments(parse_result) 5582 parse_result = parse_method() 5583 if parse_result is not None: 5584 items.append(parse_result) 5585 5586 return items 5587 5588 def _parse_tokens( 5589 self, parse_method: t.Callable, expressions: t.Dict 5590 ) -> t.Optional[exp.Expression]: 5591 this = parse_method() 5592 5593 while self._match_set(expressions): 5594 this = self.expression( 5595 expressions[self._prev.token_type], 5596 this=this, 5597 comments=self._prev_comments, 5598 expression=parse_method(), 5599 ) 5600 5601 return this 5602 5603 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5604 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5605 5606 def _parse_wrapped_csv( 5607 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5608 ) -> t.List[exp.Expression]: 5609 return self._parse_wrapped( 5610 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5611 ) 5612 5613 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5614 wrapped = self._match(TokenType.L_PAREN) 5615 if not wrapped and not optional: 5616 self.raise_error("Expecting (") 5617 parse_result = parse_method() 5618 if wrapped: 5619 self._match_r_paren() 5620 return parse_result 5621 5622 def _parse_expressions(self) -> t.List[exp.Expression]: 5623 return self._parse_csv(self._parse_expression) 5624 5625 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5626 return self._parse_select() or self._parse_set_operations( 5627 self._parse_expression() if alias else self._parse_conjunction() 5628 ) 5629 5630 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5631 return self._parse_query_modifiers( 5632 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5633 ) 5634 5635 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5636 this = None 5637 if self._match_texts(self.TRANSACTION_KIND): 5638 this = self._prev.text 5639 5640 self._match_texts(("TRANSACTION", "WORK")) 5641 5642 modes = [] 5643 while True: 5644 mode = [] 5645 while self._match(TokenType.VAR): 5646 mode.append(self._prev.text) 5647 5648 if mode: 5649 modes.append(" ".join(mode)) 5650 if not self._match(TokenType.COMMA): 5651 break 5652 5653 return self.expression(exp.Transaction, this=this, modes=modes) 5654 5655 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5656 chain = None 5657 savepoint = None 5658 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5659 5660 self._match_texts(("TRANSACTION", "WORK")) 5661 5662 if self._match_text_seq("TO"): 5663 self._match_text_seq("SAVEPOINT") 5664 savepoint = self._parse_id_var() 5665 5666 if self._match(TokenType.AND): 5667 chain = not self._match_text_seq("NO") 5668 self._match_text_seq("CHAIN") 5669 5670 if is_rollback: 5671 return self.expression(exp.Rollback, savepoint=savepoint) 5672 5673 return self.expression(exp.Commit, chain=chain) 5674 5675 def _parse_refresh(self) -> exp.Refresh: 5676 self._match(TokenType.TABLE) 5677 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5678 5679 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5680 if not self._match_text_seq("ADD"): 5681 return None 5682 5683 self._match(TokenType.COLUMN) 5684 exists_column = self._parse_exists(not_=True) 5685 expression = self._parse_field_def() 5686 5687 if expression: 5688 expression.set("exists", exists_column) 5689 5690 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5691 if self._match_texts(("FIRST", "AFTER")): 5692 position = self._prev.text 5693 column_position = self.expression( 5694 exp.ColumnPosition, this=self._parse_column(), position=position 5695 ) 5696 expression.set("position", column_position) 5697 5698 return expression 5699 5700 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5701 drop = self._match(TokenType.DROP) and self._parse_drop() 5702 if drop and not isinstance(drop, exp.Command): 5703 drop.set("kind", drop.args.get("kind", "COLUMN")) 5704 return drop 5705 5706 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5707 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5708 return self.expression( 5709 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5710 ) 5711 5712 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5713 index = self._index - 1 5714 5715 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5716 return self._parse_csv( 5717 lambda: self.expression( 5718 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5719 ) 5720 ) 5721 5722 self._retreat(index) 5723 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5724 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5725 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5726 5727 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5728 self._match(TokenType.COLUMN) 5729 column = self._parse_field(any_token=True) 5730 5731 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5732 return self.expression(exp.AlterColumn, this=column, drop=True) 5733 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5734 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5735 if self._match(TokenType.COMMENT): 5736 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5737 5738 self._match_text_seq("SET", "DATA") 5739 self._match_text_seq("TYPE") 5740 return self.expression( 5741 exp.AlterColumn, 5742 this=column, 5743 dtype=self._parse_types(), 5744 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5745 using=self._match(TokenType.USING) and self._parse_conjunction(), 5746 ) 5747 5748 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5749 index = self._index - 1 5750 5751 partition_exists = self._parse_exists() 5752 if self._match(TokenType.PARTITION, advance=False): 5753 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5754 5755 self._retreat(index) 5756 return self._parse_csv(self._parse_drop_column) 5757 5758 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5759 if self._match(TokenType.COLUMN): 5760 exists = self._parse_exists() 5761 old_column = self._parse_column() 5762 to = self._match_text_seq("TO") 5763 new_column = self._parse_column() 5764 5765 if old_column is None or to is None or new_column is None: 5766 return None 5767 5768 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5769 5770 self._match_text_seq("TO") 5771 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5772 5773 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5774 start = self._prev 5775 5776 if not self._match(TokenType.TABLE): 5777 return self._parse_as_command(start) 5778 5779 exists = self._parse_exists() 5780 only = self._match_text_seq("ONLY") 5781 this = self._parse_table(schema=True) 5782 5783 if self._next: 5784 self._advance() 5785 5786 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5787 if parser: 5788 actions = ensure_list(parser(self)) 5789 options = self._parse_csv(self._parse_property) 5790 5791 if not self._curr and actions: 5792 return self.expression( 5793 exp.AlterTable, 5794 this=this, 5795 exists=exists, 5796 actions=actions, 5797 only=only, 5798 options=options, 5799 ) 5800 5801 return self._parse_as_command(start) 5802 5803 def _parse_merge(self) -> exp.Merge: 5804 self._match(TokenType.INTO) 5805 target = self._parse_table() 5806 5807 if target and self._match(TokenType.ALIAS, advance=False): 5808 target.set("alias", self._parse_table_alias()) 5809 5810 self._match(TokenType.USING) 5811 using = self._parse_table() 5812 5813 self._match(TokenType.ON) 5814 on = self._parse_conjunction() 5815 5816 return self.expression( 5817 exp.Merge, 5818 this=target, 5819 using=using, 5820 on=on, 5821 expressions=self._parse_when_matched(), 5822 ) 5823 5824 def _parse_when_matched(self) -> t.List[exp.When]: 5825 whens = [] 5826 5827 while self._match(TokenType.WHEN): 5828 matched = not self._match(TokenType.NOT) 5829 self._match_text_seq("MATCHED") 5830 source = ( 5831 False 5832 if self._match_text_seq("BY", "TARGET") 5833 else self._match_text_seq("BY", "SOURCE") 5834 ) 5835 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5836 5837 self._match(TokenType.THEN) 5838 5839 if self._match(TokenType.INSERT): 5840 _this = self._parse_star() 5841 if _this: 5842 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5843 else: 5844 then = self.expression( 5845 exp.Insert, 5846 this=self._parse_value(), 5847 expression=self._match_text_seq("VALUES") and self._parse_value(), 5848 ) 5849 elif self._match(TokenType.UPDATE): 5850 expressions = self._parse_star() 5851 if expressions: 5852 then = self.expression(exp.Update, expressions=expressions) 5853 else: 5854 then = self.expression( 5855 exp.Update, 5856 expressions=self._match(TokenType.SET) 5857 and self._parse_csv(self._parse_equality), 5858 ) 5859 elif self._match(TokenType.DELETE): 5860 then = self.expression(exp.Var, this=self._prev.text) 5861 else: 5862 then = None 5863 5864 whens.append( 5865 self.expression( 5866 exp.When, 5867 matched=matched, 5868 source=source, 5869 condition=condition, 5870 then=then, 5871 ) 5872 ) 5873 return whens 5874 5875 def _parse_show(self) -> t.Optional[exp.Expression]: 5876 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5877 if parser: 5878 return parser(self) 5879 return self._parse_as_command(self._prev) 5880 5881 def _parse_set_item_assignment( 5882 self, kind: t.Optional[str] = None 5883 ) -> t.Optional[exp.Expression]: 5884 index = self._index 5885 5886 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5887 return self._parse_set_transaction(global_=kind == "GLOBAL") 5888 5889 left = self._parse_primary() or self._parse_id_var() 5890 assignment_delimiter = self._match_texts(("=", "TO")) 5891 5892 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5893 self._retreat(index) 5894 return None 5895 5896 right = self._parse_statement() or self._parse_id_var() 5897 this = self.expression(exp.EQ, this=left, expression=right) 5898 5899 return self.expression(exp.SetItem, this=this, kind=kind) 5900 5901 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5902 self._match_text_seq("TRANSACTION") 5903 characteristics = self._parse_csv( 5904 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5905 ) 5906 return self.expression( 5907 exp.SetItem, 5908 expressions=characteristics, 5909 kind="TRANSACTION", 5910 **{"global": global_}, # type: ignore 5911 ) 5912 5913 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5914 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5915 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5916 5917 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5918 index = self._index 5919 set_ = self.expression( 5920 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5921 ) 5922 5923 if self._curr: 5924 self._retreat(index) 5925 return self._parse_as_command(self._prev) 5926 5927 return set_ 5928 5929 def _parse_var_from_options( 5930 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5931 ) -> t.Optional[exp.Var]: 5932 start = self._curr 5933 if not start: 5934 return None 5935 5936 option = start.text.upper() 5937 continuations = options.get(option) 5938 5939 index = self._index 5940 self._advance() 5941 for keywords in continuations or []: 5942 if isinstance(keywords, str): 5943 keywords = (keywords,) 5944 5945 if self._match_text_seq(*keywords): 5946 option = f"{option} {' '.join(keywords)}" 5947 break 5948 else: 5949 if continuations or continuations is None: 5950 if raise_unmatched: 5951 self.raise_error(f"Unknown option {option}") 5952 5953 self._retreat(index) 5954 return None 5955 5956 return exp.var(option) 5957 5958 def _parse_as_command(self, start: Token) -> exp.Command: 5959 while self._curr: 5960 self._advance() 5961 text = self._find_sql(start, self._prev) 5962 size = len(start.text) 5963 self._warn_unsupported() 5964 return exp.Command(this=text[:size], expression=text[size:]) 5965 5966 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5967 settings = [] 5968 5969 self._match_l_paren() 5970 kind = self._parse_id_var() 5971 5972 if self._match(TokenType.L_PAREN): 5973 while True: 5974 key = self._parse_id_var() 5975 value = self._parse_primary() 5976 5977 if not key and value is None: 5978 break 5979 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5980 self._match(TokenType.R_PAREN) 5981 5982 self._match_r_paren() 5983 5984 return self.expression( 5985 exp.DictProperty, 5986 this=this, 5987 kind=kind.this if kind else None, 5988 settings=settings, 5989 ) 5990 5991 def _parse_dict_range(self, this: str) -> exp.DictRange: 5992 self._match_l_paren() 5993 has_min = self._match_text_seq("MIN") 5994 if has_min: 5995 min = self._parse_var() or self._parse_primary() 5996 self._match_text_seq("MAX") 5997 max = self._parse_var() or self._parse_primary() 5998 else: 5999 max = self._parse_var() or self._parse_primary() 6000 min = exp.Literal.number(0) 6001 self._match_r_paren() 6002 return self.expression(exp.DictRange, this=this, min=min, max=max) 6003 6004 def _parse_comprehension( 6005 self, this: t.Optional[exp.Expression] 6006 ) -> t.Optional[exp.Comprehension]: 6007 index = self._index 6008 expression = self._parse_column() 6009 if not self._match(TokenType.IN): 6010 self._retreat(index - 1) 6011 return None 6012 iterator = self._parse_column() 6013 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6014 return self.expression( 6015 exp.Comprehension, 6016 this=this, 6017 expression=expression, 6018 iterator=iterator, 6019 condition=condition, 6020 ) 6021 6022 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6023 if self._match(TokenType.HEREDOC_STRING): 6024 return self.expression(exp.Heredoc, this=self._prev.text) 6025 6026 if not self._match_text_seq("$"): 6027 return None 6028 6029 tags = ["$"] 6030 tag_text = None 6031 6032 if self._is_connected(): 6033 self._advance() 6034 tags.append(self._prev.text.upper()) 6035 else: 6036 self.raise_error("No closing $ found") 6037 6038 if tags[-1] != "$": 6039 if self._is_connected() and self._match_text_seq("$"): 6040 tag_text = tags[-1] 6041 tags.append("$") 6042 else: 6043 self.raise_error("No closing $ found") 6044 6045 heredoc_start = self._curr 6046 6047 while self._curr: 6048 if self._match_text_seq(*tags, advance=False): 6049 this = self._find_sql(heredoc_start, self._prev) 6050 self._advance(len(tags)) 6051 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6052 6053 self._advance() 6054 6055 self.raise_error(f"No closing {''.join(tags)} found") 6056 return None 6057 6058 def _find_parser( 6059 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6060 ) -> t.Optional[t.Callable]: 6061 if not self._curr: 6062 return None 6063 6064 index = self._index 6065 this = [] 6066 while True: 6067 # The current token might be multiple words 6068 curr = self._curr.text.upper() 6069 key = curr.split(" ") 6070 this.append(curr) 6071 6072 self._advance() 6073 result, trie = in_trie(trie, key) 6074 if result == TrieResult.FAILED: 6075 break 6076 6077 if result == TrieResult.EXISTS: 6078 subparser = parsers[" ".join(this)] 6079 return subparser 6080 6081 self._retreat(index) 6082 return None 6083 6084 def _match(self, token_type, advance=True, expression=None): 6085 if not self._curr: 6086 return None 6087 6088 if self._curr.token_type == token_type: 6089 if advance: 6090 self._advance() 6091 self._add_comments(expression) 6092 return True 6093 6094 return None 6095 6096 def _match_set(self, types, advance=True): 6097 if not self._curr: 6098 return None 6099 6100 if self._curr.token_type in types: 6101 if advance: 6102 self._advance() 6103 return True 6104 6105 return None 6106 6107 def _match_pair(self, token_type_a, token_type_b, advance=True): 6108 if not self._curr or not self._next: 6109 return None 6110 6111 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6112 if advance: 6113 self._advance(2) 6114 return True 6115 6116 return None 6117 6118 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6119 if not self._match(TokenType.L_PAREN, expression=expression): 6120 self.raise_error("Expecting (") 6121 6122 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6123 if not self._match(TokenType.R_PAREN, expression=expression): 6124 self.raise_error("Expecting )") 6125 6126 def _match_texts(self, texts, advance=True): 6127 if self._curr and self._curr.text.upper() in texts: 6128 if advance: 6129 self._advance() 6130 return True 6131 return None 6132 6133 def _match_text_seq(self, *texts, advance=True): 6134 index = self._index 6135 for text in texts: 6136 if self._curr and self._curr.text.upper() == text: 6137 self._advance() 6138 else: 6139 self._retreat(index) 6140 return None 6141 6142 if not advance: 6143 self._retreat(index) 6144 6145 return True 6146 6147 def _replace_lambda( 6148 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6149 ) -> t.Optional[exp.Expression]: 6150 if not node: 6151 return node 6152 6153 for column in node.find_all(exp.Column): 6154 if column.parts[0].name in lambda_variables: 6155 dot_or_id = column.to_dot() if column.table else column.this 6156 parent = column.parent 6157 6158 while isinstance(parent, exp.Dot): 6159 if not isinstance(parent.parent, exp.Dot): 6160 parent.replace(dot_or_id) 6161 break 6162 parent = parent.parent 6163 else: 6164 if column is node: 6165 node = dot_or_id 6166 else: 6167 column.replace(dot_or_id) 6168 return node 6169 6170 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6171 start = self._prev 6172 6173 # Not to be confused with TRUNCATE(number, decimals) function call 6174 if self._match(TokenType.L_PAREN): 6175 self._retreat(self._index - 2) 6176 return self._parse_function() 6177 6178 # Clickhouse supports TRUNCATE DATABASE as well 6179 is_database = self._match(TokenType.DATABASE) 6180 6181 self._match(TokenType.TABLE) 6182 6183 exists = self._parse_exists(not_=False) 6184 6185 expressions = self._parse_csv( 6186 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6187 ) 6188 6189 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6190 6191 if self._match_text_seq("RESTART", "IDENTITY"): 6192 identity = "RESTART" 6193 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6194 identity = "CONTINUE" 6195 else: 6196 identity = None 6197 6198 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6199 option = self._prev.text 6200 else: 6201 option = None 6202 6203 partition = self._parse_partition() 6204 6205 # Fallback case 6206 if self._curr: 6207 return self._parse_as_command(start) 6208 6209 return self.expression( 6210 exp.TruncateTable, 6211 expressions=expressions, 6212 is_database=is_database, 6213 exists=exists, 6214 cluster=cluster, 6215 identity=identity, 6216 option=option, 6217 partition=partition, 6218 ) 6219 6220 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6221 this = self._parse_ordered(self._parse_opclass) 6222 6223 if not self._match(TokenType.WITH): 6224 return this 6225 6226 op = self._parse_var(any_token=True) 6227 6228 return self.expression(exp.WithOperator, this=this, op=op)
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 *ENUM_TYPE_TOKENS, 270 *NESTED_TYPE_TOKENS, 271 *AGGREGATE_TYPE_TOKENS, 272 } 273 274 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 275 TokenType.BIGINT: TokenType.UBIGINT, 276 TokenType.INT: TokenType.UINT, 277 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 278 TokenType.SMALLINT: TokenType.USMALLINT, 279 TokenType.TINYINT: TokenType.UTINYINT, 280 TokenType.DECIMAL: TokenType.UDECIMAL, 281 } 282 283 SUBQUERY_PREDICATES = { 284 TokenType.ANY: exp.Any, 285 TokenType.ALL: exp.All, 286 TokenType.EXISTS: exp.Exists, 287 TokenType.SOME: exp.Any, 288 } 289 290 RESERVED_TOKENS = { 291 *Tokenizer.SINGLE_TOKENS.values(), 292 TokenType.SELECT, 293 } 294 295 DB_CREATABLES = { 296 TokenType.DATABASE, 297 TokenType.SCHEMA, 298 TokenType.TABLE, 299 TokenType.VIEW, 300 TokenType.MODEL, 301 TokenType.DICTIONARY, 302 TokenType.SEQUENCE, 303 TokenType.STORAGE_INTEGRATION, 304 } 305 306 CREATABLES = { 307 TokenType.COLUMN, 308 TokenType.CONSTRAINT, 309 TokenType.FUNCTION, 310 TokenType.INDEX, 311 TokenType.PROCEDURE, 312 TokenType.FOREIGN_KEY, 313 *DB_CREATABLES, 314 } 315 316 # Tokens that can represent identifiers 317 ID_VAR_TOKENS = { 318 TokenType.VAR, 319 TokenType.ANTI, 320 TokenType.APPLY, 321 TokenType.ASC, 322 TokenType.ASOF, 323 TokenType.AUTO_INCREMENT, 324 TokenType.BEGIN, 325 TokenType.BPCHAR, 326 TokenType.CACHE, 327 TokenType.CASE, 328 TokenType.COLLATE, 329 TokenType.COMMAND, 330 TokenType.COMMENT, 331 TokenType.COMMIT, 332 TokenType.CONSTRAINT, 333 TokenType.DEFAULT, 334 TokenType.DELETE, 335 TokenType.DESC, 336 TokenType.DESCRIBE, 337 TokenType.DICTIONARY, 338 TokenType.DIV, 339 TokenType.END, 340 TokenType.EXECUTE, 341 TokenType.ESCAPE, 342 TokenType.FALSE, 343 TokenType.FIRST, 344 TokenType.FILTER, 345 TokenType.FINAL, 346 TokenType.FORMAT, 347 TokenType.FULL, 348 TokenType.IS, 349 TokenType.ISNULL, 350 TokenType.INTERVAL, 351 TokenType.KEEP, 352 TokenType.KILL, 353 TokenType.LEFT, 354 TokenType.LOAD, 355 TokenType.MERGE, 356 TokenType.NATURAL, 357 TokenType.NEXT, 358 TokenType.OFFSET, 359 TokenType.OPERATOR, 360 TokenType.ORDINALITY, 361 TokenType.OVERLAPS, 362 TokenType.OVERWRITE, 363 TokenType.PARTITION, 364 TokenType.PERCENT, 365 TokenType.PIVOT, 366 TokenType.PRAGMA, 367 TokenType.RANGE, 368 TokenType.RECURSIVE, 369 TokenType.REFERENCES, 370 TokenType.REFRESH, 371 TokenType.REPLACE, 372 TokenType.RIGHT, 373 TokenType.ROW, 374 TokenType.ROWS, 375 TokenType.SEMI, 376 TokenType.SET, 377 TokenType.SETTINGS, 378 TokenType.SHOW, 379 TokenType.TEMPORARY, 380 TokenType.TOP, 381 TokenType.TRUE, 382 TokenType.TRUNCATE, 383 TokenType.UNIQUE, 384 TokenType.UNPIVOT, 385 TokenType.UPDATE, 386 TokenType.USE, 387 TokenType.VOLATILE, 388 TokenType.WINDOW, 389 *CREATABLES, 390 *SUBQUERY_PREDICATES, 391 *TYPE_TOKENS, 392 *NO_PAREN_FUNCTIONS, 393 } 394 395 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 396 397 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 398 TokenType.ANTI, 399 TokenType.APPLY, 400 TokenType.ASOF, 401 TokenType.FULL, 402 TokenType.LEFT, 403 TokenType.LOCK, 404 TokenType.NATURAL, 405 TokenType.OFFSET, 406 TokenType.RIGHT, 407 TokenType.SEMI, 408 TokenType.WINDOW, 409 } 410 411 ALIAS_TOKENS = ID_VAR_TOKENS 412 413 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 414 415 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 416 417 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 418 419 FUNC_TOKENS = { 420 TokenType.COLLATE, 421 TokenType.COMMAND, 422 TokenType.CURRENT_DATE, 423 TokenType.CURRENT_DATETIME, 424 TokenType.CURRENT_TIMESTAMP, 425 TokenType.CURRENT_TIME, 426 TokenType.CURRENT_USER, 427 TokenType.FILTER, 428 TokenType.FIRST, 429 TokenType.FORMAT, 430 TokenType.GLOB, 431 TokenType.IDENTIFIER, 432 TokenType.INDEX, 433 TokenType.ISNULL, 434 TokenType.ILIKE, 435 TokenType.INSERT, 436 TokenType.LIKE, 437 TokenType.MERGE, 438 TokenType.OFFSET, 439 TokenType.PRIMARY_KEY, 440 TokenType.RANGE, 441 TokenType.REPLACE, 442 TokenType.RLIKE, 443 TokenType.ROW, 444 TokenType.UNNEST, 445 TokenType.VAR, 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.SEQUENCE, 449 TokenType.DATE, 450 TokenType.DATETIME, 451 TokenType.TABLE, 452 TokenType.TIMESTAMP, 453 TokenType.TIMESTAMPTZ, 454 TokenType.TRUNCATE, 455 TokenType.WINDOW, 456 TokenType.XOR, 457 *TYPE_TOKENS, 458 *SUBQUERY_PREDICATES, 459 } 460 461 CONJUNCTION = { 462 TokenType.AND: exp.And, 463 TokenType.OR: exp.Or, 464 } 465 466 EQUALITY = { 467 TokenType.COLON_EQ: exp.PropertyEQ, 468 TokenType.EQ: exp.EQ, 469 TokenType.NEQ: exp.NEQ, 470 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 471 } 472 473 COMPARISON = { 474 TokenType.GT: exp.GT, 475 TokenType.GTE: exp.GTE, 476 TokenType.LT: exp.LT, 477 TokenType.LTE: exp.LTE, 478 } 479 480 BITWISE = { 481 TokenType.AMP: exp.BitwiseAnd, 482 TokenType.CARET: exp.BitwiseXor, 483 TokenType.PIPE: exp.BitwiseOr, 484 } 485 486 TERM = { 487 TokenType.DASH: exp.Sub, 488 TokenType.PLUS: exp.Add, 489 TokenType.MOD: exp.Mod, 490 TokenType.COLLATE: exp.Collate, 491 } 492 493 FACTOR = { 494 TokenType.DIV: exp.IntDiv, 495 TokenType.LR_ARROW: exp.Distance, 496 TokenType.SLASH: exp.Div, 497 TokenType.STAR: exp.Mul, 498 } 499 500 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 501 502 TIMES = { 503 TokenType.TIME, 504 TokenType.TIMETZ, 505 } 506 507 TIMESTAMPS = { 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TIMESTAMPLTZ, 511 *TIMES, 512 } 513 514 SET_OPERATIONS = { 515 TokenType.UNION, 516 TokenType.INTERSECT, 517 TokenType.EXCEPT, 518 } 519 520 JOIN_METHODS = { 521 TokenType.ASOF, 522 TokenType.NATURAL, 523 TokenType.POSITIONAL, 524 } 525 526 JOIN_SIDES = { 527 TokenType.LEFT, 528 TokenType.RIGHT, 529 TokenType.FULL, 530 } 531 532 JOIN_KINDS = { 533 TokenType.INNER, 534 TokenType.OUTER, 535 TokenType.CROSS, 536 TokenType.SEMI, 537 TokenType.ANTI, 538 } 539 540 JOIN_HINTS: t.Set[str] = set() 541 542 LAMBDAS = { 543 TokenType.ARROW: lambda self, expressions: self.expression( 544 exp.Lambda, 545 this=self._replace_lambda( 546 self._parse_conjunction(), 547 {node.name for node in expressions}, 548 ), 549 expressions=expressions, 550 ), 551 TokenType.FARROW: lambda self, expressions: self.expression( 552 exp.Kwarg, 553 this=exp.var(expressions[0].name), 554 expression=self._parse_conjunction(), 555 ), 556 } 557 558 COLUMN_OPERATORS = { 559 TokenType.DOT: None, 560 TokenType.DCOLON: lambda self, this, to: self.expression( 561 exp.Cast if self.STRICT_CAST else exp.TryCast, 562 this=this, 563 to=to, 564 ), 565 TokenType.ARROW: lambda self, this, path: self.expression( 566 exp.JSONExtract, 567 this=this, 568 expression=self.dialect.to_json_path(path), 569 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 570 ), 571 TokenType.DARROW: lambda self, this, path: self.expression( 572 exp.JSONExtractScalar, 573 this=this, 574 expression=self.dialect.to_json_path(path), 575 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 576 ), 577 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtract, 579 this=this, 580 expression=path, 581 ), 582 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 583 exp.JSONBExtractScalar, 584 this=this, 585 expression=path, 586 ), 587 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 588 exp.JSONBContains, 589 this=this, 590 expression=key, 591 ), 592 } 593 594 EXPRESSION_PARSERS = { 595 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 596 exp.Column: lambda self: self._parse_column(), 597 exp.Condition: lambda self: self._parse_conjunction(), 598 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 599 exp.Expression: lambda self: self._parse_expression(), 600 exp.From: lambda self: self._parse_from(), 601 exp.Group: lambda self: self._parse_group(), 602 exp.Having: lambda self: self._parse_having(), 603 exp.Identifier: lambda self: self._parse_id_var(), 604 exp.Join: lambda self: self._parse_join(), 605 exp.Lambda: lambda self: self._parse_lambda(), 606 exp.Lateral: lambda self: self._parse_lateral(), 607 exp.Limit: lambda self: self._parse_limit(), 608 exp.Offset: lambda self: self._parse_offset(), 609 exp.Order: lambda self: self._parse_order(), 610 exp.Ordered: lambda self: self._parse_ordered(), 611 exp.Properties: lambda self: self._parse_properties(), 612 exp.Qualify: lambda self: self._parse_qualify(), 613 exp.Returning: lambda self: self._parse_returning(), 614 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 615 exp.Table: lambda self: self._parse_table_parts(), 616 exp.TableAlias: lambda self: self._parse_table_alias(), 617 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 618 exp.Where: lambda self: self._parse_where(), 619 exp.Window: lambda self: self._parse_named_window(), 620 exp.With: lambda self: self._parse_with(), 621 "JOIN_TYPE": lambda self: self._parse_join_parts(), 622 } 623 624 STATEMENT_PARSERS = { 625 TokenType.ALTER: lambda self: self._parse_alter(), 626 TokenType.BEGIN: lambda self: self._parse_transaction(), 627 TokenType.CACHE: lambda self: self._parse_cache(), 628 TokenType.COMMENT: lambda self: self._parse_comment(), 629 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 630 TokenType.CREATE: lambda self: self._parse_create(), 631 TokenType.DELETE: lambda self: self._parse_delete(), 632 TokenType.DESC: lambda self: self._parse_describe(), 633 TokenType.DESCRIBE: lambda self: self._parse_describe(), 634 TokenType.DROP: lambda self: self._parse_drop(), 635 TokenType.INSERT: lambda self: self._parse_insert(), 636 TokenType.KILL: lambda self: self._parse_kill(), 637 TokenType.LOAD: lambda self: self._parse_load(), 638 TokenType.MERGE: lambda self: self._parse_merge(), 639 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 640 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 641 TokenType.REFRESH: lambda self: self._parse_refresh(), 642 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 643 TokenType.SET: lambda self: self._parse_set(), 644 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 645 TokenType.UNCACHE: lambda self: self._parse_uncache(), 646 TokenType.UPDATE: lambda self: self._parse_update(), 647 TokenType.USE: lambda self: self.expression( 648 exp.Use, 649 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 650 this=self._parse_table(schema=False), 651 ), 652 } 653 654 UNARY_PARSERS = { 655 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 656 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 657 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 658 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 659 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 660 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 661 } 662 663 STRING_PARSERS = { 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 668 exp.National, this=token.text 669 ), 670 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 671 TokenType.STRING: lambda self, token: self.expression( 672 exp.Literal, this=token.text, is_string=True 673 ), 674 TokenType.UNICODE_STRING: lambda self, token: self.expression( 675 exp.UnicodeString, 676 this=token.text, 677 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 678 ), 679 } 680 681 NUMERIC_PARSERS = { 682 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 683 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 684 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 685 TokenType.NUMBER: lambda self, token: self.expression( 686 exp.Literal, this=token.text, is_string=False 687 ), 688 } 689 690 PRIMARY_PARSERS = { 691 **STRING_PARSERS, 692 **NUMERIC_PARSERS, 693 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 694 TokenType.NULL: lambda self, _: self.expression(exp.Null), 695 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 696 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 697 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 698 TokenType.STAR: lambda self, _: self.expression( 699 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 700 ), 701 } 702 703 PLACEHOLDER_PARSERS = { 704 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 705 TokenType.PARAMETER: lambda self: self._parse_parameter(), 706 TokenType.COLON: lambda self: ( 707 self.expression(exp.Placeholder, this=self._prev.text) 708 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 709 else None 710 ), 711 } 712 713 RANGE_PARSERS = { 714 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 715 TokenType.GLOB: binary_range_parser(exp.Glob), 716 TokenType.ILIKE: binary_range_parser(exp.ILike), 717 TokenType.IN: lambda self, this: self._parse_in(this), 718 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 719 TokenType.IS: lambda self, this: self._parse_is(this), 720 TokenType.LIKE: binary_range_parser(exp.Like), 721 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 722 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 723 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 724 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 725 } 726 727 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 728 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 729 "AUTO": lambda self: self._parse_auto_property(), 730 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 731 "BACKUP": lambda self: self.expression( 732 exp.BackupProperty, this=self._parse_var(any_token=True) 733 ), 734 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 735 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHECKSUM": lambda self: self._parse_checksum(), 738 "CLUSTER BY": lambda self: self._parse_cluster(), 739 "CLUSTERED": lambda self: self._parse_clustered_by(), 740 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 741 exp.CollateProperty, **kwargs 742 ), 743 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 744 "CONTAINS": lambda self: self._parse_contains_property(), 745 "COPY": lambda self: self._parse_copy_property(), 746 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 747 "DEFINER": lambda self: self._parse_definer(), 748 "DETERMINISTIC": lambda self: self.expression( 749 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 750 ), 751 "DISTKEY": lambda self: self._parse_distkey(), 752 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 753 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 754 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 755 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 756 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 757 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 758 "FREESPACE": lambda self: self._parse_freespace(), 759 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 760 "HEAP": lambda self: self.expression(exp.HeapProperty), 761 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 762 "IMMUTABLE": lambda self: self.expression( 763 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 764 ), 765 "INHERITS": lambda self: self.expression( 766 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 767 ), 768 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 769 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 770 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 771 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 772 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 773 "LIKE": lambda self: self._parse_create_like(), 774 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 775 "LOCK": lambda self: self._parse_locking(), 776 "LOCKING": lambda self: self._parse_locking(), 777 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 778 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 779 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 780 "MODIFIES": lambda self: self._parse_modifies_property(), 781 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 782 "NO": lambda self: self._parse_no_property(), 783 "ON": lambda self: self._parse_on_property(), 784 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 785 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 786 "PARTITION": lambda self: self._parse_partitioned_of(), 787 "PARTITION BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 790 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 791 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 792 "READS": lambda self: self._parse_reads_property(), 793 "REMOTE": lambda self: self._parse_remote_with_connection(), 794 "RETURNS": lambda self: self._parse_returns(), 795 "ROW": lambda self: self._parse_row(), 796 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 797 "SAMPLE": lambda self: self.expression( 798 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 799 ), 800 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 801 "SETTINGS": lambda self: self.expression( 802 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 803 ), 804 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 805 "SORTKEY": lambda self: self._parse_sortkey(), 806 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 807 "STABLE": lambda self: self.expression( 808 exp.StabilityProperty, this=exp.Literal.string("STABLE") 809 ), 810 "STORED": lambda self: self._parse_stored(), 811 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 812 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 813 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 814 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 815 "TO": lambda self: self._parse_to_table(), 816 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 817 "TRANSFORM": lambda self: self.expression( 818 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 819 ), 820 "TTL": lambda self: self._parse_ttl(), 821 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 823 "VOLATILE": lambda self: self._parse_volatile_property(), 824 "WITH": lambda self: self._parse_with_property(), 825 } 826 827 CONSTRAINT_PARSERS = { 828 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 829 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 830 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 831 "CHARACTER SET": lambda self: self.expression( 832 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 833 ), 834 "CHECK": lambda self: self.expression( 835 exp.CheckColumnConstraint, 836 this=self._parse_wrapped(self._parse_conjunction), 837 enforced=self._match_text_seq("ENFORCED"), 838 ), 839 "COLLATE": lambda self: self.expression( 840 exp.CollateColumnConstraint, this=self._parse_var() 841 ), 842 "COMMENT": lambda self: self.expression( 843 exp.CommentColumnConstraint, this=self._parse_string() 844 ), 845 "COMPRESS": lambda self: self._parse_compress(), 846 "CLUSTERED": lambda self: self.expression( 847 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 848 ), 849 "NONCLUSTERED": lambda self: self.expression( 850 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 851 ), 852 "DEFAULT": lambda self: self.expression( 853 exp.DefaultColumnConstraint, this=self._parse_bitwise() 854 ), 855 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 856 "EXCLUDE": lambda self: self.expression( 857 exp.ExcludeColumnConstraint, this=self._parse_index_params() 858 ), 859 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 860 "FORMAT": lambda self: self.expression( 861 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 862 ), 863 "GENERATED": lambda self: self._parse_generated_as_identity(), 864 "IDENTITY": lambda self: self._parse_auto_increment(), 865 "INLINE": lambda self: self._parse_inline(), 866 "LIKE": lambda self: self._parse_create_like(), 867 "NOT": lambda self: self._parse_not_constraint(), 868 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 869 "ON": lambda self: ( 870 self._match(TokenType.UPDATE) 871 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 872 ) 873 or self.expression(exp.OnProperty, this=self._parse_id_var()), 874 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 875 "PERIOD": lambda self: self._parse_period_for_system_time(), 876 "PRIMARY KEY": lambda self: self._parse_primary_key(), 877 "REFERENCES": lambda self: self._parse_references(match=False), 878 "TITLE": lambda self: self.expression( 879 exp.TitleColumnConstraint, this=self._parse_var_or_string() 880 ), 881 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 882 "UNIQUE": lambda self: self._parse_unique(), 883 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 884 "WITH": lambda self: self.expression( 885 exp.Properties, expressions=self._parse_wrapped_properties() 886 ), 887 } 888 889 ALTER_PARSERS = { 890 "ADD": lambda self: self._parse_alter_table_add(), 891 "ALTER": lambda self: self._parse_alter_table_alter(), 892 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 893 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 894 "DROP": lambda self: self._parse_alter_table_drop(), 895 "RENAME": lambda self: self._parse_alter_table_rename(), 896 } 897 898 SCHEMA_UNNAMED_CONSTRAINTS = { 899 "CHECK", 900 "EXCLUDE", 901 "FOREIGN KEY", 902 "LIKE", 903 "PERIOD", 904 "PRIMARY KEY", 905 "UNIQUE", 906 } 907 908 NO_PAREN_FUNCTION_PARSERS = { 909 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 910 "CASE": lambda self: self._parse_case(), 911 "IF": lambda self: self._parse_if(), 912 "NEXT": lambda self: self._parse_next_value_for(), 913 } 914 915 INVALID_FUNC_NAME_TOKENS = { 916 TokenType.IDENTIFIER, 917 TokenType.STRING, 918 } 919 920 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 921 922 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 923 924 FUNCTION_PARSERS = { 925 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 926 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 927 "DECODE": lambda self: self._parse_decode(), 928 "EXTRACT": lambda self: self._parse_extract(), 929 "JSON_OBJECT": lambda self: self._parse_json_object(), 930 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 931 "JSON_TABLE": lambda self: self._parse_json_table(), 932 "MATCH": lambda self: self._parse_match_against(), 933 "OPENJSON": lambda self: self._parse_open_json(), 934 "POSITION": lambda self: self._parse_position(), 935 "PREDICT": lambda self: self._parse_predict(), 936 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 937 "STRING_AGG": lambda self: self._parse_string_agg(), 938 "SUBSTRING": lambda self: self._parse_substring(), 939 "TRIM": lambda self: self._parse_trim(), 940 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 941 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 942 } 943 944 QUERY_MODIFIER_PARSERS = { 945 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 946 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 947 TokenType.WHERE: lambda self: ("where", self._parse_where()), 948 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 949 TokenType.HAVING: lambda self: ("having", self._parse_having()), 950 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 951 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 952 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 953 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 954 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 955 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 956 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 957 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 958 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 959 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 960 TokenType.CLUSTER_BY: lambda self: ( 961 "cluster", 962 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 963 ), 964 TokenType.DISTRIBUTE_BY: lambda self: ( 965 "distribute", 966 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 967 ), 968 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 969 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 970 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 971 } 972 973 SET_PARSERS = { 974 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 975 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 976 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 977 "TRANSACTION": lambda self: self._parse_set_transaction(), 978 } 979 980 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 981 982 TYPE_LITERAL_PARSERS = { 983 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 984 } 985 986 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 987 988 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 989 990 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 991 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 992 "ISOLATION": ( 993 ("LEVEL", "REPEATABLE", "READ"), 994 ("LEVEL", "READ", "COMMITTED"), 995 ("LEVEL", "READ", "UNCOMITTED"), 996 ("LEVEL", "SERIALIZABLE"), 997 ), 998 "READ": ("WRITE", "ONLY"), 999 } 1000 1001 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1002 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1003 ) 1004 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1005 1006 CREATE_SEQUENCE: OPTIONS_TYPE = { 1007 "SCALE": ("EXTEND", "NOEXTEND"), 1008 "SHARD": ("EXTEND", "NOEXTEND"), 1009 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1010 **dict.fromkeys( 1011 ( 1012 "SESSION", 1013 "GLOBAL", 1014 "KEEP", 1015 "NOKEEP", 1016 "ORDER", 1017 "NOORDER", 1018 "NOCACHE", 1019 "CYCLE", 1020 "NOCYCLE", 1021 "NOMINVALUE", 1022 "NOMAXVALUE", 1023 "NOSCALE", 1024 "NOSHARD", 1025 ), 1026 tuple(), 1027 ), 1028 } 1029 1030 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1031 1032 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1033 1034 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1035 1036 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1037 1038 CLONE_KEYWORDS = {"CLONE", "COPY"} 1039 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1040 1041 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1042 1043 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1044 1045 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1046 1047 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1048 1049 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1050 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1051 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1052 1053 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1054 1055 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1056 1057 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1058 1059 DISTINCT_TOKENS = {TokenType.DISTINCT} 1060 1061 NULL_TOKENS = {TokenType.NULL} 1062 1063 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1064 1065 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1066 1067 STRICT_CAST = True 1068 1069 PREFIXED_PIVOT_COLUMNS = False 1070 IDENTIFY_PIVOT_STRINGS = False 1071 1072 LOG_DEFAULTS_TO_LN = False 1073 1074 # Whether ADD is present for each column added by ALTER TABLE 1075 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1076 1077 # Whether the table sample clause expects CSV syntax 1078 TABLESAMPLE_CSV = False 1079 1080 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1081 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1082 1083 # Whether the TRIM function expects the characters to trim as its first argument 1084 TRIM_PATTERN_FIRST = False 1085 1086 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1087 STRING_ALIASES = False 1088 1089 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1090 MODIFIERS_ATTACHED_TO_UNION = True 1091 UNION_MODIFIERS = {"order", "limit", "offset"} 1092 1093 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1094 NO_PAREN_IF_COMMANDS = True 1095 1096 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1097 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1098 1099 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1100 # If this is True and '(' is not found, the keyword will be treated as an identifier 1101 VALUES_FOLLOWED_BY_PAREN = True 1102 1103 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1104 SUPPORTS_IMPLICIT_UNNEST = False 1105 1106 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1107 INTERVAL_SPANS = True 1108 1109 __slots__ = ( 1110 "error_level", 1111 "error_message_context", 1112 "max_errors", 1113 "dialect", 1114 "sql", 1115 "errors", 1116 "_tokens", 1117 "_index", 1118 "_curr", 1119 "_next", 1120 "_prev", 1121 "_prev_comments", 1122 ) 1123 1124 # Autofilled 1125 SHOW_TRIE: t.Dict = {} 1126 SET_TRIE: t.Dict = {} 1127 1128 def __init__( 1129 self, 1130 error_level: t.Optional[ErrorLevel] = None, 1131 error_message_context: int = 100, 1132 max_errors: int = 3, 1133 dialect: DialectType = None, 1134 ): 1135 from sqlglot.dialects import Dialect 1136 1137 self.error_level = error_level or ErrorLevel.IMMEDIATE 1138 self.error_message_context = error_message_context 1139 self.max_errors = max_errors 1140 self.dialect = Dialect.get_or_raise(dialect) 1141 self.reset() 1142 1143 def reset(self): 1144 self.sql = "" 1145 self.errors = [] 1146 self._tokens = [] 1147 self._index = 0 1148 self._curr = None 1149 self._next = None 1150 self._prev = None 1151 self._prev_comments = None 1152 1153 def parse( 1154 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1155 ) -> t.List[t.Optional[exp.Expression]]: 1156 """ 1157 Parses a list of tokens and returns a list of syntax trees, one tree 1158 per parsed SQL statement. 1159 1160 Args: 1161 raw_tokens: The list of tokens. 1162 sql: The original SQL string, used to produce helpful debug messages. 1163 1164 Returns: 1165 The list of the produced syntax trees. 1166 """ 1167 return self._parse( 1168 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1169 ) 1170 1171 def parse_into( 1172 self, 1173 expression_types: exp.IntoType, 1174 raw_tokens: t.List[Token], 1175 sql: t.Optional[str] = None, 1176 ) -> t.List[t.Optional[exp.Expression]]: 1177 """ 1178 Parses a list of tokens into a given Expression type. If a collection of Expression 1179 types is given instead, this method will try to parse the token list into each one 1180 of them, stopping at the first for which the parsing succeeds. 1181 1182 Args: 1183 expression_types: The expression type(s) to try and parse the token list into. 1184 raw_tokens: The list of tokens. 1185 sql: The original SQL string, used to produce helpful debug messages. 1186 1187 Returns: 1188 The target Expression. 1189 """ 1190 errors = [] 1191 for expression_type in ensure_list(expression_types): 1192 parser = self.EXPRESSION_PARSERS.get(expression_type) 1193 if not parser: 1194 raise TypeError(f"No parser registered for {expression_type}") 1195 1196 try: 1197 return self._parse(parser, raw_tokens, sql) 1198 except ParseError as e: 1199 e.errors[0]["into_expression"] = expression_type 1200 errors.append(e) 1201 1202 raise ParseError( 1203 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1204 errors=merge_errors(errors), 1205 ) from errors[-1] 1206 1207 def _parse( 1208 self, 1209 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1210 raw_tokens: t.List[Token], 1211 sql: t.Optional[str] = None, 1212 ) -> t.List[t.Optional[exp.Expression]]: 1213 self.reset() 1214 self.sql = sql or "" 1215 1216 total = len(raw_tokens) 1217 chunks: t.List[t.List[Token]] = [[]] 1218 1219 for i, token in enumerate(raw_tokens): 1220 if token.token_type == TokenType.SEMICOLON: 1221 if i < total - 1: 1222 chunks.append([]) 1223 else: 1224 chunks[-1].append(token) 1225 1226 expressions = [] 1227 1228 for tokens in chunks: 1229 self._index = -1 1230 self._tokens = tokens 1231 self._advance() 1232 1233 expressions.append(parse_method(self)) 1234 1235 if self._index < len(self._tokens): 1236 self.raise_error("Invalid expression / Unexpected token") 1237 1238 self.check_errors() 1239 1240 return expressions 1241 1242 def check_errors(self) -> None: 1243 """Logs or raises any found errors, depending on the chosen error level setting.""" 1244 if self.error_level == ErrorLevel.WARN: 1245 for error in self.errors: 1246 logger.error(str(error)) 1247 elif self.error_level == ErrorLevel.RAISE and self.errors: 1248 raise ParseError( 1249 concat_messages(self.errors, self.max_errors), 1250 errors=merge_errors(self.errors), 1251 ) 1252 1253 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1254 """ 1255 Appends an error in the list of recorded errors or raises it, depending on the chosen 1256 error level setting. 1257 """ 1258 token = token or self._curr or self._prev or Token.string("") 1259 start = token.start 1260 end = token.end + 1 1261 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1262 highlight = self.sql[start:end] 1263 end_context = self.sql[end : end + self.error_message_context] 1264 1265 error = ParseError.new( 1266 f"{message}. Line {token.line}, Col: {token.col}.\n" 1267 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1268 description=message, 1269 line=token.line, 1270 col=token.col, 1271 start_context=start_context, 1272 highlight=highlight, 1273 end_context=end_context, 1274 ) 1275 1276 if self.error_level == ErrorLevel.IMMEDIATE: 1277 raise error 1278 1279 self.errors.append(error) 1280 1281 def expression( 1282 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1283 ) -> E: 1284 """ 1285 Creates a new, validated Expression. 1286 1287 Args: 1288 exp_class: The expression class to instantiate. 1289 comments: An optional list of comments to attach to the expression. 1290 kwargs: The arguments to set for the expression along with their respective values. 1291 1292 Returns: 1293 The target expression. 1294 """ 1295 instance = exp_class(**kwargs) 1296 instance.add_comments(comments) if comments else self._add_comments(instance) 1297 return self.validate_expression(instance) 1298 1299 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1300 if expression and self._prev_comments: 1301 expression.add_comments(self._prev_comments) 1302 self._prev_comments = None 1303 1304 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1305 """ 1306 Validates an Expression, making sure that all its mandatory arguments are set. 1307 1308 Args: 1309 expression: The expression to validate. 1310 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1311 1312 Returns: 1313 The validated expression. 1314 """ 1315 if self.error_level != ErrorLevel.IGNORE: 1316 for error_message in expression.error_messages(args): 1317 self.raise_error(error_message) 1318 1319 return expression 1320 1321 def _find_sql(self, start: Token, end: Token) -> str: 1322 return self.sql[start.start : end.end + 1] 1323 1324 def _is_connected(self) -> bool: 1325 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1326 1327 def _advance(self, times: int = 1) -> None: 1328 self._index += times 1329 self._curr = seq_get(self._tokens, self._index) 1330 self._next = seq_get(self._tokens, self._index + 1) 1331 1332 if self._index > 0: 1333 self._prev = self._tokens[self._index - 1] 1334 self._prev_comments = self._prev.comments 1335 else: 1336 self._prev = None 1337 self._prev_comments = None 1338 1339 def _retreat(self, index: int) -> None: 1340 if index != self._index: 1341 self._advance(index - self._index) 1342 1343 def _warn_unsupported(self) -> None: 1344 if len(self._tokens) <= 1: 1345 return 1346 1347 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1348 # interested in emitting a warning for the one being currently processed. 1349 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1350 1351 logger.warning( 1352 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1353 ) 1354 1355 def _parse_command(self) -> exp.Command: 1356 self._warn_unsupported() 1357 return self.expression( 1358 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1359 ) 1360 1361 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1362 """ 1363 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1364 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1365 the parser state accordingly 1366 """ 1367 index = self._index 1368 error_level = self.error_level 1369 1370 self.error_level = ErrorLevel.IMMEDIATE 1371 try: 1372 this = parse_method() 1373 except ParseError: 1374 this = None 1375 finally: 1376 if not this or retreat: 1377 self._retreat(index) 1378 self.error_level = error_level 1379 1380 return this 1381 1382 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1383 start = self._prev 1384 exists = self._parse_exists() if allow_exists else None 1385 1386 self._match(TokenType.ON) 1387 1388 kind = self._match_set(self.CREATABLES) and self._prev 1389 if not kind: 1390 return self._parse_as_command(start) 1391 1392 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1393 this = self._parse_user_defined_function(kind=kind.token_type) 1394 elif kind.token_type == TokenType.TABLE: 1395 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1396 elif kind.token_type == TokenType.COLUMN: 1397 this = self._parse_column() 1398 else: 1399 this = self._parse_id_var() 1400 1401 self._match(TokenType.IS) 1402 1403 return self.expression( 1404 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1405 ) 1406 1407 def _parse_to_table( 1408 self, 1409 ) -> exp.ToTableProperty: 1410 table = self._parse_table_parts(schema=True) 1411 return self.expression(exp.ToTableProperty, this=table) 1412 1413 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1414 def _parse_ttl(self) -> exp.Expression: 1415 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1416 this = self._parse_bitwise() 1417 1418 if self._match_text_seq("DELETE"): 1419 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1420 if self._match_text_seq("RECOMPRESS"): 1421 return self.expression( 1422 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1423 ) 1424 if self._match_text_seq("TO", "DISK"): 1425 return self.expression( 1426 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1427 ) 1428 if self._match_text_seq("TO", "VOLUME"): 1429 return self.expression( 1430 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1431 ) 1432 1433 return this 1434 1435 expressions = self._parse_csv(_parse_ttl_action) 1436 where = self._parse_where() 1437 group = self._parse_group() 1438 1439 aggregates = None 1440 if group and self._match(TokenType.SET): 1441 aggregates = self._parse_csv(self._parse_set_item) 1442 1443 return self.expression( 1444 exp.MergeTreeTTL, 1445 expressions=expressions, 1446 where=where, 1447 group=group, 1448 aggregates=aggregates, 1449 ) 1450 1451 def _parse_statement(self) -> t.Optional[exp.Expression]: 1452 if self._curr is None: 1453 return None 1454 1455 if self._match_set(self.STATEMENT_PARSERS): 1456 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1457 1458 if self._match_set(Tokenizer.COMMANDS): 1459 return self._parse_command() 1460 1461 expression = self._parse_expression() 1462 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1463 return self._parse_query_modifiers(expression) 1464 1465 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1466 start = self._prev 1467 temporary = self._match(TokenType.TEMPORARY) 1468 materialized = self._match_text_seq("MATERIALIZED") 1469 1470 kind = self._match_set(self.CREATABLES) and self._prev.text 1471 if not kind: 1472 return self._parse_as_command(start) 1473 1474 if_exists = exists or self._parse_exists() 1475 table = self._parse_table_parts( 1476 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1477 ) 1478 1479 if self._match(TokenType.L_PAREN, advance=False): 1480 expressions = self._parse_wrapped_csv(self._parse_types) 1481 else: 1482 expressions = None 1483 1484 return self.expression( 1485 exp.Drop, 1486 comments=start.comments, 1487 exists=if_exists, 1488 this=table, 1489 expressions=expressions, 1490 kind=kind, 1491 temporary=temporary, 1492 materialized=materialized, 1493 cascade=self._match_text_seq("CASCADE"), 1494 constraints=self._match_text_seq("CONSTRAINTS"), 1495 purge=self._match_text_seq("PURGE"), 1496 ) 1497 1498 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1499 return ( 1500 self._match_text_seq("IF") 1501 and (not not_ or self._match(TokenType.NOT)) 1502 and self._match(TokenType.EXISTS) 1503 ) 1504 1505 def _parse_create(self) -> exp.Create | exp.Command: 1506 # Note: this can't be None because we've matched a statement parser 1507 start = self._prev 1508 comments = self._prev_comments 1509 1510 replace = ( 1511 start.token_type == TokenType.REPLACE 1512 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1513 or self._match_pair(TokenType.OR, TokenType.ALTER) 1514 ) 1515 1516 unique = self._match(TokenType.UNIQUE) 1517 1518 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1519 self._advance() 1520 1521 properties = None 1522 create_token = self._match_set(self.CREATABLES) and self._prev 1523 1524 if not create_token: 1525 # exp.Properties.Location.POST_CREATE 1526 properties = self._parse_properties() 1527 create_token = self._match_set(self.CREATABLES) and self._prev 1528 1529 if not properties or not create_token: 1530 return self._parse_as_command(start) 1531 1532 exists = self._parse_exists(not_=True) 1533 this = None 1534 expression: t.Optional[exp.Expression] = None 1535 indexes = None 1536 no_schema_binding = None 1537 begin = None 1538 end = None 1539 clone = None 1540 1541 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1542 nonlocal properties 1543 if properties and temp_props: 1544 properties.expressions.extend(temp_props.expressions) 1545 elif temp_props: 1546 properties = temp_props 1547 1548 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1549 this = self._parse_user_defined_function(kind=create_token.token_type) 1550 1551 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1552 extend_props(self._parse_properties()) 1553 1554 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1555 1556 if not expression: 1557 if self._match(TokenType.COMMAND): 1558 expression = self._parse_as_command(self._prev) 1559 else: 1560 begin = self._match(TokenType.BEGIN) 1561 return_ = self._match_text_seq("RETURN") 1562 1563 if self._match(TokenType.STRING, advance=False): 1564 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1565 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1566 expression = self._parse_string() 1567 extend_props(self._parse_properties()) 1568 else: 1569 expression = self._parse_statement() 1570 1571 end = self._match_text_seq("END") 1572 1573 if return_: 1574 expression = self.expression(exp.Return, this=expression) 1575 elif create_token.token_type == TokenType.INDEX: 1576 this = self._parse_index(index=self._parse_id_var()) 1577 elif create_token.token_type in self.DB_CREATABLES: 1578 table_parts = self._parse_table_parts( 1579 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1580 ) 1581 1582 # exp.Properties.Location.POST_NAME 1583 self._match(TokenType.COMMA) 1584 extend_props(self._parse_properties(before=True)) 1585 1586 this = self._parse_schema(this=table_parts) 1587 1588 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1589 extend_props(self._parse_properties()) 1590 1591 self._match(TokenType.ALIAS) 1592 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1593 # exp.Properties.Location.POST_ALIAS 1594 extend_props(self._parse_properties()) 1595 1596 if create_token.token_type == TokenType.SEQUENCE: 1597 expression = self._parse_types() 1598 extend_props(self._parse_properties()) 1599 else: 1600 expression = self._parse_ddl_select() 1601 1602 if create_token.token_type == TokenType.TABLE: 1603 # exp.Properties.Location.POST_EXPRESSION 1604 extend_props(self._parse_properties()) 1605 1606 indexes = [] 1607 while True: 1608 index = self._parse_index() 1609 1610 # exp.Properties.Location.POST_INDEX 1611 extend_props(self._parse_properties()) 1612 1613 if not index: 1614 break 1615 else: 1616 self._match(TokenType.COMMA) 1617 indexes.append(index) 1618 elif create_token.token_type == TokenType.VIEW: 1619 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1620 no_schema_binding = True 1621 1622 shallow = self._match_text_seq("SHALLOW") 1623 1624 if self._match_texts(self.CLONE_KEYWORDS): 1625 copy = self._prev.text.lower() == "copy" 1626 clone = self.expression( 1627 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1628 ) 1629 1630 if self._curr: 1631 return self._parse_as_command(start) 1632 1633 return self.expression( 1634 exp.Create, 1635 comments=comments, 1636 this=this, 1637 kind=create_token.text.upper(), 1638 replace=replace, 1639 unique=unique, 1640 expression=expression, 1641 exists=exists, 1642 properties=properties, 1643 indexes=indexes, 1644 no_schema_binding=no_schema_binding, 1645 begin=begin, 1646 end=end, 1647 clone=clone, 1648 ) 1649 1650 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1651 seq = exp.SequenceProperties() 1652 1653 options = [] 1654 index = self._index 1655 1656 while self._curr: 1657 if self._match_text_seq("INCREMENT"): 1658 self._match_text_seq("BY") 1659 self._match_text_seq("=") 1660 seq.set("increment", self._parse_term()) 1661 elif self._match_text_seq("MINVALUE"): 1662 seq.set("minvalue", self._parse_term()) 1663 elif self._match_text_seq("MAXVALUE"): 1664 seq.set("maxvalue", self._parse_term()) 1665 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1666 self._match_text_seq("=") 1667 seq.set("start", self._parse_term()) 1668 elif self._match_text_seq("CACHE"): 1669 # T-SQL allows empty CACHE which is initialized dynamically 1670 seq.set("cache", self._parse_number() or True) 1671 elif self._match_text_seq("OWNED", "BY"): 1672 # "OWNED BY NONE" is the default 1673 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1674 else: 1675 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1676 if opt: 1677 options.append(opt) 1678 else: 1679 break 1680 1681 seq.set("options", options if options else None) 1682 return None if self._index == index else seq 1683 1684 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1685 # only used for teradata currently 1686 self._match(TokenType.COMMA) 1687 1688 kwargs = { 1689 "no": self._match_text_seq("NO"), 1690 "dual": self._match_text_seq("DUAL"), 1691 "before": self._match_text_seq("BEFORE"), 1692 "default": self._match_text_seq("DEFAULT"), 1693 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1694 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1695 "after": self._match_text_seq("AFTER"), 1696 "minimum": self._match_texts(("MIN", "MINIMUM")), 1697 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1698 } 1699 1700 if self._match_texts(self.PROPERTY_PARSERS): 1701 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1702 try: 1703 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1704 except TypeError: 1705 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1706 1707 return None 1708 1709 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1710 return self._parse_wrapped_csv(self._parse_property) 1711 1712 def _parse_property(self) -> t.Optional[exp.Expression]: 1713 if self._match_texts(self.PROPERTY_PARSERS): 1714 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1715 1716 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1717 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1718 1719 if self._match_text_seq("COMPOUND", "SORTKEY"): 1720 return self._parse_sortkey(compound=True) 1721 1722 if self._match_text_seq("SQL", "SECURITY"): 1723 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1724 1725 index = self._index 1726 key = self._parse_column() 1727 1728 if not self._match(TokenType.EQ): 1729 self._retreat(index) 1730 return self._parse_sequence_properties() 1731 1732 return self.expression( 1733 exp.Property, 1734 this=key.to_dot() if isinstance(key, exp.Column) else key, 1735 value=self._parse_bitwise() or self._parse_var(any_token=True), 1736 ) 1737 1738 def _parse_stored(self) -> exp.FileFormatProperty: 1739 self._match(TokenType.ALIAS) 1740 1741 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1742 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1743 1744 return self.expression( 1745 exp.FileFormatProperty, 1746 this=( 1747 self.expression( 1748 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1749 ) 1750 if input_format or output_format 1751 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1752 ), 1753 ) 1754 1755 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1756 self._match(TokenType.EQ) 1757 self._match(TokenType.ALIAS) 1758 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1759 1760 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1761 properties = [] 1762 while True: 1763 if before: 1764 prop = self._parse_property_before() 1765 else: 1766 prop = self._parse_property() 1767 if not prop: 1768 break 1769 for p in ensure_list(prop): 1770 properties.append(p) 1771 1772 if properties: 1773 return self.expression(exp.Properties, expressions=properties) 1774 1775 return None 1776 1777 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1778 return self.expression( 1779 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1780 ) 1781 1782 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1783 if self._index >= 2: 1784 pre_volatile_token = self._tokens[self._index - 2] 1785 else: 1786 pre_volatile_token = None 1787 1788 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1789 return exp.VolatileProperty() 1790 1791 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1792 1793 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1794 self._match_pair(TokenType.EQ, TokenType.ON) 1795 1796 prop = self.expression(exp.WithSystemVersioningProperty) 1797 if self._match(TokenType.L_PAREN): 1798 self._match_text_seq("HISTORY_TABLE", "=") 1799 prop.set("this", self._parse_table_parts()) 1800 1801 if self._match(TokenType.COMMA): 1802 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1803 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1804 1805 self._match_r_paren() 1806 1807 return prop 1808 1809 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1810 if self._match(TokenType.L_PAREN, advance=False): 1811 return self._parse_wrapped_properties() 1812 1813 if self._match_text_seq("JOURNAL"): 1814 return self._parse_withjournaltable() 1815 1816 if self._match_texts(self.VIEW_ATTRIBUTES): 1817 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1818 1819 if self._match_text_seq("DATA"): 1820 return self._parse_withdata(no=False) 1821 elif self._match_text_seq("NO", "DATA"): 1822 return self._parse_withdata(no=True) 1823 1824 if not self._next: 1825 return None 1826 1827 return self._parse_withisolatedloading() 1828 1829 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1830 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1831 self._match(TokenType.EQ) 1832 1833 user = self._parse_id_var() 1834 self._match(TokenType.PARAMETER) 1835 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1836 1837 if not user or not host: 1838 return None 1839 1840 return exp.DefinerProperty(this=f"{user}@{host}") 1841 1842 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1843 self._match(TokenType.TABLE) 1844 self._match(TokenType.EQ) 1845 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1846 1847 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1848 return self.expression(exp.LogProperty, no=no) 1849 1850 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1851 return self.expression(exp.JournalProperty, **kwargs) 1852 1853 def _parse_checksum(self) -> exp.ChecksumProperty: 1854 self._match(TokenType.EQ) 1855 1856 on = None 1857 if self._match(TokenType.ON): 1858 on = True 1859 elif self._match_text_seq("OFF"): 1860 on = False 1861 1862 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1863 1864 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1865 return self.expression( 1866 exp.Cluster, 1867 expressions=( 1868 self._parse_wrapped_csv(self._parse_ordered) 1869 if wrapped 1870 else self._parse_csv(self._parse_ordered) 1871 ), 1872 ) 1873 1874 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1875 self._match_text_seq("BY") 1876 1877 self._match_l_paren() 1878 expressions = self._parse_csv(self._parse_column) 1879 self._match_r_paren() 1880 1881 if self._match_text_seq("SORTED", "BY"): 1882 self._match_l_paren() 1883 sorted_by = self._parse_csv(self._parse_ordered) 1884 self._match_r_paren() 1885 else: 1886 sorted_by = None 1887 1888 self._match(TokenType.INTO) 1889 buckets = self._parse_number() 1890 self._match_text_seq("BUCKETS") 1891 1892 return self.expression( 1893 exp.ClusteredByProperty, 1894 expressions=expressions, 1895 sorted_by=sorted_by, 1896 buckets=buckets, 1897 ) 1898 1899 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1900 if not self._match_text_seq("GRANTS"): 1901 self._retreat(self._index - 1) 1902 return None 1903 1904 return self.expression(exp.CopyGrantsProperty) 1905 1906 def _parse_freespace(self) -> exp.FreespaceProperty: 1907 self._match(TokenType.EQ) 1908 return self.expression( 1909 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1910 ) 1911 1912 def _parse_mergeblockratio( 1913 self, no: bool = False, default: bool = False 1914 ) -> exp.MergeBlockRatioProperty: 1915 if self._match(TokenType.EQ): 1916 return self.expression( 1917 exp.MergeBlockRatioProperty, 1918 this=self._parse_number(), 1919 percent=self._match(TokenType.PERCENT), 1920 ) 1921 1922 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1923 1924 def _parse_datablocksize( 1925 self, 1926 default: t.Optional[bool] = None, 1927 minimum: t.Optional[bool] = None, 1928 maximum: t.Optional[bool] = None, 1929 ) -> exp.DataBlocksizeProperty: 1930 self._match(TokenType.EQ) 1931 size = self._parse_number() 1932 1933 units = None 1934 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1935 units = self._prev.text 1936 1937 return self.expression( 1938 exp.DataBlocksizeProperty, 1939 size=size, 1940 units=units, 1941 default=default, 1942 minimum=minimum, 1943 maximum=maximum, 1944 ) 1945 1946 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1947 self._match(TokenType.EQ) 1948 always = self._match_text_seq("ALWAYS") 1949 manual = self._match_text_seq("MANUAL") 1950 never = self._match_text_seq("NEVER") 1951 default = self._match_text_seq("DEFAULT") 1952 1953 autotemp = None 1954 if self._match_text_seq("AUTOTEMP"): 1955 autotemp = self._parse_schema() 1956 1957 return self.expression( 1958 exp.BlockCompressionProperty, 1959 always=always, 1960 manual=manual, 1961 never=never, 1962 default=default, 1963 autotemp=autotemp, 1964 ) 1965 1966 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1967 index = self._index 1968 no = self._match_text_seq("NO") 1969 concurrent = self._match_text_seq("CONCURRENT") 1970 1971 if not self._match_text_seq("ISOLATED", "LOADING"): 1972 self._retreat(index) 1973 return None 1974 1975 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1976 return self.expression( 1977 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1978 ) 1979 1980 def _parse_locking(self) -> exp.LockingProperty: 1981 if self._match(TokenType.TABLE): 1982 kind = "TABLE" 1983 elif self._match(TokenType.VIEW): 1984 kind = "VIEW" 1985 elif self._match(TokenType.ROW): 1986 kind = "ROW" 1987 elif self._match_text_seq("DATABASE"): 1988 kind = "DATABASE" 1989 else: 1990 kind = None 1991 1992 if kind in ("DATABASE", "TABLE", "VIEW"): 1993 this = self._parse_table_parts() 1994 else: 1995 this = None 1996 1997 if self._match(TokenType.FOR): 1998 for_or_in = "FOR" 1999 elif self._match(TokenType.IN): 2000 for_or_in = "IN" 2001 else: 2002 for_or_in = None 2003 2004 if self._match_text_seq("ACCESS"): 2005 lock_type = "ACCESS" 2006 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2007 lock_type = "EXCLUSIVE" 2008 elif self._match_text_seq("SHARE"): 2009 lock_type = "SHARE" 2010 elif self._match_text_seq("READ"): 2011 lock_type = "READ" 2012 elif self._match_text_seq("WRITE"): 2013 lock_type = "WRITE" 2014 elif self._match_text_seq("CHECKSUM"): 2015 lock_type = "CHECKSUM" 2016 else: 2017 lock_type = None 2018 2019 override = self._match_text_seq("OVERRIDE") 2020 2021 return self.expression( 2022 exp.LockingProperty, 2023 this=this, 2024 kind=kind, 2025 for_or_in=for_or_in, 2026 lock_type=lock_type, 2027 override=override, 2028 ) 2029 2030 def _parse_partition_by(self) -> t.List[exp.Expression]: 2031 if self._match(TokenType.PARTITION_BY): 2032 return self._parse_csv(self._parse_conjunction) 2033 return [] 2034 2035 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2036 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2037 if self._match_text_seq("MINVALUE"): 2038 return exp.var("MINVALUE") 2039 if self._match_text_seq("MAXVALUE"): 2040 return exp.var("MAXVALUE") 2041 return self._parse_bitwise() 2042 2043 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2044 expression = None 2045 from_expressions = None 2046 to_expressions = None 2047 2048 if self._match(TokenType.IN): 2049 this = self._parse_wrapped_csv(self._parse_bitwise) 2050 elif self._match(TokenType.FROM): 2051 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2052 self._match_text_seq("TO") 2053 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2054 elif self._match_text_seq("WITH", "(", "MODULUS"): 2055 this = self._parse_number() 2056 self._match_text_seq(",", "REMAINDER") 2057 expression = self._parse_number() 2058 self._match_r_paren() 2059 else: 2060 self.raise_error("Failed to parse partition bound spec.") 2061 2062 return self.expression( 2063 exp.PartitionBoundSpec, 2064 this=this, 2065 expression=expression, 2066 from_expressions=from_expressions, 2067 to_expressions=to_expressions, 2068 ) 2069 2070 # https://www.postgresql.org/docs/current/sql-createtable.html 2071 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2072 if not self._match_text_seq("OF"): 2073 self._retreat(self._index - 1) 2074 return None 2075 2076 this = self._parse_table(schema=True) 2077 2078 if self._match(TokenType.DEFAULT): 2079 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2080 elif self._match_text_seq("FOR", "VALUES"): 2081 expression = self._parse_partition_bound_spec() 2082 else: 2083 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2084 2085 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2086 2087 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2088 self._match(TokenType.EQ) 2089 return self.expression( 2090 exp.PartitionedByProperty, 2091 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2092 ) 2093 2094 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2095 if self._match_text_seq("AND", "STATISTICS"): 2096 statistics = True 2097 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2098 statistics = False 2099 else: 2100 statistics = None 2101 2102 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2103 2104 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2105 if self._match_text_seq("SQL"): 2106 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2107 return None 2108 2109 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2110 if self._match_text_seq("SQL", "DATA"): 2111 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2112 return None 2113 2114 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2115 if self._match_text_seq("PRIMARY", "INDEX"): 2116 return exp.NoPrimaryIndexProperty() 2117 if self._match_text_seq("SQL"): 2118 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2119 return None 2120 2121 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2122 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2123 return exp.OnCommitProperty() 2124 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2125 return exp.OnCommitProperty(delete=True) 2126 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2127 2128 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2129 if self._match_text_seq("SQL", "DATA"): 2130 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2131 return None 2132 2133 def _parse_distkey(self) -> exp.DistKeyProperty: 2134 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2135 2136 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2137 table = self._parse_table(schema=True) 2138 2139 options = [] 2140 while self._match_texts(("INCLUDING", "EXCLUDING")): 2141 this = self._prev.text.upper() 2142 2143 id_var = self._parse_id_var() 2144 if not id_var: 2145 return None 2146 2147 options.append( 2148 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2149 ) 2150 2151 return self.expression(exp.LikeProperty, this=table, expressions=options) 2152 2153 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2154 return self.expression( 2155 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2156 ) 2157 2158 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2159 self._match(TokenType.EQ) 2160 return self.expression( 2161 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2162 ) 2163 2164 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2165 self._match_text_seq("WITH", "CONNECTION") 2166 return self.expression( 2167 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2168 ) 2169 2170 def _parse_returns(self) -> exp.ReturnsProperty: 2171 value: t.Optional[exp.Expression] 2172 is_table = self._match(TokenType.TABLE) 2173 2174 if is_table: 2175 if self._match(TokenType.LT): 2176 value = self.expression( 2177 exp.Schema, 2178 this="TABLE", 2179 expressions=self._parse_csv(self._parse_struct_types), 2180 ) 2181 if not self._match(TokenType.GT): 2182 self.raise_error("Expecting >") 2183 else: 2184 value = self._parse_schema(exp.var("TABLE")) 2185 else: 2186 value = self._parse_types() 2187 2188 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2189 2190 def _parse_describe(self) -> exp.Describe: 2191 kind = self._match_set(self.CREATABLES) and self._prev.text 2192 style = self._match_texts(("EXTENDED", "FORMATTED")) and self._prev.text.upper() 2193 this = self._parse_table(schema=True) 2194 properties = self._parse_properties() 2195 expressions = properties.expressions if properties else None 2196 return self.expression( 2197 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2198 ) 2199 2200 def _parse_insert(self) -> exp.Insert: 2201 comments = ensure_list(self._prev_comments) 2202 hint = self._parse_hint() 2203 overwrite = self._match(TokenType.OVERWRITE) 2204 ignore = self._match(TokenType.IGNORE) 2205 local = self._match_text_seq("LOCAL") 2206 alternative = None 2207 is_function = None 2208 2209 if self._match_text_seq("DIRECTORY"): 2210 this: t.Optional[exp.Expression] = self.expression( 2211 exp.Directory, 2212 this=self._parse_var_or_string(), 2213 local=local, 2214 row_format=self._parse_row_format(match_row=True), 2215 ) 2216 else: 2217 if self._match(TokenType.OR): 2218 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2219 2220 self._match(TokenType.INTO) 2221 comments += ensure_list(self._prev_comments) 2222 self._match(TokenType.TABLE) 2223 is_function = self._match(TokenType.FUNCTION) 2224 2225 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2226 2227 returning = self._parse_returning() 2228 2229 return self.expression( 2230 exp.Insert, 2231 comments=comments, 2232 hint=hint, 2233 is_function=is_function, 2234 this=this, 2235 by_name=self._match_text_seq("BY", "NAME"), 2236 exists=self._parse_exists(), 2237 partition=self._parse_partition(), 2238 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2239 and self._parse_conjunction(), 2240 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2241 conflict=self._parse_on_conflict(), 2242 returning=returning or self._parse_returning(), 2243 overwrite=overwrite, 2244 alternative=alternative, 2245 ignore=ignore, 2246 ) 2247 2248 def _parse_kill(self) -> exp.Kill: 2249 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2250 2251 return self.expression( 2252 exp.Kill, 2253 this=self._parse_primary(), 2254 kind=kind, 2255 ) 2256 2257 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2258 conflict = self._match_text_seq("ON", "CONFLICT") 2259 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2260 2261 if not conflict and not duplicate: 2262 return None 2263 2264 conflict_keys = None 2265 constraint = None 2266 2267 if conflict: 2268 if self._match_text_seq("ON", "CONSTRAINT"): 2269 constraint = self._parse_id_var() 2270 elif self._match(TokenType.L_PAREN): 2271 conflict_keys = self._parse_csv(self._parse_id_var) 2272 self._match_r_paren() 2273 2274 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2275 if self._prev.token_type == TokenType.UPDATE: 2276 self._match(TokenType.SET) 2277 expressions = self._parse_csv(self._parse_equality) 2278 else: 2279 expressions = None 2280 2281 return self.expression( 2282 exp.OnConflict, 2283 duplicate=duplicate, 2284 expressions=expressions, 2285 action=action, 2286 conflict_keys=conflict_keys, 2287 constraint=constraint, 2288 ) 2289 2290 def _parse_returning(self) -> t.Optional[exp.Returning]: 2291 if not self._match(TokenType.RETURNING): 2292 return None 2293 return self.expression( 2294 exp.Returning, 2295 expressions=self._parse_csv(self._parse_expression), 2296 into=self._match(TokenType.INTO) and self._parse_table_part(), 2297 ) 2298 2299 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2300 if not self._match(TokenType.FORMAT): 2301 return None 2302 return self._parse_row_format() 2303 2304 def _parse_row_format( 2305 self, match_row: bool = False 2306 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2307 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2308 return None 2309 2310 if self._match_text_seq("SERDE"): 2311 this = self._parse_string() 2312 2313 serde_properties = None 2314 if self._match(TokenType.SERDE_PROPERTIES): 2315 serde_properties = self.expression( 2316 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2317 ) 2318 2319 return self.expression( 2320 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2321 ) 2322 2323 self._match_text_seq("DELIMITED") 2324 2325 kwargs = {} 2326 2327 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2328 kwargs["fields"] = self._parse_string() 2329 if self._match_text_seq("ESCAPED", "BY"): 2330 kwargs["escaped"] = self._parse_string() 2331 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2332 kwargs["collection_items"] = self._parse_string() 2333 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2334 kwargs["map_keys"] = self._parse_string() 2335 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2336 kwargs["lines"] = self._parse_string() 2337 if self._match_text_seq("NULL", "DEFINED", "AS"): 2338 kwargs["null"] = self._parse_string() 2339 2340 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2341 2342 def _parse_load(self) -> exp.LoadData | exp.Command: 2343 if self._match_text_seq("DATA"): 2344 local = self._match_text_seq("LOCAL") 2345 self._match_text_seq("INPATH") 2346 inpath = self._parse_string() 2347 overwrite = self._match(TokenType.OVERWRITE) 2348 self._match_pair(TokenType.INTO, TokenType.TABLE) 2349 2350 return self.expression( 2351 exp.LoadData, 2352 this=self._parse_table(schema=True), 2353 local=local, 2354 overwrite=overwrite, 2355 inpath=inpath, 2356 partition=self._parse_partition(), 2357 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2358 serde=self._match_text_seq("SERDE") and self._parse_string(), 2359 ) 2360 return self._parse_as_command(self._prev) 2361 2362 def _parse_delete(self) -> exp.Delete: 2363 # This handles MySQL's "Multiple-Table Syntax" 2364 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2365 tables = None 2366 comments = self._prev_comments 2367 if not self._match(TokenType.FROM, advance=False): 2368 tables = self._parse_csv(self._parse_table) or None 2369 2370 returning = self._parse_returning() 2371 2372 return self.expression( 2373 exp.Delete, 2374 comments=comments, 2375 tables=tables, 2376 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2377 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2378 where=self._parse_where(), 2379 returning=returning or self._parse_returning(), 2380 limit=self._parse_limit(), 2381 ) 2382 2383 def _parse_update(self) -> exp.Update: 2384 comments = self._prev_comments 2385 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2386 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2387 returning = self._parse_returning() 2388 return self.expression( 2389 exp.Update, 2390 comments=comments, 2391 **{ # type: ignore 2392 "this": this, 2393 "expressions": expressions, 2394 "from": self._parse_from(joins=True), 2395 "where": self._parse_where(), 2396 "returning": returning or self._parse_returning(), 2397 "order": self._parse_order(), 2398 "limit": self._parse_limit(), 2399 }, 2400 ) 2401 2402 def _parse_uncache(self) -> exp.Uncache: 2403 if not self._match(TokenType.TABLE): 2404 self.raise_error("Expecting TABLE after UNCACHE") 2405 2406 return self.expression( 2407 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2408 ) 2409 2410 def _parse_cache(self) -> exp.Cache: 2411 lazy = self._match_text_seq("LAZY") 2412 self._match(TokenType.TABLE) 2413 table = self._parse_table(schema=True) 2414 2415 options = [] 2416 if self._match_text_seq("OPTIONS"): 2417 self._match_l_paren() 2418 k = self._parse_string() 2419 self._match(TokenType.EQ) 2420 v = self._parse_string() 2421 options = [k, v] 2422 self._match_r_paren() 2423 2424 self._match(TokenType.ALIAS) 2425 return self.expression( 2426 exp.Cache, 2427 this=table, 2428 lazy=lazy, 2429 options=options, 2430 expression=self._parse_select(nested=True), 2431 ) 2432 2433 def _parse_partition(self) -> t.Optional[exp.Partition]: 2434 if not self._match(TokenType.PARTITION): 2435 return None 2436 2437 return self.expression( 2438 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2439 ) 2440 2441 def _parse_value(self) -> exp.Tuple: 2442 if self._match(TokenType.L_PAREN): 2443 expressions = self._parse_csv(self._parse_expression) 2444 self._match_r_paren() 2445 return self.expression(exp.Tuple, expressions=expressions) 2446 2447 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2448 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2449 2450 def _parse_projections(self) -> t.List[exp.Expression]: 2451 return self._parse_expressions() 2452 2453 def _parse_select( 2454 self, 2455 nested: bool = False, 2456 table: bool = False, 2457 parse_subquery_alias: bool = True, 2458 parse_set_operation: bool = True, 2459 ) -> t.Optional[exp.Expression]: 2460 cte = self._parse_with() 2461 2462 if cte: 2463 this = self._parse_statement() 2464 2465 if not this: 2466 self.raise_error("Failed to parse any statement following CTE") 2467 return cte 2468 2469 if "with" in this.arg_types: 2470 this.set("with", cte) 2471 else: 2472 self.raise_error(f"{this.key} does not support CTE") 2473 this = cte 2474 2475 return this 2476 2477 # duckdb supports leading with FROM x 2478 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2479 2480 if self._match(TokenType.SELECT): 2481 comments = self._prev_comments 2482 2483 hint = self._parse_hint() 2484 all_ = self._match(TokenType.ALL) 2485 distinct = self._match_set(self.DISTINCT_TOKENS) 2486 2487 kind = ( 2488 self._match(TokenType.ALIAS) 2489 and self._match_texts(("STRUCT", "VALUE")) 2490 and self._prev.text.upper() 2491 ) 2492 2493 if distinct: 2494 distinct = self.expression( 2495 exp.Distinct, 2496 on=self._parse_value() if self._match(TokenType.ON) else None, 2497 ) 2498 2499 if all_ and distinct: 2500 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2501 2502 limit = self._parse_limit(top=True) 2503 projections = self._parse_projections() 2504 2505 this = self.expression( 2506 exp.Select, 2507 kind=kind, 2508 hint=hint, 2509 distinct=distinct, 2510 expressions=projections, 2511 limit=limit, 2512 ) 2513 this.comments = comments 2514 2515 into = self._parse_into() 2516 if into: 2517 this.set("into", into) 2518 2519 if not from_: 2520 from_ = self._parse_from() 2521 2522 if from_: 2523 this.set("from", from_) 2524 2525 this = self._parse_query_modifiers(this) 2526 elif (table or nested) and self._match(TokenType.L_PAREN): 2527 if self._match(TokenType.PIVOT): 2528 this = self._parse_simplified_pivot() 2529 elif self._match(TokenType.FROM): 2530 this = exp.select("*").from_( 2531 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2532 ) 2533 else: 2534 this = ( 2535 self._parse_table() 2536 if table 2537 else self._parse_select(nested=True, parse_set_operation=False) 2538 ) 2539 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2540 2541 self._match_r_paren() 2542 2543 # We return early here so that the UNION isn't attached to the subquery by the 2544 # following call to _parse_set_operations, but instead becomes the parent node 2545 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2546 elif self._match(TokenType.VALUES, advance=False): 2547 this = self._parse_derived_table_values() 2548 elif from_: 2549 this = exp.select("*").from_(from_.this, copy=False) 2550 else: 2551 this = None 2552 2553 if parse_set_operation: 2554 return self._parse_set_operations(this) 2555 return this 2556 2557 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2558 if not skip_with_token and not self._match(TokenType.WITH): 2559 return None 2560 2561 comments = self._prev_comments 2562 recursive = self._match(TokenType.RECURSIVE) 2563 2564 expressions = [] 2565 while True: 2566 expressions.append(self._parse_cte()) 2567 2568 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2569 break 2570 else: 2571 self._match(TokenType.WITH) 2572 2573 return self.expression( 2574 exp.With, comments=comments, expressions=expressions, recursive=recursive 2575 ) 2576 2577 def _parse_cte(self) -> exp.CTE: 2578 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2579 if not alias or not alias.this: 2580 self.raise_error("Expected CTE to have alias") 2581 2582 self._match(TokenType.ALIAS) 2583 2584 if self._match_text_seq("NOT", "MATERIALIZED"): 2585 materialized = False 2586 elif self._match_text_seq("MATERIALIZED"): 2587 materialized = True 2588 else: 2589 materialized = None 2590 2591 return self.expression( 2592 exp.CTE, 2593 this=self._parse_wrapped(self._parse_statement), 2594 alias=alias, 2595 materialized=materialized, 2596 ) 2597 2598 def _parse_table_alias( 2599 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2600 ) -> t.Optional[exp.TableAlias]: 2601 any_token = self._match(TokenType.ALIAS) 2602 alias = ( 2603 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2604 or self._parse_string_as_identifier() 2605 ) 2606 2607 index = self._index 2608 if self._match(TokenType.L_PAREN): 2609 columns = self._parse_csv(self._parse_function_parameter) 2610 self._match_r_paren() if columns else self._retreat(index) 2611 else: 2612 columns = None 2613 2614 if not alias and not columns: 2615 return None 2616 2617 return self.expression(exp.TableAlias, this=alias, columns=columns) 2618 2619 def _parse_subquery( 2620 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2621 ) -> t.Optional[exp.Subquery]: 2622 if not this: 2623 return None 2624 2625 return self.expression( 2626 exp.Subquery, 2627 this=this, 2628 pivots=self._parse_pivots(), 2629 alias=self._parse_table_alias() if parse_alias else None, 2630 ) 2631 2632 def _implicit_unnests_to_explicit(self, this: E) -> E: 2633 from sqlglot.optimizer.normalize_identifiers import ( 2634 normalize_identifiers as _norm, 2635 ) 2636 2637 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2638 for i, join in enumerate(this.args.get("joins") or []): 2639 table = join.this 2640 normalized_table = table.copy() 2641 normalized_table.meta["maybe_column"] = True 2642 normalized_table = _norm(normalized_table, dialect=self.dialect) 2643 2644 if isinstance(table, exp.Table) and not join.args.get("on"): 2645 if normalized_table.parts[0].name in refs: 2646 table_as_column = table.to_column() 2647 unnest = exp.Unnest(expressions=[table_as_column]) 2648 2649 # Table.to_column creates a parent Alias node that we want to convert to 2650 # a TableAlias and attach to the Unnest, so it matches the parser's output 2651 if isinstance(table.args.get("alias"), exp.TableAlias): 2652 table_as_column.replace(table_as_column.this) 2653 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2654 2655 table.replace(unnest) 2656 2657 refs.add(normalized_table.alias_or_name) 2658 2659 return this 2660 2661 def _parse_query_modifiers( 2662 self, this: t.Optional[exp.Expression] 2663 ) -> t.Optional[exp.Expression]: 2664 if isinstance(this, (exp.Query, exp.Table)): 2665 for join in self._parse_joins(): 2666 this.append("joins", join) 2667 for lateral in iter(self._parse_lateral, None): 2668 this.append("laterals", lateral) 2669 2670 while True: 2671 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2672 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2673 key, expression = parser(self) 2674 2675 if expression: 2676 this.set(key, expression) 2677 if key == "limit": 2678 offset = expression.args.pop("offset", None) 2679 2680 if offset: 2681 offset = exp.Offset(expression=offset) 2682 this.set("offset", offset) 2683 2684 limit_by_expressions = expression.expressions 2685 expression.set("expressions", None) 2686 offset.set("expressions", limit_by_expressions) 2687 continue 2688 break 2689 2690 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2691 this = self._implicit_unnests_to_explicit(this) 2692 2693 return this 2694 2695 def _parse_hint(self) -> t.Optional[exp.Hint]: 2696 if self._match(TokenType.HINT): 2697 hints = [] 2698 for hint in iter( 2699 lambda: self._parse_csv( 2700 lambda: self._parse_function() or self._parse_var(upper=True) 2701 ), 2702 [], 2703 ): 2704 hints.extend(hint) 2705 2706 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2707 self.raise_error("Expected */ after HINT") 2708 2709 return self.expression(exp.Hint, expressions=hints) 2710 2711 return None 2712 2713 def _parse_into(self) -> t.Optional[exp.Into]: 2714 if not self._match(TokenType.INTO): 2715 return None 2716 2717 temp = self._match(TokenType.TEMPORARY) 2718 unlogged = self._match_text_seq("UNLOGGED") 2719 self._match(TokenType.TABLE) 2720 2721 return self.expression( 2722 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2723 ) 2724 2725 def _parse_from( 2726 self, joins: bool = False, skip_from_token: bool = False 2727 ) -> t.Optional[exp.From]: 2728 if not skip_from_token and not self._match(TokenType.FROM): 2729 return None 2730 2731 return self.expression( 2732 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2733 ) 2734 2735 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2736 if not self._match(TokenType.MATCH_RECOGNIZE): 2737 return None 2738 2739 self._match_l_paren() 2740 2741 partition = self._parse_partition_by() 2742 order = self._parse_order() 2743 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2744 2745 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2746 rows = exp.var("ONE ROW PER MATCH") 2747 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2748 text = "ALL ROWS PER MATCH" 2749 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2750 text += " SHOW EMPTY MATCHES" 2751 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2752 text += " OMIT EMPTY MATCHES" 2753 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2754 text += " WITH UNMATCHED ROWS" 2755 rows = exp.var(text) 2756 else: 2757 rows = None 2758 2759 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2760 text = "AFTER MATCH SKIP" 2761 if self._match_text_seq("PAST", "LAST", "ROW"): 2762 text += " PAST LAST ROW" 2763 elif self._match_text_seq("TO", "NEXT", "ROW"): 2764 text += " TO NEXT ROW" 2765 elif self._match_text_seq("TO", "FIRST"): 2766 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2767 elif self._match_text_seq("TO", "LAST"): 2768 text += f" TO LAST {self._advance_any().text}" # type: ignore 2769 after = exp.var(text) 2770 else: 2771 after = None 2772 2773 if self._match_text_seq("PATTERN"): 2774 self._match_l_paren() 2775 2776 if not self._curr: 2777 self.raise_error("Expecting )", self._curr) 2778 2779 paren = 1 2780 start = self._curr 2781 2782 while self._curr and paren > 0: 2783 if self._curr.token_type == TokenType.L_PAREN: 2784 paren += 1 2785 if self._curr.token_type == TokenType.R_PAREN: 2786 paren -= 1 2787 2788 end = self._prev 2789 self._advance() 2790 2791 if paren > 0: 2792 self.raise_error("Expecting )", self._curr) 2793 2794 pattern = exp.var(self._find_sql(start, end)) 2795 else: 2796 pattern = None 2797 2798 define = ( 2799 self._parse_csv(self._parse_name_as_expression) 2800 if self._match_text_seq("DEFINE") 2801 else None 2802 ) 2803 2804 self._match_r_paren() 2805 2806 return self.expression( 2807 exp.MatchRecognize, 2808 partition_by=partition, 2809 order=order, 2810 measures=measures, 2811 rows=rows, 2812 after=after, 2813 pattern=pattern, 2814 define=define, 2815 alias=self._parse_table_alias(), 2816 ) 2817 2818 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2819 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2820 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2821 cross_apply = False 2822 2823 if cross_apply is not None: 2824 this = self._parse_select(table=True) 2825 view = None 2826 outer = None 2827 elif self._match(TokenType.LATERAL): 2828 this = self._parse_select(table=True) 2829 view = self._match(TokenType.VIEW) 2830 outer = self._match(TokenType.OUTER) 2831 else: 2832 return None 2833 2834 if not this: 2835 this = ( 2836 self._parse_unnest() 2837 or self._parse_function() 2838 or self._parse_id_var(any_token=False) 2839 ) 2840 2841 while self._match(TokenType.DOT): 2842 this = exp.Dot( 2843 this=this, 2844 expression=self._parse_function() or self._parse_id_var(any_token=False), 2845 ) 2846 2847 if view: 2848 table = self._parse_id_var(any_token=False) 2849 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2850 table_alias: t.Optional[exp.TableAlias] = self.expression( 2851 exp.TableAlias, this=table, columns=columns 2852 ) 2853 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2854 # We move the alias from the lateral's child node to the lateral itself 2855 table_alias = this.args["alias"].pop() 2856 else: 2857 table_alias = self._parse_table_alias() 2858 2859 return self.expression( 2860 exp.Lateral, 2861 this=this, 2862 view=view, 2863 outer=outer, 2864 alias=table_alias, 2865 cross_apply=cross_apply, 2866 ) 2867 2868 def _parse_join_parts( 2869 self, 2870 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2871 return ( 2872 self._match_set(self.JOIN_METHODS) and self._prev, 2873 self._match_set(self.JOIN_SIDES) and self._prev, 2874 self._match_set(self.JOIN_KINDS) and self._prev, 2875 ) 2876 2877 def _parse_join( 2878 self, skip_join_token: bool = False, parse_bracket: bool = False 2879 ) -> t.Optional[exp.Join]: 2880 if self._match(TokenType.COMMA): 2881 return self.expression(exp.Join, this=self._parse_table()) 2882 2883 index = self._index 2884 method, side, kind = self._parse_join_parts() 2885 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2886 join = self._match(TokenType.JOIN) 2887 2888 if not skip_join_token and not join: 2889 self._retreat(index) 2890 kind = None 2891 method = None 2892 side = None 2893 2894 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2895 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2896 2897 if not skip_join_token and not join and not outer_apply and not cross_apply: 2898 return None 2899 2900 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2901 2902 if method: 2903 kwargs["method"] = method.text 2904 if side: 2905 kwargs["side"] = side.text 2906 if kind: 2907 kwargs["kind"] = kind.text 2908 if hint: 2909 kwargs["hint"] = hint 2910 2911 if self._match(TokenType.MATCH_CONDITION): 2912 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2913 2914 if self._match(TokenType.ON): 2915 kwargs["on"] = self._parse_conjunction() 2916 elif self._match(TokenType.USING): 2917 kwargs["using"] = self._parse_wrapped_id_vars() 2918 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2919 kind and kind.token_type == TokenType.CROSS 2920 ): 2921 index = self._index 2922 joins: t.Optional[list] = list(self._parse_joins()) 2923 2924 if joins and self._match(TokenType.ON): 2925 kwargs["on"] = self._parse_conjunction() 2926 elif joins and self._match(TokenType.USING): 2927 kwargs["using"] = self._parse_wrapped_id_vars() 2928 else: 2929 joins = None 2930 self._retreat(index) 2931 2932 kwargs["this"].set("joins", joins if joins else None) 2933 2934 comments = [c for token in (method, side, kind) if token for c in token.comments] 2935 return self.expression(exp.Join, comments=comments, **kwargs) 2936 2937 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2938 this = self._parse_conjunction() 2939 2940 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2941 return this 2942 2943 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2944 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2945 2946 return this 2947 2948 def _parse_index_params(self) -> exp.IndexParameters: 2949 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2950 2951 if self._match(TokenType.L_PAREN, advance=False): 2952 columns = self._parse_wrapped_csv(self._parse_with_operator) 2953 else: 2954 columns = None 2955 2956 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2957 partition_by = self._parse_partition_by() 2958 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2959 tablespace = ( 2960 self._parse_var(any_token=True) 2961 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2962 else None 2963 ) 2964 where = self._parse_where() 2965 2966 return self.expression( 2967 exp.IndexParameters, 2968 using=using, 2969 columns=columns, 2970 include=include, 2971 partition_by=partition_by, 2972 where=where, 2973 with_storage=with_storage, 2974 tablespace=tablespace, 2975 ) 2976 2977 def _parse_index( 2978 self, 2979 index: t.Optional[exp.Expression] = None, 2980 ) -> t.Optional[exp.Index]: 2981 if index: 2982 unique = None 2983 primary = None 2984 amp = None 2985 2986 self._match(TokenType.ON) 2987 self._match(TokenType.TABLE) # hive 2988 table = self._parse_table_parts(schema=True) 2989 else: 2990 unique = self._match(TokenType.UNIQUE) 2991 primary = self._match_text_seq("PRIMARY") 2992 amp = self._match_text_seq("AMP") 2993 2994 if not self._match(TokenType.INDEX): 2995 return None 2996 2997 index = self._parse_id_var() 2998 table = None 2999 3000 params = self._parse_index_params() 3001 3002 return self.expression( 3003 exp.Index, 3004 this=index, 3005 table=table, 3006 unique=unique, 3007 primary=primary, 3008 amp=amp, 3009 params=params, 3010 ) 3011 3012 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3013 hints: t.List[exp.Expression] = [] 3014 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3015 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3016 hints.append( 3017 self.expression( 3018 exp.WithTableHint, 3019 expressions=self._parse_csv( 3020 lambda: self._parse_function() or self._parse_var(any_token=True) 3021 ), 3022 ) 3023 ) 3024 self._match_r_paren() 3025 else: 3026 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3027 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3028 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3029 3030 self._match_texts(("INDEX", "KEY")) 3031 if self._match(TokenType.FOR): 3032 hint.set("target", self._advance_any() and self._prev.text.upper()) 3033 3034 hint.set("expressions", self._parse_wrapped_id_vars()) 3035 hints.append(hint) 3036 3037 return hints or None 3038 3039 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3040 return ( 3041 (not schema and self._parse_function(optional_parens=False)) 3042 or self._parse_id_var(any_token=False) 3043 or self._parse_string_as_identifier() 3044 or self._parse_placeholder() 3045 ) 3046 3047 def _parse_table_parts( 3048 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3049 ) -> exp.Table: 3050 catalog = None 3051 db = None 3052 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3053 3054 while self._match(TokenType.DOT): 3055 if catalog: 3056 # This allows nesting the table in arbitrarily many dot expressions if needed 3057 table = self.expression( 3058 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3059 ) 3060 else: 3061 catalog = db 3062 db = table 3063 # "" used for tsql FROM a..b case 3064 table = self._parse_table_part(schema=schema) or "" 3065 3066 if ( 3067 wildcard 3068 and self._is_connected() 3069 and (isinstance(table, exp.Identifier) or not table) 3070 and self._match(TokenType.STAR) 3071 ): 3072 if isinstance(table, exp.Identifier): 3073 table.args["this"] += "*" 3074 else: 3075 table = exp.Identifier(this="*") 3076 3077 if is_db_reference: 3078 catalog = db 3079 db = table 3080 table = None 3081 3082 if not table and not is_db_reference: 3083 self.raise_error(f"Expected table name but got {self._curr}") 3084 if not db and is_db_reference: 3085 self.raise_error(f"Expected database name but got {self._curr}") 3086 3087 return self.expression( 3088 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3089 ) 3090 3091 def _parse_table( 3092 self, 3093 schema: bool = False, 3094 joins: bool = False, 3095 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3096 parse_bracket: bool = False, 3097 is_db_reference: bool = False, 3098 ) -> t.Optional[exp.Expression]: 3099 lateral = self._parse_lateral() 3100 if lateral: 3101 return lateral 3102 3103 unnest = self._parse_unnest() 3104 if unnest: 3105 return unnest 3106 3107 values = self._parse_derived_table_values() 3108 if values: 3109 return values 3110 3111 subquery = self._parse_select(table=True) 3112 if subquery: 3113 if not subquery.args.get("pivots"): 3114 subquery.set("pivots", self._parse_pivots()) 3115 return subquery 3116 3117 bracket = parse_bracket and self._parse_bracket(None) 3118 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3119 3120 only = self._match(TokenType.ONLY) 3121 3122 this = t.cast( 3123 exp.Expression, 3124 bracket 3125 or self._parse_bracket( 3126 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3127 ), 3128 ) 3129 3130 if only: 3131 this.set("only", only) 3132 3133 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3134 self._match_text_seq("*") 3135 3136 if schema: 3137 return self._parse_schema(this=this) 3138 3139 version = self._parse_version() 3140 3141 if version: 3142 this.set("version", version) 3143 3144 if self.dialect.ALIAS_POST_TABLESAMPLE: 3145 table_sample = self._parse_table_sample() 3146 3147 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3148 if alias: 3149 this.set("alias", alias) 3150 3151 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3152 return self.expression( 3153 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3154 ) 3155 3156 this.set("hints", self._parse_table_hints()) 3157 3158 if not this.args.get("pivots"): 3159 this.set("pivots", self._parse_pivots()) 3160 3161 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3162 table_sample = self._parse_table_sample() 3163 3164 if table_sample: 3165 table_sample.set("this", this) 3166 this = table_sample 3167 3168 if joins: 3169 for join in self._parse_joins(): 3170 this.append("joins", join) 3171 3172 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3173 this.set("ordinality", True) 3174 this.set("alias", self._parse_table_alias()) 3175 3176 return this 3177 3178 def _parse_version(self) -> t.Optional[exp.Version]: 3179 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3180 this = "TIMESTAMP" 3181 elif self._match(TokenType.VERSION_SNAPSHOT): 3182 this = "VERSION" 3183 else: 3184 return None 3185 3186 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3187 kind = self._prev.text.upper() 3188 start = self._parse_bitwise() 3189 self._match_texts(("TO", "AND")) 3190 end = self._parse_bitwise() 3191 expression: t.Optional[exp.Expression] = self.expression( 3192 exp.Tuple, expressions=[start, end] 3193 ) 3194 elif self._match_text_seq("CONTAINED", "IN"): 3195 kind = "CONTAINED IN" 3196 expression = self.expression( 3197 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3198 ) 3199 elif self._match(TokenType.ALL): 3200 kind = "ALL" 3201 expression = None 3202 else: 3203 self._match_text_seq("AS", "OF") 3204 kind = "AS OF" 3205 expression = self._parse_type() 3206 3207 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3208 3209 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3210 if not self._match(TokenType.UNNEST): 3211 return None 3212 3213 expressions = self._parse_wrapped_csv(self._parse_equality) 3214 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3215 3216 alias = self._parse_table_alias() if with_alias else None 3217 3218 if alias: 3219 if self.dialect.UNNEST_COLUMN_ONLY: 3220 if alias.args.get("columns"): 3221 self.raise_error("Unexpected extra column alias in unnest.") 3222 3223 alias.set("columns", [alias.this]) 3224 alias.set("this", None) 3225 3226 columns = alias.args.get("columns") or [] 3227 if offset and len(expressions) < len(columns): 3228 offset = columns.pop() 3229 3230 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3231 self._match(TokenType.ALIAS) 3232 offset = self._parse_id_var( 3233 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3234 ) or exp.to_identifier("offset") 3235 3236 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3237 3238 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3239 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3240 if not is_derived and not self._match_text_seq("VALUES"): 3241 return None 3242 3243 expressions = self._parse_csv(self._parse_value) 3244 alias = self._parse_table_alias() 3245 3246 if is_derived: 3247 self._match_r_paren() 3248 3249 return self.expression( 3250 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3251 ) 3252 3253 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3254 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3255 as_modifier and self._match_text_seq("USING", "SAMPLE") 3256 ): 3257 return None 3258 3259 bucket_numerator = None 3260 bucket_denominator = None 3261 bucket_field = None 3262 percent = None 3263 size = None 3264 seed = None 3265 3266 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3267 matched_l_paren = self._match(TokenType.L_PAREN) 3268 3269 if self.TABLESAMPLE_CSV: 3270 num = None 3271 expressions = self._parse_csv(self._parse_primary) 3272 else: 3273 expressions = None 3274 num = ( 3275 self._parse_factor() 3276 if self._match(TokenType.NUMBER, advance=False) 3277 else self._parse_primary() or self._parse_placeholder() 3278 ) 3279 3280 if self._match_text_seq("BUCKET"): 3281 bucket_numerator = self._parse_number() 3282 self._match_text_seq("OUT", "OF") 3283 bucket_denominator = bucket_denominator = self._parse_number() 3284 self._match(TokenType.ON) 3285 bucket_field = self._parse_field() 3286 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3287 percent = num 3288 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3289 size = num 3290 else: 3291 percent = num 3292 3293 if matched_l_paren: 3294 self._match_r_paren() 3295 3296 if self._match(TokenType.L_PAREN): 3297 method = self._parse_var(upper=True) 3298 seed = self._match(TokenType.COMMA) and self._parse_number() 3299 self._match_r_paren() 3300 elif self._match_texts(("SEED", "REPEATABLE")): 3301 seed = self._parse_wrapped(self._parse_number) 3302 3303 return self.expression( 3304 exp.TableSample, 3305 expressions=expressions, 3306 method=method, 3307 bucket_numerator=bucket_numerator, 3308 bucket_denominator=bucket_denominator, 3309 bucket_field=bucket_field, 3310 percent=percent, 3311 size=size, 3312 seed=seed, 3313 ) 3314 3315 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3316 return list(iter(self._parse_pivot, None)) or None 3317 3318 def _parse_joins(self) -> t.Iterator[exp.Join]: 3319 return iter(self._parse_join, None) 3320 3321 # https://duckdb.org/docs/sql/statements/pivot 3322 def _parse_simplified_pivot(self) -> exp.Pivot: 3323 def _parse_on() -> t.Optional[exp.Expression]: 3324 this = self._parse_bitwise() 3325 return self._parse_in(this) if self._match(TokenType.IN) else this 3326 3327 this = self._parse_table() 3328 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3329 using = self._match(TokenType.USING) and self._parse_csv( 3330 lambda: self._parse_alias(self._parse_function()) 3331 ) 3332 group = self._parse_group() 3333 return self.expression( 3334 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3335 ) 3336 3337 def _parse_pivot_in(self) -> exp.In: 3338 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3339 this = self._parse_conjunction() 3340 3341 self._match(TokenType.ALIAS) 3342 alias = self._parse_field() 3343 if alias: 3344 return self.expression(exp.PivotAlias, this=this, alias=alias) 3345 3346 return this 3347 3348 value = self._parse_column() 3349 3350 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3351 self.raise_error("Expecting IN (") 3352 3353 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3354 3355 self._match_r_paren() 3356 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3357 3358 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3359 index = self._index 3360 include_nulls = None 3361 3362 if self._match(TokenType.PIVOT): 3363 unpivot = False 3364 elif self._match(TokenType.UNPIVOT): 3365 unpivot = True 3366 3367 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3368 if self._match_text_seq("INCLUDE", "NULLS"): 3369 include_nulls = True 3370 elif self._match_text_seq("EXCLUDE", "NULLS"): 3371 include_nulls = False 3372 else: 3373 return None 3374 3375 expressions = [] 3376 3377 if not self._match(TokenType.L_PAREN): 3378 self._retreat(index) 3379 return None 3380 3381 if unpivot: 3382 expressions = self._parse_csv(self._parse_column) 3383 else: 3384 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3385 3386 if not expressions: 3387 self.raise_error("Failed to parse PIVOT's aggregation list") 3388 3389 if not self._match(TokenType.FOR): 3390 self.raise_error("Expecting FOR") 3391 3392 field = self._parse_pivot_in() 3393 3394 self._match_r_paren() 3395 3396 pivot = self.expression( 3397 exp.Pivot, 3398 expressions=expressions, 3399 field=field, 3400 unpivot=unpivot, 3401 include_nulls=include_nulls, 3402 ) 3403 3404 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3405 pivot.set("alias", self._parse_table_alias()) 3406 3407 if not unpivot: 3408 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3409 3410 columns: t.List[exp.Expression] = [] 3411 for fld in pivot.args["field"].expressions: 3412 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3413 for name in names: 3414 if self.PREFIXED_PIVOT_COLUMNS: 3415 name = f"{name}_{field_name}" if name else field_name 3416 else: 3417 name = f"{field_name}_{name}" if name else field_name 3418 3419 columns.append(exp.to_identifier(name)) 3420 3421 pivot.set("columns", columns) 3422 3423 return pivot 3424 3425 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3426 return [agg.alias for agg in aggregations] 3427 3428 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3429 if not skip_where_token and not self._match(TokenType.PREWHERE): 3430 return None 3431 3432 return self.expression( 3433 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3434 ) 3435 3436 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3437 if not skip_where_token and not self._match(TokenType.WHERE): 3438 return None 3439 3440 return self.expression( 3441 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3442 ) 3443 3444 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3445 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3446 return None 3447 3448 elements = defaultdict(list) 3449 3450 if self._match(TokenType.ALL): 3451 return self.expression(exp.Group, all=True) 3452 3453 while True: 3454 expressions = self._parse_csv(self._parse_conjunction) 3455 if expressions: 3456 elements["expressions"].extend(expressions) 3457 3458 grouping_sets = self._parse_grouping_sets() 3459 if grouping_sets: 3460 elements["grouping_sets"].extend(grouping_sets) 3461 3462 rollup = None 3463 cube = None 3464 totals = None 3465 3466 index = self._index 3467 with_ = self._match(TokenType.WITH) 3468 if self._match(TokenType.ROLLUP): 3469 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3470 elements["rollup"].extend(ensure_list(rollup)) 3471 3472 if self._match(TokenType.CUBE): 3473 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3474 elements["cube"].extend(ensure_list(cube)) 3475 3476 if self._match_text_seq("TOTALS"): 3477 totals = True 3478 elements["totals"] = True # type: ignore 3479 3480 if not (grouping_sets or rollup or cube or totals): 3481 if with_: 3482 self._retreat(index) 3483 break 3484 3485 return self.expression(exp.Group, **elements) # type: ignore 3486 3487 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3488 if not self._match(TokenType.GROUPING_SETS): 3489 return None 3490 3491 return self._parse_wrapped_csv(self._parse_grouping_set) 3492 3493 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3494 if self._match(TokenType.L_PAREN): 3495 grouping_set = self._parse_csv(self._parse_column) 3496 self._match_r_paren() 3497 return self.expression(exp.Tuple, expressions=grouping_set) 3498 3499 return self._parse_column() 3500 3501 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3502 if not skip_having_token and not self._match(TokenType.HAVING): 3503 return None 3504 return self.expression(exp.Having, this=self._parse_conjunction()) 3505 3506 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3507 if not self._match(TokenType.QUALIFY): 3508 return None 3509 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3510 3511 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3512 if skip_start_token: 3513 start = None 3514 elif self._match(TokenType.START_WITH): 3515 start = self._parse_conjunction() 3516 else: 3517 return None 3518 3519 self._match(TokenType.CONNECT_BY) 3520 nocycle = self._match_text_seq("NOCYCLE") 3521 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3522 exp.Prior, this=self._parse_bitwise() 3523 ) 3524 connect = self._parse_conjunction() 3525 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3526 3527 if not start and self._match(TokenType.START_WITH): 3528 start = self._parse_conjunction() 3529 3530 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3531 3532 def _parse_name_as_expression(self) -> exp.Alias: 3533 return self.expression( 3534 exp.Alias, 3535 alias=self._parse_id_var(any_token=True), 3536 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3537 ) 3538 3539 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3540 if self._match_text_seq("INTERPOLATE"): 3541 return self._parse_wrapped_csv(self._parse_name_as_expression) 3542 return None 3543 3544 def _parse_order( 3545 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3546 ) -> t.Optional[exp.Expression]: 3547 siblings = None 3548 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3549 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3550 return this 3551 3552 siblings = True 3553 3554 return self.expression( 3555 exp.Order, 3556 this=this, 3557 expressions=self._parse_csv(self._parse_ordered), 3558 interpolate=self._parse_interpolate(), 3559 siblings=siblings, 3560 ) 3561 3562 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3563 if not self._match(token): 3564 return None 3565 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3566 3567 def _parse_ordered( 3568 self, parse_method: t.Optional[t.Callable] = None 3569 ) -> t.Optional[exp.Ordered]: 3570 this = parse_method() if parse_method else self._parse_conjunction() 3571 if not this: 3572 return None 3573 3574 asc = self._match(TokenType.ASC) 3575 desc = self._match(TokenType.DESC) or (asc and False) 3576 3577 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3578 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3579 3580 nulls_first = is_nulls_first or False 3581 explicitly_null_ordered = is_nulls_first or is_nulls_last 3582 3583 if ( 3584 not explicitly_null_ordered 3585 and ( 3586 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3587 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3588 ) 3589 and self.dialect.NULL_ORDERING != "nulls_are_last" 3590 ): 3591 nulls_first = True 3592 3593 if self._match_text_seq("WITH", "FILL"): 3594 with_fill = self.expression( 3595 exp.WithFill, 3596 **{ # type: ignore 3597 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3598 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3599 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3600 }, 3601 ) 3602 else: 3603 with_fill = None 3604 3605 return self.expression( 3606 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3607 ) 3608 3609 def _parse_limit( 3610 self, 3611 this: t.Optional[exp.Expression] = None, 3612 top: bool = False, 3613 skip_limit_token: bool = False, 3614 ) -> t.Optional[exp.Expression]: 3615 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3616 comments = self._prev_comments 3617 if top: 3618 limit_paren = self._match(TokenType.L_PAREN) 3619 expression = self._parse_term() if limit_paren else self._parse_number() 3620 3621 if limit_paren: 3622 self._match_r_paren() 3623 else: 3624 expression = self._parse_term() 3625 3626 if self._match(TokenType.COMMA): 3627 offset = expression 3628 expression = self._parse_term() 3629 else: 3630 offset = None 3631 3632 limit_exp = self.expression( 3633 exp.Limit, 3634 this=this, 3635 expression=expression, 3636 offset=offset, 3637 comments=comments, 3638 expressions=self._parse_limit_by(), 3639 ) 3640 3641 return limit_exp 3642 3643 if self._match(TokenType.FETCH): 3644 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3645 direction = self._prev.text.upper() if direction else "FIRST" 3646 3647 count = self._parse_field(tokens=self.FETCH_TOKENS) 3648 percent = self._match(TokenType.PERCENT) 3649 3650 self._match_set((TokenType.ROW, TokenType.ROWS)) 3651 3652 only = self._match_text_seq("ONLY") 3653 with_ties = self._match_text_seq("WITH", "TIES") 3654 3655 if only and with_ties: 3656 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3657 3658 return self.expression( 3659 exp.Fetch, 3660 direction=direction, 3661 count=count, 3662 percent=percent, 3663 with_ties=with_ties, 3664 ) 3665 3666 return this 3667 3668 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3669 if not self._match(TokenType.OFFSET): 3670 return this 3671 3672 count = self._parse_term() 3673 self._match_set((TokenType.ROW, TokenType.ROWS)) 3674 3675 return self.expression( 3676 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3677 ) 3678 3679 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3680 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3681 3682 def _parse_locks(self) -> t.List[exp.Lock]: 3683 locks = [] 3684 while True: 3685 if self._match_text_seq("FOR", "UPDATE"): 3686 update = True 3687 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3688 "LOCK", "IN", "SHARE", "MODE" 3689 ): 3690 update = False 3691 else: 3692 break 3693 3694 expressions = None 3695 if self._match_text_seq("OF"): 3696 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3697 3698 wait: t.Optional[bool | exp.Expression] = None 3699 if self._match_text_seq("NOWAIT"): 3700 wait = True 3701 elif self._match_text_seq("WAIT"): 3702 wait = self._parse_primary() 3703 elif self._match_text_seq("SKIP", "LOCKED"): 3704 wait = False 3705 3706 locks.append( 3707 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3708 ) 3709 3710 return locks 3711 3712 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3713 while this and self._match_set(self.SET_OPERATIONS): 3714 token_type = self._prev.token_type 3715 3716 if token_type == TokenType.UNION: 3717 operation = exp.Union 3718 elif token_type == TokenType.EXCEPT: 3719 operation = exp.Except 3720 else: 3721 operation = exp.Intersect 3722 3723 comments = self._prev.comments 3724 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3725 by_name = self._match_text_seq("BY", "NAME") 3726 expression = self._parse_select(nested=True, parse_set_operation=False) 3727 3728 this = self.expression( 3729 operation, 3730 comments=comments, 3731 this=this, 3732 distinct=distinct, 3733 by_name=by_name, 3734 expression=expression, 3735 ) 3736 3737 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3738 expression = this.expression 3739 3740 if expression: 3741 for arg in self.UNION_MODIFIERS: 3742 expr = expression.args.get(arg) 3743 if expr: 3744 this.set(arg, expr.pop()) 3745 3746 return this 3747 3748 def _parse_expression(self) -> t.Optional[exp.Expression]: 3749 return self._parse_alias(self._parse_conjunction()) 3750 3751 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3752 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3753 3754 def _parse_equality(self) -> t.Optional[exp.Expression]: 3755 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3756 3757 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3758 return self._parse_tokens(self._parse_range, self.COMPARISON) 3759 3760 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3761 this = this or self._parse_bitwise() 3762 negate = self._match(TokenType.NOT) 3763 3764 if self._match_set(self.RANGE_PARSERS): 3765 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3766 if not expression: 3767 return this 3768 3769 this = expression 3770 elif self._match(TokenType.ISNULL): 3771 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3772 3773 # Postgres supports ISNULL and NOTNULL for conditions. 3774 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3775 if self._match(TokenType.NOTNULL): 3776 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3777 this = self.expression(exp.Not, this=this) 3778 3779 if negate: 3780 this = self.expression(exp.Not, this=this) 3781 3782 if self._match(TokenType.IS): 3783 this = self._parse_is(this) 3784 3785 return this 3786 3787 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3788 index = self._index - 1 3789 negate = self._match(TokenType.NOT) 3790 3791 if self._match_text_seq("DISTINCT", "FROM"): 3792 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3793 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3794 3795 expression = self._parse_null() or self._parse_boolean() 3796 if not expression: 3797 self._retreat(index) 3798 return None 3799 3800 this = self.expression(exp.Is, this=this, expression=expression) 3801 return self.expression(exp.Not, this=this) if negate else this 3802 3803 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3804 unnest = self._parse_unnest(with_alias=False) 3805 if unnest: 3806 this = self.expression(exp.In, this=this, unnest=unnest) 3807 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3808 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3809 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3810 3811 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3812 this = self.expression(exp.In, this=this, query=expressions[0]) 3813 else: 3814 this = self.expression(exp.In, this=this, expressions=expressions) 3815 3816 if matched_l_paren: 3817 self._match_r_paren(this) 3818 elif not self._match(TokenType.R_BRACKET, expression=this): 3819 self.raise_error("Expecting ]") 3820 else: 3821 this = self.expression(exp.In, this=this, field=self._parse_field()) 3822 3823 return this 3824 3825 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3826 low = self._parse_bitwise() 3827 self._match(TokenType.AND) 3828 high = self._parse_bitwise() 3829 return self.expression(exp.Between, this=this, low=low, high=high) 3830 3831 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3832 if not self._match(TokenType.ESCAPE): 3833 return this 3834 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3835 3836 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3837 index = self._index 3838 3839 if not self._match(TokenType.INTERVAL) and match_interval: 3840 return None 3841 3842 if self._match(TokenType.STRING, advance=False): 3843 this = self._parse_primary() 3844 else: 3845 this = self._parse_term() 3846 3847 if not this or ( 3848 isinstance(this, exp.Column) 3849 and not this.table 3850 and not this.this.quoted 3851 and this.name.upper() == "IS" 3852 ): 3853 self._retreat(index) 3854 return None 3855 3856 unit = self._parse_function() or ( 3857 not self._match(TokenType.ALIAS, advance=False) 3858 and self._parse_var(any_token=True, upper=True) 3859 ) 3860 3861 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3862 # each INTERVAL expression into this canonical form so it's easy to transpile 3863 if this and this.is_number: 3864 this = exp.Literal.string(this.name) 3865 elif this and this.is_string: 3866 parts = this.name.split() 3867 3868 if len(parts) == 2: 3869 if unit: 3870 # This is not actually a unit, it's something else (e.g. a "window side") 3871 unit = None 3872 self._retreat(self._index - 1) 3873 3874 this = exp.Literal.string(parts[0]) 3875 unit = self.expression(exp.Var, this=parts[1].upper()) 3876 3877 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3878 unit = self.expression( 3879 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3880 ) 3881 3882 return self.expression(exp.Interval, this=this, unit=unit) 3883 3884 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3885 this = self._parse_term() 3886 3887 while True: 3888 if self._match_set(self.BITWISE): 3889 this = self.expression( 3890 self.BITWISE[self._prev.token_type], 3891 this=this, 3892 expression=self._parse_term(), 3893 ) 3894 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3895 this = self.expression( 3896 exp.DPipe, 3897 this=this, 3898 expression=self._parse_term(), 3899 safe=not self.dialect.STRICT_STRING_CONCAT, 3900 ) 3901 elif self._match(TokenType.DQMARK): 3902 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3903 elif self._match_pair(TokenType.LT, TokenType.LT): 3904 this = self.expression( 3905 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3906 ) 3907 elif self._match_pair(TokenType.GT, TokenType.GT): 3908 this = self.expression( 3909 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3910 ) 3911 else: 3912 break 3913 3914 return this 3915 3916 def _parse_term(self) -> t.Optional[exp.Expression]: 3917 return self._parse_tokens(self._parse_factor, self.TERM) 3918 3919 def _parse_factor(self) -> t.Optional[exp.Expression]: 3920 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3921 this = parse_method() 3922 3923 while self._match_set(self.FACTOR): 3924 this = self.expression( 3925 self.FACTOR[self._prev.token_type], 3926 this=this, 3927 comments=self._prev_comments, 3928 expression=parse_method(), 3929 ) 3930 if isinstance(this, exp.Div): 3931 this.args["typed"] = self.dialect.TYPED_DIVISION 3932 this.args["safe"] = self.dialect.SAFE_DIVISION 3933 3934 return this 3935 3936 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3937 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3938 3939 def _parse_unary(self) -> t.Optional[exp.Expression]: 3940 if self._match_set(self.UNARY_PARSERS): 3941 return self.UNARY_PARSERS[self._prev.token_type](self) 3942 return self._parse_at_time_zone(self._parse_type()) 3943 3944 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3945 interval = parse_interval and self._parse_interval() 3946 if interval: 3947 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3948 while True: 3949 index = self._index 3950 self._match(TokenType.PLUS) 3951 3952 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3953 self._retreat(index) 3954 break 3955 3956 interval = self.expression( # type: ignore 3957 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3958 ) 3959 3960 return interval 3961 3962 index = self._index 3963 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3964 this = self._parse_column() 3965 3966 if data_type: 3967 if isinstance(this, exp.Literal): 3968 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3969 if parser: 3970 return parser(self, this, data_type) 3971 return self.expression(exp.Cast, this=this, to=data_type) 3972 if not data_type.expressions: 3973 self._retreat(index) 3974 return self._parse_column() 3975 return self._parse_column_ops(data_type) 3976 3977 return this and self._parse_column_ops(this) 3978 3979 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3980 this = self._parse_type() 3981 if not this: 3982 return None 3983 3984 if isinstance(this, exp.Column) and not this.table: 3985 this = exp.var(this.name.upper()) 3986 3987 return self.expression( 3988 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3989 ) 3990 3991 def _parse_types( 3992 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3993 ) -> t.Optional[exp.Expression]: 3994 index = self._index 3995 3996 prefix = self._match_text_seq("SYSUDTLIB", ".") 3997 3998 if not self._match_set(self.TYPE_TOKENS): 3999 identifier = allow_identifiers and self._parse_id_var( 4000 any_token=False, tokens=(TokenType.VAR,) 4001 ) 4002 if identifier: 4003 tokens = self.dialect.tokenize(identifier.name) 4004 4005 if len(tokens) != 1: 4006 self.raise_error("Unexpected identifier", self._prev) 4007 4008 if tokens[0].token_type in self.TYPE_TOKENS: 4009 self._prev = tokens[0] 4010 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4011 type_name = identifier.name 4012 4013 while self._match(TokenType.DOT): 4014 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4015 4016 return exp.DataType.build(type_name, udt=True) 4017 else: 4018 self._retreat(self._index - 1) 4019 return None 4020 else: 4021 return None 4022 4023 type_token = self._prev.token_type 4024 4025 if type_token == TokenType.PSEUDO_TYPE: 4026 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4027 4028 if type_token == TokenType.OBJECT_IDENTIFIER: 4029 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4030 4031 nested = type_token in self.NESTED_TYPE_TOKENS 4032 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4033 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4034 expressions = None 4035 maybe_func = False 4036 4037 if self._match(TokenType.L_PAREN): 4038 if is_struct: 4039 expressions = self._parse_csv(self._parse_struct_types) 4040 elif nested: 4041 expressions = self._parse_csv( 4042 lambda: self._parse_types( 4043 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4044 ) 4045 ) 4046 elif type_token in self.ENUM_TYPE_TOKENS: 4047 expressions = self._parse_csv(self._parse_equality) 4048 elif is_aggregate: 4049 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4050 any_token=False, tokens=(TokenType.VAR,) 4051 ) 4052 if not func_or_ident or not self._match(TokenType.COMMA): 4053 return None 4054 expressions = self._parse_csv( 4055 lambda: self._parse_types( 4056 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4057 ) 4058 ) 4059 expressions.insert(0, func_or_ident) 4060 else: 4061 expressions = self._parse_csv(self._parse_type_size) 4062 4063 if not expressions or not self._match(TokenType.R_PAREN): 4064 self._retreat(index) 4065 return None 4066 4067 maybe_func = True 4068 4069 this: t.Optional[exp.Expression] = None 4070 values: t.Optional[t.List[exp.Expression]] = None 4071 4072 if nested and self._match(TokenType.LT): 4073 if is_struct: 4074 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4075 else: 4076 expressions = self._parse_csv( 4077 lambda: self._parse_types( 4078 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4079 ) 4080 ) 4081 4082 if not self._match(TokenType.GT): 4083 self.raise_error("Expecting >") 4084 4085 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4086 values = self._parse_csv(self._parse_conjunction) 4087 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4088 4089 if type_token in self.TIMESTAMPS: 4090 if self._match_text_seq("WITH", "TIME", "ZONE"): 4091 maybe_func = False 4092 tz_type = ( 4093 exp.DataType.Type.TIMETZ 4094 if type_token in self.TIMES 4095 else exp.DataType.Type.TIMESTAMPTZ 4096 ) 4097 this = exp.DataType(this=tz_type, expressions=expressions) 4098 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4099 maybe_func = False 4100 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4101 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4102 maybe_func = False 4103 elif type_token == TokenType.INTERVAL: 4104 unit = self._parse_var(upper=True) 4105 if unit: 4106 if self._match_text_seq("TO"): 4107 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4108 4109 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4110 else: 4111 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4112 4113 if maybe_func and check_func: 4114 index2 = self._index 4115 peek = self._parse_string() 4116 4117 if not peek: 4118 self._retreat(index) 4119 return None 4120 4121 self._retreat(index2) 4122 4123 if not this: 4124 if self._match_text_seq("UNSIGNED"): 4125 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4126 if not unsigned_type_token: 4127 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4128 4129 type_token = unsigned_type_token or type_token 4130 4131 this = exp.DataType( 4132 this=exp.DataType.Type[type_token.value], 4133 expressions=expressions, 4134 nested=nested, 4135 values=values, 4136 prefix=prefix, 4137 ) 4138 4139 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4140 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4141 4142 return this 4143 4144 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4145 index = self._index 4146 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4147 self._match(TokenType.COLON) 4148 column_def = self._parse_column_def(this) 4149 4150 if type_required and ( 4151 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4152 ): 4153 self._retreat(index) 4154 return self._parse_types() 4155 4156 return column_def 4157 4158 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4159 if not self._match_text_seq("AT", "TIME", "ZONE"): 4160 return this 4161 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4162 4163 def _parse_column(self) -> t.Optional[exp.Expression]: 4164 this = self._parse_column_reference() 4165 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4166 4167 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4168 this = self._parse_field() 4169 if ( 4170 not this 4171 and self._match(TokenType.VALUES, advance=False) 4172 and self.VALUES_FOLLOWED_BY_PAREN 4173 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4174 ): 4175 this = self._parse_id_var() 4176 4177 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4178 4179 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4180 this = self._parse_bracket(this) 4181 4182 while self._match_set(self.COLUMN_OPERATORS): 4183 op_token = self._prev.token_type 4184 op = self.COLUMN_OPERATORS.get(op_token) 4185 4186 if op_token == TokenType.DCOLON: 4187 field = self._parse_types() 4188 if not field: 4189 self.raise_error("Expected type") 4190 elif op and self._curr: 4191 field = self._parse_column_reference() 4192 else: 4193 field = self._parse_field(anonymous_func=True, any_token=True) 4194 4195 if isinstance(field, exp.Func) and this: 4196 # bigquery allows function calls like x.y.count(...) 4197 # SAFE.SUBSTR(...) 4198 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4199 this = exp.replace_tree( 4200 this, 4201 lambda n: ( 4202 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4203 if n.table 4204 else n.this 4205 ) 4206 if isinstance(n, exp.Column) 4207 else n, 4208 ) 4209 4210 if op: 4211 this = op(self, this, field) 4212 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4213 this = self.expression( 4214 exp.Column, 4215 this=field, 4216 table=this.this, 4217 db=this.args.get("table"), 4218 catalog=this.args.get("db"), 4219 ) 4220 else: 4221 this = self.expression(exp.Dot, this=this, expression=field) 4222 this = self._parse_bracket(this) 4223 return this 4224 4225 def _parse_primary(self) -> t.Optional[exp.Expression]: 4226 if self._match_set(self.PRIMARY_PARSERS): 4227 token_type = self._prev.token_type 4228 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4229 4230 if token_type == TokenType.STRING: 4231 expressions = [primary] 4232 while self._match(TokenType.STRING): 4233 expressions.append(exp.Literal.string(self._prev.text)) 4234 4235 if len(expressions) > 1: 4236 return self.expression(exp.Concat, expressions=expressions) 4237 4238 return primary 4239 4240 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4241 return exp.Literal.number(f"0.{self._prev.text}") 4242 4243 if self._match(TokenType.L_PAREN): 4244 comments = self._prev_comments 4245 query = self._parse_select() 4246 4247 if query: 4248 expressions = [query] 4249 else: 4250 expressions = self._parse_expressions() 4251 4252 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4253 4254 if isinstance(this, exp.UNWRAPPED_QUERIES): 4255 this = self._parse_set_operations( 4256 self._parse_subquery(this=this, parse_alias=False) 4257 ) 4258 elif isinstance(this, exp.Subquery): 4259 this = self._parse_subquery( 4260 this=self._parse_set_operations(this), parse_alias=False 4261 ) 4262 elif len(expressions) > 1: 4263 this = self.expression(exp.Tuple, expressions=expressions) 4264 else: 4265 this = self.expression(exp.Paren, this=this) 4266 4267 if this: 4268 this.add_comments(comments) 4269 4270 self._match_r_paren(expression=this) 4271 return this 4272 4273 return None 4274 4275 def _parse_field( 4276 self, 4277 any_token: bool = False, 4278 tokens: t.Optional[t.Collection[TokenType]] = None, 4279 anonymous_func: bool = False, 4280 ) -> t.Optional[exp.Expression]: 4281 return ( 4282 self._parse_primary() 4283 or self._parse_function(anonymous=anonymous_func) 4284 or self._parse_id_var(any_token=any_token, tokens=tokens) 4285 ) 4286 4287 def _parse_function( 4288 self, 4289 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4290 anonymous: bool = False, 4291 optional_parens: bool = True, 4292 ) -> t.Optional[exp.Expression]: 4293 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4294 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4295 fn_syntax = False 4296 if ( 4297 self._match(TokenType.L_BRACE, advance=False) 4298 and self._next 4299 and self._next.text.upper() == "FN" 4300 ): 4301 self._advance(2) 4302 fn_syntax = True 4303 4304 func = self._parse_function_call( 4305 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4306 ) 4307 4308 if fn_syntax: 4309 self._match(TokenType.R_BRACE) 4310 4311 return func 4312 4313 def _parse_function_call( 4314 self, 4315 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4316 anonymous: bool = False, 4317 optional_parens: bool = True, 4318 ) -> t.Optional[exp.Expression]: 4319 if not self._curr: 4320 return None 4321 4322 comments = self._curr.comments 4323 token_type = self._curr.token_type 4324 this = self._curr.text 4325 upper = this.upper() 4326 4327 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4328 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4329 self._advance() 4330 return self._parse_window(parser(self)) 4331 4332 if not self._next or self._next.token_type != TokenType.L_PAREN: 4333 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4334 self._advance() 4335 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4336 4337 return None 4338 4339 if token_type not in self.FUNC_TOKENS: 4340 return None 4341 4342 self._advance(2) 4343 4344 parser = self.FUNCTION_PARSERS.get(upper) 4345 if parser and not anonymous: 4346 this = parser(self) 4347 else: 4348 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4349 4350 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4351 this = self.expression(subquery_predicate, this=self._parse_select()) 4352 self._match_r_paren() 4353 return this 4354 4355 if functions is None: 4356 functions = self.FUNCTIONS 4357 4358 function = functions.get(upper) 4359 4360 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4361 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4362 4363 if alias: 4364 args = self._kv_to_prop_eq(args) 4365 4366 if function and not anonymous: 4367 if "dialect" in function.__code__.co_varnames: 4368 func = function(args, dialect=self.dialect) 4369 else: 4370 func = function(args) 4371 4372 func = self.validate_expression(func, args) 4373 if not self.dialect.NORMALIZE_FUNCTIONS: 4374 func.meta["name"] = this 4375 4376 this = func 4377 else: 4378 if token_type == TokenType.IDENTIFIER: 4379 this = exp.Identifier(this=this, quoted=True) 4380 this = self.expression(exp.Anonymous, this=this, expressions=args) 4381 4382 if isinstance(this, exp.Expression): 4383 this.add_comments(comments) 4384 4385 self._match_r_paren(this) 4386 return self._parse_window(this) 4387 4388 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4389 transformed = [] 4390 4391 for e in expressions: 4392 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4393 if isinstance(e, exp.Alias): 4394 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4395 4396 if not isinstance(e, exp.PropertyEQ): 4397 e = self.expression( 4398 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4399 ) 4400 4401 if isinstance(e.this, exp.Column): 4402 e.this.replace(e.this.this) 4403 4404 transformed.append(e) 4405 4406 return transformed 4407 4408 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4409 return self._parse_column_def(self._parse_id_var()) 4410 4411 def _parse_user_defined_function( 4412 self, kind: t.Optional[TokenType] = None 4413 ) -> t.Optional[exp.Expression]: 4414 this = self._parse_id_var() 4415 4416 while self._match(TokenType.DOT): 4417 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4418 4419 if not self._match(TokenType.L_PAREN): 4420 return this 4421 4422 expressions = self._parse_csv(self._parse_function_parameter) 4423 self._match_r_paren() 4424 return self.expression( 4425 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4426 ) 4427 4428 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4429 literal = self._parse_primary() 4430 if literal: 4431 return self.expression(exp.Introducer, this=token.text, expression=literal) 4432 4433 return self.expression(exp.Identifier, this=token.text) 4434 4435 def _parse_session_parameter(self) -> exp.SessionParameter: 4436 kind = None 4437 this = self._parse_id_var() or self._parse_primary() 4438 4439 if this and self._match(TokenType.DOT): 4440 kind = this.name 4441 this = self._parse_var() or self._parse_primary() 4442 4443 return self.expression(exp.SessionParameter, this=this, kind=kind) 4444 4445 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4446 index = self._index 4447 4448 if self._match(TokenType.L_PAREN): 4449 expressions = t.cast( 4450 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4451 ) 4452 4453 if not self._match(TokenType.R_PAREN): 4454 self._retreat(index) 4455 else: 4456 expressions = [self._parse_id_var()] 4457 4458 if self._match_set(self.LAMBDAS): 4459 return self.LAMBDAS[self._prev.token_type](self, expressions) 4460 4461 self._retreat(index) 4462 4463 this: t.Optional[exp.Expression] 4464 4465 if self._match(TokenType.DISTINCT): 4466 this = self.expression( 4467 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4468 ) 4469 else: 4470 this = self._parse_select_or_expression(alias=alias) 4471 4472 return self._parse_limit( 4473 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4474 ) 4475 4476 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4477 index = self._index 4478 4479 if not self._match(TokenType.L_PAREN): 4480 return this 4481 4482 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4483 # expr can be of both types 4484 if self._match_set(self.SELECT_START_TOKENS): 4485 self._retreat(index) 4486 return this 4487 4488 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4489 4490 self._match_r_paren() 4491 return self.expression(exp.Schema, this=this, expressions=args) 4492 4493 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4494 return self._parse_column_def(self._parse_field(any_token=True)) 4495 4496 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4497 # column defs are not really columns, they're identifiers 4498 if isinstance(this, exp.Column): 4499 this = this.this 4500 4501 kind = self._parse_types(schema=True) 4502 4503 if self._match_text_seq("FOR", "ORDINALITY"): 4504 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4505 4506 constraints: t.List[exp.Expression] = [] 4507 4508 if (not kind and self._match(TokenType.ALIAS)) or self._match_text_seq("ALIAS"): 4509 constraints.append( 4510 self.expression( 4511 exp.ComputedColumnConstraint, 4512 this=self._parse_conjunction(), 4513 persisted=self._match_text_seq("PERSISTED"), 4514 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4515 ) 4516 ) 4517 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4518 self._match(TokenType.ALIAS) 4519 constraints.append( 4520 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4521 ) 4522 4523 while True: 4524 constraint = self._parse_column_constraint() 4525 if not constraint: 4526 break 4527 constraints.append(constraint) 4528 4529 if not kind and not constraints: 4530 return this 4531 4532 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4533 4534 def _parse_auto_increment( 4535 self, 4536 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4537 start = None 4538 increment = None 4539 4540 if self._match(TokenType.L_PAREN, advance=False): 4541 args = self._parse_wrapped_csv(self._parse_bitwise) 4542 start = seq_get(args, 0) 4543 increment = seq_get(args, 1) 4544 elif self._match_text_seq("START"): 4545 start = self._parse_bitwise() 4546 self._match_text_seq("INCREMENT") 4547 increment = self._parse_bitwise() 4548 4549 if start and increment: 4550 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4551 4552 return exp.AutoIncrementColumnConstraint() 4553 4554 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4555 if not self._match_text_seq("REFRESH"): 4556 self._retreat(self._index - 1) 4557 return None 4558 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4559 4560 def _parse_compress(self) -> exp.CompressColumnConstraint: 4561 if self._match(TokenType.L_PAREN, advance=False): 4562 return self.expression( 4563 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4564 ) 4565 4566 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4567 4568 def _parse_generated_as_identity( 4569 self, 4570 ) -> ( 4571 exp.GeneratedAsIdentityColumnConstraint 4572 | exp.ComputedColumnConstraint 4573 | exp.GeneratedAsRowColumnConstraint 4574 ): 4575 if self._match_text_seq("BY", "DEFAULT"): 4576 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4577 this = self.expression( 4578 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4579 ) 4580 else: 4581 self._match_text_seq("ALWAYS") 4582 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4583 4584 self._match(TokenType.ALIAS) 4585 4586 if self._match_text_seq("ROW"): 4587 start = self._match_text_seq("START") 4588 if not start: 4589 self._match(TokenType.END) 4590 hidden = self._match_text_seq("HIDDEN") 4591 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4592 4593 identity = self._match_text_seq("IDENTITY") 4594 4595 if self._match(TokenType.L_PAREN): 4596 if self._match(TokenType.START_WITH): 4597 this.set("start", self._parse_bitwise()) 4598 if self._match_text_seq("INCREMENT", "BY"): 4599 this.set("increment", self._parse_bitwise()) 4600 if self._match_text_seq("MINVALUE"): 4601 this.set("minvalue", self._parse_bitwise()) 4602 if self._match_text_seq("MAXVALUE"): 4603 this.set("maxvalue", self._parse_bitwise()) 4604 4605 if self._match_text_seq("CYCLE"): 4606 this.set("cycle", True) 4607 elif self._match_text_seq("NO", "CYCLE"): 4608 this.set("cycle", False) 4609 4610 if not identity: 4611 this.set("expression", self._parse_bitwise()) 4612 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4613 args = self._parse_csv(self._parse_bitwise) 4614 this.set("start", seq_get(args, 0)) 4615 this.set("increment", seq_get(args, 1)) 4616 4617 self._match_r_paren() 4618 4619 return this 4620 4621 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4622 self._match_text_seq("LENGTH") 4623 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4624 4625 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4626 if self._match_text_seq("NULL"): 4627 return self.expression(exp.NotNullColumnConstraint) 4628 if self._match_text_seq("CASESPECIFIC"): 4629 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4630 if self._match_text_seq("FOR", "REPLICATION"): 4631 return self.expression(exp.NotForReplicationColumnConstraint) 4632 return None 4633 4634 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4635 if self._match(TokenType.CONSTRAINT): 4636 this = self._parse_id_var() 4637 else: 4638 this = None 4639 4640 if self._match_texts(self.CONSTRAINT_PARSERS): 4641 return self.expression( 4642 exp.ColumnConstraint, 4643 this=this, 4644 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4645 ) 4646 4647 return this 4648 4649 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4650 if not self._match(TokenType.CONSTRAINT): 4651 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4652 4653 return self.expression( 4654 exp.Constraint, 4655 this=self._parse_id_var(), 4656 expressions=self._parse_unnamed_constraints(), 4657 ) 4658 4659 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4660 constraints = [] 4661 while True: 4662 constraint = self._parse_unnamed_constraint() or self._parse_function() 4663 if not constraint: 4664 break 4665 constraints.append(constraint) 4666 4667 return constraints 4668 4669 def _parse_unnamed_constraint( 4670 self, constraints: t.Optional[t.Collection[str]] = None 4671 ) -> t.Optional[exp.Expression]: 4672 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4673 constraints or self.CONSTRAINT_PARSERS 4674 ): 4675 return None 4676 4677 constraint = self._prev.text.upper() 4678 if constraint not in self.CONSTRAINT_PARSERS: 4679 self.raise_error(f"No parser found for schema constraint {constraint}.") 4680 4681 return self.CONSTRAINT_PARSERS[constraint](self) 4682 4683 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4684 self._match_text_seq("KEY") 4685 return self.expression( 4686 exp.UniqueColumnConstraint, 4687 this=self._parse_schema(self._parse_id_var(any_token=False)), 4688 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4689 on_conflict=self._parse_on_conflict(), 4690 ) 4691 4692 def _parse_key_constraint_options(self) -> t.List[str]: 4693 options = [] 4694 while True: 4695 if not self._curr: 4696 break 4697 4698 if self._match(TokenType.ON): 4699 action = None 4700 on = self._advance_any() and self._prev.text 4701 4702 if self._match_text_seq("NO", "ACTION"): 4703 action = "NO ACTION" 4704 elif self._match_text_seq("CASCADE"): 4705 action = "CASCADE" 4706 elif self._match_text_seq("RESTRICT"): 4707 action = "RESTRICT" 4708 elif self._match_pair(TokenType.SET, TokenType.NULL): 4709 action = "SET NULL" 4710 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4711 action = "SET DEFAULT" 4712 else: 4713 self.raise_error("Invalid key constraint") 4714 4715 options.append(f"ON {on} {action}") 4716 elif self._match_text_seq("NOT", "ENFORCED"): 4717 options.append("NOT ENFORCED") 4718 elif self._match_text_seq("DEFERRABLE"): 4719 options.append("DEFERRABLE") 4720 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4721 options.append("INITIALLY DEFERRED") 4722 elif self._match_text_seq("NORELY"): 4723 options.append("NORELY") 4724 elif self._match_text_seq("MATCH", "FULL"): 4725 options.append("MATCH FULL") 4726 else: 4727 break 4728 4729 return options 4730 4731 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4732 if match and not self._match(TokenType.REFERENCES): 4733 return None 4734 4735 expressions = None 4736 this = self._parse_table(schema=True) 4737 options = self._parse_key_constraint_options() 4738 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4739 4740 def _parse_foreign_key(self) -> exp.ForeignKey: 4741 expressions = self._parse_wrapped_id_vars() 4742 reference = self._parse_references() 4743 options = {} 4744 4745 while self._match(TokenType.ON): 4746 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4747 self.raise_error("Expected DELETE or UPDATE") 4748 4749 kind = self._prev.text.lower() 4750 4751 if self._match_text_seq("NO", "ACTION"): 4752 action = "NO ACTION" 4753 elif self._match(TokenType.SET): 4754 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4755 action = "SET " + self._prev.text.upper() 4756 else: 4757 self._advance() 4758 action = self._prev.text.upper() 4759 4760 options[kind] = action 4761 4762 return self.expression( 4763 exp.ForeignKey, 4764 expressions=expressions, 4765 reference=reference, 4766 **options, # type: ignore 4767 ) 4768 4769 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4770 return self._parse_field() 4771 4772 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4773 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4774 self._retreat(self._index - 1) 4775 return None 4776 4777 id_vars = self._parse_wrapped_id_vars() 4778 return self.expression( 4779 exp.PeriodForSystemTimeConstraint, 4780 this=seq_get(id_vars, 0), 4781 expression=seq_get(id_vars, 1), 4782 ) 4783 4784 def _parse_primary_key( 4785 self, wrapped_optional: bool = False, in_props: bool = False 4786 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4787 desc = ( 4788 self._match_set((TokenType.ASC, TokenType.DESC)) 4789 and self._prev.token_type == TokenType.DESC 4790 ) 4791 4792 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4793 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4794 4795 expressions = self._parse_wrapped_csv( 4796 self._parse_primary_key_part, optional=wrapped_optional 4797 ) 4798 options = self._parse_key_constraint_options() 4799 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4800 4801 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4802 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4803 4804 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4805 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4806 return this 4807 4808 bracket_kind = self._prev.token_type 4809 expressions = self._parse_csv( 4810 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4811 ) 4812 4813 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4814 self.raise_error("Expected ]") 4815 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4816 self.raise_error("Expected }") 4817 4818 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4819 if bracket_kind == TokenType.L_BRACE: 4820 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4821 elif not this or this.name.upper() == "ARRAY": 4822 this = self.expression(exp.Array, expressions=expressions) 4823 else: 4824 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4825 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4826 4827 self._add_comments(this) 4828 return self._parse_bracket(this) 4829 4830 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4831 if self._match(TokenType.COLON): 4832 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4833 return this 4834 4835 def _parse_case(self) -> t.Optional[exp.Expression]: 4836 ifs = [] 4837 default = None 4838 4839 comments = self._prev_comments 4840 expression = self._parse_conjunction() 4841 4842 while self._match(TokenType.WHEN): 4843 this = self._parse_conjunction() 4844 self._match(TokenType.THEN) 4845 then = self._parse_conjunction() 4846 ifs.append(self.expression(exp.If, this=this, true=then)) 4847 4848 if self._match(TokenType.ELSE): 4849 default = self._parse_conjunction() 4850 4851 if not self._match(TokenType.END): 4852 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4853 default = exp.column("interval") 4854 else: 4855 self.raise_error("Expected END after CASE", self._prev) 4856 4857 return self.expression( 4858 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4859 ) 4860 4861 def _parse_if(self) -> t.Optional[exp.Expression]: 4862 if self._match(TokenType.L_PAREN): 4863 args = self._parse_csv(self._parse_conjunction) 4864 this = self.validate_expression(exp.If.from_arg_list(args), args) 4865 self._match_r_paren() 4866 else: 4867 index = self._index - 1 4868 4869 if self.NO_PAREN_IF_COMMANDS and index == 0: 4870 return self._parse_as_command(self._prev) 4871 4872 condition = self._parse_conjunction() 4873 4874 if not condition: 4875 self._retreat(index) 4876 return None 4877 4878 self._match(TokenType.THEN) 4879 true = self._parse_conjunction() 4880 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4881 self._match(TokenType.END) 4882 this = self.expression(exp.If, this=condition, true=true, false=false) 4883 4884 return this 4885 4886 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4887 if not self._match_text_seq("VALUE", "FOR"): 4888 self._retreat(self._index - 1) 4889 return None 4890 4891 return self.expression( 4892 exp.NextValueFor, 4893 this=self._parse_column(), 4894 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4895 ) 4896 4897 def _parse_extract(self) -> exp.Extract: 4898 this = self._parse_function() or self._parse_var() or self._parse_type() 4899 4900 if self._match(TokenType.FROM): 4901 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4902 4903 if not self._match(TokenType.COMMA): 4904 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4905 4906 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4907 4908 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4909 this = self._parse_conjunction() 4910 4911 if not self._match(TokenType.ALIAS): 4912 if self._match(TokenType.COMMA): 4913 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4914 4915 self.raise_error("Expected AS after CAST") 4916 4917 fmt = None 4918 to = self._parse_types() 4919 4920 if self._match(TokenType.FORMAT): 4921 fmt_string = self._parse_string() 4922 fmt = self._parse_at_time_zone(fmt_string) 4923 4924 if not to: 4925 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4926 if to.this in exp.DataType.TEMPORAL_TYPES: 4927 this = self.expression( 4928 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4929 this=this, 4930 format=exp.Literal.string( 4931 format_time( 4932 fmt_string.this if fmt_string else "", 4933 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4934 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4935 ) 4936 ), 4937 ) 4938 4939 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4940 this.set("zone", fmt.args["zone"]) 4941 return this 4942 elif not to: 4943 self.raise_error("Expected TYPE after CAST") 4944 elif isinstance(to, exp.Identifier): 4945 to = exp.DataType.build(to.name, udt=True) 4946 elif to.this == exp.DataType.Type.CHAR: 4947 if self._match(TokenType.CHARACTER_SET): 4948 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4949 4950 return self.expression( 4951 exp.Cast if strict else exp.TryCast, 4952 this=this, 4953 to=to, 4954 format=fmt, 4955 safe=safe, 4956 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4957 ) 4958 4959 def _parse_string_agg(self) -> exp.Expression: 4960 if self._match(TokenType.DISTINCT): 4961 args: t.List[t.Optional[exp.Expression]] = [ 4962 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4963 ] 4964 if self._match(TokenType.COMMA): 4965 args.extend(self._parse_csv(self._parse_conjunction)) 4966 else: 4967 args = self._parse_csv(self._parse_conjunction) # type: ignore 4968 4969 index = self._index 4970 if not self._match(TokenType.R_PAREN) and args: 4971 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4972 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4973 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4974 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4975 4976 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4977 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4978 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4979 if not self._match_text_seq("WITHIN", "GROUP"): 4980 self._retreat(index) 4981 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4982 4983 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4984 order = self._parse_order(this=seq_get(args, 0)) 4985 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4986 4987 def _parse_convert( 4988 self, strict: bool, safe: t.Optional[bool] = None 4989 ) -> t.Optional[exp.Expression]: 4990 this = self._parse_bitwise() 4991 4992 if self._match(TokenType.USING): 4993 to: t.Optional[exp.Expression] = self.expression( 4994 exp.CharacterSet, this=self._parse_var() 4995 ) 4996 elif self._match(TokenType.COMMA): 4997 to = self._parse_types() 4998 else: 4999 to = None 5000 5001 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5002 5003 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5004 """ 5005 There are generally two variants of the DECODE function: 5006 5007 - DECODE(bin, charset) 5008 - DECODE(expression, search, result [, search, result] ... [, default]) 5009 5010 The second variant will always be parsed into a CASE expression. Note that NULL 5011 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5012 instead of relying on pattern matching. 5013 """ 5014 args = self._parse_csv(self._parse_conjunction) 5015 5016 if len(args) < 3: 5017 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5018 5019 expression, *expressions = args 5020 if not expression: 5021 return None 5022 5023 ifs = [] 5024 for search, result in zip(expressions[::2], expressions[1::2]): 5025 if not search or not result: 5026 return None 5027 5028 if isinstance(search, exp.Literal): 5029 ifs.append( 5030 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5031 ) 5032 elif isinstance(search, exp.Null): 5033 ifs.append( 5034 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5035 ) 5036 else: 5037 cond = exp.or_( 5038 exp.EQ(this=expression.copy(), expression=search), 5039 exp.and_( 5040 exp.Is(this=expression.copy(), expression=exp.Null()), 5041 exp.Is(this=search.copy(), expression=exp.Null()), 5042 copy=False, 5043 ), 5044 copy=False, 5045 ) 5046 ifs.append(exp.If(this=cond, true=result)) 5047 5048 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5049 5050 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5051 self._match_text_seq("KEY") 5052 key = self._parse_column() 5053 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5054 self._match_text_seq("VALUE") 5055 value = self._parse_bitwise() 5056 5057 if not key and not value: 5058 return None 5059 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5060 5061 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5062 if not this or not self._match_text_seq("FORMAT", "JSON"): 5063 return this 5064 5065 return self.expression(exp.FormatJson, this=this) 5066 5067 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5068 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5069 for value in values: 5070 if self._match_text_seq(value, "ON", on): 5071 return f"{value} ON {on}" 5072 5073 return None 5074 5075 @t.overload 5076 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5077 5078 @t.overload 5079 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5080 5081 def _parse_json_object(self, agg=False): 5082 star = self._parse_star() 5083 expressions = ( 5084 [star] 5085 if star 5086 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5087 ) 5088 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5089 5090 unique_keys = None 5091 if self._match_text_seq("WITH", "UNIQUE"): 5092 unique_keys = True 5093 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5094 unique_keys = False 5095 5096 self._match_text_seq("KEYS") 5097 5098 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5099 self._parse_type() 5100 ) 5101 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5102 5103 return self.expression( 5104 exp.JSONObjectAgg if agg else exp.JSONObject, 5105 expressions=expressions, 5106 null_handling=null_handling, 5107 unique_keys=unique_keys, 5108 return_type=return_type, 5109 encoding=encoding, 5110 ) 5111 5112 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5113 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5114 if not self._match_text_seq("NESTED"): 5115 this = self._parse_id_var() 5116 kind = self._parse_types(allow_identifiers=False) 5117 nested = None 5118 else: 5119 this = None 5120 kind = None 5121 nested = True 5122 5123 path = self._match_text_seq("PATH") and self._parse_string() 5124 nested_schema = nested and self._parse_json_schema() 5125 5126 return self.expression( 5127 exp.JSONColumnDef, 5128 this=this, 5129 kind=kind, 5130 path=path, 5131 nested_schema=nested_schema, 5132 ) 5133 5134 def _parse_json_schema(self) -> exp.JSONSchema: 5135 self._match_text_seq("COLUMNS") 5136 return self.expression( 5137 exp.JSONSchema, 5138 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5139 ) 5140 5141 def _parse_json_table(self) -> exp.JSONTable: 5142 this = self._parse_format_json(self._parse_bitwise()) 5143 path = self._match(TokenType.COMMA) and self._parse_string() 5144 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5145 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5146 schema = self._parse_json_schema() 5147 5148 return exp.JSONTable( 5149 this=this, 5150 schema=schema, 5151 path=path, 5152 error_handling=error_handling, 5153 empty_handling=empty_handling, 5154 ) 5155 5156 def _parse_match_against(self) -> exp.MatchAgainst: 5157 expressions = self._parse_csv(self._parse_column) 5158 5159 self._match_text_seq(")", "AGAINST", "(") 5160 5161 this = self._parse_string() 5162 5163 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5164 modifier = "IN NATURAL LANGUAGE MODE" 5165 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5166 modifier = f"{modifier} WITH QUERY EXPANSION" 5167 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5168 modifier = "IN BOOLEAN MODE" 5169 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5170 modifier = "WITH QUERY EXPANSION" 5171 else: 5172 modifier = None 5173 5174 return self.expression( 5175 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5176 ) 5177 5178 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5179 def _parse_open_json(self) -> exp.OpenJSON: 5180 this = self._parse_bitwise() 5181 path = self._match(TokenType.COMMA) and self._parse_string() 5182 5183 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5184 this = self._parse_field(any_token=True) 5185 kind = self._parse_types() 5186 path = self._parse_string() 5187 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5188 5189 return self.expression( 5190 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5191 ) 5192 5193 expressions = None 5194 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5195 self._match_l_paren() 5196 expressions = self._parse_csv(_parse_open_json_column_def) 5197 5198 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5199 5200 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5201 args = self._parse_csv(self._parse_bitwise) 5202 5203 if self._match(TokenType.IN): 5204 return self.expression( 5205 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5206 ) 5207 5208 if haystack_first: 5209 haystack = seq_get(args, 0) 5210 needle = seq_get(args, 1) 5211 else: 5212 needle = seq_get(args, 0) 5213 haystack = seq_get(args, 1) 5214 5215 return self.expression( 5216 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5217 ) 5218 5219 def _parse_predict(self) -> exp.Predict: 5220 self._match_text_seq("MODEL") 5221 this = self._parse_table() 5222 5223 self._match(TokenType.COMMA) 5224 self._match_text_seq("TABLE") 5225 5226 return self.expression( 5227 exp.Predict, 5228 this=this, 5229 expression=self._parse_table(), 5230 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5231 ) 5232 5233 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5234 args = self._parse_csv(self._parse_table) 5235 return exp.JoinHint(this=func_name.upper(), expressions=args) 5236 5237 def _parse_substring(self) -> exp.Substring: 5238 # Postgres supports the form: substring(string [from int] [for int]) 5239 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5240 5241 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5242 5243 if self._match(TokenType.FROM): 5244 args.append(self._parse_bitwise()) 5245 if self._match(TokenType.FOR): 5246 args.append(self._parse_bitwise()) 5247 5248 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5249 5250 def _parse_trim(self) -> exp.Trim: 5251 # https://www.w3resource.com/sql/character-functions/trim.php 5252 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5253 5254 position = None 5255 collation = None 5256 expression = None 5257 5258 if self._match_texts(self.TRIM_TYPES): 5259 position = self._prev.text.upper() 5260 5261 this = self._parse_bitwise() 5262 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5263 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5264 expression = self._parse_bitwise() 5265 5266 if invert_order: 5267 this, expression = expression, this 5268 5269 if self._match(TokenType.COLLATE): 5270 collation = self._parse_bitwise() 5271 5272 return self.expression( 5273 exp.Trim, this=this, position=position, expression=expression, collation=collation 5274 ) 5275 5276 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5277 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5278 5279 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5280 return self._parse_window(self._parse_id_var(), alias=True) 5281 5282 def _parse_respect_or_ignore_nulls( 5283 self, this: t.Optional[exp.Expression] 5284 ) -> t.Optional[exp.Expression]: 5285 if self._match_text_seq("IGNORE", "NULLS"): 5286 return self.expression(exp.IgnoreNulls, this=this) 5287 if self._match_text_seq("RESPECT", "NULLS"): 5288 return self.expression(exp.RespectNulls, this=this) 5289 return this 5290 5291 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5292 if self._match(TokenType.HAVING): 5293 self._match_texts(("MAX", "MIN")) 5294 max = self._prev.text.upper() != "MIN" 5295 return self.expression( 5296 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5297 ) 5298 5299 return this 5300 5301 def _parse_window( 5302 self, this: t.Optional[exp.Expression], alias: bool = False 5303 ) -> t.Optional[exp.Expression]: 5304 func = this 5305 comments = func.comments if isinstance(func, exp.Expression) else None 5306 5307 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5308 self._match(TokenType.WHERE) 5309 this = self.expression( 5310 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5311 ) 5312 self._match_r_paren() 5313 5314 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5315 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5316 if self._match_text_seq("WITHIN", "GROUP"): 5317 order = self._parse_wrapped(self._parse_order) 5318 this = self.expression(exp.WithinGroup, this=this, expression=order) 5319 5320 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5321 # Some dialects choose to implement and some do not. 5322 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5323 5324 # There is some code above in _parse_lambda that handles 5325 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5326 5327 # The below changes handle 5328 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5329 5330 # Oracle allows both formats 5331 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5332 # and Snowflake chose to do the same for familiarity 5333 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5334 if isinstance(this, exp.AggFunc): 5335 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5336 5337 if ignore_respect and ignore_respect is not this: 5338 ignore_respect.replace(ignore_respect.this) 5339 this = self.expression(ignore_respect.__class__, this=this) 5340 5341 this = self._parse_respect_or_ignore_nulls(this) 5342 5343 # bigquery select from window x AS (partition by ...) 5344 if alias: 5345 over = None 5346 self._match(TokenType.ALIAS) 5347 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5348 return this 5349 else: 5350 over = self._prev.text.upper() 5351 5352 if comments: 5353 func.comments = None # type: ignore 5354 5355 if not self._match(TokenType.L_PAREN): 5356 return self.expression( 5357 exp.Window, 5358 comments=comments, 5359 this=this, 5360 alias=self._parse_id_var(False), 5361 over=over, 5362 ) 5363 5364 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5365 5366 first = self._match(TokenType.FIRST) 5367 if self._match_text_seq("LAST"): 5368 first = False 5369 5370 partition, order = self._parse_partition_and_order() 5371 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5372 5373 if kind: 5374 self._match(TokenType.BETWEEN) 5375 start = self._parse_window_spec() 5376 self._match(TokenType.AND) 5377 end = self._parse_window_spec() 5378 5379 spec = self.expression( 5380 exp.WindowSpec, 5381 kind=kind, 5382 start=start["value"], 5383 start_side=start["side"], 5384 end=end["value"], 5385 end_side=end["side"], 5386 ) 5387 else: 5388 spec = None 5389 5390 self._match_r_paren() 5391 5392 window = self.expression( 5393 exp.Window, 5394 comments=comments, 5395 this=this, 5396 partition_by=partition, 5397 order=order, 5398 spec=spec, 5399 alias=window_alias, 5400 over=over, 5401 first=first, 5402 ) 5403 5404 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5405 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5406 return self._parse_window(window, alias=alias) 5407 5408 return window 5409 5410 def _parse_partition_and_order( 5411 self, 5412 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5413 return self._parse_partition_by(), self._parse_order() 5414 5415 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5416 self._match(TokenType.BETWEEN) 5417 5418 return { 5419 "value": ( 5420 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5421 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5422 or self._parse_bitwise() 5423 ), 5424 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5425 } 5426 5427 def _parse_alias( 5428 self, this: t.Optional[exp.Expression], explicit: bool = False 5429 ) -> t.Optional[exp.Expression]: 5430 any_token = self._match(TokenType.ALIAS) 5431 comments = self._prev_comments 5432 5433 if explicit and not any_token: 5434 return this 5435 5436 if self._match(TokenType.L_PAREN): 5437 aliases = self.expression( 5438 exp.Aliases, 5439 comments=comments, 5440 this=this, 5441 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5442 ) 5443 self._match_r_paren(aliases) 5444 return aliases 5445 5446 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5447 self.STRING_ALIASES and self._parse_string_as_identifier() 5448 ) 5449 5450 if alias: 5451 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5452 column = this.this 5453 5454 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5455 if not this.comments and column and column.comments: 5456 this.comments = column.comments 5457 column.comments = None 5458 5459 return this 5460 5461 def _parse_id_var( 5462 self, 5463 any_token: bool = True, 5464 tokens: t.Optional[t.Collection[TokenType]] = None, 5465 ) -> t.Optional[exp.Expression]: 5466 identifier = self._parse_identifier() 5467 5468 if identifier: 5469 return identifier 5470 5471 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5472 quoted = self._prev.token_type == TokenType.STRING 5473 return exp.Identifier(this=self._prev.text, quoted=quoted) 5474 5475 return None 5476 5477 def _parse_string(self) -> t.Optional[exp.Expression]: 5478 if self._match_set(self.STRING_PARSERS): 5479 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5480 return self._parse_placeholder() 5481 5482 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5483 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5484 5485 def _parse_number(self) -> t.Optional[exp.Expression]: 5486 if self._match_set(self.NUMERIC_PARSERS): 5487 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5488 return self._parse_placeholder() 5489 5490 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5491 if self._match(TokenType.IDENTIFIER): 5492 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5493 return self._parse_placeholder() 5494 5495 def _parse_var( 5496 self, 5497 any_token: bool = False, 5498 tokens: t.Optional[t.Collection[TokenType]] = None, 5499 upper: bool = False, 5500 ) -> t.Optional[exp.Expression]: 5501 if ( 5502 (any_token and self._advance_any()) 5503 or self._match(TokenType.VAR) 5504 or (self._match_set(tokens) if tokens else False) 5505 ): 5506 return self.expression( 5507 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5508 ) 5509 return self._parse_placeholder() 5510 5511 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5512 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5513 self._advance() 5514 return self._prev 5515 return None 5516 5517 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5518 return self._parse_var() or self._parse_string() 5519 5520 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5521 return self._parse_primary() or self._parse_var(any_token=True) 5522 5523 def _parse_null(self) -> t.Optional[exp.Expression]: 5524 if self._match_set(self.NULL_TOKENS): 5525 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5526 return self._parse_placeholder() 5527 5528 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5529 if self._match(TokenType.TRUE): 5530 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5531 if self._match(TokenType.FALSE): 5532 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5533 return self._parse_placeholder() 5534 5535 def _parse_star(self) -> t.Optional[exp.Expression]: 5536 if self._match(TokenType.STAR): 5537 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5538 return self._parse_placeholder() 5539 5540 def _parse_parameter(self) -> exp.Parameter: 5541 self._match(TokenType.L_BRACE) 5542 this = self._parse_identifier() or self._parse_primary_or_var() 5543 expression = self._match(TokenType.COLON) and ( 5544 self._parse_identifier() or self._parse_primary_or_var() 5545 ) 5546 self._match(TokenType.R_BRACE) 5547 return self.expression(exp.Parameter, this=this, expression=expression) 5548 5549 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5550 if self._match_set(self.PLACEHOLDER_PARSERS): 5551 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5552 if placeholder: 5553 return placeholder 5554 self._advance(-1) 5555 return None 5556 5557 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5558 if not self._match(TokenType.EXCEPT): 5559 return None 5560 if self._match(TokenType.L_PAREN, advance=False): 5561 return self._parse_wrapped_csv(self._parse_column) 5562 5563 except_column = self._parse_column() 5564 return [except_column] if except_column else None 5565 5566 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5567 if not self._match(TokenType.REPLACE): 5568 return None 5569 if self._match(TokenType.L_PAREN, advance=False): 5570 return self._parse_wrapped_csv(self._parse_expression) 5571 5572 replace_expression = self._parse_expression() 5573 return [replace_expression] if replace_expression else None 5574 5575 def _parse_csv( 5576 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5577 ) -> t.List[exp.Expression]: 5578 parse_result = parse_method() 5579 items = [parse_result] if parse_result is not None else [] 5580 5581 while self._match(sep): 5582 self._add_comments(parse_result) 5583 parse_result = parse_method() 5584 if parse_result is not None: 5585 items.append(parse_result) 5586 5587 return items 5588 5589 def _parse_tokens( 5590 self, parse_method: t.Callable, expressions: t.Dict 5591 ) -> t.Optional[exp.Expression]: 5592 this = parse_method() 5593 5594 while self._match_set(expressions): 5595 this = self.expression( 5596 expressions[self._prev.token_type], 5597 this=this, 5598 comments=self._prev_comments, 5599 expression=parse_method(), 5600 ) 5601 5602 return this 5603 5604 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5605 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5606 5607 def _parse_wrapped_csv( 5608 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5609 ) -> t.List[exp.Expression]: 5610 return self._parse_wrapped( 5611 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5612 ) 5613 5614 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5615 wrapped = self._match(TokenType.L_PAREN) 5616 if not wrapped and not optional: 5617 self.raise_error("Expecting (") 5618 parse_result = parse_method() 5619 if wrapped: 5620 self._match_r_paren() 5621 return parse_result 5622 5623 def _parse_expressions(self) -> t.List[exp.Expression]: 5624 return self._parse_csv(self._parse_expression) 5625 5626 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5627 return self._parse_select() or self._parse_set_operations( 5628 self._parse_expression() if alias else self._parse_conjunction() 5629 ) 5630 5631 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5632 return self._parse_query_modifiers( 5633 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5634 ) 5635 5636 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5637 this = None 5638 if self._match_texts(self.TRANSACTION_KIND): 5639 this = self._prev.text 5640 5641 self._match_texts(("TRANSACTION", "WORK")) 5642 5643 modes = [] 5644 while True: 5645 mode = [] 5646 while self._match(TokenType.VAR): 5647 mode.append(self._prev.text) 5648 5649 if mode: 5650 modes.append(" ".join(mode)) 5651 if not self._match(TokenType.COMMA): 5652 break 5653 5654 return self.expression(exp.Transaction, this=this, modes=modes) 5655 5656 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5657 chain = None 5658 savepoint = None 5659 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5660 5661 self._match_texts(("TRANSACTION", "WORK")) 5662 5663 if self._match_text_seq("TO"): 5664 self._match_text_seq("SAVEPOINT") 5665 savepoint = self._parse_id_var() 5666 5667 if self._match(TokenType.AND): 5668 chain = not self._match_text_seq("NO") 5669 self._match_text_seq("CHAIN") 5670 5671 if is_rollback: 5672 return self.expression(exp.Rollback, savepoint=savepoint) 5673 5674 return self.expression(exp.Commit, chain=chain) 5675 5676 def _parse_refresh(self) -> exp.Refresh: 5677 self._match(TokenType.TABLE) 5678 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5679 5680 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5681 if not self._match_text_seq("ADD"): 5682 return None 5683 5684 self._match(TokenType.COLUMN) 5685 exists_column = self._parse_exists(not_=True) 5686 expression = self._parse_field_def() 5687 5688 if expression: 5689 expression.set("exists", exists_column) 5690 5691 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5692 if self._match_texts(("FIRST", "AFTER")): 5693 position = self._prev.text 5694 column_position = self.expression( 5695 exp.ColumnPosition, this=self._parse_column(), position=position 5696 ) 5697 expression.set("position", column_position) 5698 5699 return expression 5700 5701 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5702 drop = self._match(TokenType.DROP) and self._parse_drop() 5703 if drop and not isinstance(drop, exp.Command): 5704 drop.set("kind", drop.args.get("kind", "COLUMN")) 5705 return drop 5706 5707 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5708 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5709 return self.expression( 5710 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5711 ) 5712 5713 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5714 index = self._index - 1 5715 5716 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5717 return self._parse_csv( 5718 lambda: self.expression( 5719 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5720 ) 5721 ) 5722 5723 self._retreat(index) 5724 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5725 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5726 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5727 5728 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5729 self._match(TokenType.COLUMN) 5730 column = self._parse_field(any_token=True) 5731 5732 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5733 return self.expression(exp.AlterColumn, this=column, drop=True) 5734 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5735 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5736 if self._match(TokenType.COMMENT): 5737 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5738 5739 self._match_text_seq("SET", "DATA") 5740 self._match_text_seq("TYPE") 5741 return self.expression( 5742 exp.AlterColumn, 5743 this=column, 5744 dtype=self._parse_types(), 5745 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5746 using=self._match(TokenType.USING) and self._parse_conjunction(), 5747 ) 5748 5749 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5750 index = self._index - 1 5751 5752 partition_exists = self._parse_exists() 5753 if self._match(TokenType.PARTITION, advance=False): 5754 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5755 5756 self._retreat(index) 5757 return self._parse_csv(self._parse_drop_column) 5758 5759 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5760 if self._match(TokenType.COLUMN): 5761 exists = self._parse_exists() 5762 old_column = self._parse_column() 5763 to = self._match_text_seq("TO") 5764 new_column = self._parse_column() 5765 5766 if old_column is None or to is None or new_column is None: 5767 return None 5768 5769 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5770 5771 self._match_text_seq("TO") 5772 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5773 5774 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5775 start = self._prev 5776 5777 if not self._match(TokenType.TABLE): 5778 return self._parse_as_command(start) 5779 5780 exists = self._parse_exists() 5781 only = self._match_text_seq("ONLY") 5782 this = self._parse_table(schema=True) 5783 5784 if self._next: 5785 self._advance() 5786 5787 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5788 if parser: 5789 actions = ensure_list(parser(self)) 5790 options = self._parse_csv(self._parse_property) 5791 5792 if not self._curr and actions: 5793 return self.expression( 5794 exp.AlterTable, 5795 this=this, 5796 exists=exists, 5797 actions=actions, 5798 only=only, 5799 options=options, 5800 ) 5801 5802 return self._parse_as_command(start) 5803 5804 def _parse_merge(self) -> exp.Merge: 5805 self._match(TokenType.INTO) 5806 target = self._parse_table() 5807 5808 if target and self._match(TokenType.ALIAS, advance=False): 5809 target.set("alias", self._parse_table_alias()) 5810 5811 self._match(TokenType.USING) 5812 using = self._parse_table() 5813 5814 self._match(TokenType.ON) 5815 on = self._parse_conjunction() 5816 5817 return self.expression( 5818 exp.Merge, 5819 this=target, 5820 using=using, 5821 on=on, 5822 expressions=self._parse_when_matched(), 5823 ) 5824 5825 def _parse_when_matched(self) -> t.List[exp.When]: 5826 whens = [] 5827 5828 while self._match(TokenType.WHEN): 5829 matched = not self._match(TokenType.NOT) 5830 self._match_text_seq("MATCHED") 5831 source = ( 5832 False 5833 if self._match_text_seq("BY", "TARGET") 5834 else self._match_text_seq("BY", "SOURCE") 5835 ) 5836 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5837 5838 self._match(TokenType.THEN) 5839 5840 if self._match(TokenType.INSERT): 5841 _this = self._parse_star() 5842 if _this: 5843 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5844 else: 5845 then = self.expression( 5846 exp.Insert, 5847 this=self._parse_value(), 5848 expression=self._match_text_seq("VALUES") and self._parse_value(), 5849 ) 5850 elif self._match(TokenType.UPDATE): 5851 expressions = self._parse_star() 5852 if expressions: 5853 then = self.expression(exp.Update, expressions=expressions) 5854 else: 5855 then = self.expression( 5856 exp.Update, 5857 expressions=self._match(TokenType.SET) 5858 and self._parse_csv(self._parse_equality), 5859 ) 5860 elif self._match(TokenType.DELETE): 5861 then = self.expression(exp.Var, this=self._prev.text) 5862 else: 5863 then = None 5864 5865 whens.append( 5866 self.expression( 5867 exp.When, 5868 matched=matched, 5869 source=source, 5870 condition=condition, 5871 then=then, 5872 ) 5873 ) 5874 return whens 5875 5876 def _parse_show(self) -> t.Optional[exp.Expression]: 5877 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5878 if parser: 5879 return parser(self) 5880 return self._parse_as_command(self._prev) 5881 5882 def _parse_set_item_assignment( 5883 self, kind: t.Optional[str] = None 5884 ) -> t.Optional[exp.Expression]: 5885 index = self._index 5886 5887 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5888 return self._parse_set_transaction(global_=kind == "GLOBAL") 5889 5890 left = self._parse_primary() or self._parse_id_var() 5891 assignment_delimiter = self._match_texts(("=", "TO")) 5892 5893 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5894 self._retreat(index) 5895 return None 5896 5897 right = self._parse_statement() or self._parse_id_var() 5898 this = self.expression(exp.EQ, this=left, expression=right) 5899 5900 return self.expression(exp.SetItem, this=this, kind=kind) 5901 5902 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5903 self._match_text_seq("TRANSACTION") 5904 characteristics = self._parse_csv( 5905 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5906 ) 5907 return self.expression( 5908 exp.SetItem, 5909 expressions=characteristics, 5910 kind="TRANSACTION", 5911 **{"global": global_}, # type: ignore 5912 ) 5913 5914 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5915 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5916 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5917 5918 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5919 index = self._index 5920 set_ = self.expression( 5921 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5922 ) 5923 5924 if self._curr: 5925 self._retreat(index) 5926 return self._parse_as_command(self._prev) 5927 5928 return set_ 5929 5930 def _parse_var_from_options( 5931 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5932 ) -> t.Optional[exp.Var]: 5933 start = self._curr 5934 if not start: 5935 return None 5936 5937 option = start.text.upper() 5938 continuations = options.get(option) 5939 5940 index = self._index 5941 self._advance() 5942 for keywords in continuations or []: 5943 if isinstance(keywords, str): 5944 keywords = (keywords,) 5945 5946 if self._match_text_seq(*keywords): 5947 option = f"{option} {' '.join(keywords)}" 5948 break 5949 else: 5950 if continuations or continuations is None: 5951 if raise_unmatched: 5952 self.raise_error(f"Unknown option {option}") 5953 5954 self._retreat(index) 5955 return None 5956 5957 return exp.var(option) 5958 5959 def _parse_as_command(self, start: Token) -> exp.Command: 5960 while self._curr: 5961 self._advance() 5962 text = self._find_sql(start, self._prev) 5963 size = len(start.text) 5964 self._warn_unsupported() 5965 return exp.Command(this=text[:size], expression=text[size:]) 5966 5967 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5968 settings = [] 5969 5970 self._match_l_paren() 5971 kind = self._parse_id_var() 5972 5973 if self._match(TokenType.L_PAREN): 5974 while True: 5975 key = self._parse_id_var() 5976 value = self._parse_primary() 5977 5978 if not key and value is None: 5979 break 5980 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5981 self._match(TokenType.R_PAREN) 5982 5983 self._match_r_paren() 5984 5985 return self.expression( 5986 exp.DictProperty, 5987 this=this, 5988 kind=kind.this if kind else None, 5989 settings=settings, 5990 ) 5991 5992 def _parse_dict_range(self, this: str) -> exp.DictRange: 5993 self._match_l_paren() 5994 has_min = self._match_text_seq("MIN") 5995 if has_min: 5996 min = self._parse_var() or self._parse_primary() 5997 self._match_text_seq("MAX") 5998 max = self._parse_var() or self._parse_primary() 5999 else: 6000 max = self._parse_var() or self._parse_primary() 6001 min = exp.Literal.number(0) 6002 self._match_r_paren() 6003 return self.expression(exp.DictRange, this=this, min=min, max=max) 6004 6005 def _parse_comprehension( 6006 self, this: t.Optional[exp.Expression] 6007 ) -> t.Optional[exp.Comprehension]: 6008 index = self._index 6009 expression = self._parse_column() 6010 if not self._match(TokenType.IN): 6011 self._retreat(index - 1) 6012 return None 6013 iterator = self._parse_column() 6014 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6015 return self.expression( 6016 exp.Comprehension, 6017 this=this, 6018 expression=expression, 6019 iterator=iterator, 6020 condition=condition, 6021 ) 6022 6023 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6024 if self._match(TokenType.HEREDOC_STRING): 6025 return self.expression(exp.Heredoc, this=self._prev.text) 6026 6027 if not self._match_text_seq("$"): 6028 return None 6029 6030 tags = ["$"] 6031 tag_text = None 6032 6033 if self._is_connected(): 6034 self._advance() 6035 tags.append(self._prev.text.upper()) 6036 else: 6037 self.raise_error("No closing $ found") 6038 6039 if tags[-1] != "$": 6040 if self._is_connected() and self._match_text_seq("$"): 6041 tag_text = tags[-1] 6042 tags.append("$") 6043 else: 6044 self.raise_error("No closing $ found") 6045 6046 heredoc_start = self._curr 6047 6048 while self._curr: 6049 if self._match_text_seq(*tags, advance=False): 6050 this = self._find_sql(heredoc_start, self._prev) 6051 self._advance(len(tags)) 6052 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6053 6054 self._advance() 6055 6056 self.raise_error(f"No closing {''.join(tags)} found") 6057 return None 6058 6059 def _find_parser( 6060 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6061 ) -> t.Optional[t.Callable]: 6062 if not self._curr: 6063 return None 6064 6065 index = self._index 6066 this = [] 6067 while True: 6068 # The current token might be multiple words 6069 curr = self._curr.text.upper() 6070 key = curr.split(" ") 6071 this.append(curr) 6072 6073 self._advance() 6074 result, trie = in_trie(trie, key) 6075 if result == TrieResult.FAILED: 6076 break 6077 6078 if result == TrieResult.EXISTS: 6079 subparser = parsers[" ".join(this)] 6080 return subparser 6081 6082 self._retreat(index) 6083 return None 6084 6085 def _match(self, token_type, advance=True, expression=None): 6086 if not self._curr: 6087 return None 6088 6089 if self._curr.token_type == token_type: 6090 if advance: 6091 self._advance() 6092 self._add_comments(expression) 6093 return True 6094 6095 return None 6096 6097 def _match_set(self, types, advance=True): 6098 if not self._curr: 6099 return None 6100 6101 if self._curr.token_type in types: 6102 if advance: 6103 self._advance() 6104 return True 6105 6106 return None 6107 6108 def _match_pair(self, token_type_a, token_type_b, advance=True): 6109 if not self._curr or not self._next: 6110 return None 6111 6112 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6113 if advance: 6114 self._advance(2) 6115 return True 6116 6117 return None 6118 6119 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6120 if not self._match(TokenType.L_PAREN, expression=expression): 6121 self.raise_error("Expecting (") 6122 6123 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6124 if not self._match(TokenType.R_PAREN, expression=expression): 6125 self.raise_error("Expecting )") 6126 6127 def _match_texts(self, texts, advance=True): 6128 if self._curr and self._curr.text.upper() in texts: 6129 if advance: 6130 self._advance() 6131 return True 6132 return None 6133 6134 def _match_text_seq(self, *texts, advance=True): 6135 index = self._index 6136 for text in texts: 6137 if self._curr and self._curr.text.upper() == text: 6138 self._advance() 6139 else: 6140 self._retreat(index) 6141 return None 6142 6143 if not advance: 6144 self._retreat(index) 6145 6146 return True 6147 6148 def _replace_lambda( 6149 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6150 ) -> t.Optional[exp.Expression]: 6151 if not node: 6152 return node 6153 6154 for column in node.find_all(exp.Column): 6155 if column.parts[0].name in lambda_variables: 6156 dot_or_id = column.to_dot() if column.table else column.this 6157 parent = column.parent 6158 6159 while isinstance(parent, exp.Dot): 6160 if not isinstance(parent.parent, exp.Dot): 6161 parent.replace(dot_or_id) 6162 break 6163 parent = parent.parent 6164 else: 6165 if column is node: 6166 node = dot_or_id 6167 else: 6168 column.replace(dot_or_id) 6169 return node 6170 6171 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6172 start = self._prev 6173 6174 # Not to be confused with TRUNCATE(number, decimals) function call 6175 if self._match(TokenType.L_PAREN): 6176 self._retreat(self._index - 2) 6177 return self._parse_function() 6178 6179 # Clickhouse supports TRUNCATE DATABASE as well 6180 is_database = self._match(TokenType.DATABASE) 6181 6182 self._match(TokenType.TABLE) 6183 6184 exists = self._parse_exists(not_=False) 6185 6186 expressions = self._parse_csv( 6187 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6188 ) 6189 6190 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6191 6192 if self._match_text_seq("RESTART", "IDENTITY"): 6193 identity = "RESTART" 6194 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6195 identity = "CONTINUE" 6196 else: 6197 identity = None 6198 6199 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6200 option = self._prev.text 6201 else: 6202 option = None 6203 6204 partition = self._parse_partition() 6205 6206 # Fallback case 6207 if self._curr: 6208 return self._parse_as_command(start) 6209 6210 return self.expression( 6211 exp.TruncateTable, 6212 expressions=expressions, 6213 is_database=is_database, 6214 exists=exists, 6215 cluster=cluster, 6216 identity=identity, 6217 option=option, 6218 partition=partition, 6219 ) 6220 6221 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6222 this = self._parse_ordered(self._parse_opclass) 6223 6224 if not self._match(TokenType.WITH): 6225 return this 6226 6227 op = self._parse_var(any_token=True) 6228 6229 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1128 def __init__( 1129 self, 1130 error_level: t.Optional[ErrorLevel] = None, 1131 error_message_context: int = 100, 1132 max_errors: int = 3, 1133 dialect: DialectType = None, 1134 ): 1135 from sqlglot.dialects import Dialect 1136 1137 self.error_level = error_level or ErrorLevel.IMMEDIATE 1138 self.error_message_context = error_message_context 1139 self.max_errors = max_errors 1140 self.dialect = Dialect.get_or_raise(dialect) 1141 self.reset()
1153 def parse( 1154 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1155 ) -> t.List[t.Optional[exp.Expression]]: 1156 """ 1157 Parses a list of tokens and returns a list of syntax trees, one tree 1158 per parsed SQL statement. 1159 1160 Args: 1161 raw_tokens: The list of tokens. 1162 sql: The original SQL string, used to produce helpful debug messages. 1163 1164 Returns: 1165 The list of the produced syntax trees. 1166 """ 1167 return self._parse( 1168 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1169 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1171 def parse_into( 1172 self, 1173 expression_types: exp.IntoType, 1174 raw_tokens: t.List[Token], 1175 sql: t.Optional[str] = None, 1176 ) -> t.List[t.Optional[exp.Expression]]: 1177 """ 1178 Parses a list of tokens into a given Expression type. If a collection of Expression 1179 types is given instead, this method will try to parse the token list into each one 1180 of them, stopping at the first for which the parsing succeeds. 1181 1182 Args: 1183 expression_types: The expression type(s) to try and parse the token list into. 1184 raw_tokens: The list of tokens. 1185 sql: The original SQL string, used to produce helpful debug messages. 1186 1187 Returns: 1188 The target Expression. 1189 """ 1190 errors = [] 1191 for expression_type in ensure_list(expression_types): 1192 parser = self.EXPRESSION_PARSERS.get(expression_type) 1193 if not parser: 1194 raise TypeError(f"No parser registered for {expression_type}") 1195 1196 try: 1197 return self._parse(parser, raw_tokens, sql) 1198 except ParseError as e: 1199 e.errors[0]["into_expression"] = expression_type 1200 errors.append(e) 1201 1202 raise ParseError( 1203 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1204 errors=merge_errors(errors), 1205 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1242 def check_errors(self) -> None: 1243 """Logs or raises any found errors, depending on the chosen error level setting.""" 1244 if self.error_level == ErrorLevel.WARN: 1245 for error in self.errors: 1246 logger.error(str(error)) 1247 elif self.error_level == ErrorLevel.RAISE and self.errors: 1248 raise ParseError( 1249 concat_messages(self.errors, self.max_errors), 1250 errors=merge_errors(self.errors), 1251 )
Logs or raises any found errors, depending on the chosen error level setting.
1253 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1254 """ 1255 Appends an error in the list of recorded errors or raises it, depending on the chosen 1256 error level setting. 1257 """ 1258 token = token or self._curr or self._prev or Token.string("") 1259 start = token.start 1260 end = token.end + 1 1261 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1262 highlight = self.sql[start:end] 1263 end_context = self.sql[end : end + self.error_message_context] 1264 1265 error = ParseError.new( 1266 f"{message}. Line {token.line}, Col: {token.col}.\n" 1267 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1268 description=message, 1269 line=token.line, 1270 col=token.col, 1271 start_context=start_context, 1272 highlight=highlight, 1273 end_context=end_context, 1274 ) 1275 1276 if self.error_level == ErrorLevel.IMMEDIATE: 1277 raise error 1278 1279 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1281 def expression( 1282 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1283 ) -> E: 1284 """ 1285 Creates a new, validated Expression. 1286 1287 Args: 1288 exp_class: The expression class to instantiate. 1289 comments: An optional list of comments to attach to the expression. 1290 kwargs: The arguments to set for the expression along with their respective values. 1291 1292 Returns: 1293 The target expression. 1294 """ 1295 instance = exp_class(**kwargs) 1296 instance.add_comments(comments) if comments else self._add_comments(instance) 1297 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1304 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1305 """ 1306 Validates an Expression, making sure that all its mandatory arguments are set. 1307 1308 Args: 1309 expression: The expression to validate. 1310 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1311 1312 Returns: 1313 The validated expression. 1314 """ 1315 if self.error_level != ErrorLevel.IGNORE: 1316 for error_message in expression.error_messages(args): 1317 self.raise_error(error_message) 1318 1319 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.